Merge pull request #24614 from ziglang/flate

std.compress.flate: rework decompression and delete compression
This commit is contained in:
Andrew Kelley
2025-08-01 16:34:43 -07:00
committed by GitHub
101 changed files with 3777 additions and 7261 deletions

View File

@@ -438,8 +438,6 @@ pub fn GenericWriter(
pub const AnyReader = @import("Io/DeprecatedReader.zig");
/// Deprecated in favor of `Writer`.
pub const AnyWriter = @import("Io/DeprecatedWriter.zig");
/// Deprecated in favor of `File.Reader` and `File.Writer`.
pub const SeekableStream = @import("Io/seekable_stream.zig").SeekableStream;
/// Deprecated in favor of `Writer`.
pub const BufferedWriter = @import("Io/buffered_writer.zig").BufferedWriter;
/// Deprecated in favor of `Writer`.
@@ -467,12 +465,6 @@ pub const CountingReader = @import("Io/counting_reader.zig").CountingReader;
/// Deprecated with no replacement; inefficient pattern
pub const countingReader = @import("Io/counting_reader.zig").countingReader;
pub const BitReader = @import("Io/bit_reader.zig").BitReader;
pub const bitReader = @import("Io/bit_reader.zig").bitReader;
pub const BitWriter = @import("Io/bit_writer.zig").BitWriter;
pub const bitWriter = @import("Io/bit_writer.zig").bitWriter;
pub const tty = @import("Io/tty.zig");
/// Deprecated in favor of `Writer.Discarding`.
@@ -948,16 +940,12 @@ pub fn PollFiles(comptime StreamEnum: type) type {
test {
_ = Reader;
_ = Reader.Limited;
_ = Writer;
_ = BitReader;
_ = BitWriter;
_ = BufferedReader;
_ = BufferedWriter;
_ = CountingWriter;
_ = CountingReader;
_ = FixedBufferStream;
_ = SeekableStream;
_ = tty;
_ = @import("Io/test.zig");
}

View File

@@ -74,6 +74,10 @@ pub const VTable = struct {
///
/// `data` may not contain an alias to `Reader.buffer`.
///
/// `data` is mutable because the implementation may to temporarily modify
/// the fields in order to handle partial reads. Implementations must
/// restore the original value before returning.
///
/// Implementations may ignore `data`, writing directly to `Reader.buffer`,
/// modifying `seek` and `end` accordingly, and returning 0 from this
/// function. Implementations are encouraged to take advantage of this if
@@ -81,7 +85,7 @@ pub const VTable = struct {
///
/// The default implementation calls `stream` with either `data[0]` or
/// `Reader.buffer`, whichever is bigger.
readVec: *const fn (r: *Reader, data: []const []u8) Error!usize = defaultReadVec,
readVec: *const fn (r: *Reader, data: [][]u8) Error!usize = defaultReadVec,
/// Ensures `capacity` more data can be buffered without rebasing.
///
@@ -262,8 +266,7 @@ pub fn streamRemaining(r: *Reader, w: *Writer) StreamRemainingError!usize {
/// number of bytes discarded.
pub fn discardRemaining(r: *Reader) ShortError!usize {
var offset: usize = r.end - r.seek;
r.seek = 0;
r.end = 0;
r.seek = r.end;
while (true) {
offset += r.vtable.discard(r, .unlimited) catch |err| switch (err) {
error.EndOfStream => return offset,
@@ -417,7 +420,7 @@ pub fn readVec(r: *Reader, data: [][]u8) Error!usize {
}
/// Writes to `Reader.buffer` or `data`, whichever has larger capacity.
pub fn defaultReadVec(r: *Reader, data: []const []u8) Error!usize {
pub fn defaultReadVec(r: *Reader, data: [][]u8) Error!usize {
assert(r.seek == r.end);
r.seek = 0;
r.end = 0;
@@ -438,23 +441,6 @@ pub fn defaultReadVec(r: *Reader, data: []const []u8) Error!usize {
return 0;
}
/// Always writes to `Reader.buffer` and returns 0.
pub fn indirectReadVec(r: *Reader, data: []const []u8) Error!usize {
_ = data;
assert(r.seek == r.end);
var writer: Writer = .{
.buffer = r.buffer,
.end = r.end,
.vtable = &.{ .drain = Writer.fixedDrain },
};
const limit: Limit = .limited(writer.buffer.len - writer.end);
r.end += r.vtable.stream(r, &writer, limit) catch |err| switch (err) {
error.WriteFailed => unreachable,
else => |e| return e,
};
return 0;
}
pub fn buffered(r: *Reader) []u8 {
return r.buffer[r.seek..r.end];
}
@@ -463,8 +449,8 @@ pub fn bufferedLen(r: *const Reader) usize {
return r.end - r.seek;
}
pub fn hashed(r: *Reader, hasher: anytype) Hashed(@TypeOf(hasher)) {
return .{ .in = r, .hasher = hasher };
pub fn hashed(r: *Reader, hasher: anytype, buffer: []u8) Hashed(@TypeOf(hasher)) {
return .init(r, hasher, buffer);
}
pub fn readVecAll(r: *Reader, data: [][]u8) Error!void {
@@ -539,8 +525,7 @@ pub fn toss(r: *Reader, n: usize) void {
/// Equivalent to `toss(r.bufferedLen())`.
pub fn tossBuffered(r: *Reader) void {
r.seek = 0;
r.end = 0;
r.seek = r.end;
}
/// Equivalent to `peek` followed by `toss`.
@@ -627,8 +612,7 @@ pub fn discardShort(r: *Reader, n: usize) ShortError!usize {
return n;
}
var remaining = n - (r.end - r.seek);
r.end = 0;
r.seek = 0;
r.seek = r.end;
while (true) {
const discard_len = r.vtable.discard(r, .limited(remaining)) catch |err| switch (err) {
error.EndOfStream => return n - remaining,
@@ -1678,7 +1662,7 @@ fn endingStream(r: *Reader, w: *Writer, limit: Limit) StreamError!usize {
return error.EndOfStream;
}
fn endingReadVec(r: *Reader, data: []const []u8) Error!usize {
fn endingReadVec(r: *Reader, data: [][]u8) Error!usize {
_ = r;
_ = data;
return error.EndOfStream;
@@ -1709,6 +1693,15 @@ fn failingDiscard(r: *Reader, limit: Limit) Error!usize {
return error.ReadFailed;
}
pub fn adaptToOldInterface(r: *Reader) std.Io.AnyReader {
return .{ .context = r, .readFn = derpRead };
}
fn derpRead(context: *const anyopaque, buffer: []u8) anyerror!usize {
const r: *Reader = @constCast(@alignCast(@ptrCast(context)));
return r.readSliceShort(buffer);
}
test "readAlloc when the backing reader provides one byte at a time" {
const str = "This is a test";
var tiny_buffer: [1]u8 = undefined;
@@ -1772,15 +1765,16 @@ pub fn Hashed(comptime Hasher: type) type {
return struct {
in: *Reader,
hasher: Hasher,
interface: Reader,
reader: Reader,
pub fn init(in: *Reader, hasher: Hasher, buffer: []u8) @This() {
return .{
.in = in,
.hasher = hasher,
.interface = .{
.reader = .{
.vtable = &.{
.read = @This().read,
.stream = @This().stream,
.readVec = @This().readVec,
.discard = @This().discard,
},
.buffer = buffer,
@@ -1790,33 +1784,39 @@ pub fn Hashed(comptime Hasher: type) type {
};
}
fn read(r: *Reader, w: *Writer, limit: Limit) StreamError!usize {
const this: *@This() = @alignCast(@fieldParentPtr("interface", r));
const data = w.writableVector(limit);
fn stream(r: *Reader, w: *Writer, limit: Limit) StreamError!usize {
const this: *@This() = @alignCast(@fieldParentPtr("reader", r));
const data = limit.slice(try w.writableSliceGreedy(1));
var vec: [1][]u8 = .{data};
const n = try this.in.readVec(&vec);
this.hasher.update(data[0..n]);
w.advance(n);
return n;
}
fn readVec(r: *Reader, data: [][]u8) Error!usize {
const this: *@This() = @alignCast(@fieldParentPtr("reader", r));
const n = try this.in.readVec(data);
const result = w.advanceVector(n);
var remaining: usize = n;
for (data) |slice| {
if (remaining < slice.len) {
this.hasher.update(slice[0..remaining]);
return result;
return n;
} else {
remaining -= slice.len;
this.hasher.update(slice);
}
}
assert(remaining == 0);
return result;
return n;
}
fn discard(r: *Reader, limit: Limit) Error!usize {
const this: *@This() = @alignCast(@fieldParentPtr("interface", r));
var w = this.hasher.writer(&.{});
const n = this.in.stream(&w, limit) catch |err| switch (err) {
error.WriteFailed => unreachable,
else => |e| return e,
};
return n;
const this: *@This() = @alignCast(@fieldParentPtr("reader", r));
const peeked = limit.slice(try this.in.peekGreedy(1));
this.hasher.update(peeked);
this.in.toss(peeked.len);
return peeked.len;
}
};
}
@@ -1874,3 +1874,7 @@ pub fn writableVectorWsa(
}
return .{ i, n };
}
test {
_ = Limited;
}

View File

@@ -2266,7 +2266,7 @@ pub fn fixedDrain(w: *Writer, data: []const []const u8, splat: usize) Error!usiz
const pattern = data[data.len - 1];
const dest = w.buffer[w.end..];
switch (pattern.len) {
0 => return w.end,
0 => return 0,
1 => {
assert(splat >= dest.len);
@memset(dest, pattern[0]);
@@ -2286,6 +2286,13 @@ pub fn fixedDrain(w: *Writer, data: []const []const u8, splat: usize) Error!usiz
}
}
pub fn unreachableDrain(w: *Writer, data: []const []const u8, splat: usize) Error!usize {
_ = w;
_ = data;
_ = splat;
unreachable;
}
/// Provides a `Writer` implementation based on calling `Hasher.update`, sending
/// all data also to an underlying `Writer`.
///
@@ -2296,6 +2303,8 @@ pub fn fixedDrain(w: *Writer, data: []const []const u8, splat: usize) Error!usiz
/// generic. A better solution will involve creating a writer for each hash
/// function, where the splat buffer can be tailored to the hash implementation
/// details.
///
/// Contrast with `Hashing` which terminates the stream pipeline.
pub fn Hashed(comptime Hasher: type) type {
return struct {
out: *Writer,
@@ -2341,7 +2350,7 @@ pub fn Hashed(comptime Hasher: type) type {
this.hasher.update(slice);
}
const pattern = data[data.len - 1];
assert(remaining == splat * pattern.len);
assert(remaining <= splat * pattern.len);
switch (pattern.len) {
0 => {
assert(remaining == 0);
@@ -2368,6 +2377,52 @@ pub fn Hashed(comptime Hasher: type) type {
};
}
/// Provides a `Writer` implementation based on calling `Hasher.update`,
/// discarding all data.
///
/// This implementation makes suboptimal buffering decisions due to being
/// generic. A better solution will involve creating a writer for each hash
/// function, where the splat buffer can be tailored to the hash implementation
/// details.
///
/// The total number of bytes written is stored in `hasher`.
///
/// Contrast with `Hashed` which also passes the data to an underlying stream.
pub fn Hashing(comptime Hasher: type) type {
return struct {
hasher: Hasher,
writer: Writer,
pub fn init(buffer: []u8) @This() {
return .initHasher(.init(.{}), buffer);
}
pub fn initHasher(hasher: Hasher, buffer: []u8) @This() {
return .{
.hasher = hasher,
.writer = .{
.buffer = buffer,
.vtable = &.{ .drain = @This().drain },
},
};
}
fn drain(w: *Writer, data: []const []const u8, splat: usize) Error!usize {
const this: *@This() = @alignCast(@fieldParentPtr("writer", w));
const hasher = &this.hasher;
hasher.update(w.buffered());
w.end = 0;
var n: usize = 0;
for (data[0 .. data.len - 1]) |slice| {
hasher.update(slice);
n += slice.len;
}
for (0..splat) |_| hasher.update(data[data.len - 1]);
return n + splat * data[data.len - 1].len;
}
};
}
/// Maintains `Writer` state such that it writes to the unused capacity of an
/// array list, filling it up completely before making a call through the
/// vtable, causing a resize. Consequently, the same, optimized, non-generic

View File

@@ -1,238 +0,0 @@
const std = @import("../std.zig");
//General note on endianess:
//Big endian is packed starting in the most significant part of the byte and subsequent
// bytes contain less significant bits. Thus we always take bits from the high
// end and place them below existing bits in our output.
//Little endian is packed starting in the least significant part of the byte and
// subsequent bytes contain more significant bits. Thus we always take bits from
// the low end and place them above existing bits in our output.
//Regardless of endianess, within any given byte the bits are always in most
// to least significant order.
//Also regardless of endianess, the buffer always aligns bits to the low end
// of the byte.
/// Creates a bit reader which allows for reading bits from an underlying standard reader
pub fn BitReader(comptime endian: std.builtin.Endian, comptime Reader: type) type {
return struct {
reader: Reader,
bits: u8 = 0,
count: u4 = 0,
const low_bit_mask = [9]u8{
0b00000000,
0b00000001,
0b00000011,
0b00000111,
0b00001111,
0b00011111,
0b00111111,
0b01111111,
0b11111111,
};
fn Bits(comptime T: type) type {
return struct {
T,
u16,
};
}
fn initBits(comptime T: type, out: anytype, num: u16) Bits(T) {
const UT = std.meta.Int(.unsigned, @bitSizeOf(T));
return .{
@bitCast(@as(UT, @intCast(out))),
num,
};
}
/// Reads `bits` bits from the reader and returns a specified type
/// containing them in the least significant end, returning an error if the
/// specified number of bits could not be read.
pub fn readBitsNoEof(self: *@This(), comptime T: type, num: u16) !T {
const b, const c = try self.readBitsTuple(T, num);
if (c < num) return error.EndOfStream;
return b;
}
/// Reads `bits` bits from the reader and returns a specified type
/// containing them in the least significant end. The number of bits successfully
/// read is placed in `out_bits`, as reaching the end of the stream is not an error.
pub fn readBits(self: *@This(), comptime T: type, num: u16, out_bits: *u16) !T {
const b, const c = try self.readBitsTuple(T, num);
out_bits.* = c;
return b;
}
/// Reads `bits` bits from the reader and returns a tuple of the specified type
/// containing them in the least significant end, and the number of bits successfully
/// read. Reaching the end of the stream is not an error.
pub fn readBitsTuple(self: *@This(), comptime T: type, num: u16) !Bits(T) {
const UT = std.meta.Int(.unsigned, @bitSizeOf(T));
const U = if (@bitSizeOf(T) < 8) u8 else UT; //it is a pain to work with <u8
//dump any bits in our buffer first
if (num <= self.count) return initBits(T, self.removeBits(@intCast(num)), num);
var out_count: u16 = self.count;
var out: U = self.removeBits(self.count);
//grab all the full bytes we need and put their
//bits where they belong
const full_bytes_left = (num - out_count) / 8;
for (0..full_bytes_left) |_| {
const byte = self.reader.readByte() catch |err| switch (err) {
error.EndOfStream => return initBits(T, out, out_count),
else => |e| return e,
};
switch (endian) {
.big => {
if (U == u8) out = 0 else out <<= 8; //shifting u8 by 8 is illegal in Zig
out |= byte;
},
.little => {
const pos = @as(U, byte) << @intCast(out_count);
out |= pos;
},
}
out_count += 8;
}
const bits_left = num - out_count;
const keep = 8 - bits_left;
if (bits_left == 0) return initBits(T, out, out_count);
const final_byte = self.reader.readByte() catch |err| switch (err) {
error.EndOfStream => return initBits(T, out, out_count),
else => |e| return e,
};
switch (endian) {
.big => {
out <<= @intCast(bits_left);
out |= final_byte >> @intCast(keep);
self.bits = final_byte & low_bit_mask[keep];
},
.little => {
const pos = @as(U, final_byte & low_bit_mask[bits_left]) << @intCast(out_count);
out |= pos;
self.bits = final_byte >> @intCast(bits_left);
},
}
self.count = @intCast(keep);
return initBits(T, out, num);
}
//convenience function for removing bits from
//the appropriate part of the buffer based on
//endianess.
fn removeBits(self: *@This(), num: u4) u8 {
if (num == 8) {
self.count = 0;
return self.bits;
}
const keep = self.count - num;
const bits = switch (endian) {
.big => self.bits >> @intCast(keep),
.little => self.bits & low_bit_mask[num],
};
switch (endian) {
.big => self.bits &= low_bit_mask[keep],
.little => self.bits >>= @intCast(num),
}
self.count = keep;
return bits;
}
pub fn alignToByte(self: *@This()) void {
self.bits = 0;
self.count = 0;
}
};
}
pub fn bitReader(comptime endian: std.builtin.Endian, reader: anytype) BitReader(endian, @TypeOf(reader)) {
return .{ .reader = reader };
}
///////////////////////////////
test "api coverage" {
const mem_be = [_]u8{ 0b11001101, 0b00001011 };
const mem_le = [_]u8{ 0b00011101, 0b10010101 };
var mem_in_be = std.io.fixedBufferStream(&mem_be);
var bit_stream_be = bitReader(.big, mem_in_be.reader());
var out_bits: u16 = undefined;
const expect = std.testing.expect;
const expectError = std.testing.expectError;
try expect(1 == try bit_stream_be.readBits(u2, 1, &out_bits));
try expect(out_bits == 1);
try expect(2 == try bit_stream_be.readBits(u5, 2, &out_bits));
try expect(out_bits == 2);
try expect(3 == try bit_stream_be.readBits(u128, 3, &out_bits));
try expect(out_bits == 3);
try expect(4 == try bit_stream_be.readBits(u8, 4, &out_bits));
try expect(out_bits == 4);
try expect(5 == try bit_stream_be.readBits(u9, 5, &out_bits));
try expect(out_bits == 5);
try expect(1 == try bit_stream_be.readBits(u1, 1, &out_bits));
try expect(out_bits == 1);
mem_in_be.pos = 0;
bit_stream_be.count = 0;
try expect(0b110011010000101 == try bit_stream_be.readBits(u15, 15, &out_bits));
try expect(out_bits == 15);
mem_in_be.pos = 0;
bit_stream_be.count = 0;
try expect(0b1100110100001011 == try bit_stream_be.readBits(u16, 16, &out_bits));
try expect(out_bits == 16);
_ = try bit_stream_be.readBits(u0, 0, &out_bits);
try expect(0 == try bit_stream_be.readBits(u1, 1, &out_bits));
try expect(out_bits == 0);
try expectError(error.EndOfStream, bit_stream_be.readBitsNoEof(u1, 1));
var mem_in_le = std.io.fixedBufferStream(&mem_le);
var bit_stream_le = bitReader(.little, mem_in_le.reader());
try expect(1 == try bit_stream_le.readBits(u2, 1, &out_bits));
try expect(out_bits == 1);
try expect(2 == try bit_stream_le.readBits(u5, 2, &out_bits));
try expect(out_bits == 2);
try expect(3 == try bit_stream_le.readBits(u128, 3, &out_bits));
try expect(out_bits == 3);
try expect(4 == try bit_stream_le.readBits(u8, 4, &out_bits));
try expect(out_bits == 4);
try expect(5 == try bit_stream_le.readBits(u9, 5, &out_bits));
try expect(out_bits == 5);
try expect(1 == try bit_stream_le.readBits(u1, 1, &out_bits));
try expect(out_bits == 1);
mem_in_le.pos = 0;
bit_stream_le.count = 0;
try expect(0b001010100011101 == try bit_stream_le.readBits(u15, 15, &out_bits));
try expect(out_bits == 15);
mem_in_le.pos = 0;
bit_stream_le.count = 0;
try expect(0b1001010100011101 == try bit_stream_le.readBits(u16, 16, &out_bits));
try expect(out_bits == 16);
_ = try bit_stream_le.readBits(u0, 0, &out_bits);
try expect(0 == try bit_stream_le.readBits(u1, 1, &out_bits));
try expect(out_bits == 0);
try expectError(error.EndOfStream, bit_stream_le.readBitsNoEof(u1, 1));
}

View File

@@ -1,179 +0,0 @@
const std = @import("../std.zig");
//General note on endianess:
//Big endian is packed starting in the most significant part of the byte and subsequent
// bytes contain less significant bits. Thus we write out bits from the high end
// of our input first.
//Little endian is packed starting in the least significant part of the byte and
// subsequent bytes contain more significant bits. Thus we write out bits from
// the low end of our input first.
//Regardless of endianess, within any given byte the bits are always in most
// to least significant order.
//Also regardless of endianess, the buffer always aligns bits to the low end
// of the byte.
/// Creates a bit writer which allows for writing bits to an underlying standard writer
pub fn BitWriter(comptime endian: std.builtin.Endian, comptime Writer: type) type {
return struct {
writer: Writer,
bits: u8 = 0,
count: u4 = 0,
const low_bit_mask = [9]u8{
0b00000000,
0b00000001,
0b00000011,
0b00000111,
0b00001111,
0b00011111,
0b00111111,
0b01111111,
0b11111111,
};
/// Write the specified number of bits to the writer from the least significant bits of
/// the specified value. Bits will only be written to the writer when there
/// are enough to fill a byte.
pub fn writeBits(self: *@This(), value: anytype, num: u16) !void {
const T = @TypeOf(value);
const UT = std.meta.Int(.unsigned, @bitSizeOf(T));
const U = if (@bitSizeOf(T) < 8) u8 else UT; //<u8 is a pain to work with
var in: U = @as(UT, @bitCast(value));
var in_count: u16 = num;
if (self.count > 0) {
//if we can't fill the buffer, add what we have
const bits_free = 8 - self.count;
if (num < bits_free) {
self.addBits(@truncate(in), @intCast(num));
return;
}
//finish filling the buffer and flush it
if (num == bits_free) {
self.addBits(@truncate(in), @intCast(num));
return self.flushBits();
}
switch (endian) {
.big => {
const bits = in >> @intCast(in_count - bits_free);
self.addBits(@truncate(bits), bits_free);
},
.little => {
self.addBits(@truncate(in), bits_free);
in >>= @intCast(bits_free);
},
}
in_count -= bits_free;
try self.flushBits();
}
//write full bytes while we can
const full_bytes_left = in_count / 8;
for (0..full_bytes_left) |_| {
switch (endian) {
.big => {
const bits = in >> @intCast(in_count - 8);
try self.writer.writeByte(@truncate(bits));
},
.little => {
try self.writer.writeByte(@truncate(in));
if (U == u8) in = 0 else in >>= 8;
},
}
in_count -= 8;
}
//save the remaining bits in the buffer
self.addBits(@truncate(in), @intCast(in_count));
}
//convenience funciton for adding bits to the buffer
//in the appropriate position based on endianess
fn addBits(self: *@This(), bits: u8, num: u4) void {
if (num == 8) self.bits = bits else switch (endian) {
.big => {
self.bits <<= @intCast(num);
self.bits |= bits & low_bit_mask[num];
},
.little => {
const pos = bits << @intCast(self.count);
self.bits |= pos;
},
}
self.count += num;
}
/// Flush any remaining bits to the writer, filling
/// unused bits with 0s.
pub fn flushBits(self: *@This()) !void {
if (self.count == 0) return;
if (endian == .big) self.bits <<= @intCast(8 - self.count);
try self.writer.writeByte(self.bits);
self.bits = 0;
self.count = 0;
}
};
}
pub fn bitWriter(comptime endian: std.builtin.Endian, writer: anytype) BitWriter(endian, @TypeOf(writer)) {
return .{ .writer = writer };
}
///////////////////////////////
test "api coverage" {
var mem_be = [_]u8{0} ** 2;
var mem_le = [_]u8{0} ** 2;
var mem_out_be = std.io.fixedBufferStream(&mem_be);
var bit_stream_be = bitWriter(.big, mem_out_be.writer());
const testing = std.testing;
try bit_stream_be.writeBits(@as(u2, 1), 1);
try bit_stream_be.writeBits(@as(u5, 2), 2);
try bit_stream_be.writeBits(@as(u128, 3), 3);
try bit_stream_be.writeBits(@as(u8, 4), 4);
try bit_stream_be.writeBits(@as(u9, 5), 5);
try bit_stream_be.writeBits(@as(u1, 1), 1);
try testing.expect(mem_be[0] == 0b11001101 and mem_be[1] == 0b00001011);
mem_out_be.pos = 0;
try bit_stream_be.writeBits(@as(u15, 0b110011010000101), 15);
try bit_stream_be.flushBits();
try testing.expect(mem_be[0] == 0b11001101 and mem_be[1] == 0b00001010);
mem_out_be.pos = 0;
try bit_stream_be.writeBits(@as(u32, 0b110011010000101), 16);
try testing.expect(mem_be[0] == 0b01100110 and mem_be[1] == 0b10000101);
try bit_stream_be.writeBits(@as(u0, 0), 0);
var mem_out_le = std.io.fixedBufferStream(&mem_le);
var bit_stream_le = bitWriter(.little, mem_out_le.writer());
try bit_stream_le.writeBits(@as(u2, 1), 1);
try bit_stream_le.writeBits(@as(u5, 2), 2);
try bit_stream_le.writeBits(@as(u128, 3), 3);
try bit_stream_le.writeBits(@as(u8, 4), 4);
try bit_stream_le.writeBits(@as(u9, 5), 5);
try bit_stream_le.writeBits(@as(u1, 1), 1);
try testing.expect(mem_le[0] == 0b00011101 and mem_le[1] == 0b10010101);
mem_out_le.pos = 0;
try bit_stream_le.writeBits(@as(u15, 0b110011010000101), 15);
try bit_stream_le.flushBits();
try testing.expect(mem_le[0] == 0b10000101 and mem_le[1] == 0b01100110);
mem_out_le.pos = 0;
try bit_stream_le.writeBits(@as(u32, 0b1100110100001011), 16);
try testing.expect(mem_le[0] == 0b00001011 and mem_le[1] == 0b11001101);
try bit_stream_le.writeBits(@as(u0, 0), 0);
}

View File

@@ -4,8 +4,7 @@ const testing = std.testing;
const mem = std.mem;
const assert = std.debug.assert;
/// This turns a byte buffer into an `io.GenericWriter`, `io.GenericReader`, or `io.SeekableStream`.
/// If the supplied byte buffer is const, then `io.GenericWriter` is not available.
/// Deprecated in favor of `std.Io.Reader.fixed` and `std.Io.Writer.fixed`.
pub fn FixedBufferStream(comptime Buffer: type) type {
return struct {
/// `Buffer` is either a `[]u8` or `[]const u8`.
@@ -20,16 +19,6 @@ pub fn FixedBufferStream(comptime Buffer: type) type {
pub const Reader = io.GenericReader(*Self, ReadError, read);
pub const Writer = io.GenericWriter(*Self, WriteError, write);
pub const SeekableStream = io.SeekableStream(
*Self,
SeekError,
GetSeekPosError,
seekTo,
seekBy,
getPos,
getEndPos,
);
const Self = @This();
pub fn reader(self: *Self) Reader {
@@ -40,10 +29,6 @@ pub fn FixedBufferStream(comptime Buffer: type) type {
return .{ .context = self };
}
pub fn seekableStream(self: *Self) SeekableStream {
return .{ .context = self };
}
pub fn read(self: *Self, dest: []u8) ReadError!usize {
const size = @min(dest.len, self.buffer.len - self.pos);
const end = self.pos + size;

View File

@@ -1,35 +0,0 @@
const std = @import("../std.zig");
pub fn SeekableStream(
comptime Context: type,
comptime SeekErrorType: type,
comptime GetSeekPosErrorType: type,
comptime seekToFn: fn (context: Context, pos: u64) SeekErrorType!void,
comptime seekByFn: fn (context: Context, pos: i64) SeekErrorType!void,
comptime getPosFn: fn (context: Context) GetSeekPosErrorType!u64,
comptime getEndPosFn: fn (context: Context) GetSeekPosErrorType!u64,
) type {
return struct {
context: Context,
const Self = @This();
pub const SeekError = SeekErrorType;
pub const GetSeekPosError = GetSeekPosErrorType;
pub fn seekTo(self: Self, pos: u64) SeekError!void {
return seekToFn(self.context, pos);
}
pub fn seekBy(self: Self, amt: i64) SeekError!void {
return seekByFn(self.context, amt);
}
pub fn getEndPos(self: Self) GetSeekPosError!u64 {
return getEndPosFn(self.context);
}
pub fn getPos(self: Self) GetSeekPosError!u64 {
return getPosFn(self.context);
}
};
}

View File

@@ -57,51 +57,6 @@ test "write a file, read it, then delete it" {
try tmp.dir.deleteFile(tmp_file_name);
}
test "BitStreams with File Stream" {
var tmp = tmpDir(.{});
defer tmp.cleanup();
const tmp_file_name = "temp_test_file.txt";
{
var file = try tmp.dir.createFile(tmp_file_name, .{});
defer file.close();
var bit_stream = io.bitWriter(native_endian, file.deprecatedWriter());
try bit_stream.writeBits(@as(u2, 1), 1);
try bit_stream.writeBits(@as(u5, 2), 2);
try bit_stream.writeBits(@as(u128, 3), 3);
try bit_stream.writeBits(@as(u8, 4), 4);
try bit_stream.writeBits(@as(u9, 5), 5);
try bit_stream.writeBits(@as(u1, 1), 1);
try bit_stream.flushBits();
}
{
var file = try tmp.dir.openFile(tmp_file_name, .{});
defer file.close();
var bit_stream = io.bitReader(native_endian, file.deprecatedReader());
var out_bits: u16 = undefined;
try expect(1 == try bit_stream.readBits(u2, 1, &out_bits));
try expect(out_bits == 1);
try expect(2 == try bit_stream.readBits(u5, 2, &out_bits));
try expect(out_bits == 2);
try expect(3 == try bit_stream.readBits(u128, 3, &out_bits));
try expect(out_bits == 3);
try expect(4 == try bit_stream.readBits(u8, 4, &out_bits));
try expect(out_bits == 4);
try expect(5 == try bit_stream.readBits(u9, 5, &out_bits));
try expect(out_bits == 5);
try expect(1 == try bit_stream.readBits(u1, 1, &out_bits));
try expect(out_bits == 1);
try expectError(error.EndOfStream, bit_stream.readBitsNoEof(u1, 1));
}
try tmp.dir.deleteFile(tmp_file_name);
}
test "File seek ops" {
var tmp = tmpDir(.{});
defer tmp.cleanup();

View File

@@ -1,8 +1,7 @@
//! Compression algorithms.
/// gzip and zlib are here.
pub const flate = @import("compress/flate.zig");
pub const gzip = @import("compress/gzip.zig");
pub const zlib = @import("compress/zlib.zig");
pub const lzma = @import("compress/lzma.zig");
pub const lzma2 = @import("compress/lzma2.zig");
pub const xz = @import("compress/xz.zig");
@@ -14,6 +13,4 @@ test {
_ = lzma2;
_ = xz;
_ = zstd;
_ = gzip;
_ = zlib;
}

View File

@@ -1,477 +1,180 @@
const std = @import("../std.zig");
/// When decompressing, the output buffer is used as the history window, so
/// less than this may result in failure to decompress streams that were
/// compressed with a larger window.
pub const max_window_len = history_len * 2;
pub const history_len = 32768;
/// Deflate is a lossless data compression file format that uses a combination
/// of LZ77 and Huffman coding.
pub const deflate = @import("flate/deflate.zig");
pub const Compress = @import("flate/Compress.zig");
/// Inflate is the decoding process that takes a Deflate bitstream for
/// decompression and correctly produces the original full-size data or file.
pub const inflate = @import("flate/inflate.zig");
/// Inflate is the decoding process that consumes a Deflate bitstream and
/// produces the original full-size data.
pub const Decompress = @import("flate/Decompress.zig");
/// Decompress compressed data from reader and write plain data to the writer.
pub fn decompress(reader: anytype, writer: anytype) !void {
try inflate.decompress(.raw, reader, writer);
}
/// Compression without Lempel-Ziv match searching. Faster compression, less
/// memory requirements but bigger compressed sizes.
pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig");
/// Decompressor type
pub fn Decompressor(comptime ReaderType: type) type {
return inflate.Decompressor(.raw, ReaderType);
}
/// Container of the deflate bit stream body. Container adds header before
/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
/// no footer, raw bit stream).
///
/// Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes
/// addler 32 checksum.
///
/// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
/// crc32 checksum and 4 bytes of uncompressed data length.
///
/// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
/// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
pub const Container = enum {
raw, // no header or footer
gzip, // gzip header and footer
zlib, // zlib header and footer
/// Create Decompressor which will read compressed data from reader.
pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
return inflate.decompressor(.raw, reader);
}
/// Compression level, trades between speed and compression size.
pub const Options = deflate.Options;
/// Compress plain data from reader and write compressed data to the writer.
pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
try deflate.compress(.raw, reader, writer, options);
}
/// Compressor type
pub fn Compressor(comptime WriterType: type) type {
return deflate.Compressor(.raw, WriterType);
}
/// Create Compressor which outputs compressed data to the writer.
pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
return try deflate.compressor(.raw, writer, options);
}
/// Huffman only compression. Without Lempel-Ziv match searching. Faster
/// compression, less memory requirements but bigger compressed sizes.
pub const huffman = struct {
pub fn compress(reader: anytype, writer: anytype) !void {
try deflate.huffman.compress(.raw, reader, writer);
pub fn size(w: Container) usize {
return headerSize(w) + footerSize(w);
}
pub fn Compressor(comptime WriterType: type) type {
return deflate.huffman.Compressor(.raw, WriterType);
pub fn headerSize(w: Container) usize {
return header(w).len;
}
pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
return deflate.huffman.compressor(.raw, writer);
}
};
// No compression store only. Compressed size is slightly bigger than plain.
pub const store = struct {
pub fn compress(reader: anytype, writer: anytype) !void {
try deflate.store.compress(.raw, reader, writer);
pub fn footerSize(w: Container) usize {
return switch (w) {
.gzip => 8,
.zlib => 4,
.raw => 0,
};
}
pub fn Compressor(comptime WriterType: type) type {
return deflate.store.Compressor(.raw, WriterType);
}
pub const list = [_]Container{ .raw, .gzip, .zlib };
pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
return deflate.store.compressor(.raw, writer);
}
};
/// Container defines header/footer around deflate bit stream. Gzip and zlib
/// compression algorithms are containers around deflate bit stream body.
const Container = @import("flate/container.zig").Container;
const std = @import("std");
const testing = std.testing;
const fixedBufferStream = std.io.fixedBufferStream;
const print = std.debug.print;
const builtin = @import("builtin");
test {
_ = deflate;
_ = inflate;
}
test "compress/decompress" {
var cmp_buf: [64 * 1024]u8 = undefined; // compressed data buffer
var dcm_buf: [64 * 1024]u8 = undefined; // decompressed data buffer
const levels = [_]deflate.Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
const cases = [_]struct {
data: []const u8, // uncompressed content
// compressed data sizes per level 4-9
gzip_sizes: [levels.len]usize = [_]usize{0} ** levels.len,
huffman_only_size: usize = 0,
store_size: usize = 0,
}{
.{
.data = @embedFile("flate/testdata/rfc1951.txt"),
.gzip_sizes = [_]usize{ 11513, 11217, 11139, 11126, 11122, 11119 },
.huffman_only_size = 20287,
.store_size = 36967,
},
.{
.data = @embedFile("flate/testdata/fuzz/roundtrip1.input"),
.gzip_sizes = [_]usize{ 373, 370, 370, 370, 370, 370 },
.huffman_only_size = 393,
.store_size = 393,
},
.{
.data = @embedFile("flate/testdata/fuzz/roundtrip2.input"),
.gzip_sizes = [_]usize{ 373, 373, 373, 373, 373, 373 },
.huffman_only_size = 394,
.store_size = 394,
},
.{
.data = @embedFile("flate/testdata/fuzz/deflate-stream.expect"),
.gzip_sizes = [_]usize{ 351, 347, 347, 347, 347, 347 },
.huffman_only_size = 498,
.store_size = 747,
},
pub const Error = error{
BadGzipHeader,
BadZlibHeader,
WrongGzipChecksum,
WrongGzipSize,
WrongZlibChecksum,
};
for (cases, 0..) |case, case_no| { // for each case
const data = case.data;
pub fn header(container: Container) []const u8 {
return switch (container) {
// GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5):
// - ID1 (IDentification 1), always 0x1f
// - ID2 (IDentification 2), always 0x8b
// - CM (Compression Method), always 8 = deflate
// - FLG (Flags), all set to 0
// - 4 bytes, MTIME (Modification time), not used, all set to zero
// - XFL (eXtra FLags), all set to zero
// - OS (Operating System), 03 = Unix
.gzip => &[_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 },
// ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4):
// 1st byte:
// - First four bits is the CINFO (compression info), which is 7 for the default deflate window size.
// - The next four bits is the CM (compression method), which is 8 for deflate.
// 2nd byte:
// - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best.
// - The next bit, FDICT, is set if a dictionary is given.
// - The final five FCHECK bits form a mod-31 checksum.
//
// CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
.zlib => &[_]u8{ 0x78, 0b10_0_11100 },
.raw => &.{},
};
}
for (levels, 0..) |level, i| { // for each compression level
pub const Hasher = union(Container) {
raw: void,
gzip: struct {
crc: std.hash.Crc32 = .init(),
count: u32 = 0,
},
zlib: std.hash.Adler32,
inline for (Container.list) |container| { // for each wrapping
var compressed_size: usize = if (case.gzip_sizes[i] > 0)
case.gzip_sizes[i] - Container.gzip.size() + container.size()
else
0;
// compress original stream to compressed stream
{
var original = fixedBufferStream(data);
var compressed = fixedBufferStream(&cmp_buf);
try deflate.compress(container, original.reader(), compressed.writer(), .{ .level = level });
if (compressed_size == 0) {
if (container == .gzip)
print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos });
compressed_size = compressed.pos;
}
try testing.expectEqual(compressed_size, compressed.pos);
}
// decompress compressed stream to decompressed stream
{
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
var decompressed = fixedBufferStream(&dcm_buf);
try inflate.decompress(container, compressed.reader(), decompressed.writer());
try testing.expectEqualSlices(u8, data, decompressed.getWritten());
}
// compressor writer interface
{
var compressed = fixedBufferStream(&cmp_buf);
var cmp = try deflate.compressor(container, compressed.writer(), .{ .level = level });
var cmp_wrt = cmp.writer();
try cmp_wrt.writeAll(data);
try cmp.finish();
try testing.expectEqual(compressed_size, compressed.pos);
}
// decompressor reader interface
{
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
var dcm = inflate.decompressor(container, compressed.reader());
var dcm_rdr = dcm.reader();
const n = try dcm_rdr.readAll(&dcm_buf);
try testing.expectEqual(data.len, n);
try testing.expectEqualSlices(u8, data, dcm_buf[0..n]);
}
}
pub fn init(containter: Container) Hasher {
return switch (containter) {
.gzip => .{ .gzip = .{} },
.zlib => .{ .zlib = .{} },
.raw => .raw,
};
}
// huffman only compression
{
inline for (Container.list) |container| { // for each wrapping
var compressed_size: usize = if (case.huffman_only_size > 0)
case.huffman_only_size - Container.gzip.size() + container.size()
else
0;
// compress original stream to compressed stream
{
var original = fixedBufferStream(data);
var compressed = fixedBufferStream(&cmp_buf);
var cmp = try deflate.huffman.compressor(container, compressed.writer());
try cmp.compress(original.reader());
try cmp.finish();
if (compressed_size == 0) {
if (container == .gzip)
print("case {d} huffman only compressed size: {d}\n", .{ case_no, compressed.pos });
compressed_size = compressed.pos;
}
try testing.expectEqual(compressed_size, compressed.pos);
}
// decompress compressed stream to decompressed stream
{
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
var decompressed = fixedBufferStream(&dcm_buf);
try inflate.decompress(container, compressed.reader(), decompressed.writer());
try testing.expectEqualSlices(u8, data, decompressed.getWritten());
}
pub fn container(h: Hasher) Container {
return h;
}
pub fn update(h: *Hasher, buf: []const u8) void {
switch (h.*) {
.raw => {},
.gzip => |*gzip| {
gzip.update(buf);
gzip.count +%= buf.len;
},
.zlib => |*zlib| {
zlib.update(buf);
},
inline .gzip, .zlib => |*x| x.update(buf),
}
}
// store only
{
inline for (Container.list) |container| { // for each wrapping
var compressed_size: usize = if (case.store_size > 0)
case.store_size - Container.gzip.size() + container.size()
else
0;
pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void {
var bits: [4]u8 = undefined;
switch (hasher.*) {
.gzip => |*gzip| {
// GZIP 8 bytes footer
// - 4 bytes, CRC32 (CRC-32)
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
std.mem.writeInt(u32, &bits, gzip.final(), .little);
try writer.writeAll(&bits);
// compress original stream to compressed stream
{
var original = fixedBufferStream(data);
var compressed = fixedBufferStream(&cmp_buf);
var cmp = try deflate.store.compressor(container, compressed.writer());
try cmp.compress(original.reader());
try cmp.finish();
if (compressed_size == 0) {
if (container == .gzip)
print("case {d} store only compressed size: {d}\n", .{ case_no, compressed.pos });
compressed_size = compressed.pos;
}
try testing.expectEqual(compressed_size, compressed.pos);
}
// decompress compressed stream to decompressed stream
{
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
var decompressed = fixedBufferStream(&dcm_buf);
try inflate.decompress(container, compressed.reader(), decompressed.writer());
try testing.expectEqualSlices(u8, data, decompressed.getWritten());
}
std.mem.writeInt(u32, &bits, gzip.bytes_read, .little);
try writer.writeAll(&bits);
},
.zlib => |*zlib| {
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
// 4 bytes of ADLER32 (Adler-32 checksum)
// Checksum value of the uncompressed data (excluding any
// dictionary data) computed according to Adler-32
// algorithm.
std.mem.writeInt(u32, &bits, zlib.final, .big);
try writer.writeAll(&bits);
},
.raw => {},
}
}
}
}
};
fn testDecompress(comptime container: Container, compressed: []const u8, expected_plain: []const u8) !void {
var in = fixedBufferStream(compressed);
var out = std.ArrayList(u8).init(testing.allocator);
defer out.deinit();
pub const Metadata = union(Container) {
raw: void,
gzip: struct {
crc: u32 = 0,
count: u32 = 0,
},
zlib: struct {
adler: u32 = 0,
},
try inflate.decompress(container, in.reader(), out.writer());
try testing.expectEqualSlices(u8, expected_plain, out.items);
}
test "don't read past deflate stream's end" {
try testDecompress(.zlib, &[_]u8{
0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
0x83, 0x95, 0x0b, 0xf5,
}, &[_]u8{
0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
0x00, 0x00, 0xff, 0xff, 0xff,
});
}
test "zlib header" {
// Truncated header
try testing.expectError(
error.EndOfStream,
testDecompress(.zlib, &[_]u8{0x78}, ""),
);
// Wrong CM
try testing.expectError(
error.BadZlibHeader,
testDecompress(.zlib, &[_]u8{ 0x79, 0x94 }, ""),
);
// Wrong CINFO
try testing.expectError(
error.BadZlibHeader,
testDecompress(.zlib, &[_]u8{ 0x88, 0x98 }, ""),
);
// Wrong checksum
try testing.expectError(
error.WrongZlibChecksum,
testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
);
// Truncated checksum
try testing.expectError(
error.EndOfStream,
testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
);
}
test "gzip header" {
// Truncated header
try testing.expectError(
error.EndOfStream,
testDecompress(.gzip, &[_]u8{ 0x1f, 0x8B }, undefined),
);
// Wrong CM
try testing.expectError(
error.BadGzipHeader,
testDecompress(.gzip, &[_]u8{
0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x03,
}, undefined),
);
// Wrong checksum
try testing.expectError(
error.WrongGzipChecksum,
testDecompress(.gzip, &[_]u8{
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
0x00, 0x00, 0x00, 0x00,
}, undefined),
);
// Truncated checksum
try testing.expectError(
error.EndOfStream,
testDecompress(.gzip, &[_]u8{
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
}, undefined),
);
// Wrong initial size
try testing.expectError(
error.WrongGzipSize,
testDecompress(.gzip, &[_]u8{
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x01,
}, undefined),
);
// Truncated initial size field
try testing.expectError(
error.EndOfStream,
testDecompress(.gzip, &[_]u8{
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00,
}, undefined),
);
try testDecompress(.gzip, &[_]u8{
// GZIP header
0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00,
// header.FHCRC (should cover entire header)
0x99, 0xd6,
// GZIP data
0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}, "");
}
test "public interface" {
const plain_data = [_]u8{ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a };
// deflate final stored block, header + plain (stored) data
const deflate_block = [_]u8{
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
} ++ plain_data;
// gzip header/footer + deflate block
const gzip_data =
[_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 } ++ // gzip header (10 bytes)
deflate_block ++
[_]u8{ 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00 }; // gzip footer checksum (4 byte), size (4 bytes)
// zlib header/footer + deflate block
const zlib_data = [_]u8{ 0x78, 0b10_0_11100 } ++ // zlib header (2 bytes)}
deflate_block ++
[_]u8{ 0x1c, 0xf2, 0x04, 0x47 }; // zlib footer: checksum
const gzip = @import("gzip.zig");
const zlib = @import("zlib.zig");
const flate = @This();
try testInterface(gzip, &gzip_data, &plain_data);
try testInterface(zlib, &zlib_data, &plain_data);
try testInterface(flate, &deflate_block, &plain_data);
}
fn testInterface(comptime pkg: type, gzip_data: []const u8, plain_data: []const u8) !void {
var buffer1: [64]u8 = undefined;
var buffer2: [64]u8 = undefined;
var compressed = fixedBufferStream(&buffer1);
var plain = fixedBufferStream(&buffer2);
// decompress
{
var in = fixedBufferStream(gzip_data);
try pkg.decompress(in.reader(), plain.writer());
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
}
plain.reset();
compressed.reset();
// compress/decompress
{
var in = fixedBufferStream(plain_data);
try pkg.compress(in.reader(), compressed.writer(), .{});
compressed.reset();
try pkg.decompress(compressed.reader(), plain.writer());
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
}
plain.reset();
compressed.reset();
// compressor/decompressor
{
var in = fixedBufferStream(plain_data);
var cmp = try pkg.compressor(compressed.writer(), .{});
try cmp.compress(in.reader());
try cmp.finish();
compressed.reset();
var dcp = pkg.decompressor(compressed.reader());
try dcp.decompress(plain.writer());
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
}
plain.reset();
compressed.reset();
// huffman
{
// huffman compress/decompress
{
var in = fixedBufferStream(plain_data);
try pkg.huffman.compress(in.reader(), compressed.writer());
compressed.reset();
try pkg.decompress(compressed.reader(), plain.writer());
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
pub fn init(containter: Container) Metadata {
return switch (containter) {
.gzip => .{ .gzip = .{} },
.zlib => .{ .zlib = .{} },
.raw => .raw,
};
}
plain.reset();
compressed.reset();
// huffman compressor/decompressor
{
var in = fixedBufferStream(plain_data);
var cmp = try pkg.huffman.compressor(compressed.writer());
try cmp.compress(in.reader());
try cmp.finish();
compressed.reset();
try pkg.decompress(compressed.reader(), plain.writer());
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
pub fn container(m: Metadata) Container {
return m;
}
}
plain.reset();
compressed.reset();
};
};
// store
{
// store compress/decompress
{
var in = fixedBufferStream(plain_data);
try pkg.store.compress(in.reader(), compressed.writer());
compressed.reset();
try pkg.decompress(compressed.reader(), plain.writer());
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
}
plain.reset();
compressed.reset();
// store compressor/decompressor
{
var in = fixedBufferStream(plain_data);
var cmp = try pkg.store.compressor(compressed.writer());
try cmp.compress(in.reader());
try cmp.finish();
compressed.reset();
try pkg.decompress(compressed.reader(), plain.writer());
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
}
}
test {
_ = HuffmanEncoder;
_ = Compress;
_ = Decompress;
}

View File

@@ -0,0 +1,592 @@
//! Accepts list of tokens, decides what is best block type to write. What block
//! type will provide best compression. Writes header and body of the block.
const std = @import("std");
const io = std.io;
const assert = std.debug.assert;
const Writer = std.io.Writer;
const BlockWriter = @This();
const flate = @import("../flate.zig");
const Compress = flate.Compress;
const HuffmanEncoder = flate.HuffmanEncoder;
const Token = @import("Token.zig");
const codegen_order = HuffmanEncoder.codegen_order;
const end_code_mark = 255;
output: *Writer,
codegen_freq: [HuffmanEncoder.codegen_code_count]u16,
literal_freq: [HuffmanEncoder.max_num_lit]u16,
distance_freq: [HuffmanEncoder.distance_code_count]u16,
codegen: [HuffmanEncoder.max_num_lit + HuffmanEncoder.distance_code_count + 1]u8,
literal_encoding: HuffmanEncoder,
distance_encoding: HuffmanEncoder,
codegen_encoding: HuffmanEncoder,
fixed_literal_encoding: HuffmanEncoder,
fixed_distance_encoding: HuffmanEncoder,
huff_distance: HuffmanEncoder,
fixed_literal_codes: [HuffmanEncoder.max_num_frequencies]HuffmanEncoder.Code,
fixed_distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
pub fn init(output: *Writer) BlockWriter {
return .{
.output = output,
.codegen_freq = undefined,
.literal_freq = undefined,
.distance_freq = undefined,
.codegen = undefined,
.literal_encoding = undefined,
.distance_encoding = undefined,
.codegen_encoding = undefined,
.fixed_literal_encoding = undefined,
.fixed_distance_encoding = undefined,
.huff_distance = undefined,
.fixed_literal_codes = undefined,
.fixed_distance_codes = undefined,
.distance_codes = undefined,
};
}
pub fn initBuffers(bw: *BlockWriter) void {
bw.fixed_literal_encoding = .fixedLiteralEncoder(&bw.fixed_literal_codes);
bw.fixed_distance_encoding = .fixedDistanceEncoder(&bw.fixed_distance_codes);
bw.huff_distance = .huffmanDistanceEncoder(&bw.distance_codes);
}
/// Flush intrenal bit buffer to the writer.
/// Should be called only when bit stream is at byte boundary.
///
/// That is after final block; when last byte could be incomplete or
/// after stored block; which is aligned to the byte boundary (it has x
/// padding bits after first 3 bits).
pub fn flush(self: *BlockWriter) Writer.Error!void {
try self.bit_writer.flush();
}
fn writeCode(self: *BlockWriter, c: Compress.HuffCode) Writer.Error!void {
try self.bit_writer.writeBits(c.code, c.len);
}
/// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
/// the literal and distance lengths arrays (which are concatenated into a single
/// array). This method generates that run-length encoding.
///
/// The result is written into the codegen array, and the frequencies
/// of each code is written into the codegen_freq array.
/// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
/// information. Code bad_code is an end marker
///
/// num_literals: The number of literals in literal_encoding
/// num_distances: The number of distances in distance_encoding
/// lit_enc: The literal encoder to use
/// dist_enc: The distance encoder to use
fn generateCodegen(
self: *BlockWriter,
num_literals: u32,
num_distances: u32,
lit_enc: *Compress.LiteralEncoder,
dist_enc: *Compress.DistanceEncoder,
) void {
for (self.codegen_freq, 0..) |_, i| {
self.codegen_freq[i] = 0;
}
// Note that we are using codegen both as a temporary variable for holding
// a copy of the frequencies, and as the place where we put the result.
// This is fine because the output is always shorter than the input used
// so far.
var codegen = &self.codegen; // cache
// Copy the concatenated code sizes to codegen. Put a marker at the end.
var cgnl = codegen[0..num_literals];
for (cgnl, 0..) |_, i| {
cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
}
cgnl = codegen[num_literals .. num_literals + num_distances];
for (cgnl, 0..) |_, i| {
cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len));
}
codegen[num_literals + num_distances] = end_code_mark;
var size = codegen[0];
var count: i32 = 1;
var out_index: u32 = 0;
var in_index: u32 = 1;
while (size != end_code_mark) : (in_index += 1) {
// INVARIANT: We have seen "count" copies of size that have not yet
// had output generated for them.
const next_size = codegen[in_index];
if (next_size == size) {
count += 1;
continue;
}
// We need to generate codegen indicating "count" of size.
if (size != 0) {
codegen[out_index] = size;
out_index += 1;
self.codegen_freq[size] += 1;
count -= 1;
while (count >= 3) {
var n: i32 = 6;
if (n > count) {
n = count;
}
codegen[out_index] = 16;
out_index += 1;
codegen[out_index] = @as(u8, @intCast(n - 3));
out_index += 1;
self.codegen_freq[16] += 1;
count -= n;
}
} else {
while (count >= 11) {
var n: i32 = 138;
if (n > count) {
n = count;
}
codegen[out_index] = 18;
out_index += 1;
codegen[out_index] = @as(u8, @intCast(n - 11));
out_index += 1;
self.codegen_freq[18] += 1;
count -= n;
}
if (count >= 3) {
// 3 <= count <= 10
codegen[out_index] = 17;
out_index += 1;
codegen[out_index] = @as(u8, @intCast(count - 3));
out_index += 1;
self.codegen_freq[17] += 1;
count = 0;
}
}
count -= 1;
while (count >= 0) : (count -= 1) {
codegen[out_index] = size;
out_index += 1;
self.codegen_freq[size] += 1;
}
// Set up invariant for next time through the loop.
size = next_size;
count = 1;
}
// Marker indicating the end of the codegen.
codegen[out_index] = end_code_mark;
}
const DynamicSize = struct {
size: u32,
num_codegens: u32,
};
/// dynamicSize returns the size of dynamically encoded data in bits.
fn dynamicSize(
self: *BlockWriter,
lit_enc: *Compress.LiteralEncoder, // literal encoder
dist_enc: *Compress.DistanceEncoder, // distance encoder
extra_bits: u32,
) DynamicSize {
var num_codegens = self.codegen_freq.len;
while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
num_codegens -= 1;
}
const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
self.codegen_freq[16] * 2 +
self.codegen_freq[17] * 3 +
self.codegen_freq[18] * 7;
const size = header +
lit_enc.bitLength(&self.literal_freq) +
dist_enc.bitLength(&self.distance_freq) +
extra_bits;
return DynamicSize{
.size = @as(u32, @intCast(size)),
.num_codegens = @as(u32, @intCast(num_codegens)),
};
}
/// fixedSize returns the size of dynamically encoded data in bits.
fn fixedSize(self: *BlockWriter, extra_bits: u32) u32 {
return 3 +
self.fixed_literal_encoding.bitLength(&self.literal_freq) +
self.fixed_distance_encoding.bitLength(&self.distance_freq) +
extra_bits;
}
const StoredSize = struct {
size: u32,
storable: bool,
};
/// storedSizeFits calculates the stored size, including header.
/// The function returns the size in bits and whether the block
/// fits inside a single block.
fn storedSizeFits(in: ?[]const u8) StoredSize {
if (in == null) {
return .{ .size = 0, .storable = false };
}
if (in.?.len <= HuffmanEncoder.max_store_block_size) {
return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
}
return .{ .size = 0, .storable = false };
}
/// Write the header of a dynamic Huffman block to the output stream.
///
/// num_literals: The number of literals specified in codegen
/// num_distances: The number of distances specified in codegen
/// num_codegens: The number of codegens used in codegen
/// eof: Is it the end-of-file? (end of stream)
fn dynamicHeader(
self: *BlockWriter,
num_literals: u32,
num_distances: u32,
num_codegens: u32,
eof: bool,
) Writer.Error!void {
const first_bits: u32 = if (eof) 5 else 4;
try self.bit_writer.writeBits(first_bits, 3);
try self.bit_writer.writeBits(num_literals - 257, 5);
try self.bit_writer.writeBits(num_distances - 1, 5);
try self.bit_writer.writeBits(num_codegens - 4, 4);
var i: u32 = 0;
while (i < num_codegens) : (i += 1) {
const value = self.codegen_encoding.codes[codegen_order[i]].len;
try self.bit_writer.writeBits(value, 3);
}
i = 0;
while (true) {
const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
i += 1;
if (code_word == end_code_mark) {
break;
}
try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
switch (code_word) {
16 => {
try self.bit_writer.writeBits(self.codegen[i], 2);
i += 1;
},
17 => {
try self.bit_writer.writeBits(self.codegen[i], 3);
i += 1;
},
18 => {
try self.bit_writer.writeBits(self.codegen[i], 7);
i += 1;
},
else => {},
}
}
}
fn storedHeader(self: *BlockWriter, length: usize, eof: bool) Writer.Error!void {
assert(length <= 65535);
const flag: u32 = if (eof) 1 else 0;
try self.bit_writer.writeBits(flag, 3);
try self.flush();
const l: u16 = @intCast(length);
try self.bit_writer.writeBits(l, 16);
try self.bit_writer.writeBits(~l, 16);
}
fn fixedHeader(self: *BlockWriter, eof: bool) Writer.Error!void {
// Indicate that we are a fixed Huffman block
var value: u32 = 2;
if (eof) {
value = 3;
}
try self.bit_writer.writeBits(value, 3);
}
/// Write a block of tokens with the smallest encoding. Will choose block type.
/// The original input can be supplied, and if the huffman encoded data
/// is larger than the original bytes, the data will be written as a
/// stored block.
/// If the input is null, the tokens will always be Huffman encoded.
pub fn write(self: *BlockWriter, tokens: []const Token, eof: bool, input: ?[]const u8) Writer.Error!void {
const lit_and_dist = self.indexTokens(tokens);
const num_literals = lit_and_dist.num_literals;
const num_distances = lit_and_dist.num_distances;
var extra_bits: u32 = 0;
const ret = storedSizeFits(input);
const stored_size = ret.size;
const storable = ret.storable;
if (storable) {
// We only bother calculating the costs of the extra bits required by
// the length of distance fields (which will be the same for both fixed
// and dynamic encoding), if we need to compare those two encodings
// against stored encoding.
var length_code: u16 = Token.length_codes_start + 8;
while (length_code < num_literals) : (length_code += 1) {
// First eight length codes have extra size = 0.
extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
@as(u32, @intCast(Token.lengthExtraBits(length_code)));
}
var distance_code: u16 = 4;
while (distance_code < num_distances) : (distance_code += 1) {
// First four distance codes have extra size = 0.
extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) *
@as(u32, @intCast(Token.distanceExtraBits(distance_code)));
}
}
// Figure out smallest code.
// Fixed Huffman baseline.
var literal_encoding = &self.fixed_literal_encoding;
var distance_encoding = &self.fixed_distance_encoding;
var size = self.fixedSize(extra_bits);
// Dynamic Huffman?
var num_codegens: u32 = 0;
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literal_encoding and the distance_encoding.
self.generateCodegen(
num_literals,
num_distances,
&self.literal_encoding,
&self.distance_encoding,
);
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
const dynamic_size = self.dynamicSize(
&self.literal_encoding,
&self.distance_encoding,
extra_bits,
);
const dyn_size = dynamic_size.size;
num_codegens = dynamic_size.num_codegens;
if (dyn_size < size) {
size = dyn_size;
literal_encoding = &self.literal_encoding;
distance_encoding = &self.distance_encoding;
}
// Stored bytes?
if (storable and stored_size < size) {
try self.storedBlock(input.?, eof);
return;
}
// Huffman.
if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
try self.fixedHeader(eof);
} else {
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
}
// Write the tokens.
try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes);
}
pub fn storedBlock(self: *BlockWriter, input: []const u8, eof: bool) Writer.Error!void {
try self.storedHeader(input.len, eof);
try self.bit_writer.writeBytes(input);
}
/// writeBlockDynamic encodes a block using a dynamic Huffman table.
/// This should be used if the symbols used have a disproportionate
/// histogram distribution.
/// If input is supplied and the compression savings are below 1/16th of the
/// input size the block is stored.
fn dynamicBlock(
self: *BlockWriter,
tokens: []const Token,
eof: bool,
input: ?[]const u8,
) Writer.Error!void {
const total_tokens = self.indexTokens(tokens);
const num_literals = total_tokens.num_literals;
const num_distances = total_tokens.num_distances;
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literal_encoding and the distance_encoding.
self.generateCodegen(
num_literals,
num_distances,
&self.literal_encoding,
&self.distance_encoding,
);
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0);
const size = dynamic_size.size;
const num_codegens = dynamic_size.num_codegens;
// Store bytes, if we don't get a reasonable improvement.
const stored_size = storedSizeFits(input);
const ssize = stored_size.size;
const storable = stored_size.storable;
if (storable and ssize < (size + (size >> 4))) {
try self.storedBlock(input.?, eof);
return;
}
// Write Huffman table.
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
// Write the tokens.
try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes);
}
const TotalIndexedTokens = struct {
num_literals: u32,
num_distances: u32,
};
/// Indexes a slice of tokens followed by an end_block_marker, and updates
/// literal_freq and distance_freq, and generates literal_encoding
/// and distance_encoding.
/// The number of literal and distance tokens is returned.
fn indexTokens(self: *BlockWriter, tokens: []const Token) TotalIndexedTokens {
var num_literals: u32 = 0;
var num_distances: u32 = 0;
for (self.literal_freq, 0..) |_, i| {
self.literal_freq[i] = 0;
}
for (self.distance_freq, 0..) |_, i| {
self.distance_freq[i] = 0;
}
for (tokens) |t| {
if (t.kind == Token.Kind.literal) {
self.literal_freq[t.literal()] += 1;
continue;
}
self.literal_freq[t.lengthCode()] += 1;
self.distance_freq[t.distanceCode()] += 1;
}
// add end_block_marker token at the end
self.literal_freq[HuffmanEncoder.end_block_marker] += 1;
// get the number of literals
num_literals = @as(u32, @intCast(self.literal_freq.len));
while (self.literal_freq[num_literals - 1] == 0) {
num_literals -= 1;
}
// get the number of distances
num_distances = @as(u32, @intCast(self.distance_freq.len));
while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) {
num_distances -= 1;
}
if (num_distances == 0) {
// We haven't found a single match. If we want to go with the dynamic encoding,
// we should count at least one distance to be sure that the distance huffman tree could be encoded.
self.distance_freq[0] = 1;
num_distances = 1;
}
self.literal_encoding.generate(&self.literal_freq, 15);
self.distance_encoding.generate(&self.distance_freq, 15);
return TotalIndexedTokens{
.num_literals = num_literals,
.num_distances = num_distances,
};
}
/// Writes a slice of tokens to the output followed by and end_block_marker.
/// codes for literal and distance encoding must be supplied.
fn writeTokens(
self: *BlockWriter,
tokens: []const Token,
le_codes: []Compress.HuffCode,
oe_codes: []Compress.HuffCode,
) Writer.Error!void {
for (tokens) |t| {
if (t.kind == Token.Kind.literal) {
try self.writeCode(le_codes[t.literal()]);
continue;
}
// Write the length
const le = t.lengthEncoding();
try self.writeCode(le_codes[le.code]);
if (le.extra_bits > 0) {
try self.bit_writer.writeBits(le.extra_length, le.extra_bits);
}
// Write the distance
const oe = t.distanceEncoding();
try self.writeCode(oe_codes[oe.code]);
if (oe.extra_bits > 0) {
try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits);
}
}
// add end_block_marker at the end
try self.writeCode(le_codes[HuffmanEncoder.end_block_marker]);
}
/// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
/// if the results only gains very little from compression.
pub fn huffmanBlock(self: *BlockWriter, input: []const u8, eof: bool) Writer.Error!void {
// Add everything as literals
histogram(input, &self.literal_freq);
self.literal_freq[HuffmanEncoder.end_block_marker] = 1;
const num_literals = HuffmanEncoder.end_block_marker + 1;
self.distance_freq[0] = 1;
const num_distances = 1;
self.literal_encoding.generate(&self.literal_freq, 15);
// Figure out smallest code.
// Always use dynamic Huffman or Store
var num_codegens: u32 = 0;
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literal_encoding and the distance_encoding.
self.generateCodegen(
num_literals,
num_distances,
&self.literal_encoding,
&self.huff_distance,
);
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0);
const size = dynamic_size.size;
num_codegens = dynamic_size.num_codegens;
// Store bytes, if we don't get a reasonable improvement.
const stored_size_ret = storedSizeFits(input);
const ssize = stored_size_ret.size;
const storable = stored_size_ret.storable;
if (storable and ssize < (size + (size >> 4))) {
try self.storedBlock(input, eof);
return;
}
// Huffman.
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
const encoding = self.literal_encoding.codes[0..257];
for (input) |t| {
const c = encoding[t];
try self.bit_writer.writeBits(c.code, c.len);
}
try self.writeCode(encoding[HuffmanEncoder.end_block_marker]);
}
fn histogram(b: []const u8, h: *[286]u16) void {
// Clear histogram
for (h, 0..) |_, i| {
h[i] = 0;
}
var lh = h.*[0..256];
for (b) |t| {
lh[t] += 1;
}
}

View File

@@ -1,240 +0,0 @@
//! 64K buffer of uncompressed data created in inflate (decompression). Has enough
//! history to support writing match<length, distance>; copying length of bytes
//! from the position distance backward from current.
//!
//! Reads can return less than available bytes if they are spread across
//! different circles. So reads should repeat until get required number of bytes
//! or until returned slice is zero length.
//!
//! Note on deflate limits:
//! * non-compressible block is limited to 65,535 bytes.
//! * backward pointer is limited in distance to 32K bytes and in length to 258 bytes.
//!
//! Whole non-compressed block can be written without overlap. We always have
//! history of up to 64K, more then 32K needed.
//!
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;
const consts = @import("consts.zig").match;
const mask = 0xffff; // 64K - 1
const buffer_len = mask + 1; // 64K buffer
const Self = @This();
buffer: [buffer_len]u8 = undefined,
wp: usize = 0, // write position
rp: usize = 0, // read position
fn writeAll(self: *Self, buf: []const u8) void {
for (buf) |c| self.write(c);
}
/// Write literal.
pub fn write(self: *Self, b: u8) void {
assert(self.wp - self.rp < mask);
self.buffer[self.wp & mask] = b;
self.wp += 1;
}
/// Write match (back-reference to the same data slice) starting at `distance`
/// back from current write position, and `length` of bytes.
pub fn writeMatch(self: *Self, length: u16, distance: u16) !void {
if (self.wp < distance or
length < consts.base_length or length > consts.max_length or
distance < consts.min_distance or distance > consts.max_distance)
{
return error.InvalidMatch;
}
assert(self.wp - self.rp < mask);
var from: usize = self.wp - distance & mask;
const from_end: usize = from + length;
var to: usize = self.wp & mask;
const to_end: usize = to + length;
self.wp += length;
// Fast path using memcpy
if (from_end < buffer_len and to_end < buffer_len) // start and end at the same circle
{
var cur_len = distance;
var remaining_len = length;
while (cur_len < remaining_len) {
@memcpy(self.buffer[to..][0..cur_len], self.buffer[from..][0..cur_len]);
to += cur_len;
remaining_len -= cur_len;
cur_len = cur_len * 2;
}
@memcpy(self.buffer[to..][0..remaining_len], self.buffer[from..][0..remaining_len]);
return;
}
// Slow byte by byte
while (to < to_end) {
self.buffer[to & mask] = self.buffer[from & mask];
to += 1;
from += 1;
}
}
/// Returns writable part of the internal buffer of size `n` at most. Advances
/// write pointer, assumes that returned buffer will be filled with data.
pub fn getWritable(self: *Self, n: usize) []u8 {
const wp = self.wp & mask;
const len = @min(n, buffer_len - wp);
self.wp += len;
return self.buffer[wp .. wp + len];
}
/// Read available data. Can return part of the available data if it is
/// spread across two circles. So read until this returns zero length.
pub fn read(self: *Self) []const u8 {
return self.readAtMost(buffer_len);
}
/// Read part of available data. Can return less than max even if there are
/// more than max decoded data.
pub fn readAtMost(self: *Self, limit: usize) []const u8 {
const rb = self.readBlock(if (limit == 0) buffer_len else limit);
defer self.rp += rb.len;
return self.buffer[rb.head..rb.tail];
}
const ReadBlock = struct {
head: usize,
tail: usize,
len: usize,
};
/// Returns position of continuous read block data.
fn readBlock(self: *Self, max: usize) ReadBlock {
const r = self.rp & mask;
const w = self.wp & mask;
const n = @min(
max,
if (w >= r) w - r else buffer_len - r,
);
return .{
.head = r,
.tail = r + n,
.len = n,
};
}
/// Number of free bytes for write.
pub fn free(self: *Self) usize {
return buffer_len - (self.wp - self.rp);
}
/// Full if largest match can't fit. 258 is largest match length. That much
/// bytes can be produced in single decode step.
pub fn full(self: *Self) bool {
return self.free() < 258 + 1;
}
// example from: https://youtu.be/SJPvNi4HrWQ?t=3558
test writeMatch {
var cb: Self = .{};
cb.writeAll("a salad; ");
try cb.writeMatch(5, 9);
try cb.writeMatch(3, 3);
try testing.expectEqualStrings("a salad; a salsal", cb.read());
}
test "writeMatch overlap" {
var cb: Self = .{};
cb.writeAll("a b c ");
try cb.writeMatch(8, 4);
cb.write('d');
try testing.expectEqualStrings("a b c b c b c d", cb.read());
}
test readAtMost {
var cb: Self = .{};
cb.writeAll("0123456789");
try cb.writeMatch(50, 10);
try testing.expectEqualStrings("0123456789" ** 6, cb.buffer[cb.rp..cb.wp]);
for (0..6) |i| {
try testing.expectEqual(i * 10, cb.rp);
try testing.expectEqualStrings("0123456789", cb.readAtMost(10));
}
try testing.expectEqualStrings("", cb.readAtMost(10));
try testing.expectEqualStrings("", cb.read());
}
test Self {
var cb: Self = .{};
const data = "0123456789abcdef" ** (1024 / 16);
cb.writeAll(data);
try testing.expectEqual(@as(usize, 0), cb.rp);
try testing.expectEqual(@as(usize, 1024), cb.wp);
try testing.expectEqual(@as(usize, 1024 * 63), cb.free());
for (0..62 * 4) |_|
try cb.writeMatch(256, 1024); // write 62K
try testing.expectEqual(@as(usize, 0), cb.rp);
try testing.expectEqual(@as(usize, 63 * 1024), cb.wp);
try testing.expectEqual(@as(usize, 1024), cb.free());
cb.writeAll(data[0..200]);
_ = cb.readAtMost(1024); // make some space
cb.writeAll(data); // overflows write position
try testing.expectEqual(@as(usize, 200 + 65536), cb.wp);
try testing.expectEqual(@as(usize, 1024), cb.rp);
try testing.expectEqual(@as(usize, 1024 - 200), cb.free());
const rb = cb.readBlock(Self.buffer_len);
try testing.expectEqual(@as(usize, 65536 - 1024), rb.len);
try testing.expectEqual(@as(usize, 1024), rb.head);
try testing.expectEqual(@as(usize, 65536), rb.tail);
try testing.expectEqual(@as(usize, 65536 - 1024), cb.read().len); // read to the end of the buffer
try testing.expectEqual(@as(usize, 200 + 65536), cb.wp);
try testing.expectEqual(@as(usize, 65536), cb.rp);
try testing.expectEqual(@as(usize, 65536 - 200), cb.free());
try testing.expectEqual(@as(usize, 200), cb.read().len); // read the rest
}
test "write overlap" {
var cb: Self = .{};
cb.wp = cb.buffer.len - 15;
cb.rp = cb.wp;
cb.writeAll("0123456789");
cb.writeAll("abcdefghij");
try testing.expectEqual(cb.buffer.len + 5, cb.wp);
try testing.expectEqual(cb.buffer.len - 15, cb.rp);
try testing.expectEqualStrings("0123456789abcde", cb.read());
try testing.expectEqualStrings("fghij", cb.read());
try testing.expect(cb.wp == cb.rp);
}
test "writeMatch/read overlap" {
var cb: Self = .{};
cb.wp = cb.buffer.len - 15;
cb.rp = cb.wp;
cb.writeAll("0123456789");
try cb.writeMatch(15, 5);
try testing.expectEqualStrings("012345678956789", cb.read());
try testing.expectEqualStrings("5678956789", cb.read());
try cb.writeMatch(20, 25);
try testing.expectEqualStrings("01234567895678956789", cb.read());
}

View File

@@ -0,0 +1,332 @@
//! Default compression algorithm. Has two steps: tokenization and token
//! encoding.
//!
//! Tokenization takes uncompressed input stream and produces list of tokens.
//! Each token can be literal (byte of data) or match (backrefernce to previous
//! data with length and distance). Tokenization accumulators 32K tokens, when
//! full or `flush` is called tokens are passed to the `block_writer`. Level
//! defines how hard (how slow) it tries to find match.
//!
//! Block writer will decide which type of deflate block to write (stored, fixed,
//! dynamic) and encode tokens to the output byte stream. Client has to call
//! `finish` to write block with the final bit set.
//!
//! Container defines type of header and footer which can be gzip, zlib or raw.
//! They all share same deflate body. Raw has no header or footer just deflate
//! body.
//!
//! Compression algorithm explained in rfc-1951 (slightly edited for this case):
//!
//! The compressor uses a chained hash table `lookup` to find duplicated
//! strings, using a hash function that operates on 4-byte sequences. At any
//! given point during compression, let XYZW be the next 4 input bytes
//! (lookahead) to be examined (not necessarily all different, of course).
//! First, the compressor examines the hash chain for XYZW. If the chain is
//! empty, the compressor simply writes out X as a literal byte and advances
//! one byte in the input. If the hash chain is not empty, indicating that the
//! sequence XYZW (or, if we are unlucky, some other 4 bytes with the same
//! hash function value) has occurred recently, the compressor compares all
//! strings on the XYZW hash chain with the actual input data sequence
//! starting at the current point, and selects the longest match.
//!
//! To improve overall compression, the compressor defers the selection of
//! matches ("lazy matching"): after a match of length N has been found, the
//! compressor searches for a longer match starting at the next input byte. If
//! it finds a longer match, it truncates the previous match to a length of
//! one (thus producing a single literal byte) and then emits the longer
//! match. Otherwise, it emits the original match, and, as described above,
//! advances N bytes before continuing.
//!
//!
//! Allocates statically ~400K (192K lookup, 128K tokens, 64K window).
const builtin = @import("builtin");
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;
const expect = testing.expect;
const mem = std.mem;
const math = std.math;
const Writer = std.Io.Writer;
const Compress = @This();
const Token = @import("Token.zig");
const BlockWriter = @import("BlockWriter.zig");
const flate = @import("../flate.zig");
const Container = flate.Container;
const Lookup = @import("Lookup.zig");
const HuffmanEncoder = flate.HuffmanEncoder;
const LiteralNode = HuffmanEncoder.LiteralNode;
lookup: Lookup = .{},
tokens: Tokens = .{},
block_writer: BlockWriter,
level: LevelArgs,
hasher: Container.Hasher,
writer: Writer,
state: State,
// Match and literal at the previous position.
// Used for lazy match finding in processWindow.
prev_match: ?Token = null,
prev_literal: ?u8 = null,
pub const State = enum { header, middle, ended };
/// Trades between speed and compression size.
/// Starts with level 4: in [zlib](https://github.com/madler/zlib/blob/abd3d1a28930f89375d4b41408b39f6c1be157b2/deflate.c#L115C1-L117C43)
/// levels 1-3 are using different algorithm to perform faster but with less
/// compression. That is not implemented here.
pub const Level = enum(u4) {
level_4 = 4,
level_5 = 5,
level_6 = 6,
level_7 = 7,
level_8 = 8,
level_9 = 9,
fast = 0xb,
default = 0xc,
best = 0xd,
};
/// Number of tokens to accumulate in deflate before starting block encoding.
///
/// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is
/// 8 and max 9 that gives 14 or 15 bits.
pub const n_tokens = 1 << 15;
/// Algorithm knobs for each level.
const LevelArgs = struct {
good: u16, // Do less lookups if we already have match of this length.
nice: u16, // Stop looking for better match if we found match with at least this length.
lazy: u16, // Don't do lazy match find if got match with at least this length.
chain: u16, // How many lookups for previous match to perform.
pub fn get(level: Level) LevelArgs {
return switch (level) {
.fast, .level_4 => .{ .good = 4, .lazy = 4, .nice = 16, .chain = 16 },
.level_5 => .{ .good = 8, .lazy = 16, .nice = 32, .chain = 32 },
.default, .level_6 => .{ .good = 8, .lazy = 16, .nice = 128, .chain = 128 },
.level_7 => .{ .good = 8, .lazy = 32, .nice = 128, .chain = 256 },
.level_8 => .{ .good = 32, .lazy = 128, .nice = 258, .chain = 1024 },
.best, .level_9 => .{ .good = 32, .lazy = 258, .nice = 258, .chain = 4096 },
};
}
};
pub const Options = struct {
level: Level = .default,
container: Container = .raw,
};
pub fn init(output: *Writer, buffer: []u8, options: Options) Compress {
return .{
.block_writer = .init(output),
.level = .get(options.level),
.hasher = .init(options.container),
.state = .header,
.writer = .{
.buffer = buffer,
.vtable = &.{ .drain = drain },
},
};
}
// Tokens store
const Tokens = struct {
list: [n_tokens]Token = undefined,
pos: usize = 0,
fn add(self: *Tokens, t: Token) void {
self.list[self.pos] = t;
self.pos += 1;
}
fn full(self: *Tokens) bool {
return self.pos == self.list.len;
}
fn reset(self: *Tokens) void {
self.pos = 0;
}
fn tokens(self: *Tokens) []const Token {
return self.list[0..self.pos];
}
};
fn drain(me: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
_ = data;
_ = splat;
const c: *Compress = @fieldParentPtr("writer", me);
const out = c.block_writer.output;
switch (c.state) {
.header => {
c.state = .middle;
const header = c.hasher.container().header();
try out.writeAll(header);
return header.len;
},
.middle => {},
.ended => unreachable,
}
const buffered = me.buffered();
const min_lookahead = Token.min_length + Token.max_length;
const history_plus_lookahead_len = flate.history_len + min_lookahead;
if (buffered.len < history_plus_lookahead_len) return 0;
const lookahead = buffered[flate.history_len..];
// TODO tokenize
_ = lookahead;
//c.hasher.update(lookahead[0..n]);
@panic("TODO");
}
pub fn end(c: *Compress) !void {
try endUnflushed(c);
const out = c.block_writer.output;
try out.flush();
}
pub fn endUnflushed(c: *Compress) !void {
while (c.writer.end != 0) _ = try drain(&c.writer, &.{""}, 1);
c.state = .ended;
const out = c.block_writer.output;
// TODO flush tokens
switch (c.hasher) {
.gzip => |*gzip| {
// GZIP 8 bytes footer
// - 4 bytes, CRC32 (CRC-32)
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
const footer = try out.writableArray(8);
std.mem.writeInt(u32, footer[0..4], gzip.crc.final(), .little);
std.mem.writeInt(u32, footer[4..8], @truncate(gzip.count), .little);
},
.zlib => |*zlib| {
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
// 4 bytes of ADLER32 (Adler-32 checksum)
// Checksum value of the uncompressed data (excluding any
// dictionary data) computed according to Adler-32
// algorithm.
std.mem.writeInt(u32, try out.writableArray(4), zlib.adler, .big);
},
.raw => {},
}
}
pub const Simple = struct {
/// Note that store blocks are limited to 65535 bytes.
buffer: []u8,
wp: usize,
block_writer: BlockWriter,
hasher: Container.Hasher,
strategy: Strategy,
pub const Strategy = enum { huffman, store };
pub fn init(output: *Writer, buffer: []u8, container: Container, strategy: Strategy) !Simple {
const header = container.header();
try output.writeAll(header);
return .{
.buffer = buffer,
.wp = 0,
.block_writer = .init(output),
.hasher = .init(container),
.strategy = strategy,
};
}
pub fn flush(self: *Simple) !void {
try self.flushBuffer(false);
try self.block_writer.storedBlock("", false);
try self.block_writer.flush();
}
pub fn finish(self: *Simple) !void {
try self.flushBuffer(true);
try self.block_writer.flush();
try self.hasher.container().writeFooter(&self.hasher, self.block_writer.output);
}
fn flushBuffer(self: *Simple, final: bool) !void {
const buf = self.buffer[0..self.wp];
switch (self.strategy) {
.huffman => try self.block_writer.huffmanBlock(buf, final),
.store => try self.block_writer.storedBlock(buf, final),
}
self.wp = 0;
}
};
test "generate a Huffman code from an array of frequencies" {
var freqs: [19]u16 = [_]u16{
8, // 0
1, // 1
1, // 2
2, // 3
5, // 4
10, // 5
9, // 6
1, // 7
0, // 8
0, // 9
0, // 10
0, // 11
0, // 12
0, // 13
0, // 14
0, // 15
1, // 16
3, // 17
5, // 18
};
var codes: [19]HuffmanEncoder.Code = undefined;
var enc: HuffmanEncoder = .{
.codes = &codes,
.freq_cache = undefined,
.bit_count = undefined,
.lns = undefined,
.lfs = undefined,
};
enc.generate(freqs[0..], 7);
try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
try testing.expectEqual(@as(usize, 3), enc.codes[0].len);
try testing.expectEqual(@as(usize, 6), enc.codes[1].len);
try testing.expectEqual(@as(usize, 6), enc.codes[2].len);
try testing.expectEqual(@as(usize, 5), enc.codes[3].len);
try testing.expectEqual(@as(usize, 3), enc.codes[4].len);
try testing.expectEqual(@as(usize, 2), enc.codes[5].len);
try testing.expectEqual(@as(usize, 2), enc.codes[6].len);
try testing.expectEqual(@as(usize, 6), enc.codes[7].len);
try testing.expectEqual(@as(usize, 0), enc.codes[8].len);
try testing.expectEqual(@as(usize, 0), enc.codes[9].len);
try testing.expectEqual(@as(usize, 0), enc.codes[10].len);
try testing.expectEqual(@as(usize, 0), enc.codes[11].len);
try testing.expectEqual(@as(usize, 0), enc.codes[12].len);
try testing.expectEqual(@as(usize, 0), enc.codes[13].len);
try testing.expectEqual(@as(usize, 0), enc.codes[14].len);
try testing.expectEqual(@as(usize, 0), enc.codes[15].len);
try testing.expectEqual(@as(usize, 6), enc.codes[16].len);
try testing.expectEqual(@as(usize, 5), enc.codes[17].len);
try testing.expectEqual(@as(usize, 3), enc.codes[18].len);
try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code);
try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code);
try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code);
try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code);
try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code);
try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code);
try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code);
try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code);
try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code);
try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,463 @@
const HuffmanEncoder = @This();
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;
codes: []Code,
// Reusable buffer with the longest possible frequency table.
freq_cache: [max_num_frequencies + 1]LiteralNode,
bit_count: [17]u32,
lns: []LiteralNode, // sorted by literal, stored to avoid repeated allocation in generate
lfs: []LiteralNode, // sorted by frequency, stored to avoid repeated allocation in generate
pub const LiteralNode = struct {
literal: u16,
freq: u16,
pub fn max() LiteralNode {
return .{
.literal = std.math.maxInt(u16),
.freq = std.math.maxInt(u16),
};
}
};
pub const Code = struct {
code: u16 = 0,
len: u16 = 0,
};
/// The odd order in which the codegen code sizes are written.
pub const codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
/// The number of codegen codes.
pub const codegen_code_count = 19;
/// The largest distance code.
pub const distance_code_count = 30;
/// Maximum number of literals.
pub const max_num_lit = 286;
/// Max number of frequencies used for a Huffman Code
/// Possible lengths are codegen_code_count (19), distance_code_count (30) and max_num_lit (286).
/// The largest of these is max_num_lit.
pub const max_num_frequencies = max_num_lit;
/// Biggest block size for uncompressed block.
pub const max_store_block_size = 65535;
/// The special code used to mark the end of a block.
pub const end_block_marker = 256;
/// Update this Huffman Code object to be the minimum code for the specified frequency count.
///
/// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
/// max_bits The maximum number of bits to use for any literal.
pub fn generate(self: *HuffmanEncoder, freq: []u16, max_bits: u32) void {
var list = self.freq_cache[0 .. freq.len + 1];
// Number of non-zero literals
var count: u32 = 0;
// Set list to be the set of all non-zero literals and their frequencies
for (freq, 0..) |f, i| {
if (f != 0) {
list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f };
count += 1;
} else {
list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
self.codes[i].len = 0;
}
}
list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
list = list[0..count];
if (count <= 2) {
// Handle the small cases here, because they are awkward for the general case code. With
// two or fewer literals, everything has bit length 1.
for (list, 0..) |node, i| {
// "list" is in order of increasing literal value.
self.codes[node.literal] = .{
.code = @intCast(i),
.len = 1,
};
}
return;
}
self.lfs = list;
std.mem.sort(LiteralNode, self.lfs, {}, byFreq);
// Get the number of literals for each bit count
const bit_count = self.bitCounts(list, max_bits);
// And do the assignment
self.assignEncodingAndSize(bit_count, list);
}
pub fn bitLength(self: *HuffmanEncoder, freq: []u16) u32 {
var total: u32 = 0;
for (freq, 0..) |f, i| {
if (f != 0) {
total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len));
}
}
return total;
}
/// Return the number of literals assigned to each bit size in the Huffman encoding
///
/// This method is only called when list.len >= 3
/// The cases of 0, 1, and 2 literals are handled by special case code.
///
/// list: An array of the literals with non-zero frequencies
/// and their associated frequencies. The array is in order of increasing
/// frequency, and has as its last element a special element with frequency
/// `math.maxInt(i32)`
///
/// max_bits: The maximum number of bits that should be used to encode any literal.
/// Must be less than 16.
///
/// Returns an integer array in which array[i] indicates the number of literals
/// that should be encoded in i bits.
fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize) []u32 {
var max_bits = max_bits_to_use;
const n = list.len;
const max_bits_limit = 16;
assert(max_bits < max_bits_limit);
// The tree can't have greater depth than n - 1, no matter what. This
// saves a little bit of work in some small cases
max_bits = @min(max_bits, n - 1);
// Create information about each of the levels.
// A bogus "Level 0" whose sole purpose is so that
// level1.prev.needed == 0. This makes level1.next_pair_freq
// be a legitimate value that never gets chosen.
var levels: [max_bits_limit]LevelInfo = std.mem.zeroes([max_bits_limit]LevelInfo);
// leaf_counts[i] counts the number of literals at the left
// of ancestors of the rightmost node at level i.
// leaf_counts[i][j] is the number of literals at the left
// of the level j ancestor.
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(@splat(0));
{
var level = @as(u32, 1);
while (level <= max_bits) : (level += 1) {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
levels[level] = LevelInfo{
.level = level,
.last_freq = list[1].freq,
.next_char_freq = list[2].freq,
.next_pair_freq = list[0].freq + list[1].freq,
.needed = 0,
};
leaf_counts[level][level] = 2;
if (level == 1) {
levels[level].next_pair_freq = std.math.maxInt(i32);
}
}
}
// We need a total of 2*n - 2 items at top level and have already generated 2.
levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4;
{
var level = max_bits;
while (true) {
var l = &levels[level];
if (l.next_pair_freq == std.math.maxInt(i32) and l.next_char_freq == std.math.maxInt(i32)) {
// We've run out of both leaves and pairs.
// End all calculations for this level.
// To make sure we never come back to this level or any lower level,
// set next_pair_freq impossibly large.
l.needed = 0;
levels[level + 1].next_pair_freq = std.math.maxInt(i32);
level += 1;
continue;
}
const prev_freq = l.last_freq;
if (l.next_char_freq < l.next_pair_freq) {
// The next item on this row is a leaf node.
const next = leaf_counts[level][level] + 1;
l.last_freq = l.next_char_freq;
// Lower leaf_counts are the same of the previous node.
leaf_counts[level][level] = next;
if (next >= list.len) {
l.next_char_freq = LiteralNode.max().freq;
} else {
l.next_char_freq = list[next].freq;
}
} else {
// The next item on this row is a pair from the previous row.
// next_pair_freq isn't valid until we generate two
// more values in the level below
l.last_freq = l.next_pair_freq;
// Take leaf counts from the lower level, except counts[level] remains the same.
@memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
levels[l.level - 1].needed = 2;
}
l.needed -= 1;
if (l.needed == 0) {
// We've done everything we need to do for this level.
// Continue calculating one level up. Fill in next_pair_freq
// of that level with the sum of the two nodes we've just calculated on
// this level.
if (l.level == max_bits) {
// All done!
break;
}
levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
level += 1;
} else {
// If we stole from below, move down temporarily to replenish it.
while (levels[level - 1].needed > 0) {
level -= 1;
if (level == 0) {
break;
}
}
}
}
}
// Somethings is wrong if at the end, the top level is null or hasn't used
// all of the leaves.
assert(leaf_counts[max_bits][max_bits] == n);
var bit_count = self.bit_count[0 .. max_bits + 1];
var bits: u32 = 1;
const counts = &leaf_counts[max_bits];
{
var level = max_bits;
while (level > 0) : (level -= 1) {
// counts[level] gives the number of literals requiring at least "bits"
// bits to encode.
bit_count[bits] = counts[level] - counts[level - 1];
bits += 1;
if (level == 0) {
break;
}
}
}
return bit_count;
}
/// Look at the leaves and assign them a bit count and an encoding as specified
/// in RFC 1951 3.2.2
fn assignEncodingAndSize(self: *HuffmanEncoder, bit_count: []u32, list_arg: []LiteralNode) void {
var code = @as(u16, 0);
var list = list_arg;
for (bit_count, 0..) |bits, n| {
code <<= 1;
if (n == 0 or bits == 0) {
continue;
}
// The literals list[list.len-bits] .. list[list.len-bits]
// are encoded using "bits" bits, and get the values
// code, code + 1, .... The code values are
// assigned in literal order (not frequency order).
const chunk = list[list.len - @as(u32, @intCast(bits)) ..];
self.lns = chunk;
std.mem.sort(LiteralNode, self.lns, {}, byLiteral);
for (chunk) |node| {
self.codes[node.literal] = .{
.code = bitReverse(u16, code, @as(u5, @intCast(n))),
.len = @as(u16, @intCast(n)),
};
code += 1;
}
list = list[0 .. list.len - @as(u32, @intCast(bits))];
}
}
fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
_ = context;
if (a.freq == b.freq) {
return a.literal < b.literal;
}
return a.freq < b.freq;
}
/// Describes the state of the constructed tree for a given depth.
const LevelInfo = struct {
/// Our level. for better printing
level: u32,
/// The frequency of the last node at this level
last_freq: u32,
/// The frequency of the next character to add to this level
next_char_freq: u32,
/// The frequency of the next pair (from level below) to add to this level.
/// Only valid if the "needed" value of the next lower level is 0.
next_pair_freq: u32,
/// The number of chains remaining to generate for this level before moving
/// up to the next level
needed: u32,
};
fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
_ = context;
return a.literal < b.literal;
}
/// Reverse bit-by-bit a N-bit code.
fn bitReverse(comptime T: type, value: T, n: usize) T {
const r = @bitReverse(value);
return r >> @as(std.math.Log2Int(T), @intCast(@typeInfo(T).int.bits - n));
}
test bitReverse {
const ReverseBitsTest = struct {
in: u16,
bit_count: u5,
out: u16,
};
const reverse_bits_tests = [_]ReverseBitsTest{
.{ .in = 1, .bit_count = 1, .out = 1 },
.{ .in = 1, .bit_count = 2, .out = 2 },
.{ .in = 1, .bit_count = 3, .out = 4 },
.{ .in = 1, .bit_count = 4, .out = 8 },
.{ .in = 1, .bit_count = 5, .out = 16 },
.{ .in = 17, .bit_count = 5, .out = 17 },
.{ .in = 257, .bit_count = 9, .out = 257 },
.{ .in = 29, .bit_count = 5, .out = 23 },
};
for (reverse_bits_tests) |h| {
const v = bitReverse(u16, h.in, h.bit_count);
try std.testing.expectEqual(h.out, v);
}
}
/// Generates a HuffmanCode corresponding to the fixed literal table
pub fn fixedLiteralEncoder(codes: *[max_num_frequencies]Code) HuffmanEncoder {
var h: HuffmanEncoder = undefined;
h.codes = codes;
var ch: u16 = 0;
while (ch < max_num_frequencies) : (ch += 1) {
var bits: u16 = undefined;
var size: u16 = undefined;
switch (ch) {
0...143 => {
// size 8, 000110000 .. 10111111
bits = ch + 48;
size = 8;
},
144...255 => {
// size 9, 110010000 .. 111111111
bits = ch + 400 - 144;
size = 9;
},
256...279 => {
// size 7, 0000000 .. 0010111
bits = ch - 256;
size = 7;
},
else => {
// size 8, 11000000 .. 11000111
bits = ch + 192 - 280;
size = 8;
},
}
h.codes[ch] = .{ .code = bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size };
}
return h;
}
pub fn fixedDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder {
var h: HuffmanEncoder = undefined;
h.codes = codes;
for (h.codes, 0..) |_, ch| {
h.codes[ch] = .{ .code = bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 };
}
return h;
}
pub fn huffmanDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder {
var distance_freq: [distance_code_count]u16 = @splat(0);
distance_freq[0] = 1;
// huff_distance is a static distance encoder used for huffman only encoding.
// It can be reused since we will not be encoding distance values.
var h: HuffmanEncoder = .{};
h.codes = codes;
h.generate(distance_freq[0..], 15);
return h;
}
test "generate a Huffman code for the fixed literal table specific to Deflate" {
var codes: [max_num_frequencies]Code = undefined;
const enc: HuffmanEncoder = .fixedLiteralEncoder(&codes);
for (enc.codes) |c| {
switch (c.len) {
7 => {
const v = @bitReverse(@as(u7, @intCast(c.code)));
try testing.expect(v <= 0b0010111);
},
8 => {
const v = @bitReverse(@as(u8, @intCast(c.code)));
try testing.expect((v >= 0b000110000 and v <= 0b10111111) or
(v >= 0b11000000 and v <= 11000111));
},
9 => {
const v = @bitReverse(@as(u9, @intCast(c.code)));
try testing.expect(v >= 0b110010000 and v <= 0b111111111);
},
else => unreachable,
}
}
}
test "generate a Huffman code for the 30 possible relative distances (LZ77 distances) of Deflate" {
var codes: [distance_code_count]Code = undefined;
const enc = fixedDistanceEncoder(&codes);
for (enc.codes) |c| {
const v = @bitReverse(@as(u5, @intCast(c.code)));
try testing.expect(v <= 29);
try testing.expect(c.len == 5);
}
}
pub const fixed_codes = [_]u8{
0b00001100, 0b10001100, 0b01001100, 0b11001100, 0b00101100, 0b10101100, 0b01101100, 0b11101100,
0b00011100, 0b10011100, 0b01011100, 0b11011100, 0b00111100, 0b10111100, 0b01111100, 0b11111100,
0b00000010, 0b10000010, 0b01000010, 0b11000010, 0b00100010, 0b10100010, 0b01100010, 0b11100010,
0b00010010, 0b10010010, 0b01010010, 0b11010010, 0b00110010, 0b10110010, 0b01110010, 0b11110010,
0b00001010, 0b10001010, 0b01001010, 0b11001010, 0b00101010, 0b10101010, 0b01101010, 0b11101010,
0b00011010, 0b10011010, 0b01011010, 0b11011010, 0b00111010, 0b10111010, 0b01111010, 0b11111010,
0b00000110, 0b10000110, 0b01000110, 0b11000110, 0b00100110, 0b10100110, 0b01100110, 0b11100110,
0b00010110, 0b10010110, 0b01010110, 0b11010110, 0b00110110, 0b10110110, 0b01110110, 0b11110110,
0b00001110, 0b10001110, 0b01001110, 0b11001110, 0b00101110, 0b10101110, 0b01101110, 0b11101110,
0b00011110, 0b10011110, 0b01011110, 0b11011110, 0b00111110, 0b10111110, 0b01111110, 0b11111110,
0b00000001, 0b10000001, 0b01000001, 0b11000001, 0b00100001, 0b10100001, 0b01100001, 0b11100001,
0b00010001, 0b10010001, 0b01010001, 0b11010001, 0b00110001, 0b10110001, 0b01110001, 0b11110001,
0b00001001, 0b10001001, 0b01001001, 0b11001001, 0b00101001, 0b10101001, 0b01101001, 0b11101001,
0b00011001, 0b10011001, 0b01011001, 0b11011001, 0b00111001, 0b10111001, 0b01111001, 0b11111001,
0b00000101, 0b10000101, 0b01000101, 0b11000101, 0b00100101, 0b10100101, 0b01100101, 0b11100101,
0b00010101, 0b10010101, 0b01010101, 0b11010101, 0b00110101, 0b10110101, 0b01110101, 0b11110101,
0b00001101, 0b10001101, 0b01001101, 0b11001101, 0b00101101, 0b10101101, 0b01101101, 0b11101101,
0b00011101, 0b10011101, 0b01011101, 0b11011101, 0b00111101, 0b10111101, 0b01111101, 0b11111101,
0b00010011, 0b00100110, 0b01001110, 0b10011010, 0b00111100, 0b01100101, 0b11101010, 0b10110100,
0b11101001, 0b00110011, 0b01100110, 0b11001110, 0b10011010, 0b00111101, 0b01100111, 0b11101110,
0b10111100, 0b11111001, 0b00001011, 0b00010110, 0b00101110, 0b01011010, 0b10111100, 0b01100100,
0b11101001, 0b10110010, 0b11100101, 0b00101011, 0b01010110, 0b10101110, 0b01011010, 0b10111101,
0b01100110, 0b11101101, 0b10111010, 0b11110101, 0b00011011, 0b00110110, 0b01101110, 0b11011010,
0b10111100, 0b01100101, 0b11101011, 0b10110110, 0b11101101, 0b00111011, 0b01110110, 0b11101110,
0b11011010, 0b10111101, 0b01100111, 0b11101111, 0b10111110, 0b11111101, 0b00000111, 0b00001110,
0b00011110, 0b00111010, 0b01111100, 0b11100100, 0b11101000, 0b10110001, 0b11100011, 0b00100111,
0b01001110, 0b10011110, 0b00111010, 0b01111101, 0b11100110, 0b11101100, 0b10111001, 0b11110011,
0b00010111, 0b00101110, 0b01011110, 0b10111010, 0b01111100, 0b11100101, 0b11101010, 0b10110101,
0b11101011, 0b00110111, 0b01101110, 0b11011110, 0b10111010, 0b01111101, 0b11100111, 0b11101110,
0b10111101, 0b11111011, 0b00001111, 0b00011110, 0b00111110, 0b01111010, 0b11111100, 0b11100100,
0b11101001, 0b10110011, 0b11100111, 0b00101111, 0b01011110, 0b10111110, 0b01111010, 0b11111101,
0b11100110, 0b11101101, 0b10111011, 0b11110111, 0b00011111, 0b00111110, 0b01111110, 0b11111010,
0b11111100, 0b11100101, 0b11101011, 0b10110111, 0b11101111, 0b00111111, 0b01111110, 0b11111110,
0b11111010, 0b11111101, 0b11100111, 0b11101111, 0b10111111, 0b11111111, 0b00000000, 0b00100000,
0b00001000, 0b00001100, 0b10000001, 0b11000010, 0b11100000, 0b00001000, 0b00100100, 0b00001010,
0b10001101, 0b11000001, 0b11100010, 0b11110000, 0b00000100, 0b00100010, 0b10001001, 0b01001100,
0b10100001, 0b11010010, 0b11101000, 0b00000011, 0b10000011, 0b01000011, 0b11000011, 0b00100011,
0b10100011,
};

View File

@@ -5,22 +5,27 @@
const std = @import("std");
const testing = std.testing;
const expect = testing.expect;
const consts = @import("consts.zig");
const flate = @import("../flate.zig");
const Token = @import("Token.zig");
const Self = @This();
const Lookup = @This();
const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
const chain_len = 2 * consts.history.len;
const chain_len = 2 * flate.history_len;
pub const bits = 15;
pub const len = 1 << bits;
pub const shift = 32 - bits;
// Maps hash => first position
head: [consts.lookup.len]u16 = [_]u16{0} ** consts.lookup.len,
head: [len]u16 = [_]u16{0} ** len,
// Maps position => previous positions for the same hash value
chain: [chain_len]u16 = [_]u16{0} ** (chain_len),
// Calculates hash of the 4 bytes from data.
// Inserts `pos` position of that hash in the lookup tables.
// Returns previous location with the same hash value.
pub fn add(self: *Self, data: []const u8, pos: u16) u16 {
pub fn add(self: *Lookup, data: []const u8, pos: u16) u16 {
if (data.len < 4) return 0;
const h = hash(data[0..4]);
return self.set(h, pos);
@@ -28,11 +33,11 @@ pub fn add(self: *Self, data: []const u8, pos: u16) u16 {
// Returns previous location with the same hash value given the current
// position.
pub fn prev(self: *Self, pos: u16) u16 {
pub fn prev(self: *Lookup, pos: u16) u16 {
return self.chain[pos];
}
fn set(self: *Self, h: u32, pos: u16) u16 {
fn set(self: *Lookup, h: u32, pos: u16) u16 {
const p = self.head[h];
self.head[h] = pos;
self.chain[pos] = p;
@@ -40,7 +45,7 @@ fn set(self: *Self, h: u32, pos: u16) u16 {
}
// Slide all positions in head and chain for `n`
pub fn slide(self: *Self, n: u16) void {
pub fn slide(self: *Lookup, n: u16) void {
for (&self.head) |*v| {
v.* -|= n;
}
@@ -52,8 +57,8 @@ pub fn slide(self: *Self, n: u16) void {
// Add `len` 4 bytes hashes from `data` into lookup.
// Position of the first byte is `pos`.
pub fn bulkAdd(self: *Self, data: []const u8, len: u16, pos: u16) void {
if (len == 0 or data.len < consts.match.min_length) {
pub fn bulkAdd(self: *Lookup, data: []const u8, length: u16, pos: u16) void {
if (length == 0 or data.len < Token.min_length) {
return;
}
var hb =
@@ -64,7 +69,7 @@ pub fn bulkAdd(self: *Self, data: []const u8, len: u16, pos: u16) void {
_ = self.set(hashu(hb), pos);
var i = pos;
for (4..@min(len + 3, data.len)) |j| {
for (4..@min(length + 3, data.len)) |j| {
hb = (hb << 8) | @as(u32, data[j]);
i += 1;
_ = self.set(hashu(hb), i);
@@ -80,7 +85,7 @@ fn hash(b: *const [4]u8) u32 {
}
fn hashu(v: u32) u32 {
return @intCast((v *% prime4) >> consts.lookup.shift);
return @intCast((v *% prime4) >> shift);
}
test add {
@@ -91,7 +96,7 @@ test add {
0x01, 0x02, 0x03,
};
var h: Self = .{};
var h: Lookup = .{};
for (data, 0..) |_, i| {
const p = h.add(data[i..], @intCast(i));
if (i >= 8 and i < 24) {
@@ -101,7 +106,7 @@ test add {
}
}
const v = Self.hash(data[2 .. 2 + 4]);
const v = Lookup.hash(data[2 .. 2 + 4]);
try expect(h.head[v] == 2 + 16);
try expect(h.chain[2 + 16] == 2 + 8);
try expect(h.chain[2 + 8] == 2);
@@ -111,13 +116,13 @@ test bulkAdd {
const data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
// one by one
var h: Self = .{};
var h: Lookup = .{};
for (data, 0..) |_, i| {
_ = h.add(data[i..], @intCast(i));
}
// in bulk
var bh: Self = .{};
var bh: Lookup = .{};
bh.bulkAdd(data, data.len, 0);
try testing.expectEqualSlices(u16, &h.head, &bh.head);

View File

@@ -1,160 +0,0 @@
//! Used in deflate (compression), holds uncompressed data form which Tokens are
//! produces. In combination with Lookup it is used to find matches in history data.
//!
const std = @import("std");
const consts = @import("consts.zig");
const expect = testing.expect;
const assert = std.debug.assert;
const testing = std.testing;
const hist_len = consts.history.len;
const buffer_len = 2 * hist_len;
const min_lookahead = consts.match.min_length + consts.match.max_length;
const max_rp = buffer_len - min_lookahead;
const Self = @This();
buffer: [buffer_len]u8 = undefined,
wp: usize = 0, // write position
rp: usize = 0, // read position
fp: isize = 0, // last flush position, tokens are build from fp..rp
/// Returns number of bytes written, or 0 if buffer is full and need to slide.
pub fn write(self: *Self, buf: []const u8) usize {
if (self.rp >= max_rp) return 0; // need to slide
const n = @min(buf.len, buffer_len - self.wp);
@memcpy(self.buffer[self.wp .. self.wp + n], buf[0..n]);
self.wp += n;
return n;
}
/// Slide buffer for hist_len.
/// Drops old history, preserves between hist_len and hist_len - min_lookahead.
/// Returns number of bytes removed.
pub fn slide(self: *Self) u16 {
assert(self.rp >= max_rp and self.wp >= self.rp);
const n = self.wp - hist_len;
@memcpy(self.buffer[0..n], self.buffer[hist_len..self.wp]);
self.rp -= hist_len;
self.wp -= hist_len;
self.fp -= hist_len;
return @intCast(n);
}
/// Data from the current position (read position). Those part of the buffer is
/// not converted to tokens yet.
fn lookahead(self: *Self) []const u8 {
assert(self.wp >= self.rp);
return self.buffer[self.rp..self.wp];
}
/// Returns part of the lookahead buffer. If should_flush is set no lookahead is
/// preserved otherwise preserves enough data for the longest match. Returns
/// null if there is not enough data.
pub fn activeLookahead(self: *Self, should_flush: bool) ?[]const u8 {
const min: usize = if (should_flush) 0 else min_lookahead;
const lh = self.lookahead();
return if (lh.len > min) lh else null;
}
/// Advances read position, shrinks lookahead.
pub fn advance(self: *Self, n: u16) void {
assert(self.wp >= self.rp + n);
self.rp += n;
}
/// Returns writable part of the buffer, where new uncompressed data can be
/// written.
pub fn writable(self: *Self) []u8 {
return self.buffer[self.wp..];
}
/// Notification of what part of writable buffer is filled with data.
pub fn written(self: *Self, n: usize) void {
self.wp += n;
}
/// Finds match length between previous and current position.
/// Used in hot path!
pub fn match(self: *Self, prev_pos: u16, curr_pos: u16, min_len: u16) u16 {
const max_len: usize = @min(self.wp - curr_pos, consts.match.max_length);
// lookahead buffers from previous and current positions
const prev_lh = self.buffer[prev_pos..][0..max_len];
const curr_lh = self.buffer[curr_pos..][0..max_len];
// If we already have match (min_len > 0),
// test the first byte above previous len a[min_len] != b[min_len]
// and then all the bytes from that position to zero.
// That is likely positions to find difference than looping from first bytes.
var i: usize = min_len;
if (i > 0) {
if (max_len <= i) return 0;
while (true) {
if (prev_lh[i] != curr_lh[i]) return 0;
if (i == 0) break;
i -= 1;
}
i = min_len;
}
while (i < max_len) : (i += 1)
if (prev_lh[i] != curr_lh[i]) break;
return if (i >= consts.match.min_length) @intCast(i) else 0;
}
/// Current position of non-compressed data. Data before rp are already converted
/// to tokens.
pub fn pos(self: *Self) u16 {
return @intCast(self.rp);
}
/// Notification that token list is cleared.
pub fn flush(self: *Self) void {
self.fp = @intCast(self.rp);
}
/// Part of the buffer since last flush or null if there was slide in between (so
/// fp becomes negative).
pub fn tokensBuffer(self: *Self) ?[]const u8 {
assert(self.fp <= self.rp);
if (self.fp < 0) return null;
return self.buffer[@intCast(self.fp)..self.rp];
}
test match {
const data = "Blah blah blah blah blah!";
var win: Self = .{};
try expect(win.write(data) == data.len);
try expect(win.wp == data.len);
try expect(win.rp == 0);
// length between l symbols
try expect(win.match(1, 6, 0) == 18);
try expect(win.match(1, 11, 0) == 13);
try expect(win.match(1, 16, 0) == 8);
try expect(win.match(1, 21, 0) == 0);
// position 15 = "blah blah!"
// position 20 = "blah!"
try expect(win.match(15, 20, 0) == 4);
try expect(win.match(15, 20, 3) == 4);
try expect(win.match(15, 20, 4) == 0);
}
test slide {
var win: Self = .{};
win.wp = Self.buffer_len - 11;
win.rp = Self.buffer_len - 111;
win.buffer[win.rp] = 0xab;
try expect(win.lookahead().len == 100);
try expect(win.tokensBuffer().?.len == win.rp);
const n = win.slide();
try expect(n == 32757);
try expect(win.buffer[win.rp] == 0xab);
try expect(win.rp == Self.hist_len - 111);
try expect(win.wp == Self.hist_len - 11);
try expect(win.lookahead().len == 100);
try expect(win.tokensBuffer() == null);
}

View File

@@ -6,7 +6,6 @@ const std = @import("std");
const assert = std.debug.assert;
const print = std.debug.print;
const expect = std.testing.expect;
const consts = @import("consts.zig").match;
const Token = @This();
@@ -21,16 +20,23 @@ dist: u15 = 0,
len_lit: u8 = 0,
kind: Kind = .literal,
pub const base_length = 3; // smallest match length per the RFC section 3.2.5
pub const min_length = 4; // min length used in this algorithm
pub const max_length = 258;
pub const min_distance = 1;
pub const max_distance = std.compress.flate.history_len;
pub fn literal(t: Token) u8 {
return t.len_lit;
}
pub fn distance(t: Token) u16 {
return @as(u16, t.dist) + consts.min_distance;
return @as(u16, t.dist) + min_distance;
}
pub fn length(t: Token) u16 {
return @as(u16, t.len_lit) + consts.base_length;
return @as(u16, t.len_lit) + base_length;
}
pub fn initLiteral(lit: u8) Token {
@@ -40,12 +46,12 @@ pub fn initLiteral(lit: u8) Token {
// distance range 1 - 32768, stored in dist as 0 - 32767 (u15)
// length range 3 - 258, stored in len_lit as 0 - 255 (u8)
pub fn initMatch(dist: u16, len: u16) Token {
assert(len >= consts.min_length and len <= consts.max_length);
assert(dist >= consts.min_distance and dist <= consts.max_distance);
assert(len >= min_length and len <= max_length);
assert(dist >= min_distance and dist <= max_distance);
return .{
.kind = .match,
.dist = @intCast(dist - consts.min_distance),
.len_lit = @intCast(len - consts.base_length),
.dist = @intCast(dist - min_distance),
.len_lit = @intCast(len - base_length),
};
}

View File

@@ -1,422 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;
pub fn bitReader(comptime T: type, reader: anytype) BitReader(T, @TypeOf(reader)) {
return BitReader(T, @TypeOf(reader)).init(reader);
}
pub fn BitReader64(comptime ReaderType: type) type {
return BitReader(u64, ReaderType);
}
pub fn BitReader32(comptime ReaderType: type) type {
return BitReader(u32, ReaderType);
}
/// Bit reader used during inflate (decompression). Has internal buffer of 64
/// bits which shifts right after bits are consumed. Uses forward_reader to fill
/// that internal buffer when needed.
///
/// readF is the core function. Supports few different ways of getting bits
/// controlled by flags. In hot path we try to avoid checking whether we need to
/// fill buffer from forward_reader by calling fill in advance and readF with
/// buffered flag set.
///
pub fn BitReader(comptime T: type, comptime ReaderType: type) type {
assert(T == u32 or T == u64);
const t_bytes: usize = @sizeOf(T);
const Tshift = if (T == u64) u6 else u5;
return struct {
// Underlying reader used for filling internal bits buffer
forward_reader: ReaderType = undefined,
// Internal buffer of 64 bits
bits: T = 0,
// Number of bits in the buffer
nbits: u32 = 0,
const Self = @This();
pub const Error = ReaderType.Error || error{EndOfStream};
pub fn init(rdr: ReaderType) Self {
var self = Self{ .forward_reader = rdr };
self.fill(1) catch {};
return self;
}
/// Try to have `nice` bits are available in buffer. Reads from
/// forward reader if there is no `nice` bits in buffer. Returns error
/// if end of forward stream is reached and internal buffer is empty.
/// It will not error if less than `nice` bits are in buffer, only when
/// all bits are exhausted. During inflate we usually know what is the
/// maximum bits for the next step but usually that step will need less
/// bits to decode. So `nice` is not hard limit, it will just try to have
/// that number of bits available. If end of forward stream is reached
/// it may be some extra zero bits in buffer.
pub inline fn fill(self: *Self, nice: u6) !void {
if (self.nbits >= nice and nice != 0) {
return; // We have enough bits
}
// Read more bits from forward reader
// Number of empty bytes in bits, round nbits to whole bytes.
const empty_bytes =
@as(u8, if (self.nbits & 0x7 == 0) t_bytes else t_bytes - 1) - // 8 for 8, 16, 24..., 7 otherwise
(self.nbits >> 3); // 0 for 0-7, 1 for 8-16, ... same as / 8
var buf: [t_bytes]u8 = [_]u8{0} ** t_bytes;
const bytes_read = self.forward_reader.readAll(buf[0..empty_bytes]) catch 0;
if (bytes_read > 0) {
const u: T = std.mem.readInt(T, buf[0..t_bytes], .little);
self.bits |= u << @as(Tshift, @intCast(self.nbits));
self.nbits += 8 * @as(u8, @intCast(bytes_read));
return;
}
if (self.nbits == 0)
return error.EndOfStream;
}
/// Read exactly buf.len bytes into buf.
pub fn readAll(self: *Self, buf: []u8) !void {
assert(self.alignBits() == 0); // internal bits must be at byte boundary
// First read from internal bits buffer.
var n: usize = 0;
while (self.nbits > 0 and n < buf.len) {
buf[n] = try self.readF(u8, flag.buffered);
n += 1;
}
// Then use forward reader for all other bytes.
try self.forward_reader.readNoEof(buf[n..]);
}
pub const flag = struct {
pub const peek: u3 = 0b001; // dont advance internal buffer, just get bits, leave them in buffer
pub const buffered: u3 = 0b010; // assume that there is no need to fill, fill should be called before
pub const reverse: u3 = 0b100; // bit reverse read bits
};
/// Alias for readF(U, 0).
pub fn read(self: *Self, comptime U: type) !U {
return self.readF(U, 0);
}
/// Alias for readF with flag.peak set.
pub inline fn peekF(self: *Self, comptime U: type, comptime how: u3) !U {
return self.readF(U, how | flag.peek);
}
/// Read with flags provided.
pub fn readF(self: *Self, comptime U: type, comptime how: u3) !U {
if (U == T) {
assert(how == 0);
assert(self.alignBits() == 0);
try self.fill(@bitSizeOf(T));
if (self.nbits != @bitSizeOf(T)) return error.EndOfStream;
const v = self.bits;
self.nbits = 0;
self.bits = 0;
return v;
}
const n: Tshift = @bitSizeOf(U);
switch (how) {
0 => { // `normal` read
try self.fill(n); // ensure that there are n bits in the buffer
const u: U = @truncate(self.bits); // get n bits
try self.shift(n); // advance buffer for n
return u;
},
(flag.peek) => { // no shift, leave bits in the buffer
try self.fill(n);
return @truncate(self.bits);
},
flag.buffered => { // no fill, assume that buffer has enough bits
const u: U = @truncate(self.bits);
try self.shift(n);
return u;
},
(flag.reverse) => { // same as 0 with bit reverse
try self.fill(n);
const u: U = @truncate(self.bits);
try self.shift(n);
return @bitReverse(u);
},
(flag.peek | flag.reverse) => {
try self.fill(n);
return @bitReverse(@as(U, @truncate(self.bits)));
},
(flag.buffered | flag.reverse) => {
const u: U = @truncate(self.bits);
try self.shift(n);
return @bitReverse(u);
},
(flag.peek | flag.buffered) => {
return @truncate(self.bits);
},
(flag.peek | flag.buffered | flag.reverse) => {
return @bitReverse(@as(U, @truncate(self.bits)));
},
}
}
/// Read n number of bits.
/// Only buffered flag can be used in how.
pub fn readN(self: *Self, n: u4, comptime how: u3) !u16 {
switch (how) {
0 => {
try self.fill(n);
},
flag.buffered => {},
else => unreachable,
}
const mask: u16 = (@as(u16, 1) << n) - 1;
const u: u16 = @as(u16, @truncate(self.bits)) & mask;
try self.shift(n);
return u;
}
/// Advance buffer for n bits.
pub fn shift(self: *Self, n: Tshift) !void {
if (n > self.nbits) return error.EndOfStream;
self.bits >>= n;
self.nbits -= n;
}
/// Skip n bytes.
pub fn skipBytes(self: *Self, n: u16) !void {
for (0..n) |_| {
try self.fill(8);
try self.shift(8);
}
}
// Number of bits to align stream to the byte boundary.
fn alignBits(self: *Self) u3 {
return @intCast(self.nbits & 0x7);
}
/// Align stream to the byte boundary.
pub fn alignToByte(self: *Self) void {
const ab = self.alignBits();
if (ab > 0) self.shift(ab) catch unreachable;
}
/// Skip zero terminated string.
pub fn skipStringZ(self: *Self) !void {
while (true) {
if (try self.readF(u8, 0) == 0) break;
}
}
/// Read deflate fixed fixed code.
/// Reads first 7 bits, and then maybe 1 or 2 more to get full 7,8 or 9 bit code.
/// ref: https://datatracker.ietf.org/doc/html/rfc1951#page-12
/// Lit Value Bits Codes
/// --------- ---- -----
/// 0 - 143 8 00110000 through
/// 10111111
/// 144 - 255 9 110010000 through
/// 111111111
/// 256 - 279 7 0000000 through
/// 0010111
/// 280 - 287 8 11000000 through
/// 11000111
pub fn readFixedCode(self: *Self) !u16 {
try self.fill(7 + 2);
const code7 = try self.readF(u7, flag.buffered | flag.reverse);
if (code7 <= 0b0010_111) { // 7 bits, 256-279, codes 0000_000 - 0010_111
return @as(u16, code7) + 256;
} else if (code7 <= 0b1011_111) { // 8 bits, 0-143, codes 0011_0000 through 1011_1111
return (@as(u16, code7) << 1) + @as(u16, try self.readF(u1, flag.buffered)) - 0b0011_0000;
} else if (code7 <= 0b1100_011) { // 8 bit, 280-287, codes 1100_0000 - 1100_0111
return (@as(u16, code7 - 0b1100000) << 1) + try self.readF(u1, flag.buffered) + 280;
} else { // 9 bit, 144-255, codes 1_1001_0000 - 1_1111_1111
return (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, try self.readF(u2, flag.buffered | flag.reverse)) + 144;
}
}
};
}
test "readF" {
var fbs = std.io.fixedBufferStream(&[_]u8{ 0xf3, 0x48, 0xcd, 0xc9, 0x00, 0x00 });
var br = bitReader(u64, fbs.reader());
const F = BitReader64(@TypeOf(fbs.reader())).flag;
try testing.expectEqual(@as(u8, 48), br.nbits);
try testing.expectEqual(@as(u64, 0xc9cd48f3), br.bits);
try testing.expect(try br.readF(u1, 0) == 0b0000_0001);
try testing.expect(try br.readF(u2, 0) == 0b0000_0001);
try testing.expectEqual(@as(u8, 48 - 3), br.nbits);
try testing.expectEqual(@as(u3, 5), br.alignBits());
try testing.expect(try br.readF(u8, F.peek) == 0b0001_1110);
try testing.expect(try br.readF(u9, F.peek) == 0b1_0001_1110);
try br.shift(9);
try testing.expectEqual(@as(u8, 36), br.nbits);
try testing.expectEqual(@as(u3, 4), br.alignBits());
try testing.expect(try br.readF(u4, 0) == 0b0100);
try testing.expectEqual(@as(u8, 32), br.nbits);
try testing.expectEqual(@as(u3, 0), br.alignBits());
try br.shift(1);
try testing.expectEqual(@as(u3, 7), br.alignBits());
try br.shift(1);
try testing.expectEqual(@as(u3, 6), br.alignBits());
br.alignToByte();
try testing.expectEqual(@as(u3, 0), br.alignBits());
try testing.expectEqual(@as(u64, 0xc9), br.bits);
try testing.expectEqual(@as(u16, 0x9), try br.readN(4, 0));
try testing.expectEqual(@as(u16, 0xc), try br.readN(4, 0));
}
test "read block type 1 data" {
inline for ([_]type{ u64, u32 }) |T| {
const data = [_]u8{
0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
0x0c, 0x01, 0x02, 0x03, //
0xaa, 0xbb, 0xcc, 0xdd,
};
var fbs = std.io.fixedBufferStream(&data);
var br = bitReader(T, fbs.reader());
const F = BitReader(T, @TypeOf(fbs.reader())).flag;
try testing.expectEqual(@as(u1, 1), try br.readF(u1, 0)); // bfinal
try testing.expectEqual(@as(u2, 1), try br.readF(u2, 0)); // block_type
for ("Hello world\n") |c| {
try testing.expectEqual(@as(u8, c), try br.readF(u8, F.reverse) - 0x30);
}
try testing.expectEqual(@as(u7, 0), try br.readF(u7, 0)); // end of block
br.alignToByte();
try testing.expectEqual(@as(u32, 0x0302010c), try br.readF(u32, 0));
try testing.expectEqual(@as(u16, 0xbbaa), try br.readF(u16, 0));
try testing.expectEqual(@as(u16, 0xddcc), try br.readF(u16, 0));
}
}
test "shift/fill" {
const data = [_]u8{
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
};
var fbs = std.io.fixedBufferStream(&data);
var br = bitReader(u64, fbs.reader());
try testing.expectEqual(@as(u64, 0x08_07_06_05_04_03_02_01), br.bits);
try br.shift(8);
try testing.expectEqual(@as(u64, 0x00_08_07_06_05_04_03_02), br.bits);
try br.fill(60); // fill with 1 byte
try testing.expectEqual(@as(u64, 0x01_08_07_06_05_04_03_02), br.bits);
try br.shift(8 * 4 + 4);
try testing.expectEqual(@as(u64, 0x00_00_00_00_00_10_80_70), br.bits);
try br.fill(60); // fill with 4 bytes (shift by 4)
try testing.expectEqual(@as(u64, 0x00_50_40_30_20_10_80_70), br.bits);
try testing.expectEqual(@as(u8, 8 * 7 + 4), br.nbits);
try br.shift(@intCast(br.nbits)); // clear buffer
try br.fill(8); // refill with the rest of the bytes
try testing.expectEqual(@as(u64, 0x00_00_00_00_00_08_07_06), br.bits);
}
test "readAll" {
inline for ([_]type{ u64, u32 }) |T| {
const data = [_]u8{
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
};
var fbs = std.io.fixedBufferStream(&data);
var br = bitReader(T, fbs.reader());
switch (T) {
u64 => try testing.expectEqual(@as(u64, 0x08_07_06_05_04_03_02_01), br.bits),
u32 => try testing.expectEqual(@as(u32, 0x04_03_02_01), br.bits),
else => unreachable,
}
var out: [16]u8 = undefined;
try br.readAll(out[0..]);
try testing.expect(br.nbits == 0);
try testing.expect(br.bits == 0);
try testing.expectEqualSlices(u8, data[0..16], &out);
}
}
test "readFixedCode" {
inline for ([_]type{ u64, u32 }) |T| {
const fixed_codes = @import("huffman_encoder.zig").fixed_codes;
var fbs = std.io.fixedBufferStream(&fixed_codes);
var rdr = bitReader(T, fbs.reader());
for (0..286) |c| {
try testing.expectEqual(c, try rdr.readFixedCode());
}
try testing.expect(rdr.nbits == 0);
}
}
test "u32 leaves no bits on u32 reads" {
const data = [_]u8{
0xff, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
};
var fbs = std.io.fixedBufferStream(&data);
var br = bitReader(u32, fbs.reader());
_ = try br.read(u3);
try testing.expectEqual(29, br.nbits);
br.alignToByte();
try testing.expectEqual(24, br.nbits);
try testing.expectEqual(0x04_03_02_01, try br.read(u32));
try testing.expectEqual(0, br.nbits);
try testing.expectEqual(0x08_07_06_05, try br.read(u32));
try testing.expectEqual(0, br.nbits);
_ = try br.read(u9);
try testing.expectEqual(23, br.nbits);
br.alignToByte();
try testing.expectEqual(16, br.nbits);
try testing.expectEqual(0x0e_0d_0c_0b, try br.read(u32));
try testing.expectEqual(0, br.nbits);
}
test "u64 need fill after alignToByte" {
const data = [_]u8{
0xff, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
};
// without fill
var fbs = std.io.fixedBufferStream(&data);
var br = bitReader(u64, fbs.reader());
_ = try br.read(u23);
try testing.expectEqual(41, br.nbits);
br.alignToByte();
try testing.expectEqual(40, br.nbits);
try testing.expectEqual(0x06_05_04_03, try br.read(u32));
try testing.expectEqual(8, br.nbits);
try testing.expectEqual(0x0a_09_08_07, try br.read(u32));
try testing.expectEqual(32, br.nbits);
// fill after align ensures all bits filled
fbs.reset();
br = bitReader(u64, fbs.reader());
_ = try br.read(u23);
try testing.expectEqual(41, br.nbits);
br.alignToByte();
try br.fill(0);
try testing.expectEqual(64, br.nbits);
try testing.expectEqual(0x06_05_04_03, try br.read(u32));
try testing.expectEqual(32, br.nbits);
try testing.expectEqual(0x0a_09_08_07, try br.read(u32));
try testing.expectEqual(0, br.nbits);
}

View File

@@ -1,99 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
/// Bit writer for use in deflate (compression).
///
/// Has internal bits buffer of 64 bits and internal bytes buffer of 248 bytes.
/// When we accumulate 48 bits 6 bytes are moved to the bytes buffer. When we
/// accumulate 240 bytes they are flushed to the underlying inner_writer.
///
pub fn BitWriter(comptime WriterType: type) type {
// buffer_flush_size indicates the buffer size
// after which bytes are flushed to the writer.
// Should preferably be a multiple of 6, since
// we accumulate 6 bytes between writes to the buffer.
const buffer_flush_size = 240;
// buffer_size is the actual output byte buffer size.
// It must have additional headroom for a flush
// which can contain up to 8 bytes.
const buffer_size = buffer_flush_size + 8;
return struct {
inner_writer: WriterType,
// Data waiting to be written is bytes[0 .. nbytes]
// and then the low nbits of bits. Data is always written
// sequentially into the bytes array.
bits: u64 = 0,
nbits: u32 = 0, // number of bits
bytes: [buffer_size]u8 = undefined,
nbytes: u32 = 0, // number of bytes
const Self = @This();
pub const Error = WriterType.Error || error{UnfinishedBits};
pub fn init(writer: WriterType) Self {
return .{ .inner_writer = writer };
}
pub fn setWriter(self: *Self, new_writer: WriterType) void {
//assert(self.bits == 0 and self.nbits == 0 and self.nbytes == 0);
self.inner_writer = new_writer;
}
pub fn flush(self: *Self) Error!void {
var n = self.nbytes;
while (self.nbits != 0) {
self.bytes[n] = @as(u8, @truncate(self.bits));
self.bits >>= 8;
if (self.nbits > 8) { // Avoid underflow
self.nbits -= 8;
} else {
self.nbits = 0;
}
n += 1;
}
self.bits = 0;
_ = try self.inner_writer.write(self.bytes[0..n]);
self.nbytes = 0;
}
pub fn writeBits(self: *Self, b: u32, nb: u32) Error!void {
self.bits |= @as(u64, @intCast(b)) << @as(u6, @intCast(self.nbits));
self.nbits += nb;
if (self.nbits < 48)
return;
var n = self.nbytes;
std.mem.writeInt(u64, self.bytes[n..][0..8], self.bits, .little);
n += 6;
if (n >= buffer_flush_size) {
_ = try self.inner_writer.write(self.bytes[0..n]);
n = 0;
}
self.nbytes = n;
self.bits >>= 48;
self.nbits -= 48;
}
pub fn writeBytes(self: *Self, bytes: []const u8) Error!void {
var n = self.nbytes;
if (self.nbits & 7 != 0) {
return error.UnfinishedBits;
}
while (self.nbits != 0) {
self.bytes[n] = @as(u8, @truncate(self.bits));
self.bits >>= 8;
self.nbits -= 8;
n += 1;
}
if (n != 0) {
_ = try self.inner_writer.write(self.bytes[0..n]);
}
self.nbytes = 0;
_ = try self.inner_writer.write(bytes);
}
};
}

View File

@@ -1,706 +0,0 @@
const std = @import("std");
const io = std.io;
const assert = std.debug.assert;
const hc = @import("huffman_encoder.zig");
const consts = @import("consts.zig").huffman;
const Token = @import("Token.zig");
const BitWriter = @import("bit_writer.zig").BitWriter;
pub fn blockWriter(writer: anytype) BlockWriter(@TypeOf(writer)) {
return BlockWriter(@TypeOf(writer)).init(writer);
}
/// Accepts list of tokens, decides what is best block type to write. What block
/// type will provide best compression. Writes header and body of the block.
///
pub fn BlockWriter(comptime WriterType: type) type {
const BitWriterType = BitWriter(WriterType);
return struct {
const codegen_order = consts.codegen_order;
const end_code_mark = 255;
const Self = @This();
pub const Error = BitWriterType.Error;
bit_writer: BitWriterType,
codegen_freq: [consts.codegen_code_count]u16 = undefined,
literal_freq: [consts.max_num_lit]u16 = undefined,
distance_freq: [consts.distance_code_count]u16 = undefined,
codegen: [consts.max_num_lit + consts.distance_code_count + 1]u8 = undefined,
literal_encoding: hc.LiteralEncoder = .{},
distance_encoding: hc.DistanceEncoder = .{},
codegen_encoding: hc.CodegenEncoder = .{},
fixed_literal_encoding: hc.LiteralEncoder,
fixed_distance_encoding: hc.DistanceEncoder,
huff_distance: hc.DistanceEncoder,
pub fn init(writer: WriterType) Self {
return .{
.bit_writer = BitWriterType.init(writer),
.fixed_literal_encoding = hc.fixedLiteralEncoder(),
.fixed_distance_encoding = hc.fixedDistanceEncoder(),
.huff_distance = hc.huffmanDistanceEncoder(),
};
}
/// Flush intrenal bit buffer to the writer.
/// Should be called only when bit stream is at byte boundary.
///
/// That is after final block; when last byte could be incomplete or
/// after stored block; which is aligned to the byte boundary (it has x
/// padding bits after first 3 bits).
pub fn flush(self: *Self) Error!void {
try self.bit_writer.flush();
}
pub fn setWriter(self: *Self, new_writer: WriterType) void {
self.bit_writer.setWriter(new_writer);
}
fn writeCode(self: *Self, c: hc.HuffCode) Error!void {
try self.bit_writer.writeBits(c.code, c.len);
}
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
// the literal and distance lengths arrays (which are concatenated into a single
// array). This method generates that run-length encoding.
//
// The result is written into the codegen array, and the frequencies
// of each code is written into the codegen_freq array.
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
// information. Code bad_code is an end marker
//
// num_literals: The number of literals in literal_encoding
// num_distances: The number of distances in distance_encoding
// lit_enc: The literal encoder to use
// dist_enc: The distance encoder to use
fn generateCodegen(
self: *Self,
num_literals: u32,
num_distances: u32,
lit_enc: *hc.LiteralEncoder,
dist_enc: *hc.DistanceEncoder,
) void {
for (self.codegen_freq, 0..) |_, i| {
self.codegen_freq[i] = 0;
}
// Note that we are using codegen both as a temporary variable for holding
// a copy of the frequencies, and as the place where we put the result.
// This is fine because the output is always shorter than the input used
// so far.
var codegen = &self.codegen; // cache
// Copy the concatenated code sizes to codegen. Put a marker at the end.
var cgnl = codegen[0..num_literals];
for (cgnl, 0..) |_, i| {
cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
}
cgnl = codegen[num_literals .. num_literals + num_distances];
for (cgnl, 0..) |_, i| {
cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len));
}
codegen[num_literals + num_distances] = end_code_mark;
var size = codegen[0];
var count: i32 = 1;
var out_index: u32 = 0;
var in_index: u32 = 1;
while (size != end_code_mark) : (in_index += 1) {
// INVARIANT: We have seen "count" copies of size that have not yet
// had output generated for them.
const next_size = codegen[in_index];
if (next_size == size) {
count += 1;
continue;
}
// We need to generate codegen indicating "count" of size.
if (size != 0) {
codegen[out_index] = size;
out_index += 1;
self.codegen_freq[size] += 1;
count -= 1;
while (count >= 3) {
var n: i32 = 6;
if (n > count) {
n = count;
}
codegen[out_index] = 16;
out_index += 1;
codegen[out_index] = @as(u8, @intCast(n - 3));
out_index += 1;
self.codegen_freq[16] += 1;
count -= n;
}
} else {
while (count >= 11) {
var n: i32 = 138;
if (n > count) {
n = count;
}
codegen[out_index] = 18;
out_index += 1;
codegen[out_index] = @as(u8, @intCast(n - 11));
out_index += 1;
self.codegen_freq[18] += 1;
count -= n;
}
if (count >= 3) {
// 3 <= count <= 10
codegen[out_index] = 17;
out_index += 1;
codegen[out_index] = @as(u8, @intCast(count - 3));
out_index += 1;
self.codegen_freq[17] += 1;
count = 0;
}
}
count -= 1;
while (count >= 0) : (count -= 1) {
codegen[out_index] = size;
out_index += 1;
self.codegen_freq[size] += 1;
}
// Set up invariant for next time through the loop.
size = next_size;
count = 1;
}
// Marker indicating the end of the codegen.
codegen[out_index] = end_code_mark;
}
const DynamicSize = struct {
size: u32,
num_codegens: u32,
};
// dynamicSize returns the size of dynamically encoded data in bits.
fn dynamicSize(
self: *Self,
lit_enc: *hc.LiteralEncoder, // literal encoder
dist_enc: *hc.DistanceEncoder, // distance encoder
extra_bits: u32,
) DynamicSize {
var num_codegens = self.codegen_freq.len;
while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
num_codegens -= 1;
}
const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
self.codegen_freq[16] * 2 +
self.codegen_freq[17] * 3 +
self.codegen_freq[18] * 7;
const size = header +
lit_enc.bitLength(&self.literal_freq) +
dist_enc.bitLength(&self.distance_freq) +
extra_bits;
return DynamicSize{
.size = @as(u32, @intCast(size)),
.num_codegens = @as(u32, @intCast(num_codegens)),
};
}
// fixedSize returns the size of dynamically encoded data in bits.
fn fixedSize(self: *Self, extra_bits: u32) u32 {
return 3 +
self.fixed_literal_encoding.bitLength(&self.literal_freq) +
self.fixed_distance_encoding.bitLength(&self.distance_freq) +
extra_bits;
}
const StoredSize = struct {
size: u32,
storable: bool,
};
// storedSizeFits calculates the stored size, including header.
// The function returns the size in bits and whether the block
// fits inside a single block.
fn storedSizeFits(in: ?[]const u8) StoredSize {
if (in == null) {
return .{ .size = 0, .storable = false };
}
if (in.?.len <= consts.max_store_block_size) {
return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
}
return .{ .size = 0, .storable = false };
}
// Write the header of a dynamic Huffman block to the output stream.
//
// num_literals: The number of literals specified in codegen
// num_distances: The number of distances specified in codegen
// num_codegens: The number of codegens used in codegen
// eof: Is it the end-of-file? (end of stream)
fn dynamicHeader(
self: *Self,
num_literals: u32,
num_distances: u32,
num_codegens: u32,
eof: bool,
) Error!void {
const first_bits: u32 = if (eof) 5 else 4;
try self.bit_writer.writeBits(first_bits, 3);
try self.bit_writer.writeBits(num_literals - 257, 5);
try self.bit_writer.writeBits(num_distances - 1, 5);
try self.bit_writer.writeBits(num_codegens - 4, 4);
var i: u32 = 0;
while (i < num_codegens) : (i += 1) {
const value = self.codegen_encoding.codes[codegen_order[i]].len;
try self.bit_writer.writeBits(value, 3);
}
i = 0;
while (true) {
const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
i += 1;
if (code_word == end_code_mark) {
break;
}
try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
switch (code_word) {
16 => {
try self.bit_writer.writeBits(self.codegen[i], 2);
i += 1;
},
17 => {
try self.bit_writer.writeBits(self.codegen[i], 3);
i += 1;
},
18 => {
try self.bit_writer.writeBits(self.codegen[i], 7);
i += 1;
},
else => {},
}
}
}
fn storedHeader(self: *Self, length: usize, eof: bool) Error!void {
assert(length <= 65535);
const flag: u32 = if (eof) 1 else 0;
try self.bit_writer.writeBits(flag, 3);
try self.flush();
const l: u16 = @intCast(length);
try self.bit_writer.writeBits(l, 16);
try self.bit_writer.writeBits(~l, 16);
}
fn fixedHeader(self: *Self, eof: bool) Error!void {
// Indicate that we are a fixed Huffman block
var value: u32 = 2;
if (eof) {
value = 3;
}
try self.bit_writer.writeBits(value, 3);
}
// Write a block of tokens with the smallest encoding. Will choose block type.
// The original input can be supplied, and if the huffman encoded data
// is larger than the original bytes, the data will be written as a
// stored block.
// If the input is null, the tokens will always be Huffman encoded.
pub fn write(self: *Self, tokens: []const Token, eof: bool, input: ?[]const u8) Error!void {
const lit_and_dist = self.indexTokens(tokens);
const num_literals = lit_and_dist.num_literals;
const num_distances = lit_and_dist.num_distances;
var extra_bits: u32 = 0;
const ret = storedSizeFits(input);
const stored_size = ret.size;
const storable = ret.storable;
if (storable) {
// We only bother calculating the costs of the extra bits required by
// the length of distance fields (which will be the same for both fixed
// and dynamic encoding), if we need to compare those two encodings
// against stored encoding.
var length_code: u16 = Token.length_codes_start + 8;
while (length_code < num_literals) : (length_code += 1) {
// First eight length codes have extra size = 0.
extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
@as(u32, @intCast(Token.lengthExtraBits(length_code)));
}
var distance_code: u16 = 4;
while (distance_code < num_distances) : (distance_code += 1) {
// First four distance codes have extra size = 0.
extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) *
@as(u32, @intCast(Token.distanceExtraBits(distance_code)));
}
}
// Figure out smallest code.
// Fixed Huffman baseline.
var literal_encoding = &self.fixed_literal_encoding;
var distance_encoding = &self.fixed_distance_encoding;
var size = self.fixedSize(extra_bits);
// Dynamic Huffman?
var num_codegens: u32 = 0;
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literal_encoding and the distance_encoding.
self.generateCodegen(
num_literals,
num_distances,
&self.literal_encoding,
&self.distance_encoding,
);
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
const dynamic_size = self.dynamicSize(
&self.literal_encoding,
&self.distance_encoding,
extra_bits,
);
const dyn_size = dynamic_size.size;
num_codegens = dynamic_size.num_codegens;
if (dyn_size < size) {
size = dyn_size;
literal_encoding = &self.literal_encoding;
distance_encoding = &self.distance_encoding;
}
// Stored bytes?
if (storable and stored_size < size) {
try self.storedBlock(input.?, eof);
return;
}
// Huffman.
if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
try self.fixedHeader(eof);
} else {
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
}
// Write the tokens.
try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes);
}
pub fn storedBlock(self: *Self, input: []const u8, eof: bool) Error!void {
try self.storedHeader(input.len, eof);
try self.bit_writer.writeBytes(input);
}
// writeBlockDynamic encodes a block using a dynamic Huffman table.
// This should be used if the symbols used have a disproportionate
// histogram distribution.
// If input is supplied and the compression savings are below 1/16th of the
// input size the block is stored.
fn dynamicBlock(
self: *Self,
tokens: []const Token,
eof: bool,
input: ?[]const u8,
) Error!void {
const total_tokens = self.indexTokens(tokens);
const num_literals = total_tokens.num_literals;
const num_distances = total_tokens.num_distances;
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literal_encoding and the distance_encoding.
self.generateCodegen(
num_literals,
num_distances,
&self.literal_encoding,
&self.distance_encoding,
);
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0);
const size = dynamic_size.size;
const num_codegens = dynamic_size.num_codegens;
// Store bytes, if we don't get a reasonable improvement.
const stored_size = storedSizeFits(input);
const ssize = stored_size.size;
const storable = stored_size.storable;
if (storable and ssize < (size + (size >> 4))) {
try self.storedBlock(input.?, eof);
return;
}
// Write Huffman table.
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
// Write the tokens.
try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes);
}
const TotalIndexedTokens = struct {
num_literals: u32,
num_distances: u32,
};
// Indexes a slice of tokens followed by an end_block_marker, and updates
// literal_freq and distance_freq, and generates literal_encoding
// and distance_encoding.
// The number of literal and distance tokens is returned.
fn indexTokens(self: *Self, tokens: []const Token) TotalIndexedTokens {
var num_literals: u32 = 0;
var num_distances: u32 = 0;
for (self.literal_freq, 0..) |_, i| {
self.literal_freq[i] = 0;
}
for (self.distance_freq, 0..) |_, i| {
self.distance_freq[i] = 0;
}
for (tokens) |t| {
if (t.kind == Token.Kind.literal) {
self.literal_freq[t.literal()] += 1;
continue;
}
self.literal_freq[t.lengthCode()] += 1;
self.distance_freq[t.distanceCode()] += 1;
}
// add end_block_marker token at the end
self.literal_freq[consts.end_block_marker] += 1;
// get the number of literals
num_literals = @as(u32, @intCast(self.literal_freq.len));
while (self.literal_freq[num_literals - 1] == 0) {
num_literals -= 1;
}
// get the number of distances
num_distances = @as(u32, @intCast(self.distance_freq.len));
while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) {
num_distances -= 1;
}
if (num_distances == 0) {
// We haven't found a single match. If we want to go with the dynamic encoding,
// we should count at least one distance to be sure that the distance huffman tree could be encoded.
self.distance_freq[0] = 1;
num_distances = 1;
}
self.literal_encoding.generate(&self.literal_freq, 15);
self.distance_encoding.generate(&self.distance_freq, 15);
return TotalIndexedTokens{
.num_literals = num_literals,
.num_distances = num_distances,
};
}
// Writes a slice of tokens to the output followed by and end_block_marker.
// codes for literal and distance encoding must be supplied.
fn writeTokens(
self: *Self,
tokens: []const Token,
le_codes: []hc.HuffCode,
oe_codes: []hc.HuffCode,
) Error!void {
for (tokens) |t| {
if (t.kind == Token.Kind.literal) {
try self.writeCode(le_codes[t.literal()]);
continue;
}
// Write the length
const le = t.lengthEncoding();
try self.writeCode(le_codes[le.code]);
if (le.extra_bits > 0) {
try self.bit_writer.writeBits(le.extra_length, le.extra_bits);
}
// Write the distance
const oe = t.distanceEncoding();
try self.writeCode(oe_codes[oe.code]);
if (oe.extra_bits > 0) {
try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits);
}
}
// add end_block_marker at the end
try self.writeCode(le_codes[consts.end_block_marker]);
}
// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
// if the results only gains very little from compression.
pub fn huffmanBlock(self: *Self, input: []const u8, eof: bool) Error!void {
// Add everything as literals
histogram(input, &self.literal_freq);
self.literal_freq[consts.end_block_marker] = 1;
const num_literals = consts.end_block_marker + 1;
self.distance_freq[0] = 1;
const num_distances = 1;
self.literal_encoding.generate(&self.literal_freq, 15);
// Figure out smallest code.
// Always use dynamic Huffman or Store
var num_codegens: u32 = 0;
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literal_encoding and the distance_encoding.
self.generateCodegen(
num_literals,
num_distances,
&self.literal_encoding,
&self.huff_distance,
);
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0);
const size = dynamic_size.size;
num_codegens = dynamic_size.num_codegens;
// Store bytes, if we don't get a reasonable improvement.
const stored_size_ret = storedSizeFits(input);
const ssize = stored_size_ret.size;
const storable = stored_size_ret.storable;
if (storable and ssize < (size + (size >> 4))) {
try self.storedBlock(input, eof);
return;
}
// Huffman.
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
const encoding = self.literal_encoding.codes[0..257];
for (input) |t| {
const c = encoding[t];
try self.bit_writer.writeBits(c.code, c.len);
}
try self.writeCode(encoding[consts.end_block_marker]);
}
// histogram accumulates a histogram of b in h.
fn histogram(b: []const u8, h: *[286]u16) void {
// Clear histogram
for (h, 0..) |_, i| {
h[i] = 0;
}
var lh = h.*[0..256];
for (b) |t| {
lh[t] += 1;
}
}
};
}
// tests
const expect = std.testing.expect;
const fmt = std.fmt;
const testing = std.testing;
const ArrayList = std.ArrayList;
const TestCase = @import("testdata/block_writer.zig").TestCase;
const testCases = @import("testdata/block_writer.zig").testCases;
// tests if the writeBlock encoding has changed.
test "write" {
inline for (0..testCases.len) |i| {
try testBlock(testCases[i], .write_block);
}
}
// tests if the writeBlockDynamic encoding has changed.
test "dynamicBlock" {
inline for (0..testCases.len) |i| {
try testBlock(testCases[i], .write_dyn_block);
}
}
test "huffmanBlock" {
inline for (0..testCases.len) |i| {
try testBlock(testCases[i], .write_huffman_block);
}
try testBlock(.{
.tokens = &[_]Token{},
.input = "huffman-rand-max.input",
.want = "huffman-rand-max.{s}.expect",
}, .write_huffman_block);
}
const TestFn = enum {
write_block,
write_dyn_block, // write dynamic block
write_huffman_block,
fn to_s(self: TestFn) []const u8 {
return switch (self) {
.write_block => "wb",
.write_dyn_block => "dyn",
.write_huffman_block => "huff",
};
}
fn write(
comptime self: TestFn,
bw: anytype,
tok: []const Token,
input: ?[]const u8,
final: bool,
) !void {
switch (self) {
.write_block => try bw.write(tok, final, input),
.write_dyn_block => try bw.dynamicBlock(tok, final, input),
.write_huffman_block => try bw.huffmanBlock(input.?, final),
}
try bw.flush();
}
};
// testBlock tests a block against its references
//
// size
// 64K [file-name].input - input non compressed file
// 8.1K [file-name].golden -
// 78 [file-name].dyn.expect - output with writeBlockDynamic
// 78 [file-name].wb.expect - output with writeBlock
// 8.1K [file-name].huff.expect - output with writeBlockHuff
// 78 [file-name].dyn.expect-noinput - output with writeBlockDynamic when input is null
// 78 [file-name].wb.expect-noinput - output with writeBlock when input is null
//
// wb - writeBlock
// dyn - writeBlockDynamic
// huff - writeBlockHuff
//
fn testBlock(comptime tc: TestCase, comptime tfn: TestFn) !void {
if (tc.input.len != 0 and tc.want.len != 0) {
const want_name = comptime fmt.comptimePrint(tc.want, .{tfn.to_s()});
const input = @embedFile("testdata/block_writer/" ++ tc.input);
const want = @embedFile("testdata/block_writer/" ++ want_name);
try testWriteBlock(tfn, input, want, tc.tokens);
}
if (tfn == .write_huffman_block) {
return;
}
const want_name_no_input = comptime fmt.comptimePrint(tc.want_no_input, .{tfn.to_s()});
const want = @embedFile("testdata/block_writer/" ++ want_name_no_input);
try testWriteBlock(tfn, null, want, tc.tokens);
}
// Uses writer function `tfn` to write `tokens`, tests that we got `want` as output.
fn testWriteBlock(comptime tfn: TestFn, input: ?[]const u8, want: []const u8, tokens: []const Token) !void {
var buf = ArrayList(u8).init(testing.allocator);
var bw = blockWriter(buf.writer());
try tfn.write(&bw, tokens, input, false);
var got = buf.items;
try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
try expect(got[0] & 0b0000_0001 == 0); // bfinal is not set
//
// Test if the writer produces the same output after reset.
buf.deinit();
buf = ArrayList(u8).init(testing.allocator);
defer buf.deinit();
bw.setWriter(buf.writer());
try tfn.write(&bw, tokens, input, true);
try bw.flush();
got = buf.items;
try expect(got[0] & 1 == 1); // bfinal is set
buf.items[0] &= 0b1111_1110; // remove bfinal bit, so we can run test slices
try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
}

View File

@@ -1,49 +0,0 @@
pub const deflate = struct {
// Number of tokens to accumulate in deflate before starting block encoding.
//
// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is
// 8 and max 9 that gives 14 or 15 bits.
pub const tokens = 1 << 15;
};
pub const match = struct {
pub const base_length = 3; // smallest match length per the RFC section 3.2.5
pub const min_length = 4; // min length used in this algorithm
pub const max_length = 258;
pub const min_distance = 1;
pub const max_distance = 32768;
};
pub const history = struct {
pub const len = match.max_distance;
};
pub const lookup = struct {
pub const bits = 15;
pub const len = 1 << bits;
pub const shift = 32 - bits;
};
pub const huffman = struct {
// The odd order in which the codegen code sizes are written.
pub const codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
// The number of codegen codes.
pub const codegen_code_count = 19;
// The largest distance code.
pub const distance_code_count = 30;
// Maximum number of literals.
pub const max_num_lit = 286;
// Max number of frequencies used for a Huffman Code
// Possible lengths are codegen_code_count (19), distance_code_count (30) and max_num_lit (286).
// The largest of these is max_num_lit.
pub const max_num_frequencies = max_num_lit;
// Biggest block size for uncompressed block.
pub const max_store_block_size = 65535;
// The special code used to mark the end of a block.
pub const end_block_marker = 256;
};

View File

@@ -1,208 +0,0 @@
//! Container of the deflate bit stream body. Container adds header before
//! deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
//! no footer, raw bit stream).
//!
//! Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes
//! addler 32 checksum.
//!
//! Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
//! crc32 checksum and 4 bytes of uncompressed data length.
//!
//!
//! rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
//! rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
//!
const std = @import("std");
pub const Container = enum {
raw, // no header or footer
gzip, // gzip header and footer
zlib, // zlib header and footer
pub fn size(w: Container) usize {
return headerSize(w) + footerSize(w);
}
pub fn headerSize(w: Container) usize {
return switch (w) {
.gzip => 10,
.zlib => 2,
.raw => 0,
};
}
pub fn footerSize(w: Container) usize {
return switch (w) {
.gzip => 8,
.zlib => 4,
.raw => 0,
};
}
pub const list = [_]Container{ .raw, .gzip, .zlib };
pub const Error = error{
BadGzipHeader,
BadZlibHeader,
WrongGzipChecksum,
WrongGzipSize,
WrongZlibChecksum,
};
pub fn writeHeader(comptime wrap: Container, writer: anytype) !void {
switch (wrap) {
.gzip => {
// GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5):
// - ID1 (IDentification 1), always 0x1f
// - ID2 (IDentification 2), always 0x8b
// - CM (Compression Method), always 8 = deflate
// - FLG (Flags), all set to 0
// - 4 bytes, MTIME (Modification time), not used, all set to zero
// - XFL (eXtra FLags), all set to zero
// - OS (Operating System), 03 = Unix
const gzipHeader = [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 };
try writer.writeAll(&gzipHeader);
},
.zlib => {
// ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4):
// 1st byte:
// - First four bits is the CINFO (compression info), which is 7 for the default deflate window size.
// - The next four bits is the CM (compression method), which is 8 for deflate.
// 2nd byte:
// - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best.
// - The next bit, FDICT, is set if a dictionary is given.
// - The final five FCHECK bits form a mod-31 checksum.
//
// CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
const zlibHeader = [_]u8{ 0x78, 0b10_0_11100 };
try writer.writeAll(&zlibHeader);
},
.raw => {},
}
}
pub fn writeFooter(comptime wrap: Container, hasher: *Hasher(wrap), writer: anytype) !void {
var bits: [4]u8 = undefined;
switch (wrap) {
.gzip => {
// GZIP 8 bytes footer
// - 4 bytes, CRC32 (CRC-32)
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
std.mem.writeInt(u32, &bits, hasher.chksum(), .little);
try writer.writeAll(&bits);
std.mem.writeInt(u32, &bits, hasher.bytesRead(), .little);
try writer.writeAll(&bits);
},
.zlib => {
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
// 4 bytes of ADLER32 (Adler-32 checksum)
// Checksum value of the uncompressed data (excluding any
// dictionary data) computed according to Adler-32
// algorithm.
std.mem.writeInt(u32, &bits, hasher.chksum(), .big);
try writer.writeAll(&bits);
},
.raw => {},
}
}
pub fn parseHeader(comptime wrap: Container, reader: anytype) !void {
switch (wrap) {
.gzip => try parseGzipHeader(reader),
.zlib => try parseZlibHeader(reader),
.raw => {},
}
}
fn parseGzipHeader(reader: anytype) !void {
const magic1 = try reader.read(u8);
const magic2 = try reader.read(u8);
const method = try reader.read(u8);
const flags = try reader.read(u8);
try reader.skipBytes(6); // mtime(4), xflags, os
if (magic1 != 0x1f or magic2 != 0x8b or method != 0x08)
return error.BadGzipHeader;
// Flags description: https://www.rfc-editor.org/rfc/rfc1952.html#page-5
if (flags != 0) {
if (flags & 0b0000_0100 != 0) { // FEXTRA
const extra_len = try reader.read(u16);
try reader.skipBytes(extra_len);
}
if (flags & 0b0000_1000 != 0) { // FNAME
try reader.skipStringZ();
}
if (flags & 0b0001_0000 != 0) { // FCOMMENT
try reader.skipStringZ();
}
if (flags & 0b0000_0010 != 0) { // FHCRC
try reader.skipBytes(2);
}
}
}
fn parseZlibHeader(reader: anytype) !void {
const cm = try reader.read(u4);
const cinfo = try reader.read(u4);
_ = try reader.read(u8);
if (cm != 8 or cinfo > 7) {
return error.BadZlibHeader;
}
}
pub fn parseFooter(comptime wrap: Container, hasher: *Hasher(wrap), reader: anytype) !void {
switch (wrap) {
.gzip => {
try reader.fill(0);
if (try reader.read(u32) != hasher.chksum()) return error.WrongGzipChecksum;
if (try reader.read(u32) != hasher.bytesRead()) return error.WrongGzipSize;
},
.zlib => {
const chksum: u32 = @byteSwap(hasher.chksum());
if (try reader.read(u32) != chksum) return error.WrongZlibChecksum;
},
.raw => {},
}
}
pub fn Hasher(comptime wrap: Container) type {
const HasherType = switch (wrap) {
.gzip => std.hash.Crc32,
.zlib => std.hash.Adler32,
.raw => struct {
pub fn init() @This() {
return .{};
}
},
};
return struct {
hasher: HasherType = HasherType.init(),
bytes: usize = 0,
const Self = @This();
pub fn update(self: *Self, buf: []const u8) void {
switch (wrap) {
.raw => {},
else => {
self.hasher.update(buf);
self.bytes += buf.len;
},
}
}
pub fn chksum(self: *Self) u32 {
switch (wrap) {
.raw => return 0,
else => return self.hasher.final(),
}
}
pub fn bytesRead(self: *Self) u32 {
return @truncate(self.bytes);
}
};
}
};

View File

@@ -1,744 +0,0 @@
const std = @import("std");
const io = std.io;
const assert = std.debug.assert;
const testing = std.testing;
const expect = testing.expect;
const print = std.debug.print;
const Token = @import("Token.zig");
const consts = @import("consts.zig");
const BlockWriter = @import("block_writer.zig").BlockWriter;
const Container = @import("container.zig").Container;
const SlidingWindow = @import("SlidingWindow.zig");
const Lookup = @import("Lookup.zig");
pub const Options = struct {
level: Level = .default,
};
/// Trades between speed and compression size.
/// Starts with level 4: in [zlib](https://github.com/madler/zlib/blob/abd3d1a28930f89375d4b41408b39f6c1be157b2/deflate.c#L115C1-L117C43)
/// levels 1-3 are using different algorithm to perform faster but with less
/// compression. That is not implemented here.
pub const Level = enum(u4) {
// zig fmt: off
fast = 0xb, level_4 = 4,
level_5 = 5,
default = 0xc, level_6 = 6,
level_7 = 7,
level_8 = 8,
best = 0xd, level_9 = 9,
// zig fmt: on
};
/// Algorithm knobs for each level.
const LevelArgs = struct {
good: u16, // Do less lookups if we already have match of this length.
nice: u16, // Stop looking for better match if we found match with at least this length.
lazy: u16, // Don't do lazy match find if got match with at least this length.
chain: u16, // How many lookups for previous match to perform.
pub fn get(level: Level) LevelArgs {
// zig fmt: off
return switch (level) {
.fast, .level_4 => .{ .good = 4, .lazy = 4, .nice = 16, .chain = 16 },
.level_5 => .{ .good = 8, .lazy = 16, .nice = 32, .chain = 32 },
.default, .level_6 => .{ .good = 8, .lazy = 16, .nice = 128, .chain = 128 },
.level_7 => .{ .good = 8, .lazy = 32, .nice = 128, .chain = 256 },
.level_8 => .{ .good = 32, .lazy = 128, .nice = 258, .chain = 1024 },
.best, .level_9 => .{ .good = 32, .lazy = 258, .nice = 258, .chain = 4096 },
};
// zig fmt: on
}
};
/// Compress plain data from reader into compressed stream written to writer.
pub fn compress(comptime container: Container, reader: anytype, writer: anytype, options: Options) !void {
var c = try compressor(container, writer, options);
try c.compress(reader);
try c.finish();
}
/// Create compressor for writer type.
pub fn compressor(comptime container: Container, writer: anytype, options: Options) !Compressor(
container,
@TypeOf(writer),
) {
return try Compressor(container, @TypeOf(writer)).init(writer, options);
}
/// Compressor type.
pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
const TokenWriterType = BlockWriter(WriterType);
return Deflate(container, WriterType, TokenWriterType);
}
/// Default compression algorithm. Has two steps: tokenization and token
/// encoding.
///
/// Tokenization takes uncompressed input stream and produces list of tokens.
/// Each token can be literal (byte of data) or match (backrefernce to previous
/// data with length and distance). Tokenization accumulators 32K tokens, when
/// full or `flush` is called tokens are passed to the `block_writer`. Level
/// defines how hard (how slow) it tries to find match.
///
/// Block writer will decide which type of deflate block to write (stored, fixed,
/// dynamic) and encode tokens to the output byte stream. Client has to call
/// `finish` to write block with the final bit set.
///
/// Container defines type of header and footer which can be gzip, zlib or raw.
/// They all share same deflate body. Raw has no header or footer just deflate
/// body.
///
/// Compression algorithm explained in rfc-1951 (slightly edited for this case):
///
/// The compressor uses a chained hash table `lookup` to find duplicated
/// strings, using a hash function that operates on 4-byte sequences. At any
/// given point during compression, let XYZW be the next 4 input bytes
/// (lookahead) to be examined (not necessarily all different, of course).
/// First, the compressor examines the hash chain for XYZW. If the chain is
/// empty, the compressor simply writes out X as a literal byte and advances
/// one byte in the input. If the hash chain is not empty, indicating that the
/// sequence XYZW (or, if we are unlucky, some other 4 bytes with the same
/// hash function value) has occurred recently, the compressor compares all
/// strings on the XYZW hash chain with the actual input data sequence
/// starting at the current point, and selects the longest match.
///
/// To improve overall compression, the compressor defers the selection of
/// matches ("lazy matching"): after a match of length N has been found, the
/// compressor searches for a longer match starting at the next input byte. If
/// it finds a longer match, it truncates the previous match to a length of
/// one (thus producing a single literal byte) and then emits the longer
/// match. Otherwise, it emits the original match, and, as described above,
/// advances N bytes before continuing.
///
///
/// Allocates statically ~400K (192K lookup, 128K tokens, 64K window).
///
/// Deflate function accepts BlockWriterType so we can change that in test to test
/// just tokenization part.
///
fn Deflate(comptime container: Container, comptime WriterType: type, comptime BlockWriterType: type) type {
return struct {
lookup: Lookup = .{},
win: SlidingWindow = .{},
tokens: Tokens = .{},
wrt: WriterType,
block_writer: BlockWriterType,
level: LevelArgs,
hasher: container.Hasher() = .{},
// Match and literal at the previous position.
// Used for lazy match finding in processWindow.
prev_match: ?Token = null,
prev_literal: ?u8 = null,
const Self = @This();
pub fn init(wrt: WriterType, options: Options) !Self {
const self = Self{
.wrt = wrt,
.block_writer = BlockWriterType.init(wrt),
.level = LevelArgs.get(options.level),
};
try container.writeHeader(self.wrt);
return self;
}
const FlushOption = enum { none, flush, final };
// Process data in window and create tokens. If token buffer is full
// flush tokens to the token writer. In the case of `flush` or `final`
// option it will process all data from the window. In the `none` case
// it will preserve some data for the next match.
fn tokenize(self: *Self, flush_opt: FlushOption) !void {
// flush - process all data from window
const should_flush = (flush_opt != .none);
// While there is data in active lookahead buffer.
while (self.win.activeLookahead(should_flush)) |lh| {
var step: u16 = 1; // 1 in the case of literal, match length otherwise
const pos: u16 = self.win.pos();
const literal = lh[0]; // literal at current position
const min_len: u16 = if (self.prev_match) |m| m.length() else 0;
// Try to find match at least min_len long.
if (self.findMatch(pos, lh, min_len)) |match| {
// Found better match than previous.
try self.addPrevLiteral();
// Is found match length good enough?
if (match.length() >= self.level.lazy) {
// Don't try to lazy find better match, use this.
step = try self.addMatch(match);
} else {
// Store this match.
self.prev_literal = literal;
self.prev_match = match;
}
} else {
// There is no better match at current pos then it was previous.
// Write previous match or literal.
if (self.prev_match) |m| {
// Write match from previous position.
step = try self.addMatch(m) - 1; // we already advanced 1 from previous position
} else {
// No match at previous position.
// Write previous literal if any, and remember this literal.
try self.addPrevLiteral();
self.prev_literal = literal;
}
}
// Advance window and add hashes.
self.windowAdvance(step, lh, pos);
}
if (should_flush) {
// In the case of flushing, last few lookahead buffers were smaller then min match len.
// So only last literal can be unwritten.
assert(self.prev_match == null);
try self.addPrevLiteral();
self.prev_literal = null;
try self.flushTokens(flush_opt);
}
}
fn windowAdvance(self: *Self, step: u16, lh: []const u8, pos: u16) void {
// current position is already added in findMatch
self.lookup.bulkAdd(lh[1..], step - 1, pos + 1);
self.win.advance(step);
}
// Add previous literal (if any) to the tokens list.
fn addPrevLiteral(self: *Self) !void {
if (self.prev_literal) |l| try self.addToken(Token.initLiteral(l));
}
// Add match to the tokens list, reset prev pointers.
// Returns length of the added match.
fn addMatch(self: *Self, m: Token) !u16 {
try self.addToken(m);
self.prev_literal = null;
self.prev_match = null;
return m.length();
}
fn addToken(self: *Self, token: Token) !void {
self.tokens.add(token);
if (self.tokens.full()) try self.flushTokens(.none);
}
// Finds largest match in the history window with the data at current pos.
fn findMatch(self: *Self, pos: u16, lh: []const u8, min_len: u16) ?Token {
var len: u16 = min_len;
// Previous location with the same hash (same 4 bytes).
var prev_pos = self.lookup.add(lh, pos);
// Last found match.
var match: ?Token = null;
// How much back-references to try, performance knob.
var chain: usize = self.level.chain;
if (len >= self.level.good) {
// If we've got a match that's good enough, only look in 1/4 the chain.
chain >>= 2;
}
// Hot path loop!
while (prev_pos > 0 and chain > 0) : (chain -= 1) {
const distance = pos - prev_pos;
if (distance > consts.match.max_distance)
break;
const new_len = self.win.match(prev_pos, pos, len);
if (new_len > len) {
match = Token.initMatch(@intCast(distance), new_len);
if (new_len >= self.level.nice) {
// The match is good enough that we don't try to find a better one.
return match;
}
len = new_len;
}
prev_pos = self.lookup.prev(prev_pos);
}
return match;
}
fn flushTokens(self: *Self, flush_opt: FlushOption) !void {
// Pass tokens to the token writer
try self.block_writer.write(self.tokens.tokens(), flush_opt == .final, self.win.tokensBuffer());
// Stored block ensures byte alignment.
// It has 3 bits (final, block_type) and then padding until byte boundary.
// After that everything is aligned to the boundary in the stored block.
// Empty stored block is Ob000 + (0-7) bits of padding + 0x00 0x00 0xFF 0xFF.
// Last 4 bytes are byte aligned.
if (flush_opt == .flush) {
try self.block_writer.storedBlock("", false);
}
if (flush_opt != .none) {
// Safe to call only when byte aligned or it is OK to add
// padding bits (on last byte of the final block).
try self.block_writer.flush();
}
// Reset internal tokens store.
self.tokens.reset();
// Notify win that tokens are flushed.
self.win.flush();
}
// Slide win and if needed lookup tables.
fn slide(self: *Self) void {
const n = self.win.slide();
self.lookup.slide(n);
}
/// Compresses as much data as possible, stops when the reader becomes
/// empty. It will introduce some output latency (reading input without
/// producing all output) because some data are still in internal
/// buffers.
///
/// It is up to the caller to call flush (if needed) or finish (required)
/// when is need to output any pending data or complete stream.
///
pub fn compress(self: *Self, reader: anytype) !void {
while (true) {
// Fill window from reader
const buf = self.win.writable();
if (buf.len == 0) {
try self.tokenize(.none);
self.slide();
continue;
}
const n = try reader.readAll(buf);
self.hasher.update(buf[0..n]);
self.win.written(n);
// Process window
try self.tokenize(.none);
// Exit when no more data in reader
if (n < buf.len) break;
}
}
/// Flushes internal buffers to the output writer. Outputs empty stored
/// block to sync bit stream to the byte boundary, so that the
/// decompressor can get all input data available so far.
///
/// It is useful mainly in compressed network protocols, to ensure that
/// deflate bit stream can be used as byte stream. May degrade
/// compression so it should be used only when necessary.
///
/// Completes the current deflate block and follows it with an empty
/// stored block that is three zero bits plus filler bits to the next
/// byte, followed by four bytes (00 00 ff ff).
///
pub fn flush(self: *Self) !void {
try self.tokenize(.flush);
}
/// Completes deflate bit stream by writing any pending data as deflate
/// final deflate block. HAS to be called once all data are written to
/// the compressor as a signal that next block has to have final bit
/// set.
///
pub fn finish(self: *Self) !void {
try self.tokenize(.final);
try container.writeFooter(&self.hasher, self.wrt);
}
/// Use another writer while preserving history. Most probably flush
/// should be called on old writer before setting new.
pub fn setWriter(self: *Self, new_writer: WriterType) void {
self.block_writer.setWriter(new_writer);
self.wrt = new_writer;
}
// Writer interface
pub const Writer = io.GenericWriter(*Self, Error, write);
pub const Error = BlockWriterType.Error;
/// Write `input` of uncompressed data.
/// See compress.
pub fn write(self: *Self, input: []const u8) !usize {
var fbs = io.fixedBufferStream(input);
try self.compress(fbs.reader());
return input.len;
}
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
};
}
// Tokens store
const Tokens = struct {
list: [consts.deflate.tokens]Token = undefined,
pos: usize = 0,
fn add(self: *Tokens, t: Token) void {
self.list[self.pos] = t;
self.pos += 1;
}
fn full(self: *Tokens) bool {
return self.pos == self.list.len;
}
fn reset(self: *Tokens) void {
self.pos = 0;
}
fn tokens(self: *Tokens) []const Token {
return self.list[0..self.pos];
}
};
/// Creates huffman only deflate blocks. Disables Lempel-Ziv match searching and
/// only performs Huffman entropy encoding. Results in faster compression, much
/// less memory requirements during compression but bigger compressed sizes.
pub const huffman = struct {
pub fn compress(comptime container: Container, reader: anytype, writer: anytype) !void {
var c = try huffman.compressor(container, writer);
try c.compress(reader);
try c.finish();
}
pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
return SimpleCompressor(.huffman, container, WriterType);
}
pub fn compressor(comptime container: Container, writer: anytype) !huffman.Compressor(container, @TypeOf(writer)) {
return try huffman.Compressor(container, @TypeOf(writer)).init(writer);
}
};
/// Creates store blocks only. Data are not compressed only packed into deflate
/// store blocks. That adds 9 bytes of header for each block. Max stored block
/// size is 64K. Block is emitted when flush is called on on finish.
pub const store = struct {
pub fn compress(comptime container: Container, reader: anytype, writer: anytype) !void {
var c = try store.compressor(container, writer);
try c.compress(reader);
try c.finish();
}
pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
return SimpleCompressor(.store, container, WriterType);
}
pub fn compressor(comptime container: Container, writer: anytype) !store.Compressor(container, @TypeOf(writer)) {
return try store.Compressor(container, @TypeOf(writer)).init(writer);
}
};
const SimpleCompressorKind = enum {
huffman,
store,
};
fn simpleCompressor(
comptime kind: SimpleCompressorKind,
comptime container: Container,
writer: anytype,
) !SimpleCompressor(kind, container, @TypeOf(writer)) {
return try SimpleCompressor(kind, container, @TypeOf(writer)).init(writer);
}
fn SimpleCompressor(
comptime kind: SimpleCompressorKind,
comptime container: Container,
comptime WriterType: type,
) type {
const BlockWriterType = BlockWriter(WriterType);
return struct {
buffer: [65535]u8 = undefined, // because store blocks are limited to 65535 bytes
wp: usize = 0,
wrt: WriterType,
block_writer: BlockWriterType,
hasher: container.Hasher() = .{},
const Self = @This();
pub fn init(wrt: WriterType) !Self {
const self = Self{
.wrt = wrt,
.block_writer = BlockWriterType.init(wrt),
};
try container.writeHeader(self.wrt);
return self;
}
pub fn flush(self: *Self) !void {
try self.flushBuffer(false);
try self.block_writer.storedBlock("", false);
try self.block_writer.flush();
}
pub fn finish(self: *Self) !void {
try self.flushBuffer(true);
try self.block_writer.flush();
try container.writeFooter(&self.hasher, self.wrt);
}
fn flushBuffer(self: *Self, final: bool) !void {
const buf = self.buffer[0..self.wp];
switch (kind) {
.huffman => try self.block_writer.huffmanBlock(buf, final),
.store => try self.block_writer.storedBlock(buf, final),
}
self.wp = 0;
}
// Writes all data from the input reader of uncompressed data.
// It is up to the caller to call flush or finish if there is need to
// output compressed blocks.
pub fn compress(self: *Self, reader: anytype) !void {
while (true) {
// read from rdr into buffer
const buf = self.buffer[self.wp..];
if (buf.len == 0) {
try self.flushBuffer(false);
continue;
}
const n = try reader.readAll(buf);
self.hasher.update(buf[0..n]);
self.wp += n;
if (n < buf.len) break; // no more data in reader
}
}
// Writer interface
pub const Writer = io.GenericWriter(*Self, Error, write);
pub const Error = BlockWriterType.Error;
// Write `input` of uncompressed data.
pub fn write(self: *Self, input: []const u8) !usize {
var fbs = io.fixedBufferStream(input);
try self.compress(fbs.reader());
return input.len;
}
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
};
}
const builtin = @import("builtin");
test "tokenization" {
const L = Token.initLiteral;
const M = Token.initMatch;
const cases = [_]struct {
data: []const u8,
tokens: []const Token,
}{
.{
.data = "Blah blah blah blah blah!",
.tokens = &[_]Token{ L('B'), L('l'), L('a'), L('h'), L(' '), L('b'), M(5, 18), L('!') },
},
.{
.data = "ABCDEABCD ABCDEABCD",
.tokens = &[_]Token{
L('A'), L('B'), L('C'), L('D'), L('E'), L('A'), L('B'), L('C'), L('D'), L(' '),
L('A'), M(10, 8),
},
},
};
for (cases) |c| {
inline for (Container.list) |container| { // for each wrapping
var cw = io.countingWriter(io.null_writer);
const cww = cw.writer();
var df = try Deflate(container, @TypeOf(cww), TestTokenWriter).init(cww, .{});
_ = try df.write(c.data);
try df.flush();
// df.token_writer.show();
try expect(df.block_writer.pos == c.tokens.len); // number of tokens written
try testing.expectEqualSlices(Token, df.block_writer.get(), c.tokens); // tokens match
try testing.expectEqual(container.headerSize(), cw.bytes_written);
try df.finish();
try testing.expectEqual(container.size(), cw.bytes_written);
}
}
}
// Tests that tokens written are equal to expected token list.
const TestTokenWriter = struct {
const Self = @This();
pos: usize = 0,
actual: [128]Token = undefined,
pub fn init(_: anytype) Self {
return .{};
}
pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void {
for (tokens) |t| {
self.actual[self.pos] = t;
self.pos += 1;
}
}
pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {}
pub fn get(self: *Self) []Token {
return self.actual[0..self.pos];
}
pub fn show(self: *Self) void {
print("\n", .{});
for (self.get()) |t| {
t.show();
}
}
pub fn flush(_: *Self) !void {}
};
test "file tokenization" {
const levels = [_]Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
const cases = [_]struct {
data: []const u8, // uncompressed content
// expected number of tokens producet in deflate tokenization
tokens_count: [levels.len]usize = .{0} ** levels.len,
}{
.{
.data = @embedFile("testdata/rfc1951.txt"),
.tokens_count = .{ 7675, 7672, 7599, 7594, 7598, 7599 },
},
.{
.data = @embedFile("testdata/block_writer/huffman-null-max.input"),
.tokens_count = .{ 257, 257, 257, 257, 257, 257 },
},
.{
.data = @embedFile("testdata/block_writer/huffman-pi.input"),
.tokens_count = .{ 2570, 2564, 2564, 2564, 2564, 2564 },
},
.{
.data = @embedFile("testdata/block_writer/huffman-text.input"),
.tokens_count = .{ 235, 234, 234, 234, 234, 234 },
},
.{
.data = @embedFile("testdata/fuzz/roundtrip1.input"),
.tokens_count = .{ 333, 331, 331, 331, 331, 331 },
},
.{
.data = @embedFile("testdata/fuzz/roundtrip2.input"),
.tokens_count = .{ 334, 334, 334, 334, 334, 334 },
},
};
for (cases) |case| { // for each case
const data = case.data;
for (levels, 0..) |level, i| { // for each compression level
var original = io.fixedBufferStream(data);
// buffer for decompressed data
var al = std.ArrayList(u8).init(testing.allocator);
defer al.deinit();
const writer = al.writer();
// create compressor
const WriterType = @TypeOf(writer);
const TokenWriter = TokenDecoder(@TypeOf(writer));
var cmp = try Deflate(.raw, WriterType, TokenWriter).init(writer, .{ .level = level });
// Stream uncompressed `original` data to the compressor. It will
// produce tokens list and pass that list to the TokenDecoder. This
// TokenDecoder uses CircularBuffer from inflate to convert list of
// tokens back to the uncompressed stream.
try cmp.compress(original.reader());
try cmp.flush();
const expected_count = case.tokens_count[i];
const actual = cmp.block_writer.tokens_count;
if (expected_count == 0) {
print("actual token count {d}\n", .{actual});
} else {
try testing.expectEqual(expected_count, actual);
}
try testing.expectEqual(data.len, al.items.len);
try testing.expectEqualSlices(u8, data, al.items);
}
}
}
fn TokenDecoder(comptime WriterType: type) type {
return struct {
const CircularBuffer = @import("CircularBuffer.zig");
hist: CircularBuffer = .{},
wrt: WriterType,
tokens_count: usize = 0,
const Self = @This();
pub fn init(wrt: WriterType) Self {
return .{ .wrt = wrt };
}
pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void {
self.tokens_count += tokens.len;
for (tokens) |t| {
switch (t.kind) {
.literal => self.hist.write(t.literal()),
.match => try self.hist.writeMatch(t.length(), t.distance()),
}
if (self.hist.free() < 285) try self.flushWin();
}
try self.flushWin();
}
pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {}
fn flushWin(self: *Self) !void {
while (true) {
const buf = self.hist.read();
if (buf.len == 0) break;
try self.wrt.writeAll(buf);
}
}
pub fn flush(_: *Self) !void {}
};
}
test "store simple compressor" {
const data = "Hello world!";
const expected = [_]u8{
0x1, // block type 0, final bit set
0xc, 0x0, // len = 12
0xf3, 0xff, // ~len
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', //
//0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21,
};
var fbs = std.io.fixedBufferStream(data);
var al = std.ArrayList(u8).init(testing.allocator);
defer al.deinit();
var cmp = try store.compressor(.raw, al.writer());
try cmp.compress(fbs.reader());
try cmp.finish();
try testing.expectEqualSlices(u8, &expected, al.items);
fbs.reset();
try al.resize(0);
// huffman only compresoor will also emit store block for this small sample
var hc = try huffman.compressor(.raw, al.writer());
try hc.compress(fbs.reader());
try hc.finish();
try testing.expectEqualSlices(u8, &expected, al.items);
}

View File

@@ -1,302 +0,0 @@
const std = @import("std");
const testing = std.testing;
pub const Symbol = packed struct {
pub const Kind = enum(u2) {
literal,
end_of_block,
match,
};
symbol: u8 = 0, // symbol from alphabet
code_bits: u4 = 0, // number of bits in code 0-15
kind: Kind = .literal,
code: u16 = 0, // huffman code of the symbol
next: u16 = 0, // pointer to the next symbol in linked list
// it is safe to use 0 as null pointer, when sorted 0 has shortest code and fits into lookup
// Sorting less than function.
pub fn asc(_: void, a: Symbol, b: Symbol) bool {
if (a.code_bits == b.code_bits) {
if (a.kind == b.kind) {
return a.symbol < b.symbol;
}
return @intFromEnum(a.kind) < @intFromEnum(b.kind);
}
return a.code_bits < b.code_bits;
}
};
pub const LiteralDecoder = HuffmanDecoder(286, 15, 9);
pub const DistanceDecoder = HuffmanDecoder(30, 15, 9);
pub const CodegenDecoder = HuffmanDecoder(19, 7, 7);
pub const Error = error{
InvalidCode,
OversubscribedHuffmanTree,
IncompleteHuffmanTree,
MissingEndOfBlockCode,
};
/// Creates huffman tree codes from list of code lengths (in `build`).
///
/// `find` then finds symbol for code bits. Code can be any length between 1 and
/// 15 bits. When calling `find` we don't know how many bits will be used to
/// find symbol. When symbol is returned it has code_bits field which defines
/// how much we should advance in bit stream.
///
/// Lookup table is used to map 15 bit int to symbol. Same symbol is written
/// many times in this table; 32K places for 286 (at most) symbols.
/// Small lookup table is optimization for faster search.
/// It is variation of the algorithm explained in [zlib](https://github.com/madler/zlib/blob/643e17b7498d12ab8d15565662880579692f769d/doc/algorithm.txt#L92)
/// with difference that we here use statically allocated arrays.
///
fn HuffmanDecoder(
comptime alphabet_size: u16,
comptime max_code_bits: u4,
comptime lookup_bits: u4,
) type {
const lookup_shift = max_code_bits - lookup_bits;
return struct {
// all symbols in alaphabet, sorted by code_len, symbol
symbols: [alphabet_size]Symbol = undefined,
// lookup table code -> symbol
lookup: [1 << lookup_bits]Symbol = undefined,
const Self = @This();
/// Generates symbols and lookup tables from list of code lens for each symbol.
pub fn generate(self: *Self, lens: []const u4) !void {
try checkCompleteness(lens);
// init alphabet with code_bits
for (self.symbols, 0..) |_, i| {
const cb: u4 = if (i < lens.len) lens[i] else 0;
self.symbols[i] = if (i < 256)
.{ .kind = .literal, .symbol = @intCast(i), .code_bits = cb }
else if (i == 256)
.{ .kind = .end_of_block, .symbol = 0xff, .code_bits = cb }
else
.{ .kind = .match, .symbol = @intCast(i - 257), .code_bits = cb };
}
std.sort.heap(Symbol, &self.symbols, {}, Symbol.asc);
// reset lookup table
for (0..self.lookup.len) |i| {
self.lookup[i] = .{};
}
// assign code to symbols
// reference: https://youtu.be/9_YEGLe33NA?list=PLU4IQLU9e_OrY8oASHx0u3IXAL9TOdidm&t=2639
var code: u16 = 0;
var idx: u16 = 0;
for (&self.symbols, 0..) |*sym, pos| {
if (sym.code_bits == 0) continue; // skip unused
sym.code = code;
const next_code = code + (@as(u16, 1) << (max_code_bits - sym.code_bits));
const next_idx = next_code >> lookup_shift;
if (next_idx > self.lookup.len or idx >= self.lookup.len) break;
if (sym.code_bits <= lookup_bits) {
// fill small lookup table
for (idx..next_idx) |j|
self.lookup[j] = sym.*;
} else {
// insert into linked table starting at root
const root = &self.lookup[idx];
const root_next = root.next;
root.next = @intCast(pos);
sym.next = root_next;
}
idx = next_idx;
code = next_code;
}
}
/// Given the list of code lengths check that it represents a canonical
/// Huffman code for n symbols.
///
/// Reference: https://github.com/madler/zlib/blob/5c42a230b7b468dff011f444161c0145b5efae59/contrib/puff/puff.c#L340
fn checkCompleteness(lens: []const u4) !void {
if (alphabet_size == 286)
if (lens[256] == 0) return error.MissingEndOfBlockCode;
var count = [_]u16{0} ** (@as(usize, max_code_bits) + 1);
var max: usize = 0;
for (lens) |n| {
if (n == 0) continue;
if (n > max) max = n;
count[n] += 1;
}
if (max == 0) // empty tree
return;
// check for an over-subscribed or incomplete set of lengths
var left: usize = 1; // one possible code of zero length
for (1..count.len) |len| {
left <<= 1; // one more bit, double codes left
if (count[len] > left)
return error.OversubscribedHuffmanTree;
left -= count[len]; // deduct count from possible codes
}
if (left > 0) { // left > 0 means incomplete
// incomplete code ok only for single length 1 code
if (max_code_bits > 7 and max == count[0] + count[1]) return;
return error.IncompleteHuffmanTree;
}
}
/// Finds symbol for lookup table code.
pub fn find(self: *Self, code: u16) !Symbol {
// try to find in lookup table
const idx = code >> lookup_shift;
const sym = self.lookup[idx];
if (sym.code_bits != 0) return sym;
// if not use linked list of symbols with same prefix
return self.findLinked(code, sym.next);
}
inline fn findLinked(self: *Self, code: u16, start: u16) !Symbol {
var pos = start;
while (pos > 0) {
const sym = self.symbols[pos];
const shift = max_code_bits - sym.code_bits;
// compare code_bits number of upper bits
if ((code ^ sym.code) >> shift == 0) return sym;
pos = sym.next;
}
return error.InvalidCode;
}
};
}
test "init/find" {
// example data from: https://youtu.be/SJPvNi4HrWQ?t=8423
const code_lens = [_]u4{ 4, 3, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 2 };
var h: CodegenDecoder = .{};
try h.generate(&code_lens);
const expected = [_]struct {
sym: Symbol,
code: u16,
}{
.{
.code = 0b00_00000,
.sym = .{ .symbol = 3, .code_bits = 2 },
},
.{
.code = 0b01_00000,
.sym = .{ .symbol = 18, .code_bits = 2 },
},
.{
.code = 0b100_0000,
.sym = .{ .symbol = 1, .code_bits = 3 },
},
.{
.code = 0b101_0000,
.sym = .{ .symbol = 4, .code_bits = 3 },
},
.{
.code = 0b110_0000,
.sym = .{ .symbol = 17, .code_bits = 3 },
},
.{
.code = 0b1110_000,
.sym = .{ .symbol = 0, .code_bits = 4 },
},
.{
.code = 0b1111_000,
.sym = .{ .symbol = 16, .code_bits = 4 },
},
};
// unused symbols
for (0..12) |i| {
try testing.expectEqual(0, h.symbols[i].code_bits);
}
// used, from index 12
for (expected, 12..) |e, i| {
try testing.expectEqual(e.sym.symbol, h.symbols[i].symbol);
try testing.expectEqual(e.sym.code_bits, h.symbols[i].code_bits);
const sym_from_code = try h.find(e.code);
try testing.expectEqual(e.sym.symbol, sym_from_code.symbol);
}
// All possible codes for each symbol.
// Lookup table has 126 elements, to cover all possible 7 bit codes.
for (0b0000_000..0b0100_000) |c| // 0..32 (32)
try testing.expectEqual(3, (try h.find(@intCast(c))).symbol);
for (0b0100_000..0b1000_000) |c| // 32..64 (32)
try testing.expectEqual(18, (try h.find(@intCast(c))).symbol);
for (0b1000_000..0b1010_000) |c| // 64..80 (16)
try testing.expectEqual(1, (try h.find(@intCast(c))).symbol);
for (0b1010_000..0b1100_000) |c| // 80..96 (16)
try testing.expectEqual(4, (try h.find(@intCast(c))).symbol);
for (0b1100_000..0b1110_000) |c| // 96..112 (16)
try testing.expectEqual(17, (try h.find(@intCast(c))).symbol);
for (0b1110_000..0b1111_000) |c| // 112..120 (8)
try testing.expectEqual(0, (try h.find(@intCast(c))).symbol);
for (0b1111_000..0b1_0000_000) |c| // 120...128 (8)
try testing.expectEqual(16, (try h.find(@intCast(c))).symbol);
}
test "encode/decode literals" {
const LiteralEncoder = @import("huffman_encoder.zig").LiteralEncoder;
for (1..286) |j| { // for all different number of codes
var enc: LiteralEncoder = .{};
// create frequencies
var freq = [_]u16{0} ** 286;
freq[256] = 1; // ensure we have end of block code
for (&freq, 1..) |*f, i| {
if (i % j == 0)
f.* = @intCast(i);
}
// encoder from frequencies
enc.generate(&freq, 15);
// get code_lens from encoder
var code_lens = [_]u4{0} ** 286;
for (code_lens, 0..) |_, i| {
code_lens[i] = @intCast(enc.codes[i].len);
}
// generate decoder from code lens
var dec: LiteralDecoder = .{};
try dec.generate(&code_lens);
// expect decoder code to match original encoder code
for (dec.symbols) |s| {
if (s.code_bits == 0) continue;
const c_code: u16 = @bitReverse(@as(u15, @intCast(s.code)));
const symbol: u16 = switch (s.kind) {
.literal => s.symbol,
.end_of_block => 256,
.match => @as(u16, s.symbol) + 257,
};
const c = enc.codes[symbol];
try testing.expect(c.code == c_code);
}
// find each symbol by code
for (enc.codes) |c| {
if (c.len == 0) continue;
const s_code: u15 = @bitReverse(@as(u15, @intCast(c.code)));
const s = try dec.find(s_code);
try testing.expect(s.code == s_code);
try testing.expect(s.code_bits == c.len);
}
}
}

View File

@@ -1,536 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const math = std.math;
const mem = std.mem;
const sort = std.sort;
const testing = std.testing;
const consts = @import("consts.zig").huffman;
const LiteralNode = struct {
literal: u16,
freq: u16,
};
// Describes the state of the constructed tree for a given depth.
const LevelInfo = struct {
// Our level. for better printing
level: u32,
// The frequency of the last node at this level
last_freq: u32,
// The frequency of the next character to add to this level
next_char_freq: u32,
// The frequency of the next pair (from level below) to add to this level.
// Only valid if the "needed" value of the next lower level is 0.
next_pair_freq: u32,
// The number of chains remaining to generate for this level before moving
// up to the next level
needed: u32,
};
// hcode is a huffman code with a bit code and bit length.
pub const HuffCode = struct {
code: u16 = 0,
len: u16 = 0,
// set sets the code and length of an hcode.
fn set(self: *HuffCode, code: u16, length: u16) void {
self.len = length;
self.code = code;
}
};
pub fn HuffmanEncoder(comptime size: usize) type {
return struct {
codes: [size]HuffCode = undefined,
// Reusable buffer with the longest possible frequency table.
freq_cache: [consts.max_num_frequencies + 1]LiteralNode = undefined,
bit_count: [17]u32 = undefined,
lns: []LiteralNode = undefined, // sorted by literal, stored to avoid repeated allocation in generate
lfs: []LiteralNode = undefined, // sorted by frequency, stored to avoid repeated allocation in generate
const Self = @This();
// Update this Huffman Code object to be the minimum code for the specified frequency count.
//
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// max_bits The maximum number of bits to use for any literal.
pub fn generate(self: *Self, freq: []u16, max_bits: u32) void {
var list = self.freq_cache[0 .. freq.len + 1];
// Number of non-zero literals
var count: u32 = 0;
// Set list to be the set of all non-zero literals and their frequencies
for (freq, 0..) |f, i| {
if (f != 0) {
list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f };
count += 1;
} else {
list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
self.codes[i].len = 0;
}
}
list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
list = list[0..count];
if (count <= 2) {
// Handle the small cases here, because they are awkward for the general case code. With
// two or fewer literals, everything has bit length 1.
for (list, 0..) |node, i| {
// "list" is in order of increasing literal value.
self.codes[node.literal].set(@as(u16, @intCast(i)), 1);
}
return;
}
self.lfs = list;
mem.sort(LiteralNode, self.lfs, {}, byFreq);
// Get the number of literals for each bit count
const bit_count = self.bitCounts(list, max_bits);
// And do the assignment
self.assignEncodingAndSize(bit_count, list);
}
pub fn bitLength(self: *Self, freq: []u16) u32 {
var total: u32 = 0;
for (freq, 0..) |f, i| {
if (f != 0) {
total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len));
}
}
return total;
}
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.len >= 3
// The cases of 0, 1, and 2 literals are handled by special case code.
//
// list: An array of the literals with non-zero frequencies
// and their associated frequencies. The array is in order of increasing
// frequency, and has as its last element a special element with frequency
// std.math.maxInt(i32)
//
// max_bits: The maximum number of bits that should be used to encode any literal.
// Must be less than 16.
//
// Returns an integer array in which array[i] indicates the number of literals
// that should be encoded in i bits.
fn bitCounts(self: *Self, list: []LiteralNode, max_bits_to_use: usize) []u32 {
var max_bits = max_bits_to_use;
const n = list.len;
const max_bits_limit = 16;
assert(max_bits < max_bits_limit);
// The tree can't have greater depth than n - 1, no matter what. This
// saves a little bit of work in some small cases
max_bits = @min(max_bits, n - 1);
// Create information about each of the levels.
// A bogus "Level 0" whose sole purpose is so that
// level1.prev.needed == 0. This makes level1.next_pair_freq
// be a legitimate value that never gets chosen.
var levels: [max_bits_limit]LevelInfo = mem.zeroes([max_bits_limit]LevelInfo);
// leaf_counts[i] counts the number of literals at the left
// of ancestors of the rightmost node at level i.
// leaf_counts[i][j] is the number of literals at the left
// of the level j ancestor.
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = mem.zeroes([max_bits_limit][max_bits_limit]u32);
{
var level = @as(u32, 1);
while (level <= max_bits) : (level += 1) {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
levels[level] = LevelInfo{
.level = level,
.last_freq = list[1].freq,
.next_char_freq = list[2].freq,
.next_pair_freq = list[0].freq + list[1].freq,
.needed = 0,
};
leaf_counts[level][level] = 2;
if (level == 1) {
levels[level].next_pair_freq = math.maxInt(i32);
}
}
}
// We need a total of 2*n - 2 items at top level and have already generated 2.
levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4;
{
var level = max_bits;
while (true) {
var l = &levels[level];
if (l.next_pair_freq == math.maxInt(i32) and l.next_char_freq == math.maxInt(i32)) {
// We've run out of both leaves and pairs.
// End all calculations for this level.
// To make sure we never come back to this level or any lower level,
// set next_pair_freq impossibly large.
l.needed = 0;
levels[level + 1].next_pair_freq = math.maxInt(i32);
level += 1;
continue;
}
const prev_freq = l.last_freq;
if (l.next_char_freq < l.next_pair_freq) {
// The next item on this row is a leaf node.
const next = leaf_counts[level][level] + 1;
l.last_freq = l.next_char_freq;
// Lower leaf_counts are the same of the previous node.
leaf_counts[level][level] = next;
if (next >= list.len) {
l.next_char_freq = maxNode().freq;
} else {
l.next_char_freq = list[next].freq;
}
} else {
// The next item on this row is a pair from the previous row.
// next_pair_freq isn't valid until we generate two
// more values in the level below
l.last_freq = l.next_pair_freq;
// Take leaf counts from the lower level, except counts[level] remains the same.
@memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
levels[l.level - 1].needed = 2;
}
l.needed -= 1;
if (l.needed == 0) {
// We've done everything we need to do for this level.
// Continue calculating one level up. Fill in next_pair_freq
// of that level with the sum of the two nodes we've just calculated on
// this level.
if (l.level == max_bits) {
// All done!
break;
}
levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
level += 1;
} else {
// If we stole from below, move down temporarily to replenish it.
while (levels[level - 1].needed > 0) {
level -= 1;
if (level == 0) {
break;
}
}
}
}
}
// Somethings is wrong if at the end, the top level is null or hasn't used
// all of the leaves.
assert(leaf_counts[max_bits][max_bits] == n);
var bit_count = self.bit_count[0 .. max_bits + 1];
var bits: u32 = 1;
const counts = &leaf_counts[max_bits];
{
var level = max_bits;
while (level > 0) : (level -= 1) {
// counts[level] gives the number of literals requiring at least "bits"
// bits to encode.
bit_count[bits] = counts[level] - counts[level - 1];
bits += 1;
if (level == 0) {
break;
}
}
}
return bit_count;
}
// Look at the leaves and assign them a bit count and an encoding as specified
// in RFC 1951 3.2.2
fn assignEncodingAndSize(self: *Self, bit_count: []u32, list_arg: []LiteralNode) void {
var code = @as(u16, 0);
var list = list_arg;
for (bit_count, 0..) |bits, n| {
code <<= 1;
if (n == 0 or bits == 0) {
continue;
}
// The literals list[list.len-bits] .. list[list.len-bits]
// are encoded using "bits" bits, and get the values
// code, code + 1, .... The code values are
// assigned in literal order (not frequency order).
const chunk = list[list.len - @as(u32, @intCast(bits)) ..];
self.lns = chunk;
mem.sort(LiteralNode, self.lns, {}, byLiteral);
for (chunk) |node| {
self.codes[node.literal] = HuffCode{
.code = bitReverse(u16, code, @as(u5, @intCast(n))),
.len = @as(u16, @intCast(n)),
};
code += 1;
}
list = list[0 .. list.len - @as(u32, @intCast(bits))];
}
}
};
}
fn maxNode() LiteralNode {
return LiteralNode{
.literal = math.maxInt(u16),
.freq = math.maxInt(u16),
};
}
pub fn huffmanEncoder(comptime size: u32) HuffmanEncoder(size) {
return .{};
}
pub const LiteralEncoder = HuffmanEncoder(consts.max_num_frequencies);
pub const DistanceEncoder = HuffmanEncoder(consts.distance_code_count);
pub const CodegenEncoder = HuffmanEncoder(19);
// Generates a HuffmanCode corresponding to the fixed literal table
pub fn fixedLiteralEncoder() LiteralEncoder {
var h: LiteralEncoder = undefined;
var ch: u16 = 0;
while (ch < consts.max_num_frequencies) : (ch += 1) {
var bits: u16 = undefined;
var size: u16 = undefined;
switch (ch) {
0...143 => {
// size 8, 000110000 .. 10111111
bits = ch + 48;
size = 8;
},
144...255 => {
// size 9, 110010000 .. 111111111
bits = ch + 400 - 144;
size = 9;
},
256...279 => {
// size 7, 0000000 .. 0010111
bits = ch - 256;
size = 7;
},
else => {
// size 8, 11000000 .. 11000111
bits = ch + 192 - 280;
size = 8;
},
}
h.codes[ch] = HuffCode{ .code = bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size };
}
return h;
}
pub fn fixedDistanceEncoder() DistanceEncoder {
var h: DistanceEncoder = undefined;
for (h.codes, 0..) |_, ch| {
h.codes[ch] = HuffCode{ .code = bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 };
}
return h;
}
pub fn huffmanDistanceEncoder() DistanceEncoder {
var distance_freq = [1]u16{0} ** consts.distance_code_count;
distance_freq[0] = 1;
// huff_distance is a static distance encoder used for huffman only encoding.
// It can be reused since we will not be encoding distance values.
var h: DistanceEncoder = .{};
h.generate(distance_freq[0..], 15);
return h;
}
fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
_ = context;
return a.literal < b.literal;
}
fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
_ = context;
if (a.freq == b.freq) {
return a.literal < b.literal;
}
return a.freq < b.freq;
}
test "generate a Huffman code from an array of frequencies" {
var freqs: [19]u16 = [_]u16{
8, // 0
1, // 1
1, // 2
2, // 3
5, // 4
10, // 5
9, // 6
1, // 7
0, // 8
0, // 9
0, // 10
0, // 11
0, // 12
0, // 13
0, // 14
0, // 15
1, // 16
3, // 17
5, // 18
};
var enc = huffmanEncoder(19);
enc.generate(freqs[0..], 7);
try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
try testing.expectEqual(@as(usize, 3), enc.codes[0].len);
try testing.expectEqual(@as(usize, 6), enc.codes[1].len);
try testing.expectEqual(@as(usize, 6), enc.codes[2].len);
try testing.expectEqual(@as(usize, 5), enc.codes[3].len);
try testing.expectEqual(@as(usize, 3), enc.codes[4].len);
try testing.expectEqual(@as(usize, 2), enc.codes[5].len);
try testing.expectEqual(@as(usize, 2), enc.codes[6].len);
try testing.expectEqual(@as(usize, 6), enc.codes[7].len);
try testing.expectEqual(@as(usize, 0), enc.codes[8].len);
try testing.expectEqual(@as(usize, 0), enc.codes[9].len);
try testing.expectEqual(@as(usize, 0), enc.codes[10].len);
try testing.expectEqual(@as(usize, 0), enc.codes[11].len);
try testing.expectEqual(@as(usize, 0), enc.codes[12].len);
try testing.expectEqual(@as(usize, 0), enc.codes[13].len);
try testing.expectEqual(@as(usize, 0), enc.codes[14].len);
try testing.expectEqual(@as(usize, 0), enc.codes[15].len);
try testing.expectEqual(@as(usize, 6), enc.codes[16].len);
try testing.expectEqual(@as(usize, 5), enc.codes[17].len);
try testing.expectEqual(@as(usize, 3), enc.codes[18].len);
try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code);
try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code);
try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code);
try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code);
try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code);
try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code);
try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code);
try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code);
try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code);
try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
}
test "generate a Huffman code for the fixed literal table specific to Deflate" {
const enc = fixedLiteralEncoder();
for (enc.codes) |c| {
switch (c.len) {
7 => {
const v = @bitReverse(@as(u7, @intCast(c.code)));
try testing.expect(v <= 0b0010111);
},
8 => {
const v = @bitReverse(@as(u8, @intCast(c.code)));
try testing.expect((v >= 0b000110000 and v <= 0b10111111) or
(v >= 0b11000000 and v <= 11000111));
},
9 => {
const v = @bitReverse(@as(u9, @intCast(c.code)));
try testing.expect(v >= 0b110010000 and v <= 0b111111111);
},
else => unreachable,
}
}
}
test "generate a Huffman code for the 30 possible relative distances (LZ77 distances) of Deflate" {
const enc = fixedDistanceEncoder();
for (enc.codes) |c| {
const v = @bitReverse(@as(u5, @intCast(c.code)));
try testing.expect(v <= 29);
try testing.expect(c.len == 5);
}
}
// Reverse bit-by-bit a N-bit code.
fn bitReverse(comptime T: type, value: T, n: usize) T {
const r = @bitReverse(value);
return r >> @as(math.Log2Int(T), @intCast(@typeInfo(T).int.bits - n));
}
test bitReverse {
const ReverseBitsTest = struct {
in: u16,
bit_count: u5,
out: u16,
};
const reverse_bits_tests = [_]ReverseBitsTest{
.{ .in = 1, .bit_count = 1, .out = 1 },
.{ .in = 1, .bit_count = 2, .out = 2 },
.{ .in = 1, .bit_count = 3, .out = 4 },
.{ .in = 1, .bit_count = 4, .out = 8 },
.{ .in = 1, .bit_count = 5, .out = 16 },
.{ .in = 17, .bit_count = 5, .out = 17 },
.{ .in = 257, .bit_count = 9, .out = 257 },
.{ .in = 29, .bit_count = 5, .out = 23 },
};
for (reverse_bits_tests) |h| {
const v = bitReverse(u16, h.in, h.bit_count);
try std.testing.expectEqual(h.out, v);
}
}
test "fixedLiteralEncoder codes" {
var al = std.ArrayList(u8).init(testing.allocator);
defer al.deinit();
var bw = std.io.bitWriter(.little, al.writer());
const f = fixedLiteralEncoder();
for (f.codes) |c| {
try bw.writeBits(c.code, c.len);
}
try testing.expectEqualSlices(u8, &fixed_codes, al.items);
}
pub const fixed_codes = [_]u8{
0b00001100, 0b10001100, 0b01001100, 0b11001100, 0b00101100, 0b10101100, 0b01101100, 0b11101100,
0b00011100, 0b10011100, 0b01011100, 0b11011100, 0b00111100, 0b10111100, 0b01111100, 0b11111100,
0b00000010, 0b10000010, 0b01000010, 0b11000010, 0b00100010, 0b10100010, 0b01100010, 0b11100010,
0b00010010, 0b10010010, 0b01010010, 0b11010010, 0b00110010, 0b10110010, 0b01110010, 0b11110010,
0b00001010, 0b10001010, 0b01001010, 0b11001010, 0b00101010, 0b10101010, 0b01101010, 0b11101010,
0b00011010, 0b10011010, 0b01011010, 0b11011010, 0b00111010, 0b10111010, 0b01111010, 0b11111010,
0b00000110, 0b10000110, 0b01000110, 0b11000110, 0b00100110, 0b10100110, 0b01100110, 0b11100110,
0b00010110, 0b10010110, 0b01010110, 0b11010110, 0b00110110, 0b10110110, 0b01110110, 0b11110110,
0b00001110, 0b10001110, 0b01001110, 0b11001110, 0b00101110, 0b10101110, 0b01101110, 0b11101110,
0b00011110, 0b10011110, 0b01011110, 0b11011110, 0b00111110, 0b10111110, 0b01111110, 0b11111110,
0b00000001, 0b10000001, 0b01000001, 0b11000001, 0b00100001, 0b10100001, 0b01100001, 0b11100001,
0b00010001, 0b10010001, 0b01010001, 0b11010001, 0b00110001, 0b10110001, 0b01110001, 0b11110001,
0b00001001, 0b10001001, 0b01001001, 0b11001001, 0b00101001, 0b10101001, 0b01101001, 0b11101001,
0b00011001, 0b10011001, 0b01011001, 0b11011001, 0b00111001, 0b10111001, 0b01111001, 0b11111001,
0b00000101, 0b10000101, 0b01000101, 0b11000101, 0b00100101, 0b10100101, 0b01100101, 0b11100101,
0b00010101, 0b10010101, 0b01010101, 0b11010101, 0b00110101, 0b10110101, 0b01110101, 0b11110101,
0b00001101, 0b10001101, 0b01001101, 0b11001101, 0b00101101, 0b10101101, 0b01101101, 0b11101101,
0b00011101, 0b10011101, 0b01011101, 0b11011101, 0b00111101, 0b10111101, 0b01111101, 0b11111101,
0b00010011, 0b00100110, 0b01001110, 0b10011010, 0b00111100, 0b01100101, 0b11101010, 0b10110100,
0b11101001, 0b00110011, 0b01100110, 0b11001110, 0b10011010, 0b00111101, 0b01100111, 0b11101110,
0b10111100, 0b11111001, 0b00001011, 0b00010110, 0b00101110, 0b01011010, 0b10111100, 0b01100100,
0b11101001, 0b10110010, 0b11100101, 0b00101011, 0b01010110, 0b10101110, 0b01011010, 0b10111101,
0b01100110, 0b11101101, 0b10111010, 0b11110101, 0b00011011, 0b00110110, 0b01101110, 0b11011010,
0b10111100, 0b01100101, 0b11101011, 0b10110110, 0b11101101, 0b00111011, 0b01110110, 0b11101110,
0b11011010, 0b10111101, 0b01100111, 0b11101111, 0b10111110, 0b11111101, 0b00000111, 0b00001110,
0b00011110, 0b00111010, 0b01111100, 0b11100100, 0b11101000, 0b10110001, 0b11100011, 0b00100111,
0b01001110, 0b10011110, 0b00111010, 0b01111101, 0b11100110, 0b11101100, 0b10111001, 0b11110011,
0b00010111, 0b00101110, 0b01011110, 0b10111010, 0b01111100, 0b11100101, 0b11101010, 0b10110101,
0b11101011, 0b00110111, 0b01101110, 0b11011110, 0b10111010, 0b01111101, 0b11100111, 0b11101110,
0b10111101, 0b11111011, 0b00001111, 0b00011110, 0b00111110, 0b01111010, 0b11111100, 0b11100100,
0b11101001, 0b10110011, 0b11100111, 0b00101111, 0b01011110, 0b10111110, 0b01111010, 0b11111101,
0b11100110, 0b11101101, 0b10111011, 0b11110111, 0b00011111, 0b00111110, 0b01111110, 0b11111010,
0b11111100, 0b11100101, 0b11101011, 0b10110111, 0b11101111, 0b00111111, 0b01111110, 0b11111110,
0b11111010, 0b11111101, 0b11100111, 0b11101111, 0b10111111, 0b11111111, 0b00000000, 0b00100000,
0b00001000, 0b00001100, 0b10000001, 0b11000010, 0b11100000, 0b00001000, 0b00100100, 0b00001010,
0b10001101, 0b11000001, 0b11100010, 0b11110000, 0b00000100, 0b00100010, 0b10001001, 0b01001100,
0b10100001, 0b11010010, 0b11101000, 0b00000011, 0b10000011, 0b01000011, 0b11000011, 0b00100011,
0b10100011,
};

View File

@@ -1,570 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;
const hfd = @import("huffman_decoder.zig");
const BitReader = @import("bit_reader.zig").BitReader;
const CircularBuffer = @import("CircularBuffer.zig");
const Container = @import("container.zig").Container;
const Token = @import("Token.zig");
const codegen_order = @import("consts.zig").huffman.codegen_order;
/// Decompresses deflate bit stream `reader` and writes uncompressed data to the
/// `writer` stream.
pub fn decompress(comptime container: Container, reader: anytype, writer: anytype) !void {
var d = decompressor(container, reader);
try d.decompress(writer);
}
/// Inflate decompressor for the reader type.
pub fn decompressor(comptime container: Container, reader: anytype) Decompressor(container, @TypeOf(reader)) {
return Decompressor(container, @TypeOf(reader)).init(reader);
}
pub fn Decompressor(comptime container: Container, comptime ReaderType: type) type {
// zlib has 4 bytes footer, lookahead of 4 bytes ensures that we will not overshoot.
// gzip has 8 bytes footer so we will not overshoot even with 8 bytes of lookahead.
// For raw deflate there is always possibility of overshot so we use 8 bytes lookahead.
const lookahead: type = if (container == .zlib) u32 else u64;
return Inflate(container, lookahead, ReaderType);
}
/// Inflate decompresses deflate bit stream. Reads compressed data from reader
/// provided in init. Decompressed data are stored in internal hist buffer and
/// can be accesses iterable `next` or reader interface.
///
/// Container defines header/footer wrapper around deflate bit stream. Can be
/// gzip or zlib.
///
/// Deflate bit stream consists of multiple blocks. Block can be one of three types:
/// * stored, non compressed, max 64k in size
/// * fixed, huffman codes are predefined
/// * dynamic, huffman code tables are encoded at the block start
///
/// `step` function runs decoder until internal `hist` buffer is full. Client
/// than needs to read that data in order to proceed with decoding.
///
/// Allocates 74.5K of internal buffers, most important are:
/// * 64K for history (CircularBuffer)
/// * ~10K huffman decoders (Literal and DistanceDecoder)
///
pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comptime ReaderType: type) type {
assert(LookaheadType == u32 or LookaheadType == u64);
const BitReaderType = BitReader(LookaheadType, ReaderType);
return struct {
//const BitReaderType = BitReader(ReaderType);
const F = BitReaderType.flag;
bits: BitReaderType = .{},
hist: CircularBuffer = .{},
// Hashes, produces checkusm, of uncompressed data for gzip/zlib footer.
hasher: container.Hasher() = .{},
// dynamic block huffman code decoders
lit_dec: hfd.LiteralDecoder = .{}, // literals
dst_dec: hfd.DistanceDecoder = .{}, // distances
// current read state
bfinal: u1 = 0,
block_type: u2 = 0b11,
state: ReadState = .protocol_header,
const ReadState = enum {
protocol_header,
block_header,
block,
protocol_footer,
end,
};
const Self = @This();
pub const Error = BitReaderType.Error || Container.Error || hfd.Error || error{
InvalidCode,
InvalidMatch,
InvalidBlockType,
WrongStoredBlockNlen,
InvalidDynamicBlockHeader,
};
pub fn init(rt: ReaderType) Self {
return .{ .bits = BitReaderType.init(rt) };
}
fn blockHeader(self: *Self) !void {
self.bfinal = try self.bits.read(u1);
self.block_type = try self.bits.read(u2);
}
fn storedBlock(self: *Self) !bool {
self.bits.alignToByte(); // skip padding until byte boundary
// everything after this is byte aligned in stored block
var len = try self.bits.read(u16);
const nlen = try self.bits.read(u16);
if (len != ~nlen) return error.WrongStoredBlockNlen;
while (len > 0) {
const buf = self.hist.getWritable(len);
try self.bits.readAll(buf);
len -= @intCast(buf.len);
}
return true;
}
fn fixedBlock(self: *Self) !bool {
while (!self.hist.full()) {
const code = try self.bits.readFixedCode();
switch (code) {
0...255 => self.hist.write(@intCast(code)),
256 => return true, // end of block
257...285 => try self.fixedDistanceCode(@intCast(code - 257)),
else => return error.InvalidCode,
}
}
return false;
}
// Handles fixed block non literal (length) code.
// Length code is followed by 5 bits of distance code.
fn fixedDistanceCode(self: *Self, code: u8) !void {
try self.bits.fill(5 + 5 + 13);
const length = try self.decodeLength(code);
const distance = try self.decodeDistance(try self.bits.readF(u5, F.buffered | F.reverse));
try self.hist.writeMatch(length, distance);
}
inline fn decodeLength(self: *Self, code: u8) !u16 {
if (code > 28) return error.InvalidCode;
const ml = Token.matchLength(code);
return if (ml.extra_bits == 0) // 0 - 5 extra bits
ml.base
else
ml.base + try self.bits.readN(ml.extra_bits, F.buffered);
}
fn decodeDistance(self: *Self, code: u8) !u16 {
if (code > 29) return error.InvalidCode;
const md = Token.matchDistance(code);
return if (md.extra_bits == 0) // 0 - 13 extra bits
md.base
else
md.base + try self.bits.readN(md.extra_bits, F.buffered);
}
fn dynamicBlockHeader(self: *Self) !void {
const hlit: u16 = @as(u16, try self.bits.read(u5)) + 257; // number of ll code entries present - 257
const hdist: u16 = @as(u16, try self.bits.read(u5)) + 1; // number of distance code entries - 1
const hclen: u8 = @as(u8, try self.bits.read(u4)) + 4; // hclen + 4 code lengths are encoded
if (hlit > 286 or hdist > 30)
return error.InvalidDynamicBlockHeader;
// lengths for code lengths
var cl_lens = [_]u4{0} ** 19;
for (0..hclen) |i| {
cl_lens[codegen_order[i]] = try self.bits.read(u3);
}
var cl_dec: hfd.CodegenDecoder = .{};
try cl_dec.generate(&cl_lens);
// decoded code lengths
var dec_lens = [_]u4{0} ** (286 + 30);
var pos: usize = 0;
while (pos < hlit + hdist) {
const sym = try cl_dec.find(try self.bits.peekF(u7, F.reverse));
try self.bits.shift(sym.code_bits);
pos += try self.dynamicCodeLength(sym.symbol, &dec_lens, pos);
}
if (pos > hlit + hdist) {
return error.InvalidDynamicBlockHeader;
}
// literal code lengths to literal decoder
try self.lit_dec.generate(dec_lens[0..hlit]);
// distance code lengths to distance decoder
try self.dst_dec.generate(dec_lens[hlit .. hlit + hdist]);
}
// Decode code length symbol to code length. Writes decoded length into
// lens slice starting at position pos. Returns number of positions
// advanced.
fn dynamicCodeLength(self: *Self, code: u16, lens: []u4, pos: usize) !usize {
if (pos >= lens.len)
return error.InvalidDynamicBlockHeader;
switch (code) {
0...15 => {
// Represent code lengths of 0 - 15
lens[pos] = @intCast(code);
return 1;
},
16 => {
// Copy the previous code length 3 - 6 times.
// The next 2 bits indicate repeat length
const n: u8 = @as(u8, try self.bits.read(u2)) + 3;
if (pos == 0 or pos + n > lens.len)
return error.InvalidDynamicBlockHeader;
for (0..n) |i| {
lens[pos + i] = lens[pos + i - 1];
}
return n;
},
// Repeat a code length of 0 for 3 - 10 times. (3 bits of length)
17 => return @as(u8, try self.bits.read(u3)) + 3,
// Repeat a code length of 0 for 11 - 138 times (7 bits of length)
18 => return @as(u8, try self.bits.read(u7)) + 11,
else => return error.InvalidDynamicBlockHeader,
}
}
// In larger archives most blocks are usually dynamic, so decompression
// performance depends on this function.
fn dynamicBlock(self: *Self) !bool {
// Hot path loop!
while (!self.hist.full()) {
try self.bits.fill(15); // optimization so other bit reads can be buffered (avoiding one `if` in hot path)
const sym = try self.decodeSymbol(&self.lit_dec);
switch (sym.kind) {
.literal => self.hist.write(sym.symbol),
.match => { // Decode match backreference <length, distance>
// fill so we can use buffered reads
if (LookaheadType == u32)
try self.bits.fill(5 + 15)
else
try self.bits.fill(5 + 15 + 13);
const length = try self.decodeLength(sym.symbol);
const dsm = try self.decodeSymbol(&self.dst_dec);
if (LookaheadType == u32) try self.bits.fill(13);
const distance = try self.decodeDistance(dsm.symbol);
try self.hist.writeMatch(length, distance);
},
.end_of_block => return true,
}
}
return false;
}
// Peek 15 bits from bits reader (maximum code len is 15 bits). Use
// decoder to find symbol for that code. We then know how many bits is
// used. Shift bit reader for that much bits, those bits are used. And
// return symbol.
fn decodeSymbol(self: *Self, decoder: anytype) !hfd.Symbol {
const sym = try decoder.find(try self.bits.peekF(u15, F.buffered | F.reverse));
try self.bits.shift(sym.code_bits);
return sym;
}
fn step(self: *Self) !void {
switch (self.state) {
.protocol_header => {
try container.parseHeader(&self.bits);
self.state = .block_header;
},
.block_header => {
try self.blockHeader();
self.state = .block;
if (self.block_type == 2) try self.dynamicBlockHeader();
},
.block => {
const done = switch (self.block_type) {
0 => try self.storedBlock(),
1 => try self.fixedBlock(),
2 => try self.dynamicBlock(),
else => return error.InvalidBlockType,
};
if (done) {
self.state = if (self.bfinal == 1) .protocol_footer else .block_header;
}
},
.protocol_footer => {
self.bits.alignToByte();
try container.parseFooter(&self.hasher, &self.bits);
self.state = .end;
},
.end => {},
}
}
/// Replaces the inner reader with new reader.
pub fn setReader(self: *Self, new_reader: ReaderType) void {
self.bits.forward_reader = new_reader;
if (self.state == .end or self.state == .protocol_footer) {
self.state = .protocol_header;
}
}
// Reads all compressed data from the internal reader and outputs plain
// (uncompressed) data to the provided writer.
pub fn decompress(self: *Self, writer: anytype) !void {
while (try self.next()) |buf| {
try writer.writeAll(buf);
}
}
/// Returns the number of bytes that have been read from the internal
/// reader but not yet consumed by the decompressor.
pub fn unreadBytes(self: Self) usize {
// There can be no error here: the denominator is not zero, and
// overflow is not possible since the type is unsigned.
return std.math.divCeil(usize, self.bits.nbits, 8) catch unreachable;
}
// Iterator interface
/// Can be used in iterator like loop without memcpy to another buffer:
/// while (try inflate.next()) |buf| { ... }
pub fn next(self: *Self) Error!?[]const u8 {
const out = try self.get(0);
if (out.len == 0) return null;
return out;
}
/// Returns decompressed data from internal sliding window buffer.
/// Returned buffer can be any length between 0 and `limit` bytes. 0
/// returned bytes means end of stream reached. With limit=0 returns as
/// much data it can. It newer will be more than 65536 bytes, which is
/// size of internal buffer.
pub fn get(self: *Self, limit: usize) Error![]const u8 {
while (true) {
const out = self.hist.readAtMost(limit);
if (out.len > 0) {
self.hasher.update(out);
return out;
}
if (self.state == .end) return out;
try self.step();
}
}
// Reader interface
pub const Reader = std.io.GenericReader(*Self, Error, read);
/// Returns the number of bytes read. It may be less than buffer.len.
/// If the number of bytes read is 0, it means end of stream.
/// End of stream is not an error condition.
pub fn read(self: *Self, buffer: []u8) Error!usize {
if (buffer.len == 0) return 0;
const out = try self.get(buffer.len);
@memcpy(buffer[0..out.len], out);
return out.len;
}
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
};
}
test "decompress" {
const cases = [_]struct {
in: []const u8,
out: []const u8,
}{
// non compressed block (type 0)
.{
.in = &[_]u8{
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
},
.out = "Hello world\n",
},
// fixed code block (type 1)
.{
.in = &[_]u8{
0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
},
.out = "Hello world\n",
},
// dynamic block (type 2)
.{
.in = &[_]u8{
0x3d, 0xc6, 0x39, 0x11, 0x00, 0x00, 0x0c, 0x02, // deflate data block type 2
0x30, 0x2b, 0xb5, 0x52, 0x1e, 0xff, 0x96, 0x38,
0x16, 0x96, 0x5c, 0x1e, 0x94, 0xcb, 0x6d, 0x01,
},
.out = "ABCDEABCD ABCDEABCD",
},
};
for (cases) |c| {
var fb = std.io.fixedBufferStream(c.in);
var al = std.ArrayList(u8).init(testing.allocator);
defer al.deinit();
try decompress(.raw, fb.reader(), al.writer());
try testing.expectEqualStrings(c.out, al.items);
}
}
test "gzip decompress" {
const cases = [_]struct {
in: []const u8,
out: []const u8,
}{
// non compressed block (type 0)
.{
.in = &[_]u8{
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header (10 bytes)
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
0xd5, 0xe0, 0x39, 0xb7, // gzip footer: checksum
0x0c, 0x00, 0x00, 0x00, // gzip footer: size
},
.out = "Hello world\n",
},
// fixed code block (type 1)
.{
.in = &[_]u8{
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03, // gzip header (10 bytes)
0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00, // gzip footer (chksum, len)
},
.out = "Hello world\n",
},
// dynamic block (type 2)
.{
.in = &[_]u8{
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header (10 bytes)
0x3d, 0xc6, 0x39, 0x11, 0x00, 0x00, 0x0c, 0x02, // deflate data block type 2
0x30, 0x2b, 0xb5, 0x52, 0x1e, 0xff, 0x96, 0x38,
0x16, 0x96, 0x5c, 0x1e, 0x94, 0xcb, 0x6d, 0x01,
0x17, 0x1c, 0x39, 0xb4, 0x13, 0x00, 0x00, 0x00, // gzip footer (chksum, len)
},
.out = "ABCDEABCD ABCDEABCD",
},
// gzip header with name
.{
.in = &[_]u8{
0x1f, 0x8b, 0x08, 0x08, 0xe5, 0x70, 0xb1, 0x65, 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
0x02, 0x00, 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00,
},
.out = "Hello world\n",
},
};
for (cases) |c| {
var fb = std.io.fixedBufferStream(c.in);
var al = std.ArrayList(u8).init(testing.allocator);
defer al.deinit();
try decompress(.gzip, fb.reader(), al.writer());
try testing.expectEqualStrings(c.out, al.items);
}
}
test "zlib decompress" {
const cases = [_]struct {
in: []const u8,
out: []const u8,
}{
// non compressed block (type 0)
.{
.in = &[_]u8{
0x78, 0b10_0_11100, // zlib header (2 bytes)
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
0x1c, 0xf2, 0x04, 0x47, // zlib footer: checksum
},
.out = "Hello world\n",
},
};
for (cases) |c| {
var fb = std.io.fixedBufferStream(c.in);
var al = std.ArrayList(u8).init(testing.allocator);
defer al.deinit();
try decompress(.zlib, fb.reader(), al.writer());
try testing.expectEqualStrings(c.out, al.items);
}
}
test "fuzzing tests" {
const cases = [_]struct {
input: []const u8,
out: []const u8 = "",
err: ?anyerror = null,
}{
.{ .input = "deflate-stream", .out = @embedFile("testdata/fuzz/deflate-stream.expect") }, // 0
.{ .input = "empty-distance-alphabet01" },
.{ .input = "empty-distance-alphabet02" },
.{ .input = "end-of-stream", .err = error.EndOfStream },
.{ .input = "invalid-distance", .err = error.InvalidMatch },
.{ .input = "invalid-tree01", .err = error.IncompleteHuffmanTree }, // 5
.{ .input = "invalid-tree02", .err = error.IncompleteHuffmanTree },
.{ .input = "invalid-tree03", .err = error.IncompleteHuffmanTree },
.{ .input = "lengths-overflow", .err = error.InvalidDynamicBlockHeader },
.{ .input = "out-of-codes", .err = error.InvalidCode },
.{ .input = "puff01", .err = error.WrongStoredBlockNlen }, // 10
.{ .input = "puff02", .err = error.EndOfStream },
.{ .input = "puff03", .out = &[_]u8{0xa} },
.{ .input = "puff04", .err = error.InvalidCode },
.{ .input = "puff05", .err = error.EndOfStream },
.{ .input = "puff06", .err = error.EndOfStream },
.{ .input = "puff08", .err = error.InvalidCode },
.{ .input = "puff09", .out = "P" },
.{ .input = "puff10", .err = error.InvalidCode },
.{ .input = "puff11", .err = error.InvalidMatch },
.{ .input = "puff12", .err = error.InvalidDynamicBlockHeader }, // 20
.{ .input = "puff13", .err = error.IncompleteHuffmanTree },
.{ .input = "puff14", .err = error.EndOfStream },
.{ .input = "puff15", .err = error.IncompleteHuffmanTree },
.{ .input = "puff16", .err = error.InvalidDynamicBlockHeader },
.{ .input = "puff17", .err = error.MissingEndOfBlockCode }, // 25
.{ .input = "fuzz1", .err = error.InvalidDynamicBlockHeader },
.{ .input = "fuzz2", .err = error.InvalidDynamicBlockHeader },
.{ .input = "fuzz3", .err = error.InvalidMatch },
.{ .input = "fuzz4", .err = error.OversubscribedHuffmanTree },
.{ .input = "puff18", .err = error.OversubscribedHuffmanTree }, // 30
.{ .input = "puff19", .err = error.OversubscribedHuffmanTree },
.{ .input = "puff20", .err = error.OversubscribedHuffmanTree },
.{ .input = "puff21", .err = error.OversubscribedHuffmanTree },
.{ .input = "puff22", .err = error.OversubscribedHuffmanTree },
.{ .input = "puff23", .err = error.OversubscribedHuffmanTree }, // 35
.{ .input = "puff24", .err = error.IncompleteHuffmanTree },
.{ .input = "puff25", .err = error.OversubscribedHuffmanTree },
.{ .input = "puff26", .err = error.InvalidDynamicBlockHeader },
.{ .input = "puff27", .err = error.InvalidDynamicBlockHeader },
};
inline for (cases, 0..) |c, case_no| {
var in = std.io.fixedBufferStream(@embedFile("testdata/fuzz/" ++ c.input ++ ".input"));
var out = std.ArrayList(u8).init(testing.allocator);
defer out.deinit();
errdefer std.debug.print("test case failed {}\n", .{case_no});
if (c.err) |expected_err| {
try testing.expectError(expected_err, decompress(.raw, in.reader(), out.writer()));
} else {
try decompress(.raw, in.reader(), out.writer());
try testing.expectEqualStrings(c.out, out.items);
}
}
}
test "bug 18966" {
const input = @embedFile("testdata/fuzz/bug_18966.input");
const expect = @embedFile("testdata/fuzz/bug_18966.expect");
var in = std.io.fixedBufferStream(input);
var out = std.ArrayList(u8).init(testing.allocator);
defer out.deinit();
try decompress(.gzip, in.reader(), out.writer());
try testing.expectEqualStrings(expect, out.items);
}
test "bug 19895" {
const input = &[_]u8{
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
};
var in = std.io.fixedBufferStream(input);
var decomp = decompressor(.raw, in.reader());
var buf: [0]u8 = undefined;
try testing.expectEqual(0, try decomp.read(&buf));
}

View File

@@ -1,606 +0,0 @@
const Token = @import("../Token.zig");
pub const TestCase = struct {
tokens: []const Token,
input: []const u8 = "", // File name of input data matching the tokens.
want: []const u8 = "", // File name of data with the expected output with input available.
want_no_input: []const u8 = "", // File name of the expected output when no input is available.
};
pub const testCases = blk: {
@setEvalBranchQuota(4096 * 2);
const L = Token.initLiteral;
const M = Token.initMatch;
const ml = M(1, 258); // Maximum length token. Used to reduce the size of writeBlockTests
break :blk &[_]TestCase{
TestCase{
.input = "huffman-null-max.input",
.want = "huffman-null-max.{s}.expect",
.want_no_input = "huffman-null-max.{s}.expect-noinput",
.tokens = &[_]Token{
L(0x0), ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, L(0x0), L(0x0),
},
},
TestCase{
.input = "huffman-pi.input",
.want = "huffman-pi.{s}.expect",
.want_no_input = "huffman-pi.{s}.expect-noinput",
.tokens = &[_]Token{
L('3'), L('.'), L('1'), L('4'), L('1'), L('5'), L('9'), L('2'),
L('6'), L('5'), L('3'), L('5'), L('8'), L('9'), L('7'), L('9'),
L('3'), L('2'), L('3'), L('8'), L('4'), L('6'), L('2'), L('6'),
L('4'), L('3'), L('3'), L('8'), L('3'), L('2'), L('7'), L('9'),
L('5'), L('0'), L('2'), L('8'), L('8'), L('4'), L('1'), L('9'),
L('7'), L('1'), L('6'), L('9'), L('3'), L('9'), L('9'), L('3'),
L('7'), L('5'), L('1'), L('0'), L('5'), L('8'), L('2'), L('0'),
L('9'), L('7'), L('4'), L('9'), L('4'), L('4'), L('5'), L('9'),
L('2'), L('3'), L('0'), L('7'), L('8'), L('1'), L('6'), L('4'),
L('0'), L('6'), L('2'), L('8'), L('6'), L('2'), L('0'), L('8'),
L('9'), L('9'), L('8'), L('6'), L('2'), L('8'), L('0'), L('3'),
L('4'), L('8'), L('2'), L('5'), L('3'), L('4'), L('2'), L('1'),
L('1'), L('7'), L('0'), L('6'), L('7'), L('9'), L('8'), L('2'),
L('1'), L('4'), L('8'), L('0'), L('8'), L('6'), L('5'), L('1'),
L('3'), L('2'), L('8'), L('2'), L('3'), L('0'), L('6'), L('6'),
L('4'), L('7'), L('0'), L('9'), L('3'), L('8'), L('4'), L('4'),
L('6'), L('0'), L('9'), L('5'), L('5'), L('0'), L('5'), L('8'),
L('2'), L('2'), L('3'), L('1'), L('7'), L('2'), L('5'), L('3'),
L('5'), L('9'), L('4'), L('0'), L('8'), L('1'), L('2'), L('8'),
L('4'), L('8'), L('1'), L('1'), L('1'), L('7'), L('4'), M(127, 4),
L('4'), L('1'), L('0'), L('2'), L('7'), L('0'), L('1'), L('9'),
L('3'), L('8'), L('5'), L('2'), L('1'), L('1'), L('0'), L('5'),
L('5'), L('5'), L('9'), L('6'), L('4'), L('4'), L('6'), L('2'),
L('2'), L('9'), L('4'), L('8'), L('9'), L('5'), L('4'), L('9'),
L('3'), L('0'), L('3'), L('8'), L('1'), M(19, 4), L('2'), L('8'),
L('8'), L('1'), L('0'), L('9'), L('7'), L('5'), L('6'), L('6'),
L('5'), L('9'), L('3'), L('3'), L('4'), L('4'), L('6'), M(72, 4),
L('7'), L('5'), L('6'), L('4'), L('8'), L('2'), L('3'), L('3'),
L('7'), L('8'), L('6'), L('7'), L('8'), L('3'), L('1'), L('6'),
L('5'), L('2'), L('7'), L('1'), L('2'), L('0'), L('1'), L('9'),
L('0'), L('9'), L('1'), L('4'), M(27, 4), L('5'), L('6'), L('6'),
L('9'), L('2'), L('3'), L('4'), L('6'), M(179, 4), L('6'), L('1'),
L('0'), L('4'), L('5'), L('4'), L('3'), L('2'), L('6'), M(51, 4),
L('1'), L('3'), L('3'), L('9'), L('3'), L('6'), L('0'), L('7'),
L('2'), L('6'), L('0'), L('2'), L('4'), L('9'), L('1'), L('4'),
L('1'), L('2'), L('7'), L('3'), L('7'), L('2'), L('4'), L('5'),
L('8'), L('7'), L('0'), L('0'), L('6'), L('6'), L('0'), L('6'),
L('3'), L('1'), L('5'), L('5'), L('8'), L('8'), L('1'), L('7'),
L('4'), L('8'), L('8'), L('1'), L('5'), L('2'), L('0'), L('9'),
L('2'), L('0'), L('9'), L('6'), L('2'), L('8'), L('2'), L('9'),
L('2'), L('5'), L('4'), L('0'), L('9'), L('1'), L('7'), L('1'),
L('5'), L('3'), L('6'), L('4'), L('3'), L('6'), L('7'), L('8'),
L('9'), L('2'), L('5'), L('9'), L('0'), L('3'), L('6'), L('0'),
L('0'), L('1'), L('1'), L('3'), L('3'), L('0'), L('5'), L('3'),
L('0'), L('5'), L('4'), L('8'), L('8'), L('2'), L('0'), L('4'),
L('6'), L('6'), L('5'), L('2'), L('1'), L('3'), L('8'), L('4'),
L('1'), L('4'), L('6'), L('9'), L('5'), L('1'), L('9'), L('4'),
L('1'), L('5'), L('1'), L('1'), L('6'), L('0'), L('9'), L('4'),
L('3'), L('3'), L('0'), L('5'), L('7'), L('2'), L('7'), L('0'),
L('3'), L('6'), L('5'), L('7'), L('5'), L('9'), L('5'), L('9'),
L('1'), L('9'), L('5'), L('3'), L('0'), L('9'), L('2'), L('1'),
L('8'), L('6'), L('1'), L('1'), L('7'), M(234, 4), L('3'), L('2'),
M(10, 4), L('9'), L('3'), L('1'), L('0'), L('5'), L('1'), L('1'),
L('8'), L('5'), L('4'), L('8'), L('0'), L('7'), M(271, 4), L('3'),
L('7'), L('9'), L('9'), L('6'), L('2'), L('7'), L('4'), L('9'),
L('5'), L('6'), L('7'), L('3'), L('5'), L('1'), L('8'), L('8'),
L('5'), L('7'), L('5'), L('2'), L('7'), L('2'), L('4'), L('8'),
L('9'), L('1'), L('2'), L('2'), L('7'), L('9'), L('3'), L('8'),
L('1'), L('8'), L('3'), L('0'), L('1'), L('1'), L('9'), L('4'),
L('9'), L('1'), L('2'), L('9'), L('8'), L('3'), L('3'), L('6'),
L('7'), L('3'), L('3'), L('6'), L('2'), L('4'), L('4'), L('0'),
L('6'), L('5'), L('6'), L('6'), L('4'), L('3'), L('0'), L('8'),
L('6'), L('0'), L('2'), L('1'), L('3'), L('9'), L('4'), L('9'),
L('4'), L('6'), L('3'), L('9'), L('5'), L('2'), L('2'), L('4'),
L('7'), L('3'), L('7'), L('1'), L('9'), L('0'), L('7'), L('0'),
L('2'), L('1'), L('7'), L('9'), L('8'), M(154, 5), L('7'), L('0'),
L('2'), L('7'), L('7'), L('0'), L('5'), L('3'), L('9'), L('2'),
L('1'), L('7'), L('1'), L('7'), L('6'), L('2'), L('9'), L('3'),
L('1'), L('7'), L('6'), L('7'), L('5'), M(563, 5), L('7'), L('4'),
L('8'), L('1'), M(7, 4), L('6'), L('6'), L('9'), L('4'), L('0'),
M(488, 4), L('0'), L('0'), L('0'), L('5'), L('6'), L('8'), L('1'),
L('2'), L('7'), L('1'), L('4'), L('5'), L('2'), L('6'), L('3'),
L('5'), L('6'), L('0'), L('8'), L('2'), L('7'), L('7'), L('8'),
L('5'), L('7'), L('7'), L('1'), L('3'), L('4'), L('2'), L('7'),
L('5'), L('7'), L('7'), L('8'), L('9'), L('6'), M(298, 4), L('3'),
L('6'), L('3'), L('7'), L('1'), L('7'), L('8'), L('7'), L('2'),
L('1'), L('4'), L('6'), L('8'), L('4'), L('4'), L('0'), L('9'),
L('0'), L('1'), L('2'), L('2'), L('4'), L('9'), L('5'), L('3'),
L('4'), L('3'), L('0'), L('1'), L('4'), L('6'), L('5'), L('4'),
L('9'), L('5'), L('8'), L('5'), L('3'), L('7'), L('1'), L('0'),
L('5'), L('0'), L('7'), L('9'), M(203, 4), L('6'), M(340, 4), L('8'),
L('9'), L('2'), L('3'), L('5'), L('4'), M(458, 4), L('9'), L('5'),
L('6'), L('1'), L('1'), L('2'), L('1'), L('2'), L('9'), L('0'),
L('2'), L('1'), L('9'), L('6'), L('0'), L('8'), L('6'), L('4'),
L('0'), L('3'), L('4'), L('4'), L('1'), L('8'), L('1'), L('5'),
L('9'), L('8'), L('1'), L('3'), L('6'), L('2'), L('9'), L('7'),
L('7'), L('4'), M(117, 4), L('0'), L('9'), L('9'), L('6'), L('0'),
L('5'), L('1'), L('8'), L('7'), L('0'), L('7'), L('2'), L('1'),
L('1'), L('3'), L('4'), L('9'), M(1, 5), L('8'), L('3'), L('7'),
L('2'), L('9'), L('7'), L('8'), L('0'), L('4'), L('9'), L('9'),
M(731, 4), L('9'), L('7'), L('3'), L('1'), L('7'), L('3'), L('2'),
L('8'), M(395, 4), L('6'), L('3'), L('1'), L('8'), L('5'), M(770, 4),
M(745, 4), L('4'), L('5'), L('5'), L('3'), L('4'), L('6'), L('9'),
L('0'), L('8'), L('3'), L('0'), L('2'), L('6'), L('4'), L('2'),
L('5'), L('2'), L('2'), L('3'), L('0'), M(740, 4), M(616, 4), L('8'),
L('5'), L('0'), L('3'), L('5'), L('2'), L('6'), L('1'), L('9'),
L('3'), L('1'), L('1'), M(531, 4), L('1'), L('0'), L('1'), L('0'),
L('0'), L('0'), L('3'), L('1'), L('3'), L('7'), L('8'), L('3'),
L('8'), L('7'), L('5'), L('2'), L('8'), L('8'), L('6'), L('5'),
L('8'), L('7'), L('5'), L('3'), L('3'), L('2'), L('0'), L('8'),
L('3'), L('8'), L('1'), L('4'), L('2'), L('0'), L('6'), M(321, 4),
M(300, 4), L('1'), L('4'), L('7'), L('3'), L('0'), L('3'), L('5'),
L('9'), M(815, 5), L('9'), L('0'), L('4'), L('2'), L('8'), L('7'),
L('5'), L('5'), L('4'), L('6'), L('8'), L('7'), L('3'), L('1'),
L('1'), L('5'), L('9'), L('5'), M(854, 4), L('3'), L('8'), L('8'),
L('2'), L('3'), L('5'), L('3'), L('7'), L('8'), L('7'), L('5'),
M(896, 5), L('9'), M(315, 4), L('1'), M(329, 4), L('8'), L('0'), L('5'),
L('3'), M(395, 4), L('2'), L('2'), L('6'), L('8'), L('0'), L('6'),
L('6'), L('1'), L('3'), L('0'), L('0'), L('1'), L('9'), L('2'),
L('7'), L('8'), L('7'), L('6'), L('6'), L('1'), L('1'), L('1'),
L('9'), L('5'), L('9'), M(568, 4), L('6'), M(293, 5), L('8'), L('9'),
L('3'), L('8'), L('0'), L('9'), L('5'), L('2'), L('5'), L('7'),
L('2'), L('0'), L('1'), L('0'), L('6'), L('5'), L('4'), L('8'),
L('5'), L('8'), L('6'), L('3'), L('2'), L('7'), M(155, 4), L('9'),
L('3'), L('6'), L('1'), L('5'), L('3'), M(545, 4), M(349, 5), L('2'),
L('3'), L('0'), L('3'), L('0'), L('1'), L('9'), L('5'), L('2'),
L('0'), L('3'), L('5'), L('3'), L('0'), L('1'), L('8'), L('5'),
L('2'), M(370, 4), M(118, 4), L('3'), L('6'), L('2'), L('2'), L('5'),
L('9'), L('9'), L('4'), L('1'), L('3'), M(597, 4), L('4'), L('9'),
L('7'), L('2'), L('1'), L('7'), M(223, 4), L('3'), L('4'), L('7'),
L('9'), L('1'), L('3'), L('1'), L('5'), L('1'), L('5'), L('5'),
L('7'), L('4'), L('8'), L('5'), L('7'), L('2'), L('4'), L('2'),
L('4'), L('5'), L('4'), L('1'), L('5'), L('0'), L('6'), L('9'),
M(320, 4), L('8'), L('2'), L('9'), L('5'), L('3'), L('3'), L('1'),
L('1'), L('6'), L('8'), L('6'), L('1'), L('7'), L('2'), L('7'),
L('8'), M(824, 4), L('9'), L('0'), L('7'), L('5'), L('0'), L('9'),
M(270, 4), L('7'), L('5'), L('4'), L('6'), L('3'), L('7'), L('4'),
L('6'), L('4'), L('9'), L('3'), L('9'), L('3'), L('1'), L('9'),
L('2'), L('5'), L('5'), L('0'), L('6'), L('0'), L('4'), L('0'),
L('0'), L('9'), M(620, 4), L('1'), L('6'), L('7'), L('1'), L('1'),
L('3'), L('9'), L('0'), L('0'), L('9'), L('8'), M(822, 4), L('4'),
L('0'), L('1'), L('2'), L('8'), L('5'), L('8'), L('3'), L('6'),
L('1'), L('6'), L('0'), L('3'), L('5'), L('6'), L('3'), L('7'),
L('0'), L('7'), L('6'), L('6'), L('0'), L('1'), L('0'), L('4'),
M(371, 4), L('8'), L('1'), L('9'), L('4'), L('2'), L('9'), M(1055, 5),
M(240, 4), M(652, 4), L('7'), L('8'), L('3'), L('7'), L('4'), M(1193, 4),
L('8'), L('2'), L('5'), L('5'), L('3'), L('7'), M(522, 5), L('2'),
L('6'), L('8'), M(47, 4), L('4'), L('0'), L('4'), L('7'), M(466, 4),
L('4'), M(1206, 4), M(910, 4), L('8'), L('4'), M(937, 4), L('6'), M(800, 6),
L('3'), L('3'), L('1'), L('3'), L('6'), L('7'), L('7'), L('0'),
L('2'), L('8'), L('9'), L('8'), L('9'), L('1'), L('5'), L('2'),
M(99, 4), L('5'), L('2'), L('1'), L('6'), L('2'), L('0'), L('5'),
L('6'), L('9'), L('6'), M(1042, 4), L('0'), L('5'), L('8'), M(1144, 4),
L('5'), M(1177, 4), L('5'), L('1'), L('1'), M(522, 4), L('8'), L('2'),
L('4'), L('3'), L('0'), L('0'), L('3'), L('5'), L('5'), L('8'),
L('7'), L('6'), L('4'), L('0'), L('2'), L('4'), L('7'), L('4'),
L('9'), L('6'), L('4'), L('7'), L('3'), L('2'), L('6'), L('3'),
M(1087, 4), L('9'), L('9'), L('2'), M(1100, 4), L('4'), L('2'), L('6'),
L('9'), M(710, 6), L('7'), M(471, 4), L('4'), M(1342, 4), M(1054, 4), L('9'),
L('3'), L('4'), L('1'), L('7'), M(430, 4), L('1'), L('2'), M(43, 4),
L('4'), M(415, 4), L('1'), L('5'), L('0'), L('3'), L('0'), L('2'),
L('8'), L('6'), L('1'), L('8'), L('2'), L('9'), L('7'), L('4'),
L('5'), L('5'), L('5'), L('7'), L('0'), L('6'), L('7'), L('4'),
M(310, 4), L('5'), L('0'), L('5'), L('4'), L('9'), L('4'), L('5'),
L('8'), M(454, 4), L('9'), M(82, 4), L('5'), L('6'), M(493, 4), L('7'),
L('2'), L('1'), L('0'), L('7'), L('9'), M(346, 4), L('3'), L('0'),
M(267, 4), L('3'), L('2'), L('1'), L('1'), L('6'), L('5'), L('3'),
L('4'), L('4'), L('9'), L('8'), L('7'), L('2'), L('0'), L('2'),
L('7'), M(284, 4), L('0'), L('2'), L('3'), L('6'), L('4'), M(559, 4),
L('5'), L('4'), L('9'), L('9'), L('1'), L('1'), L('9'), L('8'),
M(1049, 4), L('4'), M(284, 4), L('5'), L('3'), L('5'), L('6'), L('6'),
L('3'), L('6'), L('9'), M(1105, 4), L('2'), L('6'), L('5'), M(741, 4),
L('7'), L('8'), L('6'), L('2'), L('5'), L('5'), L('1'), M(987, 4),
L('1'), L('7'), L('5'), L('7'), L('4'), L('6'), L('7'), L('2'),
L('8'), L('9'), L('0'), L('9'), L('7'), L('7'), L('7'), L('7'),
M(1108, 5), L('0'), L('0'), L('0'), M(1534, 4), L('7'), L('0'), M(1248, 4),
L('6'), M(1002, 4), L('4'), L('9'), L('1'), M(1055, 4), M(664, 4), L('2'),
L('1'), L('4'), L('7'), L('7'), L('2'), L('3'), L('5'), L('0'),
L('1'), L('4'), L('1'), L('4'), M(1604, 4), L('3'), L('5'), L('6'),
M(1200, 4), L('1'), L('6'), L('1'), L('3'), L('6'), L('1'), L('1'),
L('5'), L('7'), L('3'), L('5'), L('2'), L('5'), M(1285, 4), L('3'),
L('4'), M(92, 4), L('1'), L('8'), M(1148, 4), L('8'), L('4'), M(1512, 4),
L('3'), L('3'), L('2'), L('3'), L('9'), L('0'), L('7'), L('3'),
L('9'), L('4'), L('1'), L('4'), L('3'), L('3'), L('3'), L('4'),
L('5'), L('4'), L('7'), L('7'), L('6'), L('2'), L('4'), M(579, 4),
L('2'), L('5'), L('1'), L('8'), L('9'), L('8'), L('3'), L('5'),
L('6'), L('9'), L('4'), L('8'), L('5'), L('5'), L('6'), L('2'),
L('0'), L('9'), L('9'), L('2'), L('1'), L('9'), L('2'), L('2'),
L('2'), L('1'), L('8'), L('4'), L('2'), L('7'), M(575, 4), L('2'),
M(187, 4), L('6'), L('8'), L('8'), L('7'), L('6'), L('7'), L('1'),
L('7'), L('9'), L('0'), M(86, 4), L('0'), M(263, 5), L('6'), L('6'),
M(1000, 4), L('8'), L('8'), L('6'), L('2'), L('7'), L('2'), M(1757, 4),
L('1'), L('7'), L('8'), L('6'), L('0'), L('8'), L('5'), L('7'),
M(116, 4), L('3'), M(765, 5), L('7'), L('9'), L('7'), L('6'), L('6'),
L('8'), L('1'), M(702, 4), L('0'), L('0'), L('9'), L('5'), L('3'),
L('8'), L('8'), M(1593, 4), L('3'), M(1702, 4), L('0'), L('6'), L('8'),
L('0'), L('0'), L('6'), L('4'), L('2'), L('2'), L('5'), L('1'),
L('2'), L('5'), L('2'), M(1404, 4), L('7'), L('3'), L('9'), L('2'),
M(664, 4), M(1141, 4), L('4'), M(1716, 5), L('8'), L('6'), L('2'), L('6'),
L('9'), L('4'), L('5'), M(486, 4), L('4'), L('1'), L('9'), L('6'),
L('5'), L('2'), L('8'), L('5'), L('0'), M(154, 4), M(925, 4), L('1'),
L('8'), L('6'), L('3'), M(447, 4), L('4'), M(341, 5), L('2'), L('0'),
L('3'), L('9'), M(1420, 4), L('4'), L('5'), M(701, 4), L('2'), L('3'),
L('7'), M(1069, 4), L('6'), M(1297, 4), L('5'), L('6'), M(1593, 4), L('7'),
L('1'), L('9'), L('1'), L('7'), L('2'), L('8'), M(370, 4), L('7'),
L('6'), L('4'), L('6'), L('5'), L('7'), L('5'), L('7'), L('3'),
L('9'), M(258, 4), L('3'), L('8'), L('9'), M(1865, 4), L('8'), L('3'),
L('2'), L('6'), L('4'), L('5'), L('9'), L('9'), L('5'), L('8'),
M(1704, 4), L('0'), L('4'), L('7'), L('8'), M(479, 4), M(809, 4), L('9'),
M(46, 4), L('6'), L('4'), L('0'), L('7'), L('8'), L('9'), L('5'),
L('1'), M(143, 4), L('6'), L('8'), L('3'), M(304, 4), L('2'), L('5'),
L('9'), L('5'), L('7'), L('0'), M(1129, 4), L('8'), L('2'), L('2'),
M(713, 4), L('2'), M(1564, 4), L('4'), L('0'), L('7'), L('7'), L('2'),
L('6'), L('7'), L('1'), L('9'), L('4'), L('7'), L('8'), M(794, 4),
L('8'), L('2'), L('6'), L('0'), L('1'), L('4'), L('7'), L('6'),
L('9'), L('9'), L('0'), L('9'), M(1257, 4), L('0'), L('1'), L('3'),
L('6'), L('3'), L('9'), L('4'), L('4'), L('3'), M(640, 4), L('3'),
L('0'), M(262, 4), L('2'), L('0'), L('3'), L('4'), L('9'), L('6'),
L('2'), L('5'), L('2'), L('4'), L('5'), L('1'), L('7'), M(950, 4),
L('9'), L('6'), L('5'), L('1'), L('4'), L('3'), L('1'), L('4'),
L('2'), L('9'), L('8'), L('0'), L('9'), L('1'), L('9'), L('0'),
L('6'), L('5'), L('9'), L('2'), M(643, 4), L('7'), L('2'), L('2'),
L('1'), L('6'), L('9'), L('6'), L('4'), L('6'), M(1050, 4), M(123, 4),
L('5'), M(1295, 4), L('4'), M(1382, 5), L('8'), M(1370, 4), L('9'), L('7'),
M(1404, 4), L('5'), L('4'), M(1182, 4), M(575, 4), L('7'), M(1627, 4), L('8'),
L('4'), L('6'), L('8'), L('1'), L('3'), M(141, 4), L('6'), L('8'),
L('3'), L('8'), L('6'), L('8'), L('9'), L('4'), L('2'), L('7'),
L('7'), L('4'), L('1'), L('5'), L('5'), L('9'), L('9'), L('1'),
L('8'), L('5'), M(91, 4), L('2'), L('4'), L('5'), L('9'), L('5'),
L('3'), L('9'), L('5'), L('9'), L('4'), L('3'), L('1'), M(1464, 4),
L('7'), M(19, 4), L('6'), L('8'), L('0'), L('8'), L('4'), L('5'),
M(744, 4), L('7'), L('3'), M(2079, 4), L('9'), L('5'), L('8'), L('4'),
L('8'), L('6'), L('5'), L('3'), L('8'), M(1769, 4), L('6'), L('2'),
M(243, 4), L('6'), L('0'), L('9'), M(1207, 4), L('6'), L('0'), L('8'),
L('0'), L('5'), L('1'), L('2'), L('4'), L('3'), L('8'), L('8'),
L('4'), M(315, 4), M(12, 4), L('4'), L('1'), L('3'), M(784, 4), L('7'),
L('6'), L('2'), L('7'), L('8'), M(834, 4), L('7'), L('1'), L('5'),
M(1436, 4), L('3'), L('5'), L('9'), L('9'), L('7'), L('7'), L('0'),
L('0'), L('1'), L('2'), L('9'), M(1139, 4), L('8'), L('9'), L('4'),
L('4'), L('1'), M(632, 4), L('6'), L('8'), L('5'), L('5'), M(96, 4),
L('4'), L('0'), L('6'), L('3'), M(2279, 4), L('2'), L('0'), L('7'),
L('2'), L('2'), M(345, 4), M(516, 5), L('4'), L('8'), L('1'), L('5'),
L('8'), M(518, 4), M(511, 4), M(635, 4), M(665, 4), L('3'), L('9'), L('4'),
L('5'), L('2'), L('2'), L('6'), L('7'), M(1175, 6), L('8'), M(1419, 4),
L('2'), L('1'), M(747, 4), L('2'), M(904, 4), L('5'), L('4'), L('6'),
L('6'), L('6'), M(1308, 4), L('2'), L('3'), L('9'), L('8'), L('6'),
L('4'), L('5'), L('6'), M(1221, 4), L('1'), L('6'), L('3'), L('5'),
M(596, 5), M(2066, 4), L('7'), M(2222, 4), L('9'), L('8'), M(1119, 4), L('9'),
L('3'), L('6'), L('3'), L('4'), M(1884, 4), L('7'), L('4'), L('3'),
L('2'), L('4'), M(1148, 4), L('1'), L('5'), L('0'), L('7'), L('6'),
M(1212, 4), L('7'), L('9'), L('4'), L('5'), L('1'), L('0'), L('9'),
M(63, 4), L('0'), L('9'), L('4'), L('0'), M(1703, 4), L('8'), L('8'),
L('7'), L('9'), L('7'), L('1'), L('0'), L('8'), L('9'), L('3'),
M(2289, 4), L('6'), L('9'), L('1'), L('3'), L('6'), L('8'), L('6'),
L('7'), L('2'), M(604, 4), M(511, 4), L('5'), M(1344, 4), M(1129, 4), M(2050, 4),
L('1'), L('7'), L('9'), L('2'), L('8'), L('6'), L('8'), M(2253, 4),
L('8'), L('7'), L('4'), L('7'), M(1951, 5), L('8'), L('2'), L('4'),
M(2427, 4), L('8'), M(604, 4), L('7'), L('1'), L('4'), L('9'), L('0'),
L('9'), L('6'), L('7'), L('5'), L('9'), L('8'), M(1776, 4), L('3'),
L('6'), L('5'), M(309, 4), L('8'), L('1'), M(93, 4), M(1862, 4), M(2359, 4),
L('6'), L('8'), L('2'), L('9'), M(1407, 4), L('8'), L('7'), L('2'),
L('2'), L('6'), L('5'), L('8'), L('8'), L('0'), M(1554, 4), L('5'),
M(586, 4), L('4'), L('2'), L('7'), L('0'), L('4'), L('7'), L('7'),
L('5'), L('5'), M(2079, 4), L('3'), L('7'), L('9'), L('6'), L('4'),
L('1'), L('4'), L('5'), L('1'), L('5'), L('2'), M(1534, 4), L('2'),
L('3'), L('4'), L('3'), L('6'), L('4'), L('5'), L('4'), M(1503, 4),
L('4'), L('4'), L('4'), L('7'), L('9'), L('5'), M(61, 4), M(1316, 4),
M(2279, 5), L('4'), L('1'), M(1323, 4), L('3'), M(773, 4), L('5'), L('2'),
L('3'), L('1'), M(2114, 5), L('1'), L('6'), L('6'), L('1'), M(2227, 4),
L('5'), L('9'), L('6'), L('9'), L('5'), L('3'), L('6'), L('2'),
L('3'), L('1'), L('4'), M(1536, 4), L('2'), L('4'), L('8'), L('4'),
L('9'), L('3'), L('7'), L('1'), L('8'), L('7'), L('1'), L('1'),
L('0'), L('1'), L('4'), L('5'), L('7'), L('6'), L('5'), L('4'),
M(1890, 4), L('0'), L('2'), L('7'), L('9'), L('9'), L('3'), L('4'),
L('4'), L('0'), L('3'), L('7'), L('4'), L('2'), L('0'), L('0'),
L('7'), M(2368, 4), L('7'), L('8'), L('5'), L('3'), L('9'), L('0'),
L('6'), L('2'), L('1'), L('9'), M(666, 5), M(838, 4), L('8'), L('4'),
L('7'), M(979, 5), L('8'), L('3'), L('3'), L('2'), L('1'), L('4'),
L('4'), L('5'), L('7'), L('1'), M(645, 4), M(1911, 4), L('4'), L('3'),
L('5'), L('0'), M(2345, 4), M(1129, 4), L('5'), L('3'), L('1'), L('9'),
L('1'), L('0'), L('4'), L('8'), L('4'), L('8'), L('1'), L('0'),
L('0'), L('5'), L('3'), L('7'), L('0'), L('6'), M(2237, 4), M(1438, 5),
M(1922, 5), L('1'), M(1370, 4), L('7'), M(796, 4), L('5'), M(2029, 4), M(1037, 4),
L('6'), L('3'), M(2013, 5), L('4'), M(2418, 4), M(847, 5), M(1014, 5), L('8'),
M(1326, 5), M(2184, 5), L('9'), M(392, 4), L('9'), L('1'), M(2255, 4), L('8'),
L('1'), L('4'), L('6'), L('7'), L('5'), L('1'), M(1580, 4), L('1'),
L('2'), L('3'), L('9'), M(426, 6), L('9'), L('0'), L('7'), L('1'),
L('8'), L('6'), L('4'), L('9'), L('4'), L('2'), L('3'), L('1'),
L('9'), L('6'), L('1'), L('5'), L('6'), M(493, 4), M(1725, 4), L('9'),
L('5'), M(2343, 4), M(1130, 4), M(284, 4), L('6'), L('0'), L('3'), L('8'),
M(2598, 4), M(368, 4), M(901, 4), L('6'), L('2'), M(1115, 4), L('5'), M(2125, 4),
L('6'), L('3'), L('8'), L('9'), L('3'), L('7'), L('7'), L('8'),
L('7'), M(2246, 4), M(249, 4), L('9'), L('7'), L('9'), L('2'), L('0'),
L('7'), L('7'), L('3'), M(1496, 4), L('2'), L('1'), L('8'), L('2'),
L('5'), L('6'), M(2016, 4), L('6'), L('6'), M(1751, 4), L('4'), L('2'),
M(1663, 5), L('6'), M(1767, 4), L('4'), L('4'), M(37, 4), L('5'), L('4'),
L('9'), L('2'), L('0'), L('2'), L('6'), L('0'), L('5'), M(2740, 4),
M(997, 5), L('2'), L('0'), L('1'), L('4'), L('9'), M(1235, 4), L('8'),
L('5'), L('0'), L('7'), L('3'), M(1434, 4), L('6'), L('6'), L('6'),
L('0'), M(405, 4), L('2'), L('4'), L('3'), L('4'), L('0'), M(136, 4),
L('0'), M(1900, 4), L('8'), L('6'), L('3'), M(2391, 4), M(2021, 4), M(1068, 4),
M(373, 4), L('5'), L('7'), L('9'), L('6'), L('2'), L('6'), L('8'),
L('5'), L('6'), M(321, 4), L('5'), L('0'), L('8'), M(1316, 4), L('5'),
L('8'), L('7'), L('9'), L('6'), L('9'), L('9'), M(1810, 4), L('5'),
L('7'), L('4'), M(2585, 4), L('8'), L('4'), L('0'), M(2228, 4), L('1'),
L('4'), L('5'), L('9'), L('1'), M(1933, 4), L('7'), L('0'), M(565, 4),
L('0'), L('1'), M(3048, 4), L('1'), L('2'), M(3189, 4), L('0'), M(964, 4),
L('3'), L('9'), M(2859, 4), M(275, 4), L('7'), L('1'), L('5'), M(945, 4),
L('4'), L('2'), L('0'), M(3059, 5), L('9'), M(3011, 4), L('0'), L('7'),
M(834, 4), M(1942, 4), M(2736, 4), M(3171, 4), L('2'), L('1'), M(2401, 4), L('2'),
L('5'), L('1'), M(1404, 4), M(2373, 4), L('9'), L('2'), M(435, 4), L('8'),
L('2'), L('6'), M(2919, 4), L('2'), M(633, 4), L('3'), L('2'), L('1'),
L('5'), L('7'), L('9'), L('1'), L('9'), L('8'), L('4'), L('1'),
L('4'), M(2172, 5), L('9'), L('1'), L('6'), L('4'), M(1769, 5), L('9'),
M(2905, 5), M(2268, 4), L('7'), L('2'), L('2'), M(802, 4), L('5'), M(2213, 4),
M(322, 4), L('9'), L('1'), L('0'), M(189, 4), M(3164, 4), L('5'), L('2'),
L('8'), L('0'), L('1'), L('7'), M(562, 4), L('7'), L('1'), L('2'),
M(2325, 4), L('8'), L('3'), L('2'), M(884, 4), L('1'), M(1418, 4), L('0'),
L('9'), L('3'), L('5'), L('3'), L('9'), L('6'), L('5'), L('7'),
M(1612, 4), L('1'), L('0'), L('8'), L('3'), M(106, 4), L('5'), L('1'),
M(1915, 4), M(3419, 4), L('1'), L('4'), L('4'), L('4'), L('2'), L('1'),
L('0'), L('0'), M(515, 4), L('0'), L('3'), M(413, 4), L('1'), L('1'),
L('0'), L('3'), M(3202, 4), M(10, 4), M(39, 4), M(1539, 6), L('5'), L('1'),
L('6'), M(1498, 4), M(2180, 5), M(2347, 4), L('5'), M(3139, 5), L('8'), L('5'),
L('1'), L('7'), L('1'), L('4'), L('3'), L('7'), M(1542, 4), M(110, 4),
L('1'), L('5'), L('5'), L('6'), L('5'), L('0'), L('8'), L('8'),
M(954, 4), L('9'), L('8'), L('9'), L('8'), L('5'), L('9'), L('9'),
L('8'), L('2'), L('3'), L('8'), M(464, 4), M(2491, 4), L('3'), M(365, 4),
M(1087, 4), M(2500, 4), L('8'), M(3590, 5), L('3'), L('2'), M(264, 4), L('5'),
M(774, 4), L('3'), M(459, 4), L('9'), M(1052, 4), L('9'), L('8'), M(2174, 4),
L('4'), M(3257, 4), L('7'), M(1612, 4), L('0'), L('7'), M(230, 4), L('4'),
L('8'), L('1'), L('4'), L('1'), M(1338, 4), L('8'), L('5'), L('9'),
L('4'), L('6'), L('1'), M(3018, 4), L('8'), L('0'),
},
},
TestCase{
.input = "huffman-rand-1k.input",
.want = "huffman-rand-1k.{s}.expect",
.want_no_input = "huffman-rand-1k.{s}.expect-noinput",
.tokens = &[_]Token{
L(0xf8), L(0x8b), L(0x96), L(0x76), L(0x48), L(0xd), L(0x85), L(0x94), L(0x25), L(0x80), L(0xaf), L(0xc2), L(0xfe), L(0x8d),
L(0xe8), L(0x20), L(0xeb), L(0x17), L(0x86), L(0xc9), L(0xb7), L(0xc5), L(0xde), L(0x6), L(0xea), L(0x7d), L(0x18), L(0x8b),
L(0xe7), L(0x3e), L(0x7), L(0xda), L(0xdf), L(0xff), L(0x6c), L(0x73), L(0xde), L(0xcc), L(0xe7), L(0x6d), L(0x8d), L(0x4),
L(0x19), L(0x49), L(0x7f), L(0x47), L(0x1f), L(0x48), L(0x15), L(0xb0), L(0xe8), L(0x9e), L(0xf2), L(0x31), L(0x59), L(0xde),
L(0x34), L(0xb4), L(0x5b), L(0xe5), L(0xe0), L(0x9), L(0x11), L(0x30), L(0xc2), L(0x88), L(0x5b), L(0x7c), L(0x5d), L(0x14),
L(0x13), L(0x6f), L(0x23), L(0xa9), L(0xd), L(0xbc), L(0x2d), L(0x23), L(0xbe), L(0xd9), L(0xed), L(0x75), L(0x4), L(0x6c),
L(0x99), L(0xdf), L(0xfd), L(0x70), L(0x66), L(0xe6), L(0xee), L(0xd9), L(0xb1), L(0x9e), L(0x6e), L(0x83), L(0x59), L(0xd5),
L(0xd4), L(0x80), L(0x59), L(0x98), L(0x77), L(0x89), L(0x43), L(0x38), L(0xc9), L(0xaf), L(0x30), L(0x32), L(0x9a), L(0x20),
L(0x1b), L(0x46), L(0x3d), L(0x67), L(0x6e), L(0xd7), L(0x72), L(0x9e), L(0x4e), L(0x21), L(0x4f), L(0xc6), L(0xe0), L(0xd4),
L(0x7b), L(0x4), L(0x8d), L(0xa5), L(0x3), L(0xf6), L(0x5), L(0x9b), L(0x6b), L(0xdc), L(0x2a), L(0x93), L(0x77), L(0x28),
L(0xfd), L(0xb4), L(0x62), L(0xda), L(0x20), L(0xe7), L(0x1f), L(0xab), L(0x6b), L(0x51), L(0x43), L(0x39), L(0x2f), L(0xa0),
L(0x92), L(0x1), L(0x6c), L(0x75), L(0x3e), L(0xf4), L(0x35), L(0xfd), L(0x43), L(0x2e), L(0xf7), L(0xa4), L(0x75), L(0xda),
L(0xea), L(0x9b), L(0xa), L(0x64), L(0xb), L(0xe0), L(0x23), L(0x29), L(0xbd), L(0xf7), L(0xe7), L(0x83), L(0x3c), L(0xfb),
L(0xdf), L(0xb3), L(0xae), L(0x4f), L(0xa4), L(0x47), L(0x55), L(0x99), L(0xde), L(0x2f), L(0x96), L(0x6e), L(0x1c), L(0x43),
L(0x4c), L(0x87), L(0xe2), L(0x7c), L(0xd9), L(0x5f), L(0x4c), L(0x7c), L(0xe8), L(0x90), L(0x3), L(0xdb), L(0x30), L(0x95),
L(0xd6), L(0x22), L(0xc), L(0x47), L(0xb8), L(0x4d), L(0x6b), L(0xbd), L(0x24), L(0x11), L(0xab), L(0x2c), L(0xd7), L(0xbe),
L(0x6e), L(0x7a), L(0xd6), L(0x8), L(0xa3), L(0x98), L(0xd8), L(0xdd), L(0x15), L(0x6a), L(0xfa), L(0x93), L(0x30), L(0x1),
L(0x25), L(0x1d), L(0xa2), L(0x74), L(0x86), L(0x4b), L(0x6a), L(0x95), L(0xe8), L(0xe1), L(0x4e), L(0xe), L(0x76), L(0xb9),
L(0x49), L(0xa9), L(0x5f), L(0xa0), L(0xa6), L(0x63), L(0x3c), L(0x7e), L(0x7e), L(0x20), L(0x13), L(0x4f), L(0xbb), L(0x66),
L(0x92), L(0xb8), L(0x2e), L(0xa4), L(0xfa), L(0x48), L(0xcb), L(0xae), L(0xb9), L(0x3c), L(0xaf), L(0xd3), L(0x1f), L(0xe1),
L(0xd5), L(0x8d), L(0x42), L(0x6d), L(0xf0), L(0xfc), L(0x8c), L(0xc), L(0x0), L(0xde), L(0x40), L(0xab), L(0x8b), L(0x47),
L(0x97), L(0x4e), L(0xa8), L(0xcf), L(0x8e), L(0xdb), L(0xa6), L(0x8b), L(0x20), L(0x9), L(0x84), L(0x7a), L(0x66), L(0xe5),
L(0x98), L(0x29), L(0x2), L(0x95), L(0xe6), L(0x38), L(0x32), L(0x60), L(0x3), L(0xe3), L(0x9a), L(0x1e), L(0x54), L(0xe8),
L(0x63), L(0x80), L(0x48), L(0x9c), L(0xe7), L(0x63), L(0x33), L(0x6e), L(0xa0), L(0x65), L(0x83), L(0xfa), L(0xc6), L(0xba),
L(0x7a), L(0x43), L(0x71), L(0x5), L(0xf5), L(0x68), L(0x69), L(0x85), L(0x9c), L(0xba), L(0x45), L(0xcd), L(0x6b), L(0xb),
L(0x19), L(0xd1), L(0xbb), L(0x7f), L(0x70), L(0x85), L(0x92), L(0xd1), L(0xb4), L(0x64), L(0x82), L(0xb1), L(0xe4), L(0x62),
L(0xc5), L(0x3c), L(0x46), L(0x1f), L(0x92), L(0x31), L(0x1c), L(0x4e), L(0x41), L(0x77), L(0xf7), L(0xe7), L(0x87), L(0xa2),
L(0xf), L(0x6e), L(0xe8), L(0x92), L(0x3), L(0x6b), L(0xa), L(0xe7), L(0xa9), L(0x3b), L(0x11), L(0xda), L(0x66), L(0x8a),
L(0x29), L(0xda), L(0x79), L(0xe1), L(0x64), L(0x8d), L(0xe3), L(0x54), L(0xd4), L(0xf5), L(0xef), L(0x64), L(0x87), L(0x3b),
L(0xf4), L(0xc2), L(0xf4), L(0x71), L(0x13), L(0xa9), L(0xe9), L(0xe0), L(0xa2), L(0x6), L(0x14), L(0xab), L(0x5d), L(0xa7),
L(0x96), L(0x0), L(0xd6), L(0xc3), L(0xcc), L(0x57), L(0xed), L(0x39), L(0x6a), L(0x25), L(0xcd), L(0x76), L(0xea), L(0xba),
L(0x3a), L(0xf2), L(0xa1), L(0x95), L(0x5d), L(0xe5), L(0x71), L(0xcf), L(0x9c), L(0x62), L(0x9e), L(0x6a), L(0xfa), L(0xd5),
L(0x31), L(0xd1), L(0xa8), L(0x66), L(0x30), L(0x33), L(0xaa), L(0x51), L(0x17), L(0x13), L(0x82), L(0x99), L(0xc8), L(0x14),
L(0x60), L(0x9f), L(0x4d), L(0x32), L(0x6d), L(0xda), L(0x19), L(0x26), L(0x21), L(0xdc), L(0x7e), L(0x2e), L(0x25), L(0x67),
L(0x72), L(0xca), L(0xf), L(0x92), L(0xcd), L(0xf6), L(0xd6), L(0xcb), L(0x97), L(0x8a), L(0x33), L(0x58), L(0x73), L(0x70),
L(0x91), L(0x1d), L(0xbf), L(0x28), L(0x23), L(0xa3), L(0xc), L(0xf1), L(0x83), L(0xc3), L(0xc8), L(0x56), L(0x77), L(0x68),
L(0xe3), L(0x82), L(0xba), L(0xb9), L(0x57), L(0x56), L(0x57), L(0x9c), L(0xc3), L(0xd6), L(0x14), L(0x5), L(0x3c), L(0xb1),
L(0xaf), L(0x93), L(0xc8), L(0x8a), L(0x57), L(0x7f), L(0x53), L(0xfa), L(0x2f), L(0xaa), L(0x6e), L(0x66), L(0x83), L(0xfa),
L(0x33), L(0xd1), L(0x21), L(0xab), L(0x1b), L(0x71), L(0xb4), L(0x7c), L(0xda), L(0xfd), L(0xfb), L(0x7f), L(0x20), L(0xab),
L(0x5e), L(0xd5), L(0xca), L(0xfd), L(0xdd), L(0xe0), L(0xee), L(0xda), L(0xba), L(0xa8), L(0x27), L(0x99), L(0x97), L(0x69),
L(0xc1), L(0x3c), L(0x82), L(0x8c), L(0xa), L(0x5c), L(0x2d), L(0x5b), L(0x88), L(0x3e), L(0x34), L(0x35), L(0x86), L(0x37),
L(0x46), L(0x79), L(0xe1), L(0xaa), L(0x19), L(0xfb), L(0xaa), L(0xde), L(0x15), L(0x9), L(0xd), L(0x1a), L(0x57), L(0xff),
L(0xb5), L(0xf), L(0xf3), L(0x2b), L(0x5a), L(0x6a), L(0x4d), L(0x19), L(0x77), L(0x71), L(0x45), L(0xdf), L(0x4f), L(0xb3),
L(0xec), L(0xf1), L(0xeb), L(0x18), L(0x53), L(0x3e), L(0x3b), L(0x47), L(0x8), L(0x9a), L(0x73), L(0xa0), L(0x5c), L(0x8c),
L(0x5f), L(0xeb), L(0xf), L(0x3a), L(0xc2), L(0x43), L(0x67), L(0xb4), L(0x66), L(0x67), L(0x80), L(0x58), L(0xe), L(0xc1),
L(0xec), L(0x40), L(0xd4), L(0x22), L(0x94), L(0xca), L(0xf9), L(0xe8), L(0x92), L(0xe4), L(0x69), L(0x38), L(0xbe), L(0x67),
L(0x64), L(0xca), L(0x50), L(0xc7), L(0x6), L(0x67), L(0x42), L(0x6e), L(0xa3), L(0xf0), L(0xb7), L(0x6c), L(0xf2), L(0xe8),
L(0x5f), L(0xb1), L(0xaf), L(0xe7), L(0xdb), L(0xbb), L(0x77), L(0xb5), L(0xf8), L(0xcb), L(0x8), L(0xc4), L(0x75), L(0x7e),
L(0xc0), L(0xf9), L(0x1c), L(0x7f), L(0x3c), L(0x89), L(0x2f), L(0xd2), L(0x58), L(0x3a), L(0xe2), L(0xf8), L(0x91), L(0xb6),
L(0x7b), L(0x24), L(0x27), L(0xe9), L(0xae), L(0x84), L(0x8b), L(0xde), L(0x74), L(0xac), L(0xfd), L(0xd9), L(0xb7), L(0x69),
L(0x2a), L(0xec), L(0x32), L(0x6f), L(0xf0), L(0x92), L(0x84), L(0xf1), L(0x40), L(0xc), L(0x8a), L(0xbc), L(0x39), L(0x6e),
L(0x2e), L(0x73), L(0xd4), L(0x6e), L(0x8a), L(0x74), L(0x2a), L(0xdc), L(0x60), L(0x1f), L(0xa3), L(0x7), L(0xde), L(0x75),
L(0x8b), L(0x74), L(0xc8), L(0xfe), L(0x63), L(0x75), L(0xf6), L(0x3d), L(0x63), L(0xac), L(0x33), L(0x89), L(0xc3), L(0xf0),
L(0xf8), L(0x2d), L(0x6b), L(0xb4), L(0x9e), L(0x74), L(0x8b), L(0x5c), L(0x33), L(0xb4), L(0xca), L(0xa8), L(0xe4), L(0x99),
L(0xb6), L(0x90), L(0xa1), L(0xef), L(0xf), L(0xd3), L(0x61), L(0xb2), L(0xc6), L(0x1a), L(0x94), L(0x7c), L(0x44), L(0x55),
L(0xf4), L(0x45), L(0xff), L(0x9e), L(0xa5), L(0x5a), L(0xc6), L(0xa0), L(0xe8), L(0x2a), L(0xc1), L(0x8d), L(0x6f), L(0x34),
L(0x11), L(0xb9), L(0xbe), L(0x4e), L(0xd9), L(0x87), L(0x97), L(0x73), L(0xcf), L(0x3d), L(0x23), L(0xae), L(0xd5), L(0x1a),
L(0x5e), L(0xae), L(0x5d), L(0x6a), L(0x3), L(0xf9), L(0x22), L(0xd), L(0x10), L(0xd9), L(0x47), L(0x69), L(0x15), L(0x3f),
L(0xee), L(0x52), L(0xa3), L(0x8), L(0xd2), L(0x3c), L(0x51), L(0xf4), L(0xf8), L(0x9d), L(0xe4), L(0x98), L(0x89), L(0xc8),
L(0x67), L(0x39), L(0xd5), L(0x5e), L(0x35), L(0x78), L(0x27), L(0xe8), L(0x3c), L(0x80), L(0xae), L(0x79), L(0x71), L(0xd2),
L(0x93), L(0xf4), L(0xaa), L(0x51), L(0x12), L(0x1c), L(0x4b), L(0x1b), L(0xe5), L(0x6e), L(0x15), L(0x6f), L(0xe4), L(0xbb),
L(0x51), L(0x9b), L(0x45), L(0x9f), L(0xf9), L(0xc4), L(0x8c), L(0x2a), L(0xfb), L(0x1a), L(0xdf), L(0x55), L(0xd3), L(0x48),
L(0x93), L(0x27), L(0x1), L(0x26), L(0xc2), L(0x6b), L(0x55), L(0x6d), L(0xa2), L(0xfb), L(0x84), L(0x8b), L(0xc9), L(0x9e),
L(0x28), L(0xc2), L(0xef), L(0x1a), L(0x24), L(0xec), L(0x9b), L(0xae), L(0xbd), L(0x60), L(0xe9), L(0x15), L(0x35), L(0xee),
L(0x42), L(0xa4), L(0x33), L(0x5b), L(0xfa), L(0xf), L(0xb6), L(0xf7), L(0x1), L(0xa6), L(0x2), L(0x4c), L(0xca), L(0x90),
L(0x58), L(0x3a), L(0x96), L(0x41), L(0xe7), L(0xcb), L(0x9), L(0x8c), L(0xdb), L(0x85), L(0x4d), L(0xa8), L(0x89), L(0xf3),
L(0xb5), L(0x8e), L(0xfd), L(0x75), L(0x5b), L(0x4f), L(0xed), L(0xde), L(0x3f), L(0xeb), L(0x38), L(0xa3), L(0xbe), L(0xb0),
L(0x73), L(0xfc), L(0xb8), L(0x54), L(0xf7), L(0x4c), L(0x30), L(0x67), L(0x2e), L(0x38), L(0xa2), L(0x54), L(0x18), L(0xba),
L(0x8), L(0xbf), L(0xf2), L(0x39), L(0xd5), L(0xfe), L(0xa5), L(0x41), L(0xc6), L(0x66), L(0x66), L(0xba), L(0x81), L(0xef),
L(0x67), L(0xe4), L(0xe6), L(0x3c), L(0xc), L(0xca), L(0xa4), L(0xa), L(0x79), L(0xb3), L(0x57), L(0x8b), L(0x8a), L(0x75),
L(0x98), L(0x18), L(0x42), L(0x2f), L(0x29), L(0xa3), L(0x82), L(0xef), L(0x9f), L(0x86), L(0x6), L(0x23), L(0xe1), L(0x75),
L(0xfa), L(0x8), L(0xb1), L(0xde), L(0x17), L(0x4a),
},
},
TestCase{
.input = "huffman-rand-limit.input",
.want = "huffman-rand-limit.{s}.expect",
.want_no_input = "huffman-rand-limit.{s}.expect-noinput",
.tokens = &[_]Token{
L(0x61), M(1, 74), L(0xa), L(0xf8), L(0x8b), L(0x96), L(0x76), L(0x48), L(0xa), L(0x85), L(0x94), L(0x25), L(0x80),
L(0xaf), L(0xc2), L(0xfe), L(0x8d), L(0xe8), L(0x20), L(0xeb), L(0x17), L(0x86), L(0xc9), L(0xb7), L(0xc5), L(0xde),
L(0x6), L(0xea), L(0x7d), L(0x18), L(0x8b), L(0xe7), L(0x3e), L(0x7), L(0xda), L(0xdf), L(0xff), L(0x6c), L(0x73),
L(0xde), L(0xcc), L(0xe7), L(0x6d), L(0x8d), L(0x4), L(0x19), L(0x49), L(0x7f), L(0x47), L(0x1f), L(0x48), L(0x15),
L(0xb0), L(0xe8), L(0x9e), L(0xf2), L(0x31), L(0x59), L(0xde), L(0x34), L(0xb4), L(0x5b), L(0xe5), L(0xe0), L(0x9),
L(0x11), L(0x30), L(0xc2), L(0x88), L(0x5b), L(0x7c), L(0x5d), L(0x14), L(0x13), L(0x6f), L(0x23), L(0xa9), L(0xa),
L(0xbc), L(0x2d), L(0x23), L(0xbe), L(0xd9), L(0xed), L(0x75), L(0x4), L(0x6c), L(0x99), L(0xdf), L(0xfd), L(0x70),
L(0x66), L(0xe6), L(0xee), L(0xd9), L(0xb1), L(0x9e), L(0x6e), L(0x83), L(0x59), L(0xd5), L(0xd4), L(0x80), L(0x59),
L(0x98), L(0x77), L(0x89), L(0x43), L(0x38), L(0xc9), L(0xaf), L(0x30), L(0x32), L(0x9a), L(0x20), L(0x1b), L(0x46),
L(0x3d), L(0x67), L(0x6e), L(0xd7), L(0x72), L(0x9e), L(0x4e), L(0x21), L(0x4f), L(0xc6), L(0xe0), L(0xd4), L(0x7b),
L(0x4), L(0x8d), L(0xa5), L(0x3), L(0xf6), L(0x5), L(0x9b), L(0x6b), L(0xdc), L(0x2a), L(0x93), L(0x77), L(0x28),
L(0xfd), L(0xb4), L(0x62), L(0xda), L(0x20), L(0xe7), L(0x1f), L(0xab), L(0x6b), L(0x51), L(0x43), L(0x39), L(0x2f),
L(0xa0), L(0x92), L(0x1), L(0x6c), L(0x75), L(0x3e), L(0xf4), L(0x35), L(0xfd), L(0x43), L(0x2e), L(0xf7), L(0xa4),
L(0x75), L(0xda), L(0xea), L(0x9b), L(0xa),
},
},
TestCase{
.input = "huffman-shifts.input",
.want = "huffman-shifts.{s}.expect",
.want_no_input = "huffman-shifts.{s}.expect-noinput",
.tokens = &[_]Token{
L('1'), L('0'), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258),
M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258),
M(2, 258), M(2, 76), L(0xd), L(0xa), L('2'), L('3'), M(2, 258), M(2, 258),
M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 256),
},
},
TestCase{
.input = "huffman-text-shift.input",
.want = "huffman-text-shift.{s}.expect",
.want_no_input = "huffman-text-shift.{s}.expect-noinput",
.tokens = &[_]Token{
L('/'), L('/'), L('C'), L('o'), L('p'), L('y'), L('r'), L('i'),
L('g'), L('h'), L('t'), L('2'), L('0'), L('0'), L('9'), L('T'),
L('h'), L('G'), L('o'), L('A'), L('u'), L('t'), L('h'), L('o'),
L('r'), L('.'), L('A'), L('l'), L('l'), M(23, 5), L('r'), L('r'),
L('v'), L('d'), L('.'), L(0xd), L(0xa), L('/'), L('/'), L('U'),
L('o'), L('f'), L('t'), L('h'), L('i'), L('o'), L('u'), L('r'),
L('c'), L('c'), L('o'), L('d'), L('i'), L('g'), L('o'), L('v'),
L('r'), L('n'), L('d'), L('b'), L('y'), L('B'), L('S'), L('D'),
L('-'), L('t'), L('y'), L('l'), M(33, 4), L('l'), L('i'), L('c'),
L('n'), L('t'), L('h'), L('t'), L('c'), L('n'), L('b'), L('f'),
L('o'), L('u'), L('n'), L('d'), L('i'), L('n'), L('t'), L('h'),
L('L'), L('I'), L('C'), L('E'), L('N'), L('S'), L('E'), L('f'),
L('i'), L('l'), L('.'), L(0xd), L(0xa), L(0xd), L(0xa), L('p'),
L('c'), L('k'), L('g'), L('m'), L('i'), L('n'), M(11, 4), L('i'),
L('m'), L('p'), L('o'), L('r'), L('t'), L('"'), L('o'), L('"'),
M(13, 4), L('f'), L('u'), L('n'), L('c'), L('m'), L('i'), L('n'),
L('('), L(')'), L('{'), L(0xd), L(0xa), L(0x9), L('v'), L('r'),
L('b'), L('='), L('m'), L('k'), L('('), L('['), L(']'), L('b'),
L('y'), L('t'), L(','), L('6'), L('5'), L('5'), L('3'), L('5'),
L(')'), L(0xd), L(0xa), L(0x9), L('f'), L(','), L('_'), L(':'),
L('='), L('o'), L('.'), L('C'), L('r'), L('t'), L('('), L('"'),
L('h'), L('u'), L('f'), L('f'), L('m'), L('n'), L('-'), L('n'),
L('u'), L('l'), L('l'), L('-'), L('m'), L('x'), L('.'), L('i'),
L('n'), L('"'), M(34, 5), L('.'), L('W'), L('r'), L('i'), L('t'),
L('('), L('b'), L(')'), L(0xd), L(0xa), L('}'), L(0xd), L(0xa),
L('A'), L('B'), L('C'), L('D'), L('E'), L('F'), L('G'), L('H'),
L('I'), L('J'), L('K'), L('L'), L('M'), L('N'), L('O'), L('P'),
L('Q'), L('R'), L('S'), L('T'), L('U'), L('V'), L('X'), L('x'),
L('y'), L('z'), L('!'), L('"'), L('#'), L(0xc2), L(0xa4), L('%'),
L('&'), L('/'), L('?'), L('"'),
},
},
TestCase{
.input = "huffman-text.input",
.want = "huffman-text.{s}.expect",
.want_no_input = "huffman-text.{s}.expect-noinput",
.tokens = &[_]Token{
L('/'), L('/'), L(' '), L('z'), L('i'), L('g'), L(' '), L('v'),
L('0'), L('.'), L('1'), L('0'), L('.'), L('0'), L(0xa), L('/'),
L('/'), L(' '), L('c'), L('r'), L('e'), L('a'), L('t'), L('e'),
L(' '), L('a'), L(' '), L('f'), L('i'), L('l'), L('e'), M(5, 4),
L('l'), L('e'), L('d'), L(' '), L('w'), L('i'), L('t'), L('h'),
L(' '), L('0'), L('x'), L('0'), L('0'), L(0xa), L('c'), L('o'),
L('n'), L('s'), L('t'), L(' '), L('s'), L('t'), L('d'), L(' '),
L('='), L(' '), L('@'), L('i'), L('m'), L('p'), L('o'), L('r'),
L('t'), L('('), L('"'), L('s'), L('t'), L('d'), L('"'), L(')'),
L(';'), L(0xa), L(0xa), L('p'), L('u'), L('b'), L(' '), L('f'),
L('n'), L(' '), L('m'), L('a'), L('i'), L('n'), L('('), L(')'),
L(' '), L('!'), L('v'), L('o'), L('i'), L('d'), L(' '), L('{'),
L(0xa), L(' '), L(' '), L(' '), L(' '), L('v'), L('a'), L('r'),
L(' '), L('b'), L(' '), L('='), L(' '), L('['), L('1'), L(']'),
L('u'), L('8'), L('{'), L('0'), L('}'), L(' '), L('*'), L('*'),
L(' '), L('6'), L('5'), L('5'), L('3'), L('5'), L(';'), M(31, 5),
M(86, 6), L('f'), L(' '), L('='), L(' '), L('t'), L('r'), L('y'),
M(94, 4), L('.'), L('f'), L('s'), L('.'), L('c'), L('w'), L('d'),
L('('), L(')'), L('.'), M(144, 6), L('F'), L('i'), L('l'), L('e'),
L('('), M(43, 5), M(1, 4), L('"'), L('h'), L('u'), L('f'), L('f'),
L('m'), L('a'), L('n'), L('-'), L('n'), L('u'), L('l'), L('l'),
L('-'), L('m'), L('a'), L('x'), L('.'), L('i'), L('n'), L('"'),
L(','), M(31, 9), L('.'), L('{'), L(' '), L('.'), L('r'), L('e'),
L('a'), L('d'), M(79, 5), L('u'), L('e'), L(' '), L('}'), M(27, 6),
L(')'), M(108, 6), L('d'), L('e'), L('f'), L('e'), L('r'), L(' '),
L('f'), L('.'), L('c'), L('l'), L('o'), L('s'), L('e'), L('('),
M(183, 4), M(22, 4), L('_'), M(124, 7), L('f'), L('.'), L('w'), L('r'),
L('i'), L('t'), L('e'), L('A'), L('l'), L('l'), L('('), L('b'),
L('['), L('0'), L('.'), L('.'), L(']'), L(')'), L(';'), L(0xa),
L('}'), L(0xa),
},
},
TestCase{
.input = "huffman-zero.input",
.want = "huffman-zero.{s}.expect",
.want_no_input = "huffman-zero.{s}.expect-noinput",
.tokens = &[_]Token{ L(0x30), ml, M(1, 49) },
},
TestCase{
.input = "",
.want = "",
.want_no_input = "null-long-match.{s}.expect-noinput",
.tokens = &[_]Token{
L(0x0), ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
ml, ml, ml, M(1, 8),
},
},
};
};

View File

@@ -1 +0,0 @@
3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420198938095257201065485863278865936153381827968230301952035301852968995773622599413891249721775283479131515574857242454150695950829533116861727855889075098381754637464939319255060400927701671139009848824012858361603563707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104752162056966024058038150193511253382430035587640247496473263914199272604269922796782354781636009341721641219924586315030286182974555706749838505494588586926995690927210797509302955321165344987202755960236480665499119881834797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548161361157352552133475741849468438523323907394143334547762416862518983569485562099219222184272550254256887671790494601653466804988627232791786085784383827967976681454100953883786360950680064225125205117392984896084128488626945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645995813390478027590099465764078951269468398352595709825822620522489407726719478268482601476990902640136394437455305068203496252451749399651431429809190659250937221696461515709858387410597885959772975498930161753928468138268683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244136549762780797715691435997700129616089441694868555848406353422072225828488648158456028506016842739452267467678895252138522549954666727823986456596116354886230577456498035593634568174324112515076069479451096596094025228879710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821682998948722658804857564014270477555132379641451523746234364542858444795265867821051141354735739523113427166102135969536231442952484937187110145765403590279934403742007310578539062198387447808478489683321445713868751943506430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675142691239748940907186494231961567945208095146550225231603881930142093762137855956638937787083039069792077346722182562599661501421503068038447734549202605414665925201497442850732518666002132434088190710486331734649651453905796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007230558763176359421873125147120532928191826186125867321579198414848829164470609575270695722091756711672291098169091528017350671274858322287183520935396572512108357915136988209144421006751033467110314126711136990865851639831501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064204675259070915481416549859461637180

View File

@@ -1,4 +0,0 @@
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
řvH
…”%€ŻÂţŤč ë†É·ĹŢę}‹ç>Úß˙lsŢĚçmŤIGH°čžň1YŢ4´[ĺŕ <30>[|]o#©
Ľ-#ľŮíul™ßýpfćîٱžn<C5BE>YŐÔ€Y<E282AC>w‰C8ÉŻ02š F=gn×ržN!OĆŕÔ{ŤĄökÜ*“w(ý´bÚ ç«kQC9/ lu>ô5ýC.÷¤uÚę

View File

@@ -1,2 +0,0 @@
101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010
232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323

View File

@@ -1,14 +0,0 @@
//Copyright2009ThGoAuthor.Allrightrrvd.
//UofthiourccodigovrndbyBSD-tyl
//licnthtcnbfoundinthLICENSEfil.
pckgmin
import"o"
funcmin(){
vrb=mk([]byt,65535)
f,_:=o.Crt("huffmn-null-mx.in")
f.Writ(b)
}
ABCDEFGHIJKLMNOPQRSTUVXxyz!"#¤%&/?"

View File

@@ -1,14 +0,0 @@
// zig v0.10.0
// create a file filled with 0x00
const std = @import("std");
pub fn main() !void {
var b = [1]u8{0} ** 65535;
const f = try std.fs.cwd().createFile(
"huffman-null-max.in",
.{ .read = true },
);
defer f.close();
_ = try f.writeAll(b[0..]);
}

View File

@@ -1 +0,0 @@
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

View File

@@ -1,66 +0,0 @@
const deflate = @import("flate/deflate.zig");
const inflate = @import("flate/inflate.zig");
/// Decompress compressed data from reader and write plain data to the writer.
pub fn decompress(reader: anytype, writer: anytype) !void {
try inflate.decompress(.gzip, reader, writer);
}
/// Decompressor type
pub fn Decompressor(comptime ReaderType: type) type {
return inflate.Decompressor(.gzip, ReaderType);
}
/// Create Decompressor which will read compressed data from reader.
pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
return inflate.decompressor(.gzip, reader);
}
/// Compression level, trades between speed and compression size.
pub const Options = deflate.Options;
/// Compress plain data from reader and write compressed data to the writer.
pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
try deflate.compress(.gzip, reader, writer, options);
}
/// Compressor type
pub fn Compressor(comptime WriterType: type) type {
return deflate.Compressor(.gzip, WriterType);
}
/// Create Compressor which outputs compressed data to the writer.
pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
return try deflate.compressor(.gzip, writer, options);
}
/// Huffman only compression. Without Lempel-Ziv match searching. Faster
/// compression, less memory requirements but bigger compressed sizes.
pub const huffman = struct {
pub fn compress(reader: anytype, writer: anytype) !void {
try deflate.huffman.compress(.gzip, reader, writer);
}
pub fn Compressor(comptime WriterType: type) type {
return deflate.huffman.Compressor(.gzip, WriterType);
}
pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
return deflate.huffman.compressor(.gzip, writer);
}
};
// No compression store only. Compressed size is slightly bigger than plain.
pub const store = struct {
pub fn compress(reader: anytype, writer: anytype) !void {
try deflate.store.compress(.gzip, reader, writer);
}
pub fn Compressor(comptime WriterType: type) type {
return deflate.store.Compressor(.gzip, WriterType);
}
pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
return deflate.store.compressor(.gzip, writer);
}
};

View File

@@ -1,101 +0,0 @@
const deflate = @import("flate/deflate.zig");
const inflate = @import("flate/inflate.zig");
/// Decompress compressed data from reader and write plain data to the writer.
pub fn decompress(reader: anytype, writer: anytype) !void {
try inflate.decompress(.zlib, reader, writer);
}
/// Decompressor type
pub fn Decompressor(comptime ReaderType: type) type {
return inflate.Decompressor(.zlib, ReaderType);
}
/// Create Decompressor which will read compressed data from reader.
pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
return inflate.decompressor(.zlib, reader);
}
/// Compression level, trades between speed and compression size.
pub const Options = deflate.Options;
/// Compress plain data from reader and write compressed data to the writer.
pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
try deflate.compress(.zlib, reader, writer, options);
}
/// Compressor type
pub fn Compressor(comptime WriterType: type) type {
return deflate.Compressor(.zlib, WriterType);
}
/// Create Compressor which outputs compressed data to the writer.
pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
return try deflate.compressor(.zlib, writer, options);
}
/// Huffman only compression. Without Lempel-Ziv match searching. Faster
/// compression, less memory requirements but bigger compressed sizes.
pub const huffman = struct {
pub fn compress(reader: anytype, writer: anytype) !void {
try deflate.huffman.compress(.zlib, reader, writer);
}
pub fn Compressor(comptime WriterType: type) type {
return deflate.huffman.Compressor(.zlib, WriterType);
}
pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
return deflate.huffman.compressor(.zlib, writer);
}
};
// No compression store only. Compressed size is slightly bigger than plain.
pub const store = struct {
pub fn compress(reader: anytype, writer: anytype) !void {
try deflate.store.compress(.zlib, reader, writer);
}
pub fn Compressor(comptime WriterType: type) type {
return deflate.store.Compressor(.zlib, WriterType);
}
pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
return deflate.store.compressor(.zlib, writer);
}
};
test "should not overshoot" {
const std = @import("std");
// Compressed zlib data with extra 4 bytes at the end.
const data = [_]u8{
0x78, 0x9c, 0x73, 0xce, 0x2f, 0xa8, 0x2c, 0xca, 0x4c, 0xcf, 0x28, 0x51, 0x08, 0xcf, 0xcc, 0xc9,
0x49, 0xcd, 0x55, 0x28, 0x4b, 0xcc, 0x53, 0x08, 0x4e, 0xce, 0x48, 0xcc, 0xcc, 0xd6, 0x51, 0x08,
0xce, 0xcc, 0x4b, 0x4f, 0x2c, 0xc8, 0x2f, 0x4a, 0x55, 0x30, 0xb4, 0xb4, 0x34, 0xd5, 0xb5, 0x34,
0x03, 0x00, 0x8b, 0x61, 0x0f, 0xa4, 0x52, 0x5a, 0x94, 0x12,
};
var stream = std.io.fixedBufferStream(data[0..]);
const reader = stream.reader();
var dcp = decompressor(reader);
var out: [128]u8 = undefined;
// Decompress
var n = try dcp.reader().readAll(out[0..]);
// Expected decompressed data
try std.testing.expectEqual(46, n);
try std.testing.expectEqualStrings("Copyright Willem van Schaik, Singapore 1995-96", out[0..n]);
// Decompressor don't overshoot underlying reader.
// It is leaving it at the end of compressed data chunk.
try std.testing.expectEqual(data.len - 4, stream.getPos());
try std.testing.expectEqual(0, dcp.unreadBytes());
// 4 bytes after compressed chunk are available in reader.
n = try reader.readAll(out[0..]);
try std.testing.expectEqual(n, 4);
try std.testing.expectEqualSlices(u8, data[data.len - 4 .. data.len], out[0..n]);
}

View File

@@ -89,7 +89,7 @@ pub fn init(input: *Reader, buffer: []u8, options: Options) Decompress {
.stream = stream,
.rebase = rebase,
.discard = discard,
.readVec = Reader.indirectReadVec,
.readVec = readVec,
},
.buffer = buffer,
.seek = 0,
@@ -109,10 +109,24 @@ fn rebase(r: *Reader, capacity: usize) Reader.RebaseError!void {
r.seek -= discard_n;
}
fn discard(r: *Reader, limit: Limit) Reader.Error!usize {
r.rebase(zstd.block_size_max) catch unreachable;
var d: Writer.Discarding = .init(r.buffer);
const n = r.stream(&d.writer, limit) catch |err| switch (err) {
/// This could be improved so that when an amount is discarded that includes an
/// entire frame, skip decoding that frame.
fn discard(r: *Reader, limit: std.Io.Limit) Reader.Error!usize {
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
r.rebase(d.window_len) catch unreachable;
var writer: Writer = .{
.vtable = &.{
.drain = std.Io.Writer.Discarding.drain,
.sendFile = std.Io.Writer.Discarding.sendFile,
},
.buffer = r.buffer,
.end = r.end,
};
defer {
r.end = writer.end;
r.seek = r.end;
}
const n = r.stream(&writer, limit) catch |err| switch (err) {
error.WriteFailed => unreachable,
error.ReadFailed => return error.ReadFailed,
error.EndOfStream => return error.EndOfStream,
@@ -121,6 +135,23 @@ fn discard(r: *Reader, limit: Limit) Reader.Error!usize {
return n;
}
fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize {
_ = data;
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
assert(r.seek == r.end);
r.rebase(d.window_len) catch unreachable;
var writer: Writer = .{
.buffer = r.buffer,
.end = r.end,
.vtable = &.{ .drain = Writer.fixedDrain },
};
r.end += r.vtable.stream(r, &writer, .limited(writer.buffer.len - writer.end)) catch |err| switch (err) {
error.WriteFailed => unreachable,
else => |e| return e,
};
return 0;
}
fn stream(r: *Reader, w: *Writer, limit: Limit) Reader.StreamError!usize {
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
const in = d.input;

View File

@@ -2019,10 +2019,14 @@ pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 {
/// This function is to make it handy to comment out the return and make it
/// into a crash when working on this file.
pub fn bad() error{InvalidDebugInfo} {
if (debug_debug_mode) @panic("bad dwarf");
invalidDebugInfoDetected();
return error.InvalidDebugInfo;
}
fn invalidDebugInfoDetected() void {
if (debug_debug_mode) @panic("bad dwarf");
}
fn missing() error{MissingDebugInfo} {
if (debug_debug_mode) @panic("missing dwarf");
return error.MissingDebugInfo;
@@ -2235,21 +2239,23 @@ pub const ElfModule = struct {
const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size);
sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: {
var section_stream = std.io.fixedBufferStream(section_bytes);
const section_reader = section_stream.reader();
const chdr = section_reader.readStruct(elf.Chdr) catch continue;
var section_reader: std.Io.Reader = .fixed(section_bytes);
const chdr = section_reader.takeStruct(elf.Chdr, endian) catch continue;
if (chdr.ch_type != .ZLIB) continue;
var zlib_stream = std.compress.zlib.decompressor(section_reader);
const decompressed_section = try gpa.alloc(u8, chdr.ch_size);
errdefer gpa.free(decompressed_section);
const read = zlib_stream.reader().readAll(decompressed_section) catch continue;
assert(read == decompressed_section.len);
var decompress: std.compress.flate.Decompress = .init(&section_reader, .zlib, &.{});
var decompressed_section: std.ArrayListUnmanaged(u8) = .empty;
defer decompressed_section.deinit(gpa);
decompress.reader.appendRemainingUnlimited(gpa, null, &decompressed_section, std.compress.flate.history_len) catch {
invalidDebugInfoDetected();
continue;
};
if (chdr.ch_size != decompressed_section.items.len) {
invalidDebugInfoDetected();
continue;
}
break :blk .{
.data = decompressed_section,
.data = try decompressed_section.toOwnedSlice(gpa),
.virtual_address = shdr.sh_addr,
.owned = true,
};

View File

@@ -1105,22 +1105,6 @@ pub fn deprecatedWriter(file: File) DeprecatedWriter {
return .{ .context = file };
}
/// Deprecated in favor of `Reader` and `Writer`.
pub const SeekableStream = io.SeekableStream(
File,
SeekError,
GetSeekPosError,
seekTo,
seekBy,
getPos,
getEndPos,
);
/// Deprecated in favor of `Reader` and `Writer`.
pub fn seekableStream(file: File) SeekableStream {
return .{ .context = file };
}
/// Memoizes key information about a file handle such as:
/// * The size from calling stat, or the error that occurred therein.
/// * The current seek position.
@@ -1321,7 +1305,7 @@ pub const Reader = struct {
}
}
fn readVec(io_reader: *std.Io.Reader, data: []const []u8) std.Io.Reader.Error!usize {
fn readVec(io_reader: *std.Io.Reader, data: [][]u8) std.Io.Reader.Error!usize {
const r: *Reader = @alignCast(@fieldParentPtr("interface", io_reader));
switch (r.mode) {
.positional, .positional_reading => {

View File

@@ -1,5 +1,4 @@
const adler = @import("hash/adler.zig");
pub const Adler32 = adler.Adler32;
pub const Adler32 = @import("hash/Adler32.zig");
const auto_hash = @import("hash/auto_hash.zig");
pub const autoHash = auto_hash.autoHash;
@@ -116,7 +115,7 @@ test int {
}
test {
_ = adler;
_ = Adler32;
_ = auto_hash;
_ = crc;
_ = fnv;

117
lib/std/hash/Adler32.zig Normal file
View File

@@ -0,0 +1,117 @@
//! https://tools.ietf.org/html/rfc1950#section-9
//! https://github.com/madler/zlib/blob/master/adler32.c
const Adler32 = @This();
const std = @import("std");
const testing = std.testing;
adler: u32 = 1,
pub fn permute(state: u32, input: []const u8) u32 {
const base = 65521;
const nmax = 5552;
var s1 = state & 0xffff;
var s2 = (state >> 16) & 0xffff;
if (input.len == 1) {
s1 +%= input[0];
if (s1 >= base) {
s1 -= base;
}
s2 +%= s1;
if (s2 >= base) {
s2 -= base;
}
} else if (input.len < 16) {
for (input) |b| {
s1 +%= b;
s2 +%= s1;
}
if (s1 >= base) {
s1 -= base;
}
s2 %= base;
} else {
const n = nmax / 16; // note: 16 | nmax
var i: usize = 0;
while (i + nmax <= input.len) {
var rounds: usize = 0;
while (rounds < n) : (rounds += 1) {
comptime var j: usize = 0;
inline while (j < 16) : (j += 1) {
s1 +%= input[i + j];
s2 +%= s1;
}
i += 16;
}
s1 %= base;
s2 %= base;
}
if (i < input.len) {
while (i + 16 <= input.len) : (i += 16) {
comptime var j: usize = 0;
inline while (j < 16) : (j += 1) {
s1 +%= input[i + j];
s2 +%= s1;
}
}
while (i < input.len) : (i += 1) {
s1 +%= input[i];
s2 +%= s1;
}
s1 %= base;
s2 %= base;
}
}
return s1 | (s2 << 16);
}
pub fn update(a: *Adler32, input: []const u8) void {
a.adler = permute(a.adler, input);
}
pub fn hash(input: []const u8) u32 {
return permute(1, input);
}
test "sanity" {
try testing.expectEqual(@as(u32, 0x620062), hash("a"));
try testing.expectEqual(@as(u32, 0xbc002ed), hash("example"));
}
test "long" {
const long1 = [_]u8{1} ** 1024;
try testing.expectEqual(@as(u32, 0x06780401), hash(long1[0..]));
const long2 = [_]u8{1} ** 1025;
try testing.expectEqual(@as(u32, 0x0a7a0402), hash(long2[0..]));
}
test "very long" {
const long = [_]u8{1} ** 5553;
try testing.expectEqual(@as(u32, 0x707f15b2), hash(long[0..]));
}
test "very long with variation" {
const long = comptime blk: {
@setEvalBranchQuota(7000);
var result: [6000]u8 = undefined;
var i: usize = 0;
while (i < result.len) : (i += 1) {
result[i] = @as(u8, @truncate(i));
}
break :blk result;
};
try testing.expectEqual(@as(u32, 0x5af38d6e), hash(long[0..]));
}

View File

@@ -1,134 +0,0 @@
// Adler32 checksum.
//
// https://tools.ietf.org/html/rfc1950#section-9
// https://github.com/madler/zlib/blob/master/adler32.c
const std = @import("std");
const testing = std.testing;
pub const Adler32 = struct {
const base = 65521;
const nmax = 5552;
adler: u32,
pub fn init() Adler32 {
return Adler32{ .adler = 1 };
}
// This fast variant is taken from zlib. It reduces the required modulos and unrolls longer
// buffer inputs and should be much quicker.
pub fn update(self: *Adler32, input: []const u8) void {
var s1 = self.adler & 0xffff;
var s2 = (self.adler >> 16) & 0xffff;
if (input.len == 1) {
s1 +%= input[0];
if (s1 >= base) {
s1 -= base;
}
s2 +%= s1;
if (s2 >= base) {
s2 -= base;
}
} else if (input.len < 16) {
for (input) |b| {
s1 +%= b;
s2 +%= s1;
}
if (s1 >= base) {
s1 -= base;
}
s2 %= base;
} else {
const n = nmax / 16; // note: 16 | nmax
var i: usize = 0;
while (i + nmax <= input.len) {
var rounds: usize = 0;
while (rounds < n) : (rounds += 1) {
comptime var j: usize = 0;
inline while (j < 16) : (j += 1) {
s1 +%= input[i + j];
s2 +%= s1;
}
i += 16;
}
s1 %= base;
s2 %= base;
}
if (i < input.len) {
while (i + 16 <= input.len) : (i += 16) {
comptime var j: usize = 0;
inline while (j < 16) : (j += 1) {
s1 +%= input[i + j];
s2 +%= s1;
}
}
while (i < input.len) : (i += 1) {
s1 +%= input[i];
s2 +%= s1;
}
s1 %= base;
s2 %= base;
}
}
self.adler = s1 | (s2 << 16);
}
pub fn final(self: *Adler32) u32 {
return self.adler;
}
pub fn hash(input: []const u8) u32 {
var c = Adler32.init();
c.update(input);
return c.final();
}
};
test "adler32 sanity" {
try testing.expectEqual(@as(u32, 0x620062), Adler32.hash("a"));
try testing.expectEqual(@as(u32, 0xbc002ed), Adler32.hash("example"));
}
test "adler32 long" {
const long1 = [_]u8{1} ** 1024;
try testing.expectEqual(@as(u32, 0x06780401), Adler32.hash(long1[0..]));
const long2 = [_]u8{1} ** 1025;
try testing.expectEqual(@as(u32, 0x0a7a0402), Adler32.hash(long2[0..]));
}
test "adler32 very long" {
const long = [_]u8{1} ** 5553;
try testing.expectEqual(@as(u32, 0x707f15b2), Adler32.hash(long[0..]));
}
test "adler32 very long with variation" {
const long = comptime blk: {
@setEvalBranchQuota(7000);
var result: [6000]u8 = undefined;
var i: usize = 0;
while (i < result.len) : (i += 1) {
result[i] = @as(u8, @truncate(i));
}
break :blk result;
};
try testing.expectEqual(@as(u32, 0x5af38d6e), std.hash.Adler32.hash(long[0..]));
}
const verify = @import("verify.zig");
test "adler32 iterative" {
try verify.iterativeApi(Adler32);
}

View File

@@ -45,7 +45,7 @@ pub fn smhasher(comptime hash_fn: anytype) u32 {
pub fn iterativeApi(comptime Hash: anytype) !void {
// Sum(1..32) = 528
var buf: [528]u8 = [_]u8{0} ** 528;
var buf: [528]u8 = @splat(0);
var len: usize = 0;
const seed = 0;

View File

@@ -405,13 +405,8 @@ pub const RequestTransfer = union(enum) {
/// The decompressor for response messages.
pub const Compression = union(enum) {
pub const DeflateDecompressor = std.compress.zlib.Decompressor(Request.TransferReader);
pub const GzipDecompressor = std.compress.gzip.Decompressor(Request.TransferReader);
// https://github.com/ziglang/zig/issues/18937
//pub const ZstdDecompressor = std.compress.zstd.DecompressStream(Request.TransferReader, .{});
deflate: DeflateDecompressor,
gzip: GzipDecompressor,
//deflate: std.compress.flate.Decompress,
//gzip: std.compress.flate.Decompress,
// https://github.com/ziglang/zig/issues/18937
//zstd: ZstdDecompressor,
none: void,
@@ -1079,12 +1074,10 @@ pub const Request = struct {
switch (req.response.transfer_compression) {
.identity => req.response.compression = .none,
.compress, .@"x-compress" => return error.CompressionUnsupported,
.deflate => req.response.compression = .{
.deflate = std.compress.zlib.decompressor(req.transferReader()),
},
.gzip, .@"x-gzip" => req.response.compression = .{
.gzip = std.compress.gzip.decompressor(req.transferReader()),
},
// I'm about to upstream my http.Client rewrite
.deflate => return error.CompressionUnsupported,
// I'm about to upstream my http.Client rewrite
.gzip, .@"x-gzip" => return error.CompressionUnsupported,
// https://github.com/ziglang/zig/issues/18937
//.zstd => req.response.compression = .{
// .zstd = std.compress.zstd.decompressStream(req.client.allocator, req.transferReader()),
@@ -1110,8 +1103,9 @@ pub const Request = struct {
/// Reads data from the response body. Must be called after `wait`.
pub fn read(req: *Request, buffer: []u8) ReadError!usize {
const out_index = switch (req.response.compression) {
.deflate => |*deflate| deflate.read(buffer) catch return error.DecompressionFailure,
.gzip => |*gzip| gzip.read(buffer) catch return error.DecompressionFailure,
// I'm about to upstream my http client rewrite
//.deflate => |*deflate| deflate.readSlice(buffer) catch return error.DecompressionFailure,
//.gzip => |*gzip| gzip.read(buffer) catch return error.DecompressionFailure,
// https://github.com/ziglang/zig/issues/18937
//.zstd => |*zstd| zstd.read(buffer) catch return error.DecompressionFailure,
else => try req.transferRead(buffer),

View File

@@ -130,8 +130,8 @@ pub const Request = struct {
pub const DeflateDecompressor = std.compress.zlib.Decompressor(std.io.AnyReader);
pub const GzipDecompressor = std.compress.gzip.Decompressor(std.io.AnyReader);
deflate: DeflateDecompressor,
gzip: GzipDecompressor,
deflate: std.compress.flate.Decompress,
gzip: std.compress.flate.Decompress,
zstd: std.compress.zstd.Decompress,
none: void,
};

View File

@@ -1973,12 +1973,13 @@ pub const Stream = struct {
fn stream(io_r: *Io.Reader, io_w: *Io.Writer, limit: Io.Limit) Io.Reader.StreamError!usize {
const dest = limit.slice(try io_w.writableSliceGreedy(1));
const n = try readVec(io_r, &.{dest});
var bufs: [1][]u8 = .{dest};
const n = try readVec(io_r, &bufs);
io_w.advance(n);
return n;
}
fn readVec(io_r: *std.Io.Reader, data: []const []u8) Io.Reader.Error!usize {
fn readVec(io_r: *std.Io.Reader, data: [][]u8) Io.Reader.Error!usize {
const r: *Reader = @alignCast(@fieldParentPtr("interface_state", io_r));
var iovecs: [max_buffers_len]windows.ws2_32.WSABUF = undefined;
const bufs_n, const data_size = try io_r.writableVectorWsa(&iovecs, data);

File diff suppressed because it is too large Load Diff

View File

@@ -1,298 +0,0 @@
const std = @import("std");
const testing = std.testing;
const zip = @import("../zip.zig");
const maxInt = std.math.maxInt;
pub const File = struct {
name: []const u8,
content: []const u8,
compression: zip.CompressionMethod,
};
pub fn expectFiles(
test_files: []const File,
dir: std.fs.Dir,
opt: struct {
strip_prefix: ?[]const u8 = null,
},
) !void {
for (test_files) |test_file| {
var normalized_sub_path_buf: [std.fs.max_path_bytes]u8 = undefined;
const name = blk: {
if (opt.strip_prefix) |strip_prefix| {
try testing.expect(test_file.name.len >= strip_prefix.len);
try testing.expectEqualStrings(strip_prefix, test_file.name[0..strip_prefix.len]);
break :blk test_file.name[strip_prefix.len..];
}
break :blk test_file.name;
};
const normalized_sub_path = normalized_sub_path_buf[0..name.len];
@memcpy(normalized_sub_path, name);
std.mem.replaceScalar(u8, normalized_sub_path, '\\', '/');
var file = try dir.openFile(normalized_sub_path, .{});
defer file.close();
var content_buf: [4096]u8 = undefined;
const n = try file.deprecatedReader().readAll(&content_buf);
try testing.expectEqualStrings(test_file.content, content_buf[0..n]);
}
}
// Used to store any data from writing a file to the zip archive that's needed
// when writing the corresponding central directory record.
pub const FileStore = struct {
compression: zip.CompressionMethod,
file_offset: u64,
crc32: u32,
compressed_size: u32,
uncompressed_size: usize,
};
pub fn makeZip(
buf: []u8,
comptime files: []const File,
options: WriteZipOptions,
) !std.io.FixedBufferStream([]u8) {
var store: [files.len]FileStore = undefined;
return try makeZipWithStore(buf, files, options, &store);
}
pub fn makeZipWithStore(
buf: []u8,
files: []const File,
options: WriteZipOptions,
store: []FileStore,
) !std.io.FixedBufferStream([]u8) {
var fbs = std.io.fixedBufferStream(buf);
try writeZip(fbs.writer(), files, store, options);
return std.io.fixedBufferStream(buf[0..fbs.pos]);
}
pub const WriteZipOptions = struct {
end: ?EndRecordOptions = null,
local_header: ?LocalHeaderOptions = null,
};
pub const LocalHeaderOptions = struct {
zip64: ?LocalHeaderZip64Options = null,
compressed_size: ?u32 = null,
uncompressed_size: ?u32 = null,
extra_len: ?u16 = null,
};
pub const LocalHeaderZip64Options = struct {
data_size: ?u16 = null,
};
pub const EndRecordOptions = struct {
zip64: ?Zip64Options = null,
sig: ?[4]u8 = null,
disk_number: ?u16 = null,
central_directory_disk_number: ?u16 = null,
record_count_disk: ?u16 = null,
record_count_total: ?u16 = null,
central_directory_size: ?u32 = null,
central_directory_offset: ?u32 = null,
comment_len: ?u16 = null,
comment: ?[]const u8 = null,
};
pub const Zip64Options = struct {
locator_sig: ?[4]u8 = null,
locator_zip64_disk_count: ?u32 = null,
locator_record_file_offset: ?u64 = null,
locator_total_disk_count: ?u32 = null,
//record_size: ?u64 = null,
central_directory_size: ?u64 = null,
};
pub fn writeZip(
writer: anytype,
files: []const File,
store: []FileStore,
options: WriteZipOptions,
) !void {
if (store.len < files.len) return error.FileStoreTooSmall;
var zipper = initZipper(writer);
for (files, 0..) |file, i| {
store[i] = try zipper.writeFile(.{
.name = file.name,
.content = file.content,
.compression = file.compression,
.write_options = options,
});
}
for (files, 0..) |file, i| {
try zipper.writeCentralRecord(store[i], .{
.name = file.name,
});
}
try zipper.writeEndRecord(if (options.end) |e| e else .{});
}
pub fn initZipper(writer: anytype) Zipper(@TypeOf(writer)) {
return .{ .counting_writer = std.io.countingWriter(writer) };
}
/// Provides methods to format and write the contents of a zip archive
/// to the underlying Writer.
pub fn Zipper(comptime Writer: type) type {
return struct {
counting_writer: std.io.CountingWriter(Writer),
central_count: u64 = 0,
first_central_offset: ?u64 = null,
last_central_limit: ?u64 = null,
const Self = @This();
pub fn writeFile(
self: *Self,
opt: struct {
name: []const u8,
content: []const u8,
compression: zip.CompressionMethod,
write_options: WriteZipOptions,
},
) !FileStore {
const writer = self.counting_writer.writer();
const file_offset: u64 = @intCast(self.counting_writer.bytes_written);
const crc32 = std.hash.Crc32.hash(opt.content);
const header_options = opt.write_options.local_header;
{
var compressed_size: u32 = 0;
var uncompressed_size: u32 = 0;
var extra_len: u16 = 0;
if (header_options) |hdr_options| {
compressed_size = if (hdr_options.compressed_size) |size| size else 0;
uncompressed_size = if (hdr_options.uncompressed_size) |size| size else @intCast(opt.content.len);
extra_len = if (hdr_options.extra_len) |len| len else 0;
}
const hdr: zip.LocalFileHeader = .{
.signature = zip.local_file_header_sig,
.version_needed_to_extract = 10,
.flags = .{ .encrypted = false, ._ = 0 },
.compression_method = opt.compression,
.last_modification_time = 0,
.last_modification_date = 0,
.crc32 = crc32,
.compressed_size = compressed_size,
.uncompressed_size = uncompressed_size,
.filename_len = @intCast(opt.name.len),
.extra_len = extra_len,
};
try writer.writeStructEndian(hdr, .little);
}
try writer.writeAll(opt.name);
if (header_options) |hdr| {
if (hdr.zip64) |options| {
try writer.writeInt(u16, 0x0001, .little);
const data_size = if (options.data_size) |size| size else 8;
try writer.writeInt(u16, data_size, .little);
try writer.writeInt(u64, 0, .little);
try writer.writeInt(u64, @intCast(opt.content.len), .little);
}
}
var compressed_size: u32 = undefined;
switch (opt.compression) {
.store => {
try writer.writeAll(opt.content);
compressed_size = @intCast(opt.content.len);
},
.deflate => {
const offset = self.counting_writer.bytes_written;
var fbs = std.io.fixedBufferStream(opt.content);
try std.compress.flate.deflate.compress(.raw, fbs.reader(), writer, .{});
std.debug.assert(fbs.pos == opt.content.len);
compressed_size = @intCast(self.counting_writer.bytes_written - offset);
},
else => unreachable,
}
return .{
.compression = opt.compression,
.file_offset = file_offset,
.crc32 = crc32,
.compressed_size = compressed_size,
.uncompressed_size = opt.content.len,
};
}
pub fn writeCentralRecord(
self: *Self,
store: FileStore,
opt: struct {
name: []const u8,
version_needed_to_extract: u16 = 10,
},
) !void {
if (self.first_central_offset == null) {
self.first_central_offset = self.counting_writer.bytes_written;
}
self.central_count += 1;
const hdr: zip.CentralDirectoryFileHeader = .{
.signature = zip.central_file_header_sig,
.version_made_by = 0,
.version_needed_to_extract = opt.version_needed_to_extract,
.flags = .{ .encrypted = false, ._ = 0 },
.compression_method = store.compression,
.last_modification_time = 0,
.last_modification_date = 0,
.crc32 = store.crc32,
.compressed_size = store.compressed_size,
.uncompressed_size = @intCast(store.uncompressed_size),
.filename_len = @intCast(opt.name.len),
.extra_len = 0,
.comment_len = 0,
.disk_number = 0,
.internal_file_attributes = 0,
.external_file_attributes = 0,
.local_file_header_offset = @intCast(store.file_offset),
};
try self.counting_writer.writer().writeStructEndian(hdr, .little);
try self.counting_writer.writer().writeAll(opt.name);
self.last_central_limit = self.counting_writer.bytes_written;
}
pub fn writeEndRecord(self: *Self, opt: EndRecordOptions) !void {
const cd_offset = self.first_central_offset orelse 0;
const cd_end = self.last_central_limit orelse 0;
if (opt.zip64) |zip64| {
const end64_off = cd_end;
const fixed: zip.EndRecord64 = .{
.signature = zip.end_record64_sig,
.end_record_size = @sizeOf(zip.EndRecord64) - 12,
.version_made_by = 0,
.version_needed_to_extract = 45,
.disk_number = 0,
.central_directory_disk_number = 0,
.record_count_disk = @intCast(self.central_count),
.record_count_total = @intCast(self.central_count),
.central_directory_size = @intCast(cd_end - cd_offset),
.central_directory_offset = @intCast(cd_offset),
};
try self.counting_writer.writer().writeStructEndian(fixed, .little);
const locator: zip.EndLocator64 = .{
.signature = if (zip64.locator_sig) |s| s else zip.end_locator64_sig,
.zip64_disk_count = if (zip64.locator_zip64_disk_count) |c| c else 0,
.record_file_offset = if (zip64.locator_record_file_offset) |o| o else @intCast(end64_off),
.total_disk_count = if (zip64.locator_total_disk_count) |c| c else 1,
};
try self.counting_writer.writer().writeStructEndian(locator, .little);
}
const hdr: zip.EndRecord = .{
.signature = if (opt.sig) |s| s else zip.end_record_sig,
.disk_number = if (opt.disk_number) |n| n else 0,
.central_directory_disk_number = if (opt.central_directory_disk_number) |n| n else 0,
.record_count_disk = if (opt.record_count_disk) |c| c else @intCast(self.central_count),
.record_count_total = if (opt.record_count_total) |c| c else @intCast(self.central_count),
.central_directory_size = if (opt.central_directory_size) |s| s else @intCast(cd_end - cd_offset),
.central_directory_offset = if (opt.central_directory_offset) |o| o else @intCast(cd_offset),
.comment_len = if (opt.comment_len) |l| l else (if (opt.comment) |c| @as(u16, @intCast(c.len)) else 0),
};
try self.counting_writer.writer().writeStructEndian(hdr, .little);
if (opt.comment) |c|
try self.counting_writer.writer().writeAll(c);
}
};
}

View File

@@ -1203,12 +1203,11 @@ fn unpackResource(
return unpackTarball(f, tmp_directory.handle, &adapter.new_interface);
},
.@"tar.gz" => {
const reader = resource.reader();
var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader);
var dcp = std.compress.gzip.decompressor(br.reader());
var adapter_buffer: [1024]u8 = undefined;
var adapter = dcp.reader().adaptToNewApi(&adapter_buffer);
return try unpackTarball(f, tmp_directory.handle, &adapter.new_interface);
var adapter_buffer: [std.crypto.tls.max_ciphertext_record_len]u8 = undefined;
var adapter = resource.reader().adaptToNewApi(&adapter_buffer);
var flate_buffer: [std.compress.flate.max_window_len]u8 = undefined;
var decompress: std.compress.flate.Decompress = .init(&adapter.new_interface, .gzip, &flate_buffer);
return try unpackTarball(f, tmp_directory.handle, &decompress.reader);
},
.@"tar.xz" => {
const gpa = f.arena.child_allocator;
@@ -1352,7 +1351,10 @@ fn unzip(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!UnpackResult {
));
defer zip_file.close();
std.zip.extract(out_dir, zip_file.seekableStream(), .{
var zip_file_buffer: [1024]u8 = undefined;
var zip_file_reader = zip_file.reader(&zip_file_buffer);
std.zip.extract(out_dir, &zip_file_reader, .{
.allow_backslashes = true,
.diagnostics = &diagnostics,
}) catch |err| return f.fail(f.location_tok, try eb.printString(
@@ -1384,23 +1386,28 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource.Git) anyerror!U
defer pack_dir.close();
var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true });
defer pack_file.close();
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
var pack_file_buffer: [4096]u8 = undefined;
var fifo = std.fifo.LinearFifo(u8, .{ .Slice = {} }).init(&pack_file_buffer);
try fifo.pump(resource.fetch_stream.reader(), pack_file.deprecatedWriter());
var pack_file_reader = pack_file.reader(&pack_file_buffer);
var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true });
defer index_file.close();
var index_file_buffer: [2000]u8 = undefined;
var index_file_writer = index_file.writer(&index_file_buffer);
{
const index_prog_node = f.prog_node.start("Index pack", 0);
defer index_prog_node.end();
var index_buffered_writer = std.io.bufferedWriter(index_file.deprecatedWriter());
try git.indexPack(gpa, object_format, pack_file, index_buffered_writer.writer());
try index_buffered_writer.flush();
try git.indexPack(gpa, object_format, &pack_file_reader, &index_file_writer);
}
{
var index_file_reader = index_file.reader(&index_file_buffer);
const checkout_prog_node = f.prog_node.start("Checkout", 0);
defer checkout_prog_node.end();
var repository = try git.Repository.init(gpa, object_format, pack_file, index_file);
var repository: git.Repository = undefined;
try repository.init(gpa, object_format, &pack_file_reader, &index_file_reader);
defer repository.deinit();
var diagnostics: git.Diagnostics = .{ .allocator = arena };
try repository.checkout(out_dir, resource.want_oid, &diagnostics);
@@ -2071,72 +2078,6 @@ const UnpackResult = struct {
}
};
test "zip" {
const gpa = std.testing.allocator;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const test_files = [_]std.zip.testutil.File{
.{ .name = "foo", .content = "this is just foo\n", .compression = .store },
.{ .name = "bar", .content = "another file\n", .compression = .deflate },
};
{
var zip_file = try tmp.dir.createFile("test.zip", .{});
defer zip_file.close();
var bw = std.io.bufferedWriter(zip_file.deprecatedWriter());
var store: [test_files.len]std.zip.testutil.FileStore = undefined;
try std.zip.testutil.writeZip(bw.writer(), &test_files, &store, .{});
try bw.flush();
}
const zip_path = try std.fmt.allocPrint(gpa, ".zig-cache/tmp/{s}/test.zip", .{tmp.sub_path});
defer gpa.free(zip_path);
var fb: TestFetchBuilder = undefined;
var fetch = try fb.build(gpa, tmp.dir, zip_path);
defer fb.deinit();
try fetch.run();
var out = try fb.packageDir();
defer out.close();
try std.zip.testutil.expectFiles(&test_files, out, .{});
}
test "zip with one root folder" {
const gpa = std.testing.allocator;
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const test_files = [_]std.zip.testutil.File{
.{ .name = "the_root_folder/foo.zig", .content = "// this is foo.zig\n", .compression = .store },
.{ .name = "the_root_folder/README.md", .content = "# The foo.zig README\n", .compression = .store },
};
{
var zip_file = try tmp.dir.createFile("test.zip", .{});
defer zip_file.close();
var bw = std.io.bufferedWriter(zip_file.deprecatedWriter());
var store: [test_files.len]std.zip.testutil.FileStore = undefined;
try std.zip.testutil.writeZip(bw.writer(), &test_files, &store, .{});
try bw.flush();
}
const zip_path = try std.fmt.allocPrint(gpa, ".zig-cache/tmp/{s}/test.zip", .{tmp.sub_path});
defer gpa.free(zip_path);
var fb: TestFetchBuilder = undefined;
var fetch = try fb.build(gpa, tmp.dir, zip_path);
defer fb.deinit();
try fetch.run();
var out = try fb.packageDir();
defer out.close();
try std.zip.testutil.expectFiles(&test_files, out, .{ .strip_prefix = "the_root_folder/" });
}
test "tarball with duplicate paths" {
// This tarball has duplicate path 'dir1/file1' to simulate case sensitve
// file system on any file sytstem.

View File

@@ -66,6 +66,33 @@ pub const Oid = union(Format) {
}
};
const Hashing = union(Format) {
sha1: std.Io.Writer.Hashing(Sha1),
sha256: std.Io.Writer.Hashing(Sha256),
fn init(oid_format: Format, buffer: []u8) Hashing {
return switch (oid_format) {
.sha1 => .{ .sha1 = .init(buffer) },
.sha256 => .{ .sha256 = .init(buffer) },
};
}
fn writer(h: *@This()) *std.Io.Writer {
return switch (h.*) {
inline else => |*inner| &inner.writer,
};
}
fn final(h: *@This()) Oid {
switch (h.*) {
inline else => |*inner, tag| {
inner.writer.flush() catch unreachable; // hashers cannot fail
return @unionInit(Oid, @tagName(tag), inner.hasher.finalResult());
},
}
}
};
pub fn fromBytes(oid_format: Format, bytes: []const u8) Oid {
assert(bytes.len == oid_format.byteLength());
return switch (oid_format) {
@@ -73,9 +100,9 @@ pub const Oid = union(Format) {
};
}
pub fn readBytes(oid_format: Format, reader: anytype) @TypeOf(reader).NoEofError!Oid {
pub fn readBytes(oid_format: Format, reader: *std.Io.Reader) !Oid {
return switch (oid_format) {
inline else => |tag| @unionInit(Oid, @tagName(tag), try reader.readBytesNoEof(tag.byteLength())),
inline else => |tag| @unionInit(Oid, @tagName(tag), (try reader.takeArray(tag.byteLength())).*),
};
}
@@ -166,8 +193,15 @@ pub const Diagnostics = struct {
pub const Repository = struct {
odb: Odb,
pub fn init(allocator: Allocator, format: Oid.Format, pack_file: std.fs.File, index_file: std.fs.File) !Repository {
return .{ .odb = try Odb.init(allocator, format, pack_file, index_file) };
pub fn init(
repo: *Repository,
allocator: Allocator,
format: Oid.Format,
pack_file: *std.fs.File.Reader,
index_file: *std.fs.File.Reader,
) !void {
repo.* = .{ .odb = undefined };
try repo.odb.init(allocator, format, pack_file, index_file);
}
pub fn deinit(repository: *Repository) void {
@@ -335,24 +369,30 @@ pub const Repository = struct {
/// [pack-format](https://git-scm.com/docs/pack-format).
const Odb = struct {
format: Oid.Format,
pack_file: std.fs.File,
pack_file: *std.fs.File.Reader,
index_header: IndexHeader,
index_file: std.fs.File,
index_file: *std.fs.File.Reader,
cache: ObjectCache = .{},
allocator: Allocator,
/// Initializes the database from open pack and index files.
fn init(allocator: Allocator, format: Oid.Format, pack_file: std.fs.File, index_file: std.fs.File) !Odb {
fn init(
odb: *Odb,
allocator: Allocator,
format: Oid.Format,
pack_file: *std.fs.File.Reader,
index_file: *std.fs.File.Reader,
) !void {
try pack_file.seekTo(0);
try index_file.seekTo(0);
const index_header = try IndexHeader.read(index_file.deprecatedReader());
return .{
odb.* = .{
.format = format,
.pack_file = pack_file,
.index_header = index_header,
.index_header = undefined,
.index_file = index_file,
.allocator = allocator,
};
try odb.index_header.read(&index_file.interface);
}
fn deinit(odb: *Odb) void {
@@ -362,14 +402,14 @@ const Odb = struct {
/// Reads the object at the current position in the database.
fn readObject(odb: *Odb) !Object {
var base_offset = try odb.pack_file.getPos();
var base_offset = odb.pack_file.logicalPos();
var base_header: EntryHeader = undefined;
var delta_offsets: std.ArrayListUnmanaged(u64) = .empty;
defer delta_offsets.deinit(odb.allocator);
const base_object = while (true) {
if (odb.cache.get(base_offset)) |base_object| break base_object;
base_header = try EntryHeader.read(odb.format, odb.pack_file.deprecatedReader());
base_header = try EntryHeader.read(odb.format, &odb.pack_file.interface);
switch (base_header) {
.ofs_delta => |ofs_delta| {
try delta_offsets.append(odb.allocator, base_offset);
@@ -379,10 +419,10 @@ const Odb = struct {
.ref_delta => |ref_delta| {
try delta_offsets.append(odb.allocator, base_offset);
try odb.seekOid(ref_delta.base_object);
base_offset = try odb.pack_file.getPos();
base_offset = odb.pack_file.logicalPos();
},
else => {
const base_data = try readObjectRaw(odb.allocator, odb.pack_file.deprecatedReader(), base_header.uncompressedLength());
const base_data = try readObjectRaw(odb.allocator, &odb.pack_file.interface, base_header.uncompressedLength());
errdefer odb.allocator.free(base_data);
const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
try odb.cache.put(odb.allocator, base_offset, base_object);
@@ -412,7 +452,7 @@ const Odb = struct {
const found_index = while (start_index < end_index) {
const mid_index = start_index + (end_index - start_index) / 2;
try odb.index_file.seekTo(IndexHeader.size + mid_index * oid_length);
const mid_oid = try Oid.readBytes(odb.format, odb.index_file.deprecatedReader());
const mid_oid = try Oid.readBytes(odb.format, &odb.index_file.interface);
switch (mem.order(u8, mid_oid.slice(), oid.slice())) {
.lt => start_index = mid_index + 1,
.gt => end_index = mid_index,
@@ -423,12 +463,12 @@ const Odb = struct {
const n_objects = odb.index_header.fan_out_table[255];
const offset_values_start = IndexHeader.size + n_objects * (oid_length + 4);
try odb.index_file.seekTo(offset_values_start + found_index * 4);
const l1_offset: packed struct { value: u31, big: bool } = @bitCast(try odb.index_file.deprecatedReader().readInt(u32, .big));
const l1_offset: packed struct { value: u31, big: bool } = @bitCast(try odb.index_file.interface.takeInt(u32, .big));
const pack_offset = pack_offset: {
if (l1_offset.big) {
const l2_offset_values_start = offset_values_start + n_objects * 4;
try odb.index_file.seekTo(l2_offset_values_start + l1_offset.value * 4);
break :pack_offset try odb.index_file.deprecatedReader().readInt(u64, .big);
break :pack_offset try odb.index_file.interface.takeInt(u64, .big);
} else {
break :pack_offset l1_offset.value;
}
@@ -1080,18 +1120,18 @@ const PackHeader = struct {
const signature = "PACK";
const supported_version = 2;
fn read(reader: anytype) !PackHeader {
const actual_signature = reader.readBytesNoEof(4) catch |e| switch (e) {
fn read(reader: *std.Io.Reader) !PackHeader {
const actual_signature = reader.take(4) catch |e| switch (e) {
error.EndOfStream => return error.InvalidHeader,
else => |other| return other,
};
if (!mem.eql(u8, &actual_signature, signature)) return error.InvalidHeader;
const version = reader.readInt(u32, .big) catch |e| switch (e) {
if (!mem.eql(u8, actual_signature, signature)) return error.InvalidHeader;
const version = reader.takeInt(u32, .big) catch |e| switch (e) {
error.EndOfStream => return error.InvalidHeader,
else => |other| return other,
};
if (version != supported_version) return error.UnsupportedVersion;
const total_objects = reader.readInt(u32, .big) catch |e| switch (e) {
const total_objects = reader.takeInt(u32, .big) catch |e| switch (e) {
error.EndOfStream => return error.InvalidHeader,
else => |other| return other,
};
@@ -1143,13 +1183,13 @@ const EntryHeader = union(Type) {
};
}
fn read(format: Oid.Format, reader: anytype) !EntryHeader {
fn read(format: Oid.Format, reader: *std.Io.Reader) !EntryHeader {
const InitialByte = packed struct { len: u4, type: u3, has_next: bool };
const initial: InitialByte = @bitCast(reader.readByte() catch |e| switch (e) {
const initial: InitialByte = @bitCast(reader.takeByte() catch |e| switch (e) {
error.EndOfStream => return error.InvalidFormat,
else => |other| return other,
});
const rest_len = if (initial.has_next) try readSizeVarInt(reader) else 0;
const rest_len = if (initial.has_next) try reader.takeLeb128(u64) else 0;
var uncompressed_length: u64 = initial.len;
uncompressed_length |= std.math.shlExact(u64, rest_len, 4) catch return error.InvalidFormat;
const @"type" = std.enums.fromInt(EntryHeader.Type, initial.type) orelse return error.InvalidFormat;
@@ -1172,25 +1212,12 @@ const EntryHeader = union(Type) {
}
};
fn readSizeVarInt(r: anytype) !u64 {
fn readOffsetVarInt(r: *std.Io.Reader) !u64 {
const Byte = packed struct { value: u7, has_next: bool };
var b: Byte = @bitCast(try r.readByte());
var value: u64 = b.value;
var shift: u6 = 0;
while (b.has_next) {
b = @bitCast(try r.readByte());
shift = std.math.add(u6, shift, 7) catch return error.InvalidFormat;
value |= @as(u64, b.value) << shift;
}
return value;
}
fn readOffsetVarInt(r: anytype) !u64 {
const Byte = packed struct { value: u7, has_next: bool };
var b: Byte = @bitCast(try r.readByte());
var b: Byte = @bitCast(try r.takeByte());
var value: u64 = b.value;
while (b.has_next) {
b = @bitCast(try r.readByte());
b = @bitCast(try r.takeByte());
value = std.math.shlExact(u64, value + 1, 7) catch return error.InvalidFormat;
value |= b.value;
}
@@ -1204,19 +1231,12 @@ const IndexHeader = struct {
const supported_version = 2;
const size = 4 + 4 + @sizeOf([256]u32);
fn read(reader: anytype) !IndexHeader {
var header_bytes = try reader.readBytesNoEof(size);
if (!mem.eql(u8, header_bytes[0..4], signature)) return error.InvalidHeader;
const version = mem.readInt(u32, header_bytes[4..8], .big);
fn read(index_header: *IndexHeader, reader: *std.Io.Reader) !void {
const sig = try reader.take(4);
if (!mem.eql(u8, sig, signature)) return error.InvalidHeader;
const version = try reader.takeInt(u32, .big);
if (version != supported_version) return error.UnsupportedVersion;
var fan_out_table: [256]u32 = undefined;
var fan_out_table_stream = std.io.fixedBufferStream(header_bytes[8..]);
const fan_out_table_reader = fan_out_table_stream.reader();
for (&fan_out_table) |*entry| {
entry.* = fan_out_table_reader.readInt(u32, .big) catch unreachable;
}
return .{ .fan_out_table = fan_out_table };
try reader.readSliceEndian(u32, &index_header.fan_out_table, .big);
}
};
@@ -1227,7 +1247,12 @@ const IndexEntry = struct {
/// Writes out a version 2 index for the given packfile, as documented in
/// [pack-format](https://git-scm.com/docs/pack-format).
pub fn indexPack(allocator: Allocator, format: Oid.Format, pack: std.fs.File, index_writer: anytype) !void {
pub fn indexPack(
allocator: Allocator,
format: Oid.Format,
pack: *std.fs.File.Reader,
index_writer: *std.fs.File.Writer,
) !void {
try pack.seekTo(0);
var index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry) = .empty;
@@ -1280,8 +1305,8 @@ pub fn indexPack(allocator: Allocator, format: Oid.Format, pack: std.fs.File, in
}
@memset(fan_out_table[fan_out_index..], count);
var index_hashed_writer = hashedWriter(index_writer, Oid.Hasher.init(format));
const writer = index_hashed_writer.writer();
var index_hashed_writer = std.Io.Writer.hashed(&index_writer.interface, Oid.Hasher.init(format), &.{});
const writer = &index_hashed_writer.writer;
try writer.writeAll(IndexHeader.signature);
try writer.writeInt(u32, IndexHeader.supported_version, .big);
for (fan_out_table) |fan_out_entry| {
@@ -1314,7 +1339,8 @@ pub fn indexPack(allocator: Allocator, format: Oid.Format, pack: std.fs.File, in
try writer.writeAll(pack_checksum.slice());
const index_checksum = index_hashed_writer.hasher.finalResult();
try index_writer.writeAll(index_checksum.slice());
try index_writer.interface.writeAll(index_checksum.slice());
try index_writer.end();
}
/// Performs the first pass over the packfile data for index construction.
@@ -1324,68 +1350,51 @@ pub fn indexPack(allocator: Allocator, format: Oid.Format, pack: std.fs.File, in
fn indexPackFirstPass(
allocator: Allocator,
format: Oid.Format,
pack: std.fs.File,
pack: *std.fs.File.Reader,
index_entries: *std.AutoHashMapUnmanaged(Oid, IndexEntry),
pending_deltas: *std.ArrayListUnmanaged(IndexEntry),
) !Oid {
var pack_buffered_reader = std.io.bufferedReader(pack.deprecatedReader());
var pack_counting_reader = std.io.countingReader(pack_buffered_reader.reader());
var pack_hashed_reader = hashedReader(pack_counting_reader.reader(), Oid.Hasher.init(format));
const pack_reader = pack_hashed_reader.reader();
var flate_buffer: [std.compress.flate.max_window_len]u8 = undefined;
var pack_buffer: [2048]u8 = undefined; // Reasonably large buffer for file system.
var pack_hashed = pack.interface.hashed(Oid.Hasher.init(format), &pack_buffer);
const pack_header = try PackHeader.read(pack_reader);
const pack_header = try PackHeader.read(&pack_hashed.reader);
var current_entry: u32 = 0;
while (current_entry < pack_header.total_objects) : (current_entry += 1) {
const entry_offset = pack_counting_reader.bytes_read;
var entry_crc32_reader = hashedReader(pack_reader, std.hash.Crc32.init());
const entry_header = try EntryHeader.read(format, entry_crc32_reader.reader());
for (0..pack_header.total_objects) |_| {
const entry_offset = pack.logicalPos() - pack_hashed.reader.bufferedLen();
const entry_header = try EntryHeader.read(format, &pack_hashed.reader);
switch (entry_header) {
.commit, .tree, .blob, .tag => |object| {
var entry_decompress_stream = std.compress.zlib.decompressor(entry_crc32_reader.reader());
var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
var entry_hashed_writer = hashedWriter(std.io.null_writer, Oid.Hasher.init(format));
const entry_writer = entry_hashed_writer.writer();
var entry_decompress: std.compress.flate.Decompress = .init(&pack_hashed.reader, .zlib, &.{});
var oid_hasher: Oid.Hashing = .init(format, &flate_buffer);
const oid_hasher_w = oid_hasher.writer();
// The object header is not included in the pack data but is
// part of the object's ID
try entry_writer.print("{s} {}\x00", .{ @tagName(entry_header), object.uncompressed_length });
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(entry_counting_reader.reader(), entry_writer);
if (entry_counting_reader.bytes_read != object.uncompressed_length) {
return error.InvalidObject;
}
const oid = entry_hashed_writer.hasher.finalResult();
try oid_hasher_w.print("{t} {d}\x00", .{ entry_header, object.uncompressed_length });
const n = try entry_decompress.reader.streamRemaining(oid_hasher_w);
if (n != object.uncompressed_length) return error.InvalidObject;
const oid = oid_hasher.final();
if (!skip_checksums) @compileError("TODO");
try index_entries.put(allocator, oid, .{
.offset = entry_offset,
.crc32 = entry_crc32_reader.hasher.final(),
.crc32 = 0,
});
},
inline .ofs_delta, .ref_delta => |delta| {
var entry_decompress_stream = std.compress.zlib.decompressor(entry_crc32_reader.reader());
var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(entry_counting_reader.reader(), std.io.null_writer);
if (entry_counting_reader.bytes_read != delta.uncompressed_length) {
return error.InvalidObject;
}
var entry_decompress: std.compress.flate.Decompress = .init(&pack_hashed.reader, .zlib, &flate_buffer);
const n = try entry_decompress.reader.discardRemaining();
if (n != delta.uncompressed_length) return error.InvalidObject;
if (!skip_checksums) @compileError("TODO");
try pending_deltas.append(allocator, .{
.offset = entry_offset,
.crc32 = entry_crc32_reader.hasher.final(),
.crc32 = 0,
});
},
}
}
const pack_checksum = pack_hashed_reader.hasher.finalResult();
const recorded_checksum = try Oid.readBytes(format, pack_buffered_reader.reader());
if (!mem.eql(u8, pack_checksum.slice(), recorded_checksum.slice())) {
return error.CorruptedPack;
}
_ = pack_reader.readByte() catch |e| switch (e) {
error.EndOfStream => return pack_checksum,
else => |other| return other,
};
return error.InvalidFormat;
if (!skip_checksums) @compileError("TODO");
return pack_hashed.hasher.finalResult();
}
/// Attempts to determine the final object ID of the given deltified object.
@@ -1394,7 +1403,7 @@ fn indexPackFirstPass(
fn indexPackHashDelta(
allocator: Allocator,
format: Oid.Format,
pack: std.fs.File,
pack: *std.fs.File.Reader,
delta: IndexEntry,
index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry),
cache: *ObjectCache,
@@ -1408,7 +1417,7 @@ fn indexPackHashDelta(
if (cache.get(base_offset)) |base_object| break base_object;
try pack.seekTo(base_offset);
base_header = try EntryHeader.read(format, pack.deprecatedReader());
base_header = try EntryHeader.read(format, &pack.interface);
switch (base_header) {
.ofs_delta => |ofs_delta| {
try delta_offsets.append(allocator, base_offset);
@@ -1419,7 +1428,7 @@ fn indexPackHashDelta(
base_offset = (index_entries.get(ref_delta.base_object) orelse return null).offset;
},
else => {
const base_data = try readObjectRaw(allocator, pack.deprecatedReader(), base_header.uncompressedLength());
const base_data = try readObjectRaw(allocator, &pack.interface, base_header.uncompressedLength());
errdefer allocator.free(base_data);
const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
try cache.put(allocator, base_offset, base_object);
@@ -1430,11 +1439,13 @@ fn indexPackHashDelta(
const base_data = try resolveDeltaChain(allocator, format, pack, base_object, delta_offsets.items, cache);
var entry_hasher: Oid.Hasher = .init(format);
var entry_hashed_writer = hashedWriter(std.io.null_writer, &entry_hasher);
try entry_hashed_writer.writer().print("{s} {}\x00", .{ @tagName(base_object.type), base_data.len });
entry_hasher.update(base_data);
return entry_hasher.finalResult();
var entry_hasher_buffer: [64]u8 = undefined;
var entry_hasher: Oid.Hashing = .init(format, &entry_hasher_buffer);
const entry_hasher_w = entry_hasher.writer();
// Writes to hashers cannot fail.
entry_hasher_w.print("{t} {d}\x00", .{ base_object.type, base_data.len }) catch unreachable;
entry_hasher_w.writeAll(base_data) catch unreachable;
return entry_hasher.final();
}
/// Resolves a chain of deltas, returning the final base object data. `pack` is
@@ -1444,7 +1455,7 @@ fn indexPackHashDelta(
fn resolveDeltaChain(
allocator: Allocator,
format: Oid.Format,
pack: std.fs.File,
pack: *std.fs.File.Reader,
base_object: Object,
delta_offsets: []const u64,
cache: *ObjectCache,
@@ -1456,21 +1467,19 @@ fn resolveDeltaChain(
const delta_offset = delta_offsets[i];
try pack.seekTo(delta_offset);
const delta_header = try EntryHeader.read(format, pack.deprecatedReader());
const delta_data = try readObjectRaw(allocator, pack.deprecatedReader(), delta_header.uncompressedLength());
const delta_header = try EntryHeader.read(format, &pack.interface);
const delta_data = try readObjectRaw(allocator, &pack.interface, delta_header.uncompressedLength());
defer allocator.free(delta_data);
var delta_stream = std.io.fixedBufferStream(delta_data);
const delta_reader = delta_stream.reader();
_ = try readSizeVarInt(delta_reader); // base object size
const expanded_size = try readSizeVarInt(delta_reader);
var delta_reader: std.Io.Reader = .fixed(delta_data);
_ = try delta_reader.takeLeb128(u64); // base object size
const expanded_size = try delta_reader.takeLeb128(u64);
const expanded_alloc_size = std.math.cast(usize, expanded_size) orelse return error.ObjectTooLarge;
const expanded_data = try allocator.alloc(u8, expanded_alloc_size);
errdefer allocator.free(expanded_data);
var expanded_delta_stream = std.io.fixedBufferStream(expanded_data);
var base_stream = std.io.fixedBufferStream(base_data);
try expandDelta(&base_stream, delta_reader, expanded_delta_stream.writer());
if (expanded_delta_stream.pos != expanded_size) return error.InvalidObject;
var expanded_delta_stream: std.Io.Writer = .fixed(expanded_data);
try expandDelta(base_data, &delta_reader, &expanded_delta_stream);
if (expanded_delta_stream.end != expanded_size) return error.InvalidObject;
try cache.put(allocator, delta_offset, .{ .type = base_object.type, .data = expanded_data });
base_data = expanded_data;
@@ -1481,28 +1490,23 @@ fn resolveDeltaChain(
/// Reads the complete contents of an object from `reader`. This function may
/// read more bytes than required from `reader`, so the reader position after
/// returning is not reliable.
fn readObjectRaw(allocator: Allocator, reader: anytype, size: u64) ![]u8 {
fn readObjectRaw(allocator: Allocator, reader: *std.Io.Reader, size: u64) ![]u8 {
const alloc_size = std.math.cast(usize, size) orelse return error.ObjectTooLarge;
var buffered_reader = std.io.bufferedReader(reader);
var decompress_stream = std.compress.zlib.decompressor(buffered_reader.reader());
const data = try allocator.alloc(u8, alloc_size);
errdefer allocator.free(data);
try decompress_stream.reader().readNoEof(data);
_ = decompress_stream.reader().readByte() catch |e| switch (e) {
error.EndOfStream => return data,
else => |other| return other,
};
return error.InvalidFormat;
var aw: std.Io.Writer.Allocating = .init(allocator);
try aw.ensureTotalCapacity(alloc_size + std.compress.flate.max_window_len);
defer aw.deinit();
var decompress: std.compress.flate.Decompress = .init(reader, .zlib, &.{});
try decompress.reader.streamExact(&aw.writer, alloc_size);
return aw.toOwnedSlice();
}
/// Expands delta data from `delta_reader` to `writer`. `base_object` must
/// support `reader` and `seekTo` (such as a `std.io.FixedBufferStream`).
/// Expands delta data from `delta_reader` to `writer`.
///
/// The format of the delta data is documented in
/// [pack-format](https://git-scm.com/docs/pack-format).
fn expandDelta(base_object: anytype, delta_reader: anytype, writer: anytype) !void {
fn expandDelta(base_object: []const u8, delta_reader: *std.Io.Reader, writer: *std.Io.Writer) !void {
while (true) {
const inst: packed struct { value: u7, copy: bool } = @bitCast(delta_reader.readByte() catch |e| switch (e) {
const inst: packed struct { value: u7, copy: bool } = @bitCast(delta_reader.takeByte() catch |e| switch (e) {
error.EndOfStream => return,
else => |other| return other,
});
@@ -1517,27 +1521,22 @@ fn expandDelta(base_object: anytype, delta_reader: anytype, writer: anytype) !vo
size3: bool,
} = @bitCast(inst.value);
const offset_parts: packed struct { offset1: u8, offset2: u8, offset3: u8, offset4: u8 } = .{
.offset1 = if (available.offset1) try delta_reader.readByte() else 0,
.offset2 = if (available.offset2) try delta_reader.readByte() else 0,
.offset3 = if (available.offset3) try delta_reader.readByte() else 0,
.offset4 = if (available.offset4) try delta_reader.readByte() else 0,
.offset1 = if (available.offset1) try delta_reader.takeByte() else 0,
.offset2 = if (available.offset2) try delta_reader.takeByte() else 0,
.offset3 = if (available.offset3) try delta_reader.takeByte() else 0,
.offset4 = if (available.offset4) try delta_reader.takeByte() else 0,
};
const offset: u32 = @bitCast(offset_parts);
const base_offset: u32 = @bitCast(offset_parts);
const size_parts: packed struct { size1: u8, size2: u8, size3: u8 } = .{
.size1 = if (available.size1) try delta_reader.readByte() else 0,
.size2 = if (available.size2) try delta_reader.readByte() else 0,
.size3 = if (available.size3) try delta_reader.readByte() else 0,
.size1 = if (available.size1) try delta_reader.takeByte() else 0,
.size2 = if (available.size2) try delta_reader.takeByte() else 0,
.size3 = if (available.size3) try delta_reader.takeByte() else 0,
};
var size: u24 = @bitCast(size_parts);
if (size == 0) size = 0x10000;
try base_object.seekTo(offset);
var copy_reader = std.io.limitedReader(base_object.reader(), size);
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(copy_reader.reader(), writer);
try writer.writeAll(base_object[base_offset..][0..size]);
} else if (inst.value != 0) {
var data_reader = std.io.limitedReader(delta_reader, inst.value);
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(data_reader.reader(), writer);
try delta_reader.streamExact(writer, inst.value);
} else {
return error.InvalidDeltaInstruction;
}
@@ -1567,23 +1566,32 @@ fn runRepositoryTest(comptime format: Oid.Format, head_commit: []const u8) !void
defer pack_file.close();
try pack_file.writeAll(testrepo_pack);
var pack_file_buffer: [2000]u8 = undefined;
var pack_file_reader = pack_file.reader(&pack_file_buffer);
var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true });
defer index_file.close();
try indexPack(testing.allocator, format, pack_file, index_file.deprecatedWriter());
var index_file_buffer: [2000]u8 = undefined;
var index_file_writer = index_file.writer(&index_file_buffer);
try indexPack(testing.allocator, format, &pack_file_reader, &index_file_writer);
// Arbitrary size limit on files read while checking the repository contents
// (all files in the test repo are known to be smaller than this)
const max_file_size = 8192;
const index_file_data = try git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size);
defer testing.allocator.free(index_file_data);
// testrepo.idx is generated by Git. The index created by this file should
// match it exactly. Running `git verify-pack -v testrepo.pack` can verify
// this.
const testrepo_idx = @embedFile("git/testdata/testrepo-" ++ @tagName(format) ++ ".idx");
try testing.expectEqualSlices(u8, testrepo_idx, index_file_data);
if (!skip_checksums) {
const index_file_data = try git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size);
defer testing.allocator.free(index_file_data);
// testrepo.idx is generated by Git. The index created by this file should
// match it exactly. Running `git verify-pack -v testrepo.pack` can verify
// this.
const testrepo_idx = @embedFile("git/testdata/testrepo-" ++ @tagName(format) ++ ".idx");
try testing.expectEqualSlices(u8, testrepo_idx, index_file_data);
}
var repository = try Repository.init(testing.allocator, format, pack_file, index_file);
var index_file_reader = index_file.reader(&index_file_buffer);
var repository: Repository = undefined;
try repository.init(testing.allocator, format, &pack_file_reader, &index_file_reader);
defer repository.deinit();
var worktree = testing.tmpDir(.{ .iterate = true });
@@ -1653,6 +1661,12 @@ fn runRepositoryTest(comptime format: Oid.Format, head_commit: []const u8) !void
try testing.expectEqualStrings(expected_file_contents, actual_file_contents);
}
/// Checksum calculation is useful for troubleshooting and debugging, but it's
/// redundant since the package manager already does content hashing at the
/// end. Let's save time by not doing that work, but, I left a cookie crumb
/// trail here if you want to restore the functionality for tinkering purposes.
const skip_checksums = true;
test "SHA-1 packfile indexing and checkout" {
try runRepositoryTest(.sha1, "dd582c0720819ab7130b103635bd7271b9fd4feb");
}
@@ -1676,6 +1690,9 @@ pub fn main() !void {
var pack_file = try std.fs.cwd().openFile(args[2], .{});
defer pack_file.close();
var pack_file_buffer: [4096]u8 = undefined;
var pack_file_reader = pack_file.reader(&pack_file_buffer);
const commit = try Oid.parse(format, args[3]);
var worktree = try std.fs.cwd().makeOpenPath(args[4], .{});
defer worktree.close();
@@ -1687,11 +1704,11 @@ pub fn main() !void {
var index_file = try git_dir.createFile("idx", .{ .read = true });
defer index_file.close();
var index_buffered_writer = std.io.bufferedWriter(index_file.deprecatedWriter());
try indexPack(allocator, format, pack_file, index_buffered_writer.writer());
try indexPack(allocator, format, &pack_file_reader, index_buffered_writer.writer());
try index_buffered_writer.flush();
std.debug.print("Starting checkout...\n", .{});
var repository = try Repository.init(allocator, format, pack_file, index_file);
var repository = try Repository.init(allocator, format, &pack_file_reader, index_file);
defer repository.deinit();
var diagnostics: Diagnostics = .{ .allocator = allocator };
defer diagnostics.deinit();
@@ -1701,58 +1718,3 @@ pub fn main() !void {
std.debug.print("Diagnostic: {}\n", .{err});
}
}
/// Deprecated
fn hashedReader(reader: anytype, hasher: anytype) HashedReader(@TypeOf(reader), @TypeOf(hasher)) {
return .{ .child_reader = reader, .hasher = hasher };
}
/// Deprecated
fn HashedReader(ReaderType: type, HasherType: type) type {
return struct {
child_reader: ReaderType,
hasher: HasherType,
pub const Error = ReaderType.Error;
pub const Reader = std.io.GenericReader(*@This(), Error, read);
pub fn read(self: *@This(), buf: []u8) Error!usize {
const amt = try self.child_reader.read(buf);
self.hasher.update(buf[0..amt]);
return amt;
}
pub fn reader(self: *@This()) Reader {
return .{ .context = self };
}
};
}
/// Deprecated
pub fn HashedWriter(WriterType: type, HasherType: type) type {
return struct {
child_writer: WriterType,
hasher: HasherType,
pub const Error = WriterType.Error;
pub const Writer = std.io.GenericWriter(*@This(), Error, write);
pub fn write(self: *@This(), buf: []const u8) Error!usize {
const amt = try self.child_writer.write(buf);
self.hasher.update(buf[0..amt]);
return amt;
}
pub fn writer(self: *@This()) Writer {
return .{ .context = self };
}
};
}
/// Deprecated
pub fn hashedWriter(
writer: anytype,
hasher: anytype,
) HashedWriter(@TypeOf(writer), @TypeOf(hasher)) {
return .{ .child_writer = writer, .hasher = hasher };
}

View File

@@ -142,8 +142,8 @@ const DebugInfo = struct {
&abbrev_code_buf,
debug_info.section.off(dwarf) + unit_ptr.off + unit_ptr.header_len + entry_ptr.off,
) != abbrev_code_buf.len) return error.InputOutput;
var abbrev_code_fbs = std.io.fixedBufferStream(&abbrev_code_buf);
return @enumFromInt(std.leb.readUleb128(@typeInfo(AbbrevCode).@"enum".tag_type, abbrev_code_fbs.reader()) catch unreachable);
var abbrev_code_reader: std.Io.Reader = .fixed(&abbrev_code_buf);
return @enumFromInt(abbrev_code_reader.takeLeb128(@typeInfo(AbbrevCode).@"enum".tag_type) catch unreachable);
}
const trailer_bytes = 1 + 1;
@@ -2077,7 +2077,7 @@ pub const WipNav = struct {
.generic_decl_const,
.generic_decl_func,
=> true,
else => unreachable,
else => |t| std.debug.panic("bad decl abbrev code: {t}", .{t}),
};
if (parent_type.getCaptures(zcu).len == 0) {
if (was_generic_decl) try dwarf.freeCommonEntry(wip_nav.unit, decl_gop.value_ptr.*);
@@ -6021,15 +6021,17 @@ fn sectionOffsetBytes(dwarf: *Dwarf) u32 {
}
fn uleb128Bytes(value: anytype) u32 {
var cw = std.io.countingWriter(std.io.null_writer);
try uleb128(cw.writer(), value);
return @intCast(cw.bytes_written);
var trash_buffer: [64]u8 = undefined;
var d: std.Io.Writer.Discarding = .init(&trash_buffer);
d.writer.writeUleb128(value) catch unreachable;
return @intCast(d.count + d.writer.end);
}
fn sleb128Bytes(value: anytype) u32 {
var cw = std.io.countingWriter(std.io.null_writer);
try sleb128(cw.writer(), value);
return @intCast(cw.bytes_written);
var trash_buffer: [64]u8 = undefined;
var d: std.Io.Writer.Discarding = .init(&trash_buffer);
d.writer.writeSleb128(value) catch unreachable;
return @intCast(d.count + d.writer.end);
}
/// overrides `-fno-incremental` for testing incremental debug info until `-fincremental` is functional

View File

@@ -1198,15 +1198,14 @@ pub fn codeDecompressAlloc(self: *Object, elf_file: *Elf, atom_index: Atom.Index
const chdr = @as(*align(1) const elf.Elf64_Chdr, @ptrCast(data.ptr)).*;
switch (chdr.ch_type) {
.ZLIB => {
var stream = std.io.fixedBufferStream(data[@sizeOf(elf.Elf64_Chdr)..]);
var zlib_stream = std.compress.zlib.decompressor(stream.reader());
var stream: std.Io.Reader = .fixed(data[@sizeOf(elf.Elf64_Chdr)..]);
var zlib_stream: std.compress.flate.Decompress = .init(&stream, .zlib, &.{});
const size = std.math.cast(usize, chdr.ch_size) orelse return error.Overflow;
const decomp = try gpa.alloc(u8, size);
const nread = zlib_stream.reader().readAll(decomp) catch return error.InputOutput;
if (nread != decomp.len) {
return error.InputOutput;
}
return decomp;
var aw: std.Io.Writer.Allocating = .init(gpa);
try aw.ensureUnusedCapacity(size);
defer aw.deinit();
_ = try zlib_stream.reader.streamRemaining(&aw.writer);
return aw.toOwnedSlice();
},
else => @panic("TODO unhandled compression scheme"),
}

View File

@@ -1,4 +1,3 @@
#target=x86_64-linux-selfhosted
#target=x86_64-linux-cbe
#target=x86_64-windows-cbe
#update=initial version

View File

@@ -1,4 +1,3 @@
#target=x86_64-linux-selfhosted
#target=x86_64-linux-cbe
#target=x86_64-windows-cbe
#update=initial version

Some files were not shown because too many files have changed in this diff Show More