zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit ea9ded87582a8b9d0ed3afd3360a1d75f0359a5c (tree)
parent 06ce15e8f719756cc12d928cfdae12be99a9e4c2
Author: Andrew Kelley <andrew@ziglang.org>
Date:   Tue, 24 Jan 2023 15:04:56 -0700

std.compress.xz public API cleanup

 * add xz to std.compress
 * prefer importing std.zig by file name, to reduce reliance on the
   standard library being a special case.
 * extract some types from inside generic functions. These types are the
   same regardless of the generic parameters.
 * expose some more types in the std.compress.xz namespace.
 * rename xz.stream to xz.decompress
 * rename check.Kind to Check
 * use std.leb for LEB instead of a redundant implementation

Diffstat:
Mlib/std/compress.zig | 2++
Mlib/std/compress/xz.zig | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mlib/std/compress/xz/block.zig | 26++++++++++++--------------
Dlib/std/compress/xz/check.zig | 7-------
Mlib/std/compress/xz/lzma.zig | 2+-
Dlib/std/compress/xz/multibyte.zig | 23-----------------------
Dlib/std/compress/xz/stream.zig | 136-------------------------------------------------------------------------------
Dlib/std/compress/xz/stream_test.zig | 80-------------------------------------------------------------------------------
Alib/std/compress/xz/test.zig | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 files changed, 234 insertions(+), 263 deletions(-)

diff --git a/lib/std/compress.zig b/lib/std/compress.zig @@ -3,6 +3,7 @@ const std = @import("std.zig"); pub const deflate = @import("compress/deflate.zig"); pub const gzip = @import("compress/gzip.zig"); pub const zlib = @import("compress/zlib.zig"); +pub const xz = @import("compress/xz.zig"); pub fn HashedReader( comptime ReaderType: anytype, @@ -38,4 +39,5 @@ test { _ = deflate; _ = gzip; _ = zlib; + _ = xz; } diff --git a/lib/std/compress/xz.zig b/lib/std/compress/xz.zig @@ -1,5 +1,142 @@ -pub usingnamespace @import("xz/stream.zig"); +const std = @import("std"); +const block = @import("xz/block.zig"); +const Allocator = std.mem.Allocator; +const Crc32 = std.hash.Crc32; + +pub const Flags = packed struct(u16) { + reserved1: u8, + check_kind: Check, + reserved2: u4, +}; + +pub const Header = extern struct { + magic: [6]u8, + flags: Flags, + crc32: u32, +}; + +pub const Footer = extern struct { + crc32: u32, + backward_size: u32, + flags: Flags, + magic: [2]u8, +}; + +pub const Check = enum(u4) { + none = 0x00, + crc32 = 0x01, + crc64 = 0x04, + sha256 = 0x0A, + _, +}; + +pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) { + return Decompress(@TypeOf(reader)).init(allocator, reader); +} + +pub fn Decompress(comptime ReaderType: type) type { + return struct { + const Self = @This(); + + pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error; + pub const Reader = std.io.Reader(*Self, Error, read); + + allocator: Allocator, + block_decoder: block.Decoder(ReaderType), + in_reader: ReaderType, + + fn init(allocator: Allocator, source: ReaderType) !Self { + const header = try source.readStruct(Header); + + if (!std.mem.eql(u8, &header.magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 })) + return error.BadHeader; + + if (header.flags.reserved1 != 0 or header.flags.reserved2 != 0) + return error.BadHeader; + + const hash = Crc32.hash(std.mem.asBytes(&header.flags)); + if (hash != header.crc32) + return error.WrongChecksum; + + return Self{ + .allocator = allocator, + .block_decoder = try block.decoder(allocator, source, header.flags.check_kind), + .in_reader = source, + }; + } + + pub fn deinit(self: *Self) void { + self.block_decoder.deinit(); + } + + pub fn reader(self: *Self) Reader { + return .{ .context = self }; + } + + pub fn read(self: *Self, buffer: []u8) Error!usize { + if (buffer.len == 0) + return 0; + + const r = try self.block_decoder.read(buffer); + if (r != 0) + return r; + + const index_size = blk: { + var hasher = std.compress.hashedReader(self.in_reader, Crc32.init()); + hasher.hasher.update(&[1]u8{0x00}); + + var counter = std.io.countingReader(hasher.reader()); + counter.bytes_read += 1; + + const counting_reader = counter.reader(); + + const record_count = try std.leb.readULEB128(u64, counting_reader); + if (record_count != self.block_decoder.block_count) + return error.CorruptInput; + + var i: usize = 0; + while (i < record_count) : (i += 1) { + // TODO: validate records + _ = try std.leb.readULEB128(u64, counting_reader); + _ = try std.leb.readULEB128(u64, counting_reader); + } + + while (counter.bytes_read % 4 != 0) { + if (try counting_reader.readByte() != 0) + return error.CorruptInput; + } + + const hash_a = hasher.hasher.final(); + const hash_b = try counting_reader.readIntLittle(u32); + if (hash_a != hash_b) + return error.WrongChecksum; + + break :blk counter.bytes_read; + }; + + const footer = try self.in_reader.readStruct(Footer); + const backward_size = (footer.backward_size + 1) * 4; + if (backward_size != index_size) + return error.CorruptInput; + + if (footer.flags.reserved1 != 0 or footer.flags.reserved2 != 0) + return error.CorruptInput; + + var hasher = Crc32.init(); + hasher.update(std.mem.asBytes(&footer.backward_size)); + hasher.update(std.mem.asBytes(&footer.flags)); + const hash = hasher.final(); + if (hash != footer.crc32) + return error.WrongChecksum; + + if (!std.mem.eql(u8, &footer.magic, &.{ 'Y', 'Z' })) + return error.CorruptInput; + + return 0; + } + }; +} test { - _ = @import("xz/stream.zig"); + _ = @import("xz/test.zig"); } diff --git a/lib/std/compress/xz/block.zig b/lib/std/compress/xz/block.zig @@ -1,11 +1,10 @@ -const std = @import("std"); -const check = @import("check.zig"); +const std = @import("../../std.zig"); const lzma = @import("lzma.zig"); -const multibyte = @import("multibyte.zig"); const Allocator = std.mem.Allocator; const Crc32 = std.hash.Crc32; const Crc64 = std.hash.crc.Crc64Xz; const Sha256 = std.crypto.hash.sha2.Sha256; +const xz = std.compress.xz; const DecodeError = error{ CorruptInput, @@ -16,8 +15,8 @@ const DecodeError = error{ Overflow, }; -pub fn decoder(allocator: Allocator, reader: anytype, check_kind: check.Kind) !Decoder(@TypeOf(reader)) { - return Decoder(@TypeOf(reader)).init(allocator, reader, check_kind); +pub fn decoder(allocator: Allocator, reader: anytype, check: xz.Check) !Decoder(@TypeOf(reader)) { + return Decoder(@TypeOf(reader)).init(allocator, reader, check); } pub fn Decoder(comptime ReaderType: type) type { @@ -31,17 +30,17 @@ pub fn Decoder(comptime ReaderType: type) type { allocator: Allocator, inner_reader: ReaderType, - check_kind: check.Kind, + check: xz.Check, err: ?Error, accum: lzma.LzAccumBuffer, lzma_state: lzma.DecoderState, block_count: usize, - fn init(allocator: Allocator, in_reader: ReaderType, check_kind: check.Kind) !Self { + fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self { return Self{ .allocator = allocator, .inner_reader = in_reader, - .check_kind = check_kind, + .check = check, .err = null, .accum = .{}, .lzma_state = try lzma.DecoderState.init(allocator), @@ -116,10 +115,10 @@ pub fn Decoder(comptime ReaderType: type) type { return error.Unsupported; if (flags.has_packed_size) - packed_size = try multibyte.readInt(header_reader); + packed_size = try std.leb.readULEB128(u64, header_reader); if (flags.has_unpacked_size) - unpacked_size = try multibyte.readInt(header_reader); + unpacked_size = try std.leb.readULEB128(u64, header_reader); const FilterId = enum(u64) { lzma2 = 0x21, @@ -128,7 +127,7 @@ pub fn Decoder(comptime ReaderType: type) type { const filter_id = @intToEnum( FilterId, - try multibyte.readInt(header_reader), + try std.leb.readULEB128(u64, header_reader), ); if (@enumToInt(filter_id) >= 0x4000_0000_0000_0000) @@ -137,7 +136,7 @@ pub fn Decoder(comptime ReaderType: type) type { if (filter_id != .lzma2) return error.Unsupported; - const properties_size = try multibyte.readInt(header_reader); + const properties_size = try std.leb.readULEB128(u64, header_reader); if (properties_size != 1) return error.CorruptInput; @@ -177,8 +176,7 @@ pub fn Decoder(comptime ReaderType: type) type { return error.CorruptInput; } - // Check - switch (self.check_kind) { + switch (self.check) { .none => {}, .crc32 => { const hash_a = Crc32.hash(unpacked_bytes); diff --git a/lib/std/compress/xz/check.zig b/lib/std/compress/xz/check.zig @@ -1,7 +0,0 @@ -pub const Kind = enum(u4) { - none = 0x00, - crc32 = 0x01, - crc64 = 0x04, - sha256 = 0x0A, - _, -}; diff --git a/lib/std/compress/xz/lzma.zig b/lib/std/compress/xz/lzma.zig @@ -1,6 +1,6 @@ // Ported from https://github.com/gendx/lzma-rs -const std = @import("std"); +const std = @import("../../std.zig"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ArrayListUnmanaged = std.ArrayListUnmanaged; diff --git a/lib/std/compress/xz/multibyte.zig b/lib/std/compress/xz/multibyte.zig @@ -1,23 +0,0 @@ -const Multibyte = packed struct(u8) { - value: u7, - more: bool, -}; - -pub fn readInt(reader: anytype) !u64 { - const max_size = 9; - - var chunk = try reader.readStruct(Multibyte); - var num: u64 = chunk.value; - var i: u6 = 0; - - while (chunk.more) { - chunk = try reader.readStruct(Multibyte); - i += 1; - if (i >= max_size or @bitCast(u8, chunk) == 0x00) - return error.CorruptInput; - - num |= @as(u64, chunk.value) << (i * 7); - } - - return num; -} diff --git a/lib/std/compress/xz/stream.zig b/lib/std/compress/xz/stream.zig @@ -1,136 +0,0 @@ -const std = @import("std"); -const block = @import("block.zig"); -const check = @import("check.zig"); -const multibyte = @import("multibyte.zig"); -const Allocator = std.mem.Allocator; -const Crc32 = std.hash.Crc32; - -test { - _ = @import("stream_test.zig"); -} - -const Flags = packed struct(u16) { - reserved1: u8, - check_kind: check.Kind, - reserved2: u4, -}; - -pub fn stream(allocator: Allocator, reader: anytype) !Stream(@TypeOf(reader)) { - return Stream(@TypeOf(reader)).init(allocator, reader); -} - -pub fn Stream(comptime ReaderType: type) type { - return struct { - const Self = @This(); - - pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error; - pub const Reader = std.io.Reader(*Self, Error, read); - - allocator: Allocator, - block_decoder: block.Decoder(ReaderType), - in_reader: ReaderType, - - fn init(allocator: Allocator, source: ReaderType) !Self { - const Header = extern struct { - magic: [6]u8, - flags: Flags, - crc32: u32, - }; - - const header = try source.readStruct(Header); - - if (!std.mem.eql(u8, &header.magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 })) - return error.BadHeader; - - if (header.flags.reserved1 != 0 or header.flags.reserved2 != 0) - return error.BadHeader; - - const hash = Crc32.hash(std.mem.asBytes(&header.flags)); - if (hash != header.crc32) - return error.WrongChecksum; - - return Self{ - .allocator = allocator, - .block_decoder = try block.decoder(allocator, source, header.flags.check_kind), - .in_reader = source, - }; - } - - pub fn deinit(self: *Self) void { - self.block_decoder.deinit(); - } - - pub fn reader(self: *Self) Reader { - return .{ .context = self }; - } - - pub fn read(self: *Self, buffer: []u8) Error!usize { - if (buffer.len == 0) - return 0; - - const r = try self.block_decoder.read(buffer); - if (r != 0) - return r; - - const index_size = blk: { - var hasher = std.compress.hashedReader(self.in_reader, Crc32.init()); - hasher.hasher.update(&[1]u8{0x00}); - - var counter = std.io.countingReader(hasher.reader()); - counter.bytes_read += 1; - - const counting_reader = counter.reader(); - - const record_count = try multibyte.readInt(counting_reader); - if (record_count != self.block_decoder.block_count) - return error.CorruptInput; - - var i: usize = 0; - while (i < record_count) : (i += 1) { - // TODO: validate records - _ = try multibyte.readInt(counting_reader); - _ = try multibyte.readInt(counting_reader); - } - - while (counter.bytes_read % 4 != 0) { - if (try counting_reader.readByte() != 0) - return error.CorruptInput; - } - - const hash_a = hasher.hasher.final(); - const hash_b = try counting_reader.readIntLittle(u32); - if (hash_a != hash_b) - return error.WrongChecksum; - - break :blk counter.bytes_read; - }; - - const Footer = extern struct { - crc32: u32, - backward_size: u32, - flags: Flags, - magic: [2]u8, - }; - - const footer = try self.in_reader.readStruct(Footer); - const backward_size = (footer.backward_size + 1) * 4; - if (backward_size != index_size) - return error.CorruptInput; - - if (footer.flags.reserved1 != 0 or footer.flags.reserved2 != 0) - return error.CorruptInput; - - var hasher = Crc32.init(); - hasher.update(std.mem.asBytes(&footer.backward_size)); - hasher.update(std.mem.asBytes(&footer.flags)); - const hash = hasher.final(); - if (hash != footer.crc32) - return error.WrongChecksum; - - if (!std.mem.eql(u8, &footer.magic, &.{ 'Y', 'Z' })) - return error.CorruptInput; - - return 0; - } - }; -} diff --git a/lib/std/compress/xz/stream_test.zig b/lib/std/compress/xz/stream_test.zig @@ -1,80 +0,0 @@ -const std = @import("std"); -const testing = std.testing; -const stream = @import("stream.zig").stream; - -fn decompress(data: []const u8) ![]u8 { - var in_stream = std.io.fixedBufferStream(data); - - var xz_stream = try stream(testing.allocator, in_stream.reader()); - defer xz_stream.deinit(); - - return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize)); -} - -fn testReader(data: []const u8, comptime expected: []const u8) !void { - const buf = try decompress(data); - defer testing.allocator.free(buf); - - try testing.expectEqualSlices(u8, expected, buf); -} - -test "compressed data" { - try testReader(@embedFile("testdata/good-0-empty.xz"), ""); - - inline for ([_][]const u8{ - "good-1-check-none.xz", - "good-1-check-crc32.xz", - "good-1-check-crc64.xz", - "good-1-check-sha256.xz", - "good-2-lzma2.xz", - "good-1-block_header-1.xz", - "good-1-block_header-2.xz", - "good-1-block_header-3.xz", - }) |filename| { - try testReader(@embedFile("testdata/" ++ filename), - \\Hello - \\World! - \\ - ); - } - - inline for ([_][]const u8{ - "good-1-lzma2-1.xz", - "good-1-lzma2-2.xz", - "good-1-lzma2-3.xz", - "good-1-lzma2-4.xz", - }) |filename| { - try testReader(@embedFile("testdata/" ++ filename), - \\Lorem ipsum dolor sit amet, consectetur adipisicing - \\elit, sed do eiusmod tempor incididunt ut - \\labore et dolore magna aliqua. Ut enim - \\ad minim veniam, quis nostrud exercitation ullamco - \\laboris nisi ut aliquip ex ea commodo - \\consequat. Duis aute irure dolor in reprehenderit - \\in voluptate velit esse cillum dolore eu - \\fugiat nulla pariatur. Excepteur sint occaecat cupidatat - \\non proident, sunt in culpa qui officia - \\deserunt mollit anim id est laborum. - \\ - ); - } - - try testReader(@embedFile("testdata/good-1-lzma2-5.xz"), ""); -} - -test "unsupported" { - inline for ([_][]const u8{ - "good-1-delta-lzma2.tiff.xz", - "good-1-x86-lzma2.xz", - "good-1-sparc-lzma2.xz", - "good-1-arm64-lzma2-1.xz", - "good-1-arm64-lzma2-2.xz", - "good-1-3delta-lzma2.xz", - "good-1-empty-bcj-lzma2.xz", - }) |filename| { - try testing.expectError( - error.Unsupported, - decompress(@embedFile("testdata/" ++ filename)), - ); - } -} diff --git a/lib/std/compress/xz/test.zig b/lib/std/compress/xz/test.zig @@ -0,0 +1,80 @@ +const std = @import("../../std.zig"); +const testing = std.testing; +const xz = std.compress.xz; + +fn decompress(data: []const u8) ![]u8 { + var in_stream = std.io.fixedBufferStream(data); + + var xz_stream = try xz.decompress(testing.allocator, in_stream.reader()); + defer xz_stream.deinit(); + + return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize)); +} + +fn testReader(data: []const u8, comptime expected: []const u8) !void { + const buf = try decompress(data); + defer testing.allocator.free(buf); + + try testing.expectEqualSlices(u8, expected, buf); +} + +test "compressed data" { + try testReader(@embedFile("testdata/good-0-empty.xz"), ""); + + inline for ([_][]const u8{ + "good-1-check-none.xz", + "good-1-check-crc32.xz", + "good-1-check-crc64.xz", + "good-1-check-sha256.xz", + "good-2-lzma2.xz", + "good-1-block_header-1.xz", + "good-1-block_header-2.xz", + "good-1-block_header-3.xz", + }) |filename| { + try testReader(@embedFile("testdata/" ++ filename), + \\Hello + \\World! + \\ + ); + } + + inline for ([_][]const u8{ + "good-1-lzma2-1.xz", + "good-1-lzma2-2.xz", + "good-1-lzma2-3.xz", + "good-1-lzma2-4.xz", + }) |filename| { + try testReader(@embedFile("testdata/" ++ filename), + \\Lorem ipsum dolor sit amet, consectetur adipisicing + \\elit, sed do eiusmod tempor incididunt ut + \\labore et dolore magna aliqua. Ut enim + \\ad minim veniam, quis nostrud exercitation ullamco + \\laboris nisi ut aliquip ex ea commodo + \\consequat. Duis aute irure dolor in reprehenderit + \\in voluptate velit esse cillum dolore eu + \\fugiat nulla pariatur. Excepteur sint occaecat cupidatat + \\non proident, sunt in culpa qui officia + \\deserunt mollit anim id est laborum. + \\ + ); + } + + try testReader(@embedFile("testdata/good-1-lzma2-5.xz"), ""); +} + +test "unsupported" { + inline for ([_][]const u8{ + "good-1-delta-lzma2.tiff.xz", + "good-1-x86-lzma2.xz", + "good-1-sparc-lzma2.xz", + "good-1-arm64-lzma2-1.xz", + "good-1-arm64-lzma2-2.xz", + "good-1-3delta-lzma2.xz", + "good-1-empty-bcj-lzma2.xz", + }) |filename| { + try testing.expectError( + error.Unsupported, + decompress(@embedFile("testdata/" ++ filename)), + ); + } +}