zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob 0f273bad (39586B) - Raw


      1 //! Tar archive is single ordinary file which can contain many files (or
      2 //! directories, symlinks, ...). It's build by series of blocks each size of 512
      3 //! bytes. First block of each entry is header which defines type, name, size
      4 //! permissions and other attributes. Header is followed by series of blocks of
      5 //! file content, if any that entry has content. Content is padded to the block
      6 //! size, so next header always starts at block boundary.
      7 //!
      8 //! This simple format is extended by GNU and POSIX pax extensions to support
      9 //! file names longer than 256 bytes and additional attributes.
     10 //!
     11 //! This is not comprehensive tar parser. Here we are only file types needed to
     12 //! support Zig package manager; normal file, directory, symbolic link. And
     13 //! subset of attributes: name, size, permissions.
     14 //!
     15 //! GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
     16 //! pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
     17 
     18 const std = @import("std");
     19 const assert = std.debug.assert;
     20 const testing = std.testing;
     21 
     22 pub const output = @import("tar/output.zig");
     23 
     24 /// Provide this to receive detailed error messages.
     25 /// When this is provided, some errors which would otherwise be returned
     26 /// immediately will instead be added to this structure. The API user must check
     27 /// the errors in diagnostics to know whether the operation succeeded or failed.
     28 pub const Diagnostics = struct {
     29     allocator: std.mem.Allocator,
     30     errors: std.ArrayListUnmanaged(Error) = .{},
     31 
     32     pub const Error = union(enum) {
     33         unable_to_create_sym_link: struct {
     34             code: anyerror,
     35             file_name: []const u8,
     36             link_name: []const u8,
     37         },
     38         unable_to_create_file: struct {
     39             code: anyerror,
     40             file_name: []const u8,
     41         },
     42         unsupported_file_type: struct {
     43             file_name: []const u8,
     44             file_type: Header.Kind,
     45         },
     46     };
     47 
     48     pub fn deinit(d: *Diagnostics) void {
     49         for (d.errors.items) |item| {
     50             switch (item) {
     51                 .unable_to_create_sym_link => |info| {
     52                     d.allocator.free(info.file_name);
     53                     d.allocator.free(info.link_name);
     54                 },
     55                 .unable_to_create_file => |info| {
     56                     d.allocator.free(info.file_name);
     57                 },
     58                 .unsupported_file_type => |info| {
     59                     d.allocator.free(info.file_name);
     60                 },
     61             }
     62         }
     63         d.errors.deinit(d.allocator);
     64         d.* = undefined;
     65     }
     66 };
     67 
     68 /// pipeToFileSystem options
     69 pub const PipeOptions = struct {
     70     /// Number of directory levels to skip when extracting files.
     71     strip_components: u32 = 0,
     72     /// How to handle the "mode" property of files from within the tar file.
     73     mode_mode: ModeMode = .executable_bit_only,
     74     /// Prevents creation of empty directories.
     75     exclude_empty_directories: bool = false,
     76     /// Collects error messages during unpacking
     77     diagnostics: ?*Diagnostics = null,
     78 
     79     pub const ModeMode = enum {
     80         /// The mode from the tar file is completely ignored. Files are created
     81         /// with the default mode when creating files.
     82         ignore,
     83         /// The mode from the tar file is inspected for the owner executable bit
     84         /// only. This bit is copied to the group and other executable bits.
     85         /// Other bits of the mode are left as the default when creating files.
     86         executable_bit_only,
     87     };
     88 };
     89 
     90 const Header = struct {
     91     const SIZE = 512;
     92     const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
     93     const LINK_NAME_SIZE = 100;
     94 
     95     bytes: *const [SIZE]u8,
     96 
     97     const Kind = enum(u8) {
     98         normal_alias = 0,
     99         normal = '0',
    100         hard_link = '1',
    101         symbolic_link = '2',
    102         character_special = '3',
    103         block_special = '4',
    104         directory = '5',
    105         fifo = '6',
    106         contiguous = '7',
    107         global_extended_header = 'g',
    108         extended_header = 'x',
    109         // Types 'L' and 'K' are used by the GNU format for a meta file
    110         // used to store the path or link name for the next file.
    111         gnu_long_name = 'L',
    112         gnu_long_link = 'K',
    113         gnu_sparse = 'S',
    114         solaris_extended_header = 'X',
    115         _,
    116     };
    117 
    118     /// Includes prefix concatenated, if any.
    119     /// TODO: check against "../" and other nefarious things
    120     pub fn fullName(header: Header, buffer: []u8) ![]const u8 {
    121         const n = name(header);
    122         const p = prefix(header);
    123         if (buffer.len < n.len + p.len + 1) return error.TarInsufficientBuffer;
    124         if (!is_ustar(header) or p.len == 0) {
    125             @memcpy(buffer[0..n.len], n);
    126             return buffer[0..n.len];
    127         }
    128         @memcpy(buffer[0..p.len], p);
    129         buffer[p.len] = '/';
    130         @memcpy(buffer[p.len + 1 ..][0..n.len], n);
    131         return buffer[0 .. p.len + 1 + n.len];
    132     }
    133 
    134     /// When kind is symbolic_link linked-to name (target_path) is specified in
    135     /// the linkname field.
    136     pub fn linkName(header: Header, buffer: []u8) ![]const u8 {
    137         const link_name = header.str(157, 100);
    138         if (link_name.len == 0) {
    139             return buffer[0..0];
    140         }
    141         if (buffer.len < link_name.len) return error.TarInsufficientBuffer;
    142         const buf = buffer[0..link_name.len];
    143         @memcpy(buf, link_name);
    144         return buf;
    145     }
    146 
    147     pub fn name(header: Header) []const u8 {
    148         return header.str(0, 100);
    149     }
    150 
    151     pub fn mode(header: Header) !u32 {
    152         return @intCast(try header.octal(100, 8));
    153     }
    154 
    155     pub fn size(header: Header) !u64 {
    156         const start = 124;
    157         const len = 12;
    158         const raw = header.bytes[start..][0..len];
    159         //  If the leading byte is 0xff (255), all the bytes of the field
    160         //  (including the leading byte) are concatenated in big-endian order,
    161         //  with the result being a negative number expressed in two’s
    162         //  complement form.
    163         if (raw[0] == 0xff) return error.TarNumericValueNegative;
    164         // If the leading byte is 0x80 (128), the non-leading bytes of the
    165         // field are concatenated in big-endian order.
    166         if (raw[0] == 0x80) {
    167             if (raw[1] != 0 or raw[2] != 0 or raw[3] != 0) return error.TarNumericValueTooBig;
    168             return std.mem.readInt(u64, raw[4..12], .big);
    169         }
    170         return try header.octal(start, len);
    171     }
    172 
    173     pub fn chksum(header: Header) !u64 {
    174         return header.octal(148, 8);
    175     }
    176 
    177     pub fn is_ustar(header: Header) bool {
    178         const magic = header.bytes[257..][0..6];
    179         return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
    180     }
    181 
    182     pub fn prefix(header: Header) []const u8 {
    183         return header.str(345, 155);
    184     }
    185 
    186     pub fn kind(header: Header) Kind {
    187         const result: Kind = @enumFromInt(header.bytes[156]);
    188         if (result == .normal_alias) return .normal;
    189         return result;
    190     }
    191 
    192     fn str(header: Header, start: usize, len: usize) []const u8 {
    193         return nullStr(header.bytes[start .. start + len]);
    194     }
    195 
    196     fn octal(header: Header, start: usize, len: usize) !u64 {
    197         const raw = header.bytes[start..][0..len];
    198         // Zero-filled octal number in ASCII. Each numeric field of width w
    199         // contains w minus 1 digits, and a null
    200         const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
    201         const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
    202         if (rtrimmed.len == 0) return 0;
    203         return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
    204     }
    205 
    206     const Chksums = struct {
    207         unsigned: u64,
    208         signed: i64,
    209     };
    210 
    211     // Sum of all bytes in the header block. The chksum field is treated as if
    212     // it were filled with spaces (ASCII 32).
    213     fn computeChksum(header: Header) Chksums {
    214         var cs: Chksums = .{ .signed = 0, .unsigned = 0 };
    215         for (header.bytes, 0..) |v, i| {
    216             const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces.
    217             cs.unsigned += b;
    218             cs.signed += @as(i8, @bitCast(b));
    219         }
    220         return cs;
    221     }
    222 
    223     // Checks calculated chksum with value of chksum field.
    224     // Returns error or valid chksum value.
    225     // Zero value indicates empty block.
    226     pub fn checkChksum(header: Header) !u64 {
    227         const field = try header.chksum();
    228         const cs = header.computeChksum();
    229         if (field == 0 and cs.unsigned == 256) return 0;
    230         if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum;
    231         return field;
    232     }
    233 };
    234 
    235 // Breaks string on first null character.
    236 fn nullStr(str: []const u8) []const u8 {
    237     for (str, 0..) |c, i| {
    238         if (c == 0) return str[0..i];
    239     }
    240     return str;
    241 }
    242 
    243 /// Options for iterator.
    244 /// Buffers should be provided by the caller.
    245 pub const IteratorOptions = struct {
    246     /// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities.
    247     file_name_buffer: []u8,
    248     /// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities.
    249     link_name_buffer: []u8,
    250     /// Collects error messages during unpacking
    251     diagnostics: ?*Diagnostics = null,
    252 };
    253 
    254 /// Iterates over files in tar archive.
    255 /// `next` returns each file in tar archive.
    256 pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) {
    257     return .{
    258         .reader = reader,
    259         .diagnostics = options.diagnostics,
    260         .file_name_buffer = options.file_name_buffer,
    261         .link_name_buffer = options.link_name_buffer,
    262     };
    263 }
    264 
    265 /// Type of the file returned by iterator `next` method.
    266 pub const FileKind = enum {
    267     directory,
    268     sym_link,
    269     file,
    270 };
    271 
    272 /// Iteartor over entries in the tar file represented by reader.
    273 pub fn Iterator(comptime ReaderType: type) type {
    274     return struct {
    275         reader: ReaderType,
    276         diagnostics: ?*Diagnostics = null,
    277 
    278         // buffers for heeader and file attributes
    279         header_buffer: [Header.SIZE]u8 = undefined,
    280         file_name_buffer: []u8,
    281         link_name_buffer: []u8,
    282 
    283         // bytes of padding to the end of the block
    284         padding: usize = 0,
    285         // not consumed bytes of file from last next iteration
    286         unread_file_bytes: u64 = 0,
    287 
    288         pub const File = struct {
    289             name: []const u8, // name of file, symlink or directory
    290             link_name: []const u8, // target name of symlink
    291             size: u64 = 0, // size of the file in bytes
    292             mode: u32 = 0,
    293             kind: FileKind = .file,
    294 
    295             unread_bytes: *u64,
    296             parent_reader: ReaderType,
    297 
    298             pub const Reader = std.io.Reader(File, ReaderType.Error, File.read);
    299 
    300             pub fn reader(self: File) Reader {
    301                 return .{ .context = self };
    302             }
    303 
    304             pub fn read(self: File, dest: []u8) ReaderType.Error!usize {
    305                 const buf = dest[0..@min(dest.len, self.unread_bytes.*)];
    306                 const n = try self.parent_reader.read(buf);
    307                 self.unread_bytes.* -= n;
    308                 return n;
    309             }
    310 
    311             // Writes file content to writer.
    312             pub fn writeAll(self: File, writer: anytype) !void {
    313                 var buffer: [4096]u8 = undefined;
    314 
    315                 while (self.unread_bytes.* > 0) {
    316                     const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)];
    317                     try self.parent_reader.readNoEof(buf);
    318                     try writer.writeAll(buf);
    319                     self.unread_bytes.* -= buf.len;
    320                 }
    321             }
    322         };
    323 
    324         const Self = @This();
    325 
    326         fn readHeader(self: *Self) !?Header {
    327             if (self.padding > 0) {
    328                 try self.reader.skipBytes(self.padding, .{});
    329             }
    330             const n = try self.reader.readAll(&self.header_buffer);
    331             if (n == 0) return null;
    332             if (n < Header.SIZE) return error.UnexpectedEndOfStream;
    333             const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
    334             if (try header.checkChksum() == 0) return null;
    335             return header;
    336         }
    337 
    338         fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
    339             if (size > buffer.len) return error.TarInsufficientBuffer;
    340             const buf = buffer[0..size];
    341             try self.reader.readNoEof(buf);
    342             return nullStr(buf);
    343         }
    344 
    345         fn newFile(self: *Self) File {
    346             return .{
    347                 .name = self.file_name_buffer[0..0],
    348                 .link_name = self.link_name_buffer[0..0],
    349                 .parent_reader = self.reader,
    350                 .unread_bytes = &self.unread_file_bytes,
    351             };
    352         }
    353 
    354         // Number of padding bytes in the last file block.
    355         fn blockPadding(size: u64) usize {
    356             const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
    357             return @intCast(block_rounded - size);
    358         }
    359 
    360         /// Iterates through the tar archive as if it is a series of files.
    361         /// Internally, the tar format often uses entries (header with optional
    362         /// content) to add meta data that describes the next file. These
    363         /// entries should not normally be visible to the outside. As such, this
    364         /// loop iterates through one or more entries until it collects a all
    365         /// file attributes.
    366         pub fn next(self: *Self) !?File {
    367             if (self.unread_file_bytes > 0) {
    368                 // If file content was not consumed by caller
    369                 try self.reader.skipBytes(self.unread_file_bytes, .{});
    370                 self.unread_file_bytes = 0;
    371             }
    372             var file: File = self.newFile();
    373 
    374             while (try self.readHeader()) |header| {
    375                 const kind = header.kind();
    376                 const size: u64 = try header.size();
    377                 self.padding = blockPadding(size);
    378 
    379                 switch (kind) {
    380                     // File types to retrun upstream
    381                     .directory, .normal, .symbolic_link => {
    382                         file.kind = switch (kind) {
    383                             .directory => .directory,
    384                             .normal => .file,
    385                             .symbolic_link => .sym_link,
    386                             else => unreachable,
    387                         };
    388                         file.mode = try header.mode();
    389 
    390                         // set file attributes if not already set by prefix/extended headers
    391                         if (file.size == 0) {
    392                             file.size = size;
    393                         }
    394                         if (file.link_name.len == 0) {
    395                             file.link_name = try header.linkName(self.link_name_buffer);
    396                         }
    397                         if (file.name.len == 0) {
    398                             file.name = try header.fullName(self.file_name_buffer);
    399                         }
    400 
    401                         self.padding = blockPadding(file.size);
    402                         self.unread_file_bytes = file.size;
    403                         return file;
    404                     },
    405                     // Prefix header types
    406                     .gnu_long_name => {
    407                         file.name = try self.readString(@intCast(size), self.file_name_buffer);
    408                     },
    409                     .gnu_long_link => {
    410                         file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
    411                     },
    412                     .extended_header => {
    413                         // Use just attributes from last extended header.
    414                         file = self.newFile();
    415 
    416                         var rdr = paxIterator(self.reader, @intCast(size));
    417                         while (try rdr.next()) |attr| {
    418                             switch (attr.kind) {
    419                                 .path => {
    420                                     file.name = try attr.value(self.file_name_buffer);
    421                                 },
    422                                 .linkpath => {
    423                                     file.link_name = try attr.value(self.link_name_buffer);
    424                                 },
    425                                 .size => {
    426                                     var buf: [pax_max_size_attr_len]u8 = undefined;
    427                                     file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
    428                                 },
    429                             }
    430                         }
    431                     },
    432                     // Ignored header type
    433                     .global_extended_header => {
    434                         self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
    435                     },
    436                     // All other are unsupported header types
    437                     else => {
    438                         const d = self.diagnostics orelse return error.TarUnsupportedHeader;
    439                         try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
    440                             .file_name = try d.allocator.dupe(u8, header.name()),
    441                             .file_type = kind,
    442                         } });
    443                         if (kind == .gnu_sparse) {
    444                             try self.skipGnuSparseExtendedHeaders(header);
    445                         }
    446                         self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
    447                     },
    448                 }
    449             }
    450             return null;
    451         }
    452 
    453         fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void {
    454             var is_extended = header.bytes[482] > 0;
    455             while (is_extended) {
    456                 var buf: [Header.SIZE]u8 = undefined;
    457                 const n = try self.reader.readAll(&buf);
    458                 if (n < Header.SIZE) return error.UnexpectedEndOfStream;
    459                 is_extended = buf[504] > 0;
    460             }
    461         }
    462     };
    463 }
    464 
    465 /// Pax attributes iterator.
    466 /// Size is length of pax extended header in reader.
    467 fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) {
    468     return PaxIterator(@TypeOf(reader)){
    469         .reader = reader,
    470         .size = size,
    471     };
    472 }
    473 
    474 const PaxAttributeKind = enum {
    475     path,
    476     linkpath,
    477     size,
    478 };
    479 
    480 // maxInt(u64) has 20 chars, base 10 in practice we got 24 chars
    481 const pax_max_size_attr_len = 64;
    482 
    483 fn PaxIterator(comptime ReaderType: type) type {
    484     return struct {
    485         size: usize, // cumulative size of all pax attributes
    486         reader: ReaderType,
    487         // scratch buffer used for reading attribute length and keyword
    488         scratch: [128]u8 = undefined,
    489 
    490         const Self = @This();
    491 
    492         const Attribute = struct {
    493             kind: PaxAttributeKind,
    494             len: usize, // length of the attribute value
    495             reader: ReaderType, // reader positioned at value start
    496 
    497             // Copies pax attribute value into destination buffer.
    498             // Must be called with destination buffer of size at least Attribute.len.
    499             pub fn value(self: Attribute, dst: []u8) ![]const u8 {
    500                 if (self.len > dst.len) return error.TarInsufficientBuffer;
    501                 // assert(self.len <= dst.len);
    502                 const buf = dst[0..self.len];
    503                 const n = try self.reader.readAll(buf);
    504                 if (n < self.len) return error.UnexpectedEndOfStream;
    505                 try validateAttributeEnding(self.reader);
    506                 if (hasNull(buf)) return error.PaxNullInValue;
    507                 return buf;
    508             }
    509         };
    510 
    511         // Iterates over pax attributes. Returns known only known attributes.
    512         // Caller has to call value in Attribute, to advance reader across value.
    513         pub fn next(self: *Self) !?Attribute {
    514             // Pax extended header consists of one or more attributes, each constructed as follows:
    515             // "%d %s=%s\n", <length>, <keyword>, <value>
    516             while (self.size > 0) {
    517                 const length_buf = try self.readUntil(' ');
    518                 const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
    519 
    520                 const keyword = try self.readUntil('=');
    521                 if (hasNull(keyword)) return error.PaxNullInKeyword;
    522 
    523                 // calculate value_len
    524                 const value_start = length_buf.len + keyword.len + 2; // 2 separators
    525                 if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
    526                 const value_len = length - value_start - 1; // \n separator at end
    527                 self.size -= length;
    528 
    529                 const kind: PaxAttributeKind = if (eql(keyword, "path"))
    530                     .path
    531                 else if (eql(keyword, "linkpath"))
    532                     .linkpath
    533                 else if (eql(keyword, "size"))
    534                     .size
    535                 else {
    536                     try self.reader.skipBytes(value_len, .{});
    537                     try validateAttributeEnding(self.reader);
    538                     continue;
    539                 };
    540                 if (kind == .size and value_len > pax_max_size_attr_len) {
    541                     return error.PaxSizeAttrOverflow;
    542                 }
    543                 return Attribute{
    544                     .kind = kind,
    545                     .len = value_len,
    546                     .reader = self.reader,
    547                 };
    548             }
    549 
    550             return null;
    551         }
    552 
    553         fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
    554             var fbs = std.io.fixedBufferStream(&self.scratch);
    555             try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
    556             return fbs.getWritten();
    557         }
    558 
    559         fn eql(a: []const u8, b: []const u8) bool {
    560             return std.mem.eql(u8, a, b);
    561         }
    562 
    563         fn hasNull(str: []const u8) bool {
    564             return (std.mem.indexOfScalar(u8, str, 0)) != null;
    565         }
    566 
    567         // Checks that each record ends with new line.
    568         fn validateAttributeEnding(reader: ReaderType) !void {
    569             if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
    570         }
    571     };
    572 }
    573 
    574 /// Saves tar file content to the file systems.
    575 pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void {
    576     var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
    577     var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
    578     var iter = iterator(reader, .{
    579         .file_name_buffer = &file_name_buffer,
    580         .link_name_buffer = &link_name_buffer,
    581         .diagnostics = options.diagnostics,
    582     });
    583     while (try iter.next()) |file| {
    584         switch (file.kind) {
    585             .directory => {
    586                 const file_name = stripComponents(file.name, options.strip_components);
    587                 if (file_name.len != 0 and !options.exclude_empty_directories) {
    588                     try dir.makePath(file_name);
    589                 }
    590             },
    591             .file => {
    592                 if (file.size == 0 and file.name.len == 0) return;
    593                 const file_name = stripComponents(file.name, options.strip_components);
    594                 if (file_name.len == 0) return error.BadFileName;
    595 
    596                 if (createDirAndFile(dir, file_name, fileMode(file.mode, options))) |fs_file| {
    597                     defer fs_file.close();
    598                     try file.writeAll(fs_file);
    599                 } else |err| {
    600                     const d = options.diagnostics orelse return err;
    601                     try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
    602                         .code = err,
    603                         .file_name = try d.allocator.dupe(u8, file_name),
    604                     } });
    605                 }
    606             },
    607             .sym_link => {
    608                 // The file system path of the symbolic link.
    609                 const file_name = stripComponents(file.name, options.strip_components);
    610                 if (file_name.len == 0) return error.BadFileName;
    611                 // The data inside the symbolic link.
    612                 const link_name = file.link_name;
    613 
    614                 createDirAndSymlink(dir, link_name, file_name) catch |err| {
    615                     const d = options.diagnostics orelse return error.UnableToCreateSymLink;
    616                     try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
    617                         .code = err,
    618                         .file_name = try d.allocator.dupe(u8, file_name),
    619                         .link_name = try d.allocator.dupe(u8, link_name),
    620                     } });
    621                 };
    622             },
    623         }
    624     }
    625 }
    626 
    627 fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8, mode: std.fs.File.Mode) !std.fs.File {
    628     const fs_file = dir.createFile(file_name, .{ .exclusive = true, .mode = mode }) catch |err| {
    629         if (err == error.FileNotFound) {
    630             if (std.fs.path.dirname(file_name)) |dir_name| {
    631                 try dir.makePath(dir_name);
    632                 return try dir.createFile(file_name, .{ .exclusive = true, .mode = mode });
    633             }
    634         }
    635         return err;
    636     };
    637     return fs_file;
    638 }
    639 
    640 // Creates a symbolic link at path `file_name` which points to `link_name`.
    641 fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void {
    642     dir.symLink(link_name, file_name, .{}) catch |err| {
    643         if (err == error.FileNotFound) {
    644             if (std.fs.path.dirname(file_name)) |dir_name| {
    645                 try dir.makePath(dir_name);
    646                 return try dir.symLink(link_name, file_name, .{});
    647             }
    648         }
    649         return err;
    650     };
    651 }
    652 
    653 fn stripComponents(path: []const u8, count: u32) []const u8 {
    654     var i: usize = 0;
    655     var c = count;
    656     while (c > 0) : (c -= 1) {
    657         if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| {
    658             i = pos + 1;
    659         } else {
    660             i = path.len;
    661             break;
    662         }
    663     }
    664     return path[i..];
    665 }
    666 
    667 test stripComponents {
    668     const expectEqualStrings = testing.expectEqualStrings;
    669     try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0));
    670     try expectEqualStrings("b/c", stripComponents("a/b/c", 1));
    671     try expectEqualStrings("c", stripComponents("a/b/c", 2));
    672     try expectEqualStrings("", stripComponents("a/b/c", 3));
    673     try expectEqualStrings("", stripComponents("a/b/c", 4));
    674 }
    675 
    676 test PaxIterator {
    677     const Attr = struct {
    678         kind: PaxAttributeKind,
    679         value: []const u8 = undefined,
    680         err: ?anyerror = null,
    681     };
    682     const cases = [_]struct {
    683         data: []const u8,
    684         attrs: []const Attr,
    685         err: ?anyerror = null,
    686     }{
    687         .{ // valid but unknown keys
    688             .data =
    689             \\30 mtime=1350244992.023960108
    690             \\6 k=1
    691             \\13 key1=val1
    692             \\10 a=name
    693             \\9 a=name
    694             \\
    695             ,
    696             .attrs = &[_]Attr{},
    697         },
    698         .{ // mix of known and unknown keys
    699             .data =
    700             \\6 k=1
    701             \\13 path=name
    702             \\17 linkpath=link
    703             \\13 key1=val1
    704             \\12 size=123
    705             \\13 key2=val2
    706             \\
    707             ,
    708             .attrs = &[_]Attr{
    709                 .{ .kind = .path, .value = "name" },
    710                 .{ .kind = .linkpath, .value = "link" },
    711                 .{ .kind = .size, .value = "123" },
    712             },
    713         },
    714         .{ // too short size of the second key-value pair
    715             .data =
    716             \\13 path=name
    717             \\10 linkpath=value
    718             \\
    719             ,
    720             .attrs = &[_]Attr{
    721                 .{ .kind = .path, .value = "name" },
    722             },
    723             .err = error.UnexpectedEndOfStream,
    724         },
    725         .{ // too long size of the second key-value pair
    726             .data =
    727             \\13 path=name
    728             \\6 k=1
    729             \\19 linkpath=value
    730             \\
    731             ,
    732             .attrs = &[_]Attr{
    733                 .{ .kind = .path, .value = "name" },
    734             },
    735             .err = error.UnexpectedEndOfStream,
    736         },
    737 
    738         .{ // too long size of the second key-value pair
    739             .data =
    740             \\13 path=name
    741             \\19 linkpath=value
    742             \\6 k=1
    743             \\
    744             ,
    745             .attrs = &[_]Attr{
    746                 .{ .kind = .path, .value = "name" },
    747                 .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd },
    748             },
    749         },
    750         .{ // null in keyword is not valid
    751             .data = "13 path=name\n" ++ "7 k\x00b=1\n",
    752             .attrs = &[_]Attr{
    753                 .{ .kind = .path, .value = "name" },
    754             },
    755             .err = error.PaxNullInKeyword,
    756         },
    757         .{ // null in value is not valid
    758             .data = "23 path=name\x00with null\n",
    759             .attrs = &[_]Attr{
    760                 .{ .kind = .path, .err = error.PaxNullInValue },
    761             },
    762         },
    763         .{ // 1000 characters path
    764             .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n",
    765             .attrs = &[_]Attr{
    766                 .{ .kind = .path, .value = "0123456789" ** 100 },
    767             },
    768         },
    769     };
    770     var buffer: [1024]u8 = undefined;
    771 
    772     outer: for (cases) |case| {
    773         var stream = std.io.fixedBufferStream(case.data);
    774         var iter = paxIterator(stream.reader(), case.data.len);
    775 
    776         var i: usize = 0;
    777         while (iter.next() catch |err| {
    778             if (case.err) |e| {
    779                 try testing.expectEqual(e, err);
    780                 continue;
    781             }
    782             return err;
    783         }) |attr| : (i += 1) {
    784             const exp = case.attrs[i];
    785             try testing.expectEqual(exp.kind, attr.kind);
    786             const value = attr.value(&buffer) catch |err| {
    787                 if (exp.err) |e| {
    788                     try testing.expectEqual(e, err);
    789                     break :outer;
    790                 }
    791                 return err;
    792             };
    793             try testing.expectEqualStrings(exp.value, value);
    794         }
    795         try testing.expectEqual(case.attrs.len, i);
    796         try testing.expect(case.err == null);
    797     }
    798 }
    799 
    800 test {
    801     _ = @import("tar/test.zig");
    802 }
    803 
    804 test "header parse size" {
    805     const cases = [_]struct {
    806         in: []const u8,
    807         want: u64 = 0,
    808         err: ?anyerror = null,
    809     }{
    810         // Test base-256 (binary) encoded values.
    811         .{ .in = "", .want = 0 },
    812         .{ .in = "\x80", .want = 0 },
    813         .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", .want = 1 },
    814         .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02", .want = 0x0102 },
    815         .{ .in = "\x80\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08", .want = 0x0102030405060708 },
    816         .{ .in = "\x80\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09", .err = error.TarNumericValueTooBig },
    817         .{ .in = "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", .want = 537795476381659745 },
    818         .{ .in = "\x80\x80\x80\x00\x01\x02\x03\x04\x05\x06\x07\x08", .err = error.TarNumericValueTooBig },
    819 
    820         // // Test base-8 (octal) encoded values.
    821         .{ .in = "00000000227\x00", .want = 0o227 },
    822         .{ .in = "  000000227\x00", .want = 0o227 },
    823         .{ .in = "00000000228\x00", .err = error.TarHeader },
    824         .{ .in = "11111111111\x00", .want = 0o11111111111 },
    825     };
    826 
    827     for (cases) |case| {
    828         var bytes = [_]u8{0} ** Header.SIZE;
    829         @memcpy(bytes[124 .. 124 + case.in.len], case.in);
    830         var header = Header{ .bytes = &bytes };
    831         if (case.err) |err| {
    832             try testing.expectError(err, header.size());
    833         } else {
    834             try testing.expectEqual(case.want, try header.size());
    835         }
    836     }
    837 }
    838 
    839 test "header parse mode" {
    840     const cases = [_]struct {
    841         in: []const u8,
    842         want: u64 = 0,
    843         err: ?anyerror = null,
    844     }{
    845         .{ .in = "0000644\x00", .want = 0o644 },
    846         .{ .in = "0000777\x00", .want = 0o777 },
    847         .{ .in = "7777777\x00", .want = 0o7777777 },
    848         .{ .in = "7777778\x00", .err = error.TarHeader },
    849         .{ .in = "77777777", .want = 0o77777777 },
    850         .{ .in = "777777777777", .want = 0o77777777 },
    851     };
    852     for (cases) |case| {
    853         var bytes = [_]u8{0} ** Header.SIZE;
    854         @memcpy(bytes[100 .. 100 + case.in.len], case.in);
    855         var header = Header{ .bytes = &bytes };
    856         if (case.err) |err| {
    857             try testing.expectError(err, header.mode());
    858         } else {
    859             try testing.expectEqual(case.want, try header.mode());
    860         }
    861     }
    862 }
    863 
    864 test "create file and symlink" {
    865     var root = testing.tmpDir(.{});
    866     defer root.cleanup();
    867 
    868     var file = try createDirAndFile(root.dir, "file1", default_mode);
    869     file.close();
    870     file = try createDirAndFile(root.dir, "a/b/c/file2", default_mode);
    871     file.close();
    872 
    873     createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| {
    874         // On Windows when developer mode is not enabled
    875         if (err == error.AccessDenied) return error.SkipZigTest;
    876         return err;
    877     };
    878     try createDirAndSymlink(root.dir, "../../../file1", "d/e/f/symlink2");
    879 
    880     // Danglink symlnik, file created later
    881     try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3");
    882     file = try createDirAndFile(root.dir, "g/h/i/file4", default_mode);
    883     file.close();
    884 }
    885 
    886 test iterator {
    887     // Example tar file is created from this tree structure:
    888     // $ tree example
    889     //    example
    890     //    ├── a
    891     //    │   └── file
    892     //    ├── b
    893     //    │   └── symlink -> ../a/file
    894     //    └── empty
    895     // $ cat example/a/file
    896     //   content
    897     // $ tar -cf example.tar example
    898     // $ tar -tvf example.tar
    899     //    example/
    900     //    example/b/
    901     //    example/b/symlink -> ../a/file
    902     //    example/a/
    903     //    example/a/file
    904     //    example/empty/
    905 
    906     const data = @embedFile("tar/testdata/example.tar");
    907     var fbs = std.io.fixedBufferStream(data);
    908 
    909     // User provided buffers to the iterator
    910     var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
    911     var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
    912     // Create iterator
    913     var iter = iterator(fbs.reader(), .{
    914         .file_name_buffer = &file_name_buffer,
    915         .link_name_buffer = &link_name_buffer,
    916     });
    917     // Iterate over files in example.tar
    918     var file_no: usize = 0;
    919     while (try iter.next()) |file| : (file_no += 1) {
    920         switch (file.kind) {
    921             .directory => {
    922                 switch (file_no) {
    923                     0 => try testing.expectEqualStrings("example/", file.name),
    924                     1 => try testing.expectEqualStrings("example/b/", file.name),
    925                     3 => try testing.expectEqualStrings("example/a/", file.name),
    926                     5 => try testing.expectEqualStrings("example/empty/", file.name),
    927                     else => unreachable,
    928                 }
    929             },
    930             .file => {
    931                 try testing.expectEqualStrings("example/a/file", file.name);
    932                 // Read file content
    933                 var buf: [16]u8 = undefined;
    934                 const n = try file.reader().readAll(&buf);
    935                 try testing.expectEqualStrings("content\n", buf[0..n]);
    936             },
    937             .sym_link => {
    938                 try testing.expectEqualStrings("example/b/symlink", file.name);
    939                 try testing.expectEqualStrings("../a/file", file.link_name);
    940             },
    941         }
    942     }
    943 }
    944 
    945 test pipeToFileSystem {
    946     // Example tar file is created from this tree structure:
    947     // $ tree example
    948     //    example
    949     //    ├── a
    950     //    │   └── file
    951     //    ├── b
    952     //    │   └── symlink -> ../a/file
    953     //    └── empty
    954     // $ cat example/a/file
    955     //   content
    956     // $ tar -cf example.tar example
    957     // $ tar -tvf example.tar
    958     //    example/
    959     //    example/b/
    960     //    example/b/symlink -> ../a/file
    961     //    example/a/
    962     //    example/a/file
    963     //    example/empty/
    964 
    965     const data = @embedFile("tar/testdata/example.tar");
    966     var fbs = std.io.fixedBufferStream(data);
    967     const reader = fbs.reader();
    968 
    969     var tmp = testing.tmpDir(.{ .no_follow = true });
    970     defer tmp.cleanup();
    971     const dir = tmp.dir;
    972 
    973     // Save tar from `reader` to the file system `dir`
    974     pipeToFileSystem(dir, reader, .{
    975         .mode_mode = .ignore,
    976         .strip_components = 1,
    977         .exclude_empty_directories = true,
    978     }) catch |err| {
    979         // Skip on platform which don't support symlinks
    980         if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
    981         return err;
    982     };
    983 
    984     try testing.expectError(error.FileNotFound, dir.statFile("empty"));
    985     try testing.expect((try dir.statFile("a/file")).kind == .file);
    986     try testing.expect((try dir.statFile("b/symlink")).kind == .file); // statFile follows symlink
    987 
    988     var buf: [32]u8 = undefined;
    989     try testing.expectEqualSlices(
    990         u8,
    991         "../a/file",
    992         normalizePath(try dir.readLink("b/symlink", &buf)),
    993     );
    994 }
    995 
    996 fn normalizePath(bytes: []u8) []u8 {
    997     const canonical_sep = std.fs.path.sep_posix;
    998     if (std.fs.path.sep == canonical_sep) return bytes;
    999     std.mem.replaceScalar(u8, bytes, std.fs.path.sep, canonical_sep);
   1000     return bytes;
   1001 }
   1002 
   1003 const default_mode = std.fs.File.default_mode;
   1004 
   1005 // File system mode based on tar header mode and mode_mode options.
   1006 fn fileMode(mode: u32, options: PipeOptions) std.fs.File.Mode {
   1007     if (!std.fs.has_executable_bit or options.mode_mode == .ignore)
   1008         return default_mode;
   1009 
   1010     const S = std.posix.S;
   1011 
   1012     // The mode from the tar file is inspected for the owner executable bit.
   1013     if (mode & S.IXUSR == 0)
   1014         return default_mode;
   1015 
   1016     // This bit is copied to the group and other executable bits.
   1017     // Other bits of the mode are left as the default when creating files.
   1018     return default_mode | S.IXUSR | S.IXGRP | S.IXOTH;
   1019 }
   1020 
   1021 test fileMode {
   1022     if (!std.fs.has_executable_bit) return error.SkipZigTest;
   1023     try testing.expectEqual(default_mode, fileMode(0o744, PipeOptions{ .mode_mode = .ignore }));
   1024     try testing.expectEqual(0o777, fileMode(0o744, PipeOptions{}));
   1025     try testing.expectEqual(0o666, fileMode(0o644, PipeOptions{}));
   1026     try testing.expectEqual(0o666, fileMode(0o655, PipeOptions{}));
   1027 }
   1028 
   1029 test "executable bit" {
   1030     if (!std.fs.has_executable_bit) return error.SkipZigTest;
   1031 
   1032     const S = std.posix.S;
   1033     const data = @embedFile("tar/testdata/example.tar");
   1034 
   1035     for ([_]PipeOptions.ModeMode{ .ignore, .executable_bit_only }) |opt| {
   1036         var fbs = std.io.fixedBufferStream(data);
   1037         const reader = fbs.reader();
   1038 
   1039         var tmp = testing.tmpDir(.{ .no_follow = true });
   1040         //defer tmp.cleanup();
   1041 
   1042         pipeToFileSystem(tmp.dir, reader, .{
   1043             .strip_components = 1,
   1044             .exclude_empty_directories = true,
   1045             .mode_mode = opt,
   1046         }) catch |err| {
   1047             // Skip on platform which don't support symlinks
   1048             if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
   1049             return err;
   1050         };
   1051 
   1052         const fs = try tmp.dir.statFile("a/file");
   1053         try testing.expect(fs.kind == .file);
   1054 
   1055         if (opt == .executable_bit_only) {
   1056             // Executable bit is set for user, group and others
   1057             try testing.expect(fs.mode & S.IXUSR > 0);
   1058             try testing.expect(fs.mode & S.IXGRP > 0);
   1059             try testing.expect(fs.mode & S.IXOTH > 0);
   1060         }
   1061         if (opt == .ignore) {
   1062             try testing.expect(fs.mode & S.IXUSR == 0);
   1063             try testing.expect(fs.mode & S.IXGRP == 0);
   1064             try testing.expect(fs.mode & S.IXOTH == 0);
   1065         }
   1066     }
   1067 }