blob 0f273bad (39586B) - Raw
1 //! Tar archive is single ordinary file which can contain many files (or 2 //! directories, symlinks, ...). It's build by series of blocks each size of 512 3 //! bytes. First block of each entry is header which defines type, name, size 4 //! permissions and other attributes. Header is followed by series of blocks of 5 //! file content, if any that entry has content. Content is padded to the block 6 //! size, so next header always starts at block boundary. 7 //! 8 //! This simple format is extended by GNU and POSIX pax extensions to support 9 //! file names longer than 256 bytes and additional attributes. 10 //! 11 //! This is not comprehensive tar parser. Here we are only file types needed to 12 //! support Zig package manager; normal file, directory, symbolic link. And 13 //! subset of attributes: name, size, permissions. 14 //! 15 //! GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html 16 //! pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13 17 18 const std = @import("std"); 19 const assert = std.debug.assert; 20 const testing = std.testing; 21 22 pub const output = @import("tar/output.zig"); 23 24 /// Provide this to receive detailed error messages. 25 /// When this is provided, some errors which would otherwise be returned 26 /// immediately will instead be added to this structure. The API user must check 27 /// the errors in diagnostics to know whether the operation succeeded or failed. 28 pub const Diagnostics = struct { 29 allocator: std.mem.Allocator, 30 errors: std.ArrayListUnmanaged(Error) = .{}, 31 32 pub const Error = union(enum) { 33 unable_to_create_sym_link: struct { 34 code: anyerror, 35 file_name: []const u8, 36 link_name: []const u8, 37 }, 38 unable_to_create_file: struct { 39 code: anyerror, 40 file_name: []const u8, 41 }, 42 unsupported_file_type: struct { 43 file_name: []const u8, 44 file_type: Header.Kind, 45 }, 46 }; 47 48 pub fn deinit(d: *Diagnostics) void { 49 for (d.errors.items) |item| { 50 switch (item) { 51 .unable_to_create_sym_link => |info| { 52 d.allocator.free(info.file_name); 53 d.allocator.free(info.link_name); 54 }, 55 .unable_to_create_file => |info| { 56 d.allocator.free(info.file_name); 57 }, 58 .unsupported_file_type => |info| { 59 d.allocator.free(info.file_name); 60 }, 61 } 62 } 63 d.errors.deinit(d.allocator); 64 d.* = undefined; 65 } 66 }; 67 68 /// pipeToFileSystem options 69 pub const PipeOptions = struct { 70 /// Number of directory levels to skip when extracting files. 71 strip_components: u32 = 0, 72 /// How to handle the "mode" property of files from within the tar file. 73 mode_mode: ModeMode = .executable_bit_only, 74 /// Prevents creation of empty directories. 75 exclude_empty_directories: bool = false, 76 /// Collects error messages during unpacking 77 diagnostics: ?*Diagnostics = null, 78 79 pub const ModeMode = enum { 80 /// The mode from the tar file is completely ignored. Files are created 81 /// with the default mode when creating files. 82 ignore, 83 /// The mode from the tar file is inspected for the owner executable bit 84 /// only. This bit is copied to the group and other executable bits. 85 /// Other bits of the mode are left as the default when creating files. 86 executable_bit_only, 87 }; 88 }; 89 90 const Header = struct { 91 const SIZE = 512; 92 const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155) 93 const LINK_NAME_SIZE = 100; 94 95 bytes: *const [SIZE]u8, 96 97 const Kind = enum(u8) { 98 normal_alias = 0, 99 normal = '0', 100 hard_link = '1', 101 symbolic_link = '2', 102 character_special = '3', 103 block_special = '4', 104 directory = '5', 105 fifo = '6', 106 contiguous = '7', 107 global_extended_header = 'g', 108 extended_header = 'x', 109 // Types 'L' and 'K' are used by the GNU format for a meta file 110 // used to store the path or link name for the next file. 111 gnu_long_name = 'L', 112 gnu_long_link = 'K', 113 gnu_sparse = 'S', 114 solaris_extended_header = 'X', 115 _, 116 }; 117 118 /// Includes prefix concatenated, if any. 119 /// TODO: check against "../" and other nefarious things 120 pub fn fullName(header: Header, buffer: []u8) ![]const u8 { 121 const n = name(header); 122 const p = prefix(header); 123 if (buffer.len < n.len + p.len + 1) return error.TarInsufficientBuffer; 124 if (!is_ustar(header) or p.len == 0) { 125 @memcpy(buffer[0..n.len], n); 126 return buffer[0..n.len]; 127 } 128 @memcpy(buffer[0..p.len], p); 129 buffer[p.len] = '/'; 130 @memcpy(buffer[p.len + 1 ..][0..n.len], n); 131 return buffer[0 .. p.len + 1 + n.len]; 132 } 133 134 /// When kind is symbolic_link linked-to name (target_path) is specified in 135 /// the linkname field. 136 pub fn linkName(header: Header, buffer: []u8) ![]const u8 { 137 const link_name = header.str(157, 100); 138 if (link_name.len == 0) { 139 return buffer[0..0]; 140 } 141 if (buffer.len < link_name.len) return error.TarInsufficientBuffer; 142 const buf = buffer[0..link_name.len]; 143 @memcpy(buf, link_name); 144 return buf; 145 } 146 147 pub fn name(header: Header) []const u8 { 148 return header.str(0, 100); 149 } 150 151 pub fn mode(header: Header) !u32 { 152 return @intCast(try header.octal(100, 8)); 153 } 154 155 pub fn size(header: Header) !u64 { 156 const start = 124; 157 const len = 12; 158 const raw = header.bytes[start..][0..len]; 159 // If the leading byte is 0xff (255), all the bytes of the field 160 // (including the leading byte) are concatenated in big-endian order, 161 // with the result being a negative number expressed in two’s 162 // complement form. 163 if (raw[0] == 0xff) return error.TarNumericValueNegative; 164 // If the leading byte is 0x80 (128), the non-leading bytes of the 165 // field are concatenated in big-endian order. 166 if (raw[0] == 0x80) { 167 if (raw[1] != 0 or raw[2] != 0 or raw[3] != 0) return error.TarNumericValueTooBig; 168 return std.mem.readInt(u64, raw[4..12], .big); 169 } 170 return try header.octal(start, len); 171 } 172 173 pub fn chksum(header: Header) !u64 { 174 return header.octal(148, 8); 175 } 176 177 pub fn is_ustar(header: Header) bool { 178 const magic = header.bytes[257..][0..6]; 179 return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' '); 180 } 181 182 pub fn prefix(header: Header) []const u8 { 183 return header.str(345, 155); 184 } 185 186 pub fn kind(header: Header) Kind { 187 const result: Kind = @enumFromInt(header.bytes[156]); 188 if (result == .normal_alias) return .normal; 189 return result; 190 } 191 192 fn str(header: Header, start: usize, len: usize) []const u8 { 193 return nullStr(header.bytes[start .. start + len]); 194 } 195 196 fn octal(header: Header, start: usize, len: usize) !u64 { 197 const raw = header.bytes[start..][0..len]; 198 // Zero-filled octal number in ASCII. Each numeric field of width w 199 // contains w minus 1 digits, and a null 200 const ltrimmed = std.mem.trimLeft(u8, raw, "0 "); 201 const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00"); 202 if (rtrimmed.len == 0) return 0; 203 return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader; 204 } 205 206 const Chksums = struct { 207 unsigned: u64, 208 signed: i64, 209 }; 210 211 // Sum of all bytes in the header block. The chksum field is treated as if 212 // it were filled with spaces (ASCII 32). 213 fn computeChksum(header: Header) Chksums { 214 var cs: Chksums = .{ .signed = 0, .unsigned = 0 }; 215 for (header.bytes, 0..) |v, i| { 216 const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces. 217 cs.unsigned += b; 218 cs.signed += @as(i8, @bitCast(b)); 219 } 220 return cs; 221 } 222 223 // Checks calculated chksum with value of chksum field. 224 // Returns error or valid chksum value. 225 // Zero value indicates empty block. 226 pub fn checkChksum(header: Header) !u64 { 227 const field = try header.chksum(); 228 const cs = header.computeChksum(); 229 if (field == 0 and cs.unsigned == 256) return 0; 230 if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum; 231 return field; 232 } 233 }; 234 235 // Breaks string on first null character. 236 fn nullStr(str: []const u8) []const u8 { 237 for (str, 0..) |c, i| { 238 if (c == 0) return str[0..i]; 239 } 240 return str; 241 } 242 243 /// Options for iterator. 244 /// Buffers should be provided by the caller. 245 pub const IteratorOptions = struct { 246 /// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities. 247 file_name_buffer: []u8, 248 /// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities. 249 link_name_buffer: []u8, 250 /// Collects error messages during unpacking 251 diagnostics: ?*Diagnostics = null, 252 }; 253 254 /// Iterates over files in tar archive. 255 /// `next` returns each file in tar archive. 256 pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) { 257 return .{ 258 .reader = reader, 259 .diagnostics = options.diagnostics, 260 .file_name_buffer = options.file_name_buffer, 261 .link_name_buffer = options.link_name_buffer, 262 }; 263 } 264 265 /// Type of the file returned by iterator `next` method. 266 pub const FileKind = enum { 267 directory, 268 sym_link, 269 file, 270 }; 271 272 /// Iteartor over entries in the tar file represented by reader. 273 pub fn Iterator(comptime ReaderType: type) type { 274 return struct { 275 reader: ReaderType, 276 diagnostics: ?*Diagnostics = null, 277 278 // buffers for heeader and file attributes 279 header_buffer: [Header.SIZE]u8 = undefined, 280 file_name_buffer: []u8, 281 link_name_buffer: []u8, 282 283 // bytes of padding to the end of the block 284 padding: usize = 0, 285 // not consumed bytes of file from last next iteration 286 unread_file_bytes: u64 = 0, 287 288 pub const File = struct { 289 name: []const u8, // name of file, symlink or directory 290 link_name: []const u8, // target name of symlink 291 size: u64 = 0, // size of the file in bytes 292 mode: u32 = 0, 293 kind: FileKind = .file, 294 295 unread_bytes: *u64, 296 parent_reader: ReaderType, 297 298 pub const Reader = std.io.Reader(File, ReaderType.Error, File.read); 299 300 pub fn reader(self: File) Reader { 301 return .{ .context = self }; 302 } 303 304 pub fn read(self: File, dest: []u8) ReaderType.Error!usize { 305 const buf = dest[0..@min(dest.len, self.unread_bytes.*)]; 306 const n = try self.parent_reader.read(buf); 307 self.unread_bytes.* -= n; 308 return n; 309 } 310 311 // Writes file content to writer. 312 pub fn writeAll(self: File, writer: anytype) !void { 313 var buffer: [4096]u8 = undefined; 314 315 while (self.unread_bytes.* > 0) { 316 const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)]; 317 try self.parent_reader.readNoEof(buf); 318 try writer.writeAll(buf); 319 self.unread_bytes.* -= buf.len; 320 } 321 } 322 }; 323 324 const Self = @This(); 325 326 fn readHeader(self: *Self) !?Header { 327 if (self.padding > 0) { 328 try self.reader.skipBytes(self.padding, .{}); 329 } 330 const n = try self.reader.readAll(&self.header_buffer); 331 if (n == 0) return null; 332 if (n < Header.SIZE) return error.UnexpectedEndOfStream; 333 const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; 334 if (try header.checkChksum() == 0) return null; 335 return header; 336 } 337 338 fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 { 339 if (size > buffer.len) return error.TarInsufficientBuffer; 340 const buf = buffer[0..size]; 341 try self.reader.readNoEof(buf); 342 return nullStr(buf); 343 } 344 345 fn newFile(self: *Self) File { 346 return .{ 347 .name = self.file_name_buffer[0..0], 348 .link_name = self.link_name_buffer[0..0], 349 .parent_reader = self.reader, 350 .unread_bytes = &self.unread_file_bytes, 351 }; 352 } 353 354 // Number of padding bytes in the last file block. 355 fn blockPadding(size: u64) usize { 356 const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary 357 return @intCast(block_rounded - size); 358 } 359 360 /// Iterates through the tar archive as if it is a series of files. 361 /// Internally, the tar format often uses entries (header with optional 362 /// content) to add meta data that describes the next file. These 363 /// entries should not normally be visible to the outside. As such, this 364 /// loop iterates through one or more entries until it collects a all 365 /// file attributes. 366 pub fn next(self: *Self) !?File { 367 if (self.unread_file_bytes > 0) { 368 // If file content was not consumed by caller 369 try self.reader.skipBytes(self.unread_file_bytes, .{}); 370 self.unread_file_bytes = 0; 371 } 372 var file: File = self.newFile(); 373 374 while (try self.readHeader()) |header| { 375 const kind = header.kind(); 376 const size: u64 = try header.size(); 377 self.padding = blockPadding(size); 378 379 switch (kind) { 380 // File types to retrun upstream 381 .directory, .normal, .symbolic_link => { 382 file.kind = switch (kind) { 383 .directory => .directory, 384 .normal => .file, 385 .symbolic_link => .sym_link, 386 else => unreachable, 387 }; 388 file.mode = try header.mode(); 389 390 // set file attributes if not already set by prefix/extended headers 391 if (file.size == 0) { 392 file.size = size; 393 } 394 if (file.link_name.len == 0) { 395 file.link_name = try header.linkName(self.link_name_buffer); 396 } 397 if (file.name.len == 0) { 398 file.name = try header.fullName(self.file_name_buffer); 399 } 400 401 self.padding = blockPadding(file.size); 402 self.unread_file_bytes = file.size; 403 return file; 404 }, 405 // Prefix header types 406 .gnu_long_name => { 407 file.name = try self.readString(@intCast(size), self.file_name_buffer); 408 }, 409 .gnu_long_link => { 410 file.link_name = try self.readString(@intCast(size), self.link_name_buffer); 411 }, 412 .extended_header => { 413 // Use just attributes from last extended header. 414 file = self.newFile(); 415 416 var rdr = paxIterator(self.reader, @intCast(size)); 417 while (try rdr.next()) |attr| { 418 switch (attr.kind) { 419 .path => { 420 file.name = try attr.value(self.file_name_buffer); 421 }, 422 .linkpath => { 423 file.link_name = try attr.value(self.link_name_buffer); 424 }, 425 .size => { 426 var buf: [pax_max_size_attr_len]u8 = undefined; 427 file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); 428 }, 429 } 430 } 431 }, 432 // Ignored header type 433 .global_extended_header => { 434 self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; 435 }, 436 // All other are unsupported header types 437 else => { 438 const d = self.diagnostics orelse return error.TarUnsupportedHeader; 439 try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ 440 .file_name = try d.allocator.dupe(u8, header.name()), 441 .file_type = kind, 442 } }); 443 if (kind == .gnu_sparse) { 444 try self.skipGnuSparseExtendedHeaders(header); 445 } 446 self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; 447 }, 448 } 449 } 450 return null; 451 } 452 453 fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void { 454 var is_extended = header.bytes[482] > 0; 455 while (is_extended) { 456 var buf: [Header.SIZE]u8 = undefined; 457 const n = try self.reader.readAll(&buf); 458 if (n < Header.SIZE) return error.UnexpectedEndOfStream; 459 is_extended = buf[504] > 0; 460 } 461 } 462 }; 463 } 464 465 /// Pax attributes iterator. 466 /// Size is length of pax extended header in reader. 467 fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) { 468 return PaxIterator(@TypeOf(reader)){ 469 .reader = reader, 470 .size = size, 471 }; 472 } 473 474 const PaxAttributeKind = enum { 475 path, 476 linkpath, 477 size, 478 }; 479 480 // maxInt(u64) has 20 chars, base 10 in practice we got 24 chars 481 const pax_max_size_attr_len = 64; 482 483 fn PaxIterator(comptime ReaderType: type) type { 484 return struct { 485 size: usize, // cumulative size of all pax attributes 486 reader: ReaderType, 487 // scratch buffer used for reading attribute length and keyword 488 scratch: [128]u8 = undefined, 489 490 const Self = @This(); 491 492 const Attribute = struct { 493 kind: PaxAttributeKind, 494 len: usize, // length of the attribute value 495 reader: ReaderType, // reader positioned at value start 496 497 // Copies pax attribute value into destination buffer. 498 // Must be called with destination buffer of size at least Attribute.len. 499 pub fn value(self: Attribute, dst: []u8) ![]const u8 { 500 if (self.len > dst.len) return error.TarInsufficientBuffer; 501 // assert(self.len <= dst.len); 502 const buf = dst[0..self.len]; 503 const n = try self.reader.readAll(buf); 504 if (n < self.len) return error.UnexpectedEndOfStream; 505 try validateAttributeEnding(self.reader); 506 if (hasNull(buf)) return error.PaxNullInValue; 507 return buf; 508 } 509 }; 510 511 // Iterates over pax attributes. Returns known only known attributes. 512 // Caller has to call value in Attribute, to advance reader across value. 513 pub fn next(self: *Self) !?Attribute { 514 // Pax extended header consists of one or more attributes, each constructed as follows: 515 // "%d %s=%s\n", <length>, <keyword>, <value> 516 while (self.size > 0) { 517 const length_buf = try self.readUntil(' '); 518 const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes 519 520 const keyword = try self.readUntil('='); 521 if (hasNull(keyword)) return error.PaxNullInKeyword; 522 523 // calculate value_len 524 const value_start = length_buf.len + keyword.len + 2; // 2 separators 525 if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; 526 const value_len = length - value_start - 1; // \n separator at end 527 self.size -= length; 528 529 const kind: PaxAttributeKind = if (eql(keyword, "path")) 530 .path 531 else if (eql(keyword, "linkpath")) 532 .linkpath 533 else if (eql(keyword, "size")) 534 .size 535 else { 536 try self.reader.skipBytes(value_len, .{}); 537 try validateAttributeEnding(self.reader); 538 continue; 539 }; 540 if (kind == .size and value_len > pax_max_size_attr_len) { 541 return error.PaxSizeAttrOverflow; 542 } 543 return Attribute{ 544 .kind = kind, 545 .len = value_len, 546 .reader = self.reader, 547 }; 548 } 549 550 return null; 551 } 552 553 fn readUntil(self: *Self, delimiter: u8) ![]const u8 { 554 var fbs = std.io.fixedBufferStream(&self.scratch); 555 try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null); 556 return fbs.getWritten(); 557 } 558 559 fn eql(a: []const u8, b: []const u8) bool { 560 return std.mem.eql(u8, a, b); 561 } 562 563 fn hasNull(str: []const u8) bool { 564 return (std.mem.indexOfScalar(u8, str, 0)) != null; 565 } 566 567 // Checks that each record ends with new line. 568 fn validateAttributeEnding(reader: ReaderType) !void { 569 if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd; 570 } 571 }; 572 } 573 574 /// Saves tar file content to the file systems. 575 pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void { 576 var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; 577 var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; 578 var iter = iterator(reader, .{ 579 .file_name_buffer = &file_name_buffer, 580 .link_name_buffer = &link_name_buffer, 581 .diagnostics = options.diagnostics, 582 }); 583 while (try iter.next()) |file| { 584 switch (file.kind) { 585 .directory => { 586 const file_name = stripComponents(file.name, options.strip_components); 587 if (file_name.len != 0 and !options.exclude_empty_directories) { 588 try dir.makePath(file_name); 589 } 590 }, 591 .file => { 592 if (file.size == 0 and file.name.len == 0) return; 593 const file_name = stripComponents(file.name, options.strip_components); 594 if (file_name.len == 0) return error.BadFileName; 595 596 if (createDirAndFile(dir, file_name, fileMode(file.mode, options))) |fs_file| { 597 defer fs_file.close(); 598 try file.writeAll(fs_file); 599 } else |err| { 600 const d = options.diagnostics orelse return err; 601 try d.errors.append(d.allocator, .{ .unable_to_create_file = .{ 602 .code = err, 603 .file_name = try d.allocator.dupe(u8, file_name), 604 } }); 605 } 606 }, 607 .sym_link => { 608 // The file system path of the symbolic link. 609 const file_name = stripComponents(file.name, options.strip_components); 610 if (file_name.len == 0) return error.BadFileName; 611 // The data inside the symbolic link. 612 const link_name = file.link_name; 613 614 createDirAndSymlink(dir, link_name, file_name) catch |err| { 615 const d = options.diagnostics orelse return error.UnableToCreateSymLink; 616 try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{ 617 .code = err, 618 .file_name = try d.allocator.dupe(u8, file_name), 619 .link_name = try d.allocator.dupe(u8, link_name), 620 } }); 621 }; 622 }, 623 } 624 } 625 } 626 627 fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8, mode: std.fs.File.Mode) !std.fs.File { 628 const fs_file = dir.createFile(file_name, .{ .exclusive = true, .mode = mode }) catch |err| { 629 if (err == error.FileNotFound) { 630 if (std.fs.path.dirname(file_name)) |dir_name| { 631 try dir.makePath(dir_name); 632 return try dir.createFile(file_name, .{ .exclusive = true, .mode = mode }); 633 } 634 } 635 return err; 636 }; 637 return fs_file; 638 } 639 640 // Creates a symbolic link at path `file_name` which points to `link_name`. 641 fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void { 642 dir.symLink(link_name, file_name, .{}) catch |err| { 643 if (err == error.FileNotFound) { 644 if (std.fs.path.dirname(file_name)) |dir_name| { 645 try dir.makePath(dir_name); 646 return try dir.symLink(link_name, file_name, .{}); 647 } 648 } 649 return err; 650 }; 651 } 652 653 fn stripComponents(path: []const u8, count: u32) []const u8 { 654 var i: usize = 0; 655 var c = count; 656 while (c > 0) : (c -= 1) { 657 if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| { 658 i = pos + 1; 659 } else { 660 i = path.len; 661 break; 662 } 663 } 664 return path[i..]; 665 } 666 667 test stripComponents { 668 const expectEqualStrings = testing.expectEqualStrings; 669 try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0)); 670 try expectEqualStrings("b/c", stripComponents("a/b/c", 1)); 671 try expectEqualStrings("c", stripComponents("a/b/c", 2)); 672 try expectEqualStrings("", stripComponents("a/b/c", 3)); 673 try expectEqualStrings("", stripComponents("a/b/c", 4)); 674 } 675 676 test PaxIterator { 677 const Attr = struct { 678 kind: PaxAttributeKind, 679 value: []const u8 = undefined, 680 err: ?anyerror = null, 681 }; 682 const cases = [_]struct { 683 data: []const u8, 684 attrs: []const Attr, 685 err: ?anyerror = null, 686 }{ 687 .{ // valid but unknown keys 688 .data = 689 \\30 mtime=1350244992.023960108 690 \\6 k=1 691 \\13 key1=val1 692 \\10 a=name 693 \\9 a=name 694 \\ 695 , 696 .attrs = &[_]Attr{}, 697 }, 698 .{ // mix of known and unknown keys 699 .data = 700 \\6 k=1 701 \\13 path=name 702 \\17 linkpath=link 703 \\13 key1=val1 704 \\12 size=123 705 \\13 key2=val2 706 \\ 707 , 708 .attrs = &[_]Attr{ 709 .{ .kind = .path, .value = "name" }, 710 .{ .kind = .linkpath, .value = "link" }, 711 .{ .kind = .size, .value = "123" }, 712 }, 713 }, 714 .{ // too short size of the second key-value pair 715 .data = 716 \\13 path=name 717 \\10 linkpath=value 718 \\ 719 , 720 .attrs = &[_]Attr{ 721 .{ .kind = .path, .value = "name" }, 722 }, 723 .err = error.UnexpectedEndOfStream, 724 }, 725 .{ // too long size of the second key-value pair 726 .data = 727 \\13 path=name 728 \\6 k=1 729 \\19 linkpath=value 730 \\ 731 , 732 .attrs = &[_]Attr{ 733 .{ .kind = .path, .value = "name" }, 734 }, 735 .err = error.UnexpectedEndOfStream, 736 }, 737 738 .{ // too long size of the second key-value pair 739 .data = 740 \\13 path=name 741 \\19 linkpath=value 742 \\6 k=1 743 \\ 744 , 745 .attrs = &[_]Attr{ 746 .{ .kind = .path, .value = "name" }, 747 .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd }, 748 }, 749 }, 750 .{ // null in keyword is not valid 751 .data = "13 path=name\n" ++ "7 k\x00b=1\n", 752 .attrs = &[_]Attr{ 753 .{ .kind = .path, .value = "name" }, 754 }, 755 .err = error.PaxNullInKeyword, 756 }, 757 .{ // null in value is not valid 758 .data = "23 path=name\x00with null\n", 759 .attrs = &[_]Attr{ 760 .{ .kind = .path, .err = error.PaxNullInValue }, 761 }, 762 }, 763 .{ // 1000 characters path 764 .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n", 765 .attrs = &[_]Attr{ 766 .{ .kind = .path, .value = "0123456789" ** 100 }, 767 }, 768 }, 769 }; 770 var buffer: [1024]u8 = undefined; 771 772 outer: for (cases) |case| { 773 var stream = std.io.fixedBufferStream(case.data); 774 var iter = paxIterator(stream.reader(), case.data.len); 775 776 var i: usize = 0; 777 while (iter.next() catch |err| { 778 if (case.err) |e| { 779 try testing.expectEqual(e, err); 780 continue; 781 } 782 return err; 783 }) |attr| : (i += 1) { 784 const exp = case.attrs[i]; 785 try testing.expectEqual(exp.kind, attr.kind); 786 const value = attr.value(&buffer) catch |err| { 787 if (exp.err) |e| { 788 try testing.expectEqual(e, err); 789 break :outer; 790 } 791 return err; 792 }; 793 try testing.expectEqualStrings(exp.value, value); 794 } 795 try testing.expectEqual(case.attrs.len, i); 796 try testing.expect(case.err == null); 797 } 798 } 799 800 test { 801 _ = @import("tar/test.zig"); 802 } 803 804 test "header parse size" { 805 const cases = [_]struct { 806 in: []const u8, 807 want: u64 = 0, 808 err: ?anyerror = null, 809 }{ 810 // Test base-256 (binary) encoded values. 811 .{ .in = "", .want = 0 }, 812 .{ .in = "\x80", .want = 0 }, 813 .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", .want = 1 }, 814 .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02", .want = 0x0102 }, 815 .{ .in = "\x80\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08", .want = 0x0102030405060708 }, 816 .{ .in = "\x80\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09", .err = error.TarNumericValueTooBig }, 817 .{ .in = "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", .want = 537795476381659745 }, 818 .{ .in = "\x80\x80\x80\x00\x01\x02\x03\x04\x05\x06\x07\x08", .err = error.TarNumericValueTooBig }, 819 820 // // Test base-8 (octal) encoded values. 821 .{ .in = "00000000227\x00", .want = 0o227 }, 822 .{ .in = " 000000227\x00", .want = 0o227 }, 823 .{ .in = "00000000228\x00", .err = error.TarHeader }, 824 .{ .in = "11111111111\x00", .want = 0o11111111111 }, 825 }; 826 827 for (cases) |case| { 828 var bytes = [_]u8{0} ** Header.SIZE; 829 @memcpy(bytes[124 .. 124 + case.in.len], case.in); 830 var header = Header{ .bytes = &bytes }; 831 if (case.err) |err| { 832 try testing.expectError(err, header.size()); 833 } else { 834 try testing.expectEqual(case.want, try header.size()); 835 } 836 } 837 } 838 839 test "header parse mode" { 840 const cases = [_]struct { 841 in: []const u8, 842 want: u64 = 0, 843 err: ?anyerror = null, 844 }{ 845 .{ .in = "0000644\x00", .want = 0o644 }, 846 .{ .in = "0000777\x00", .want = 0o777 }, 847 .{ .in = "7777777\x00", .want = 0o7777777 }, 848 .{ .in = "7777778\x00", .err = error.TarHeader }, 849 .{ .in = "77777777", .want = 0o77777777 }, 850 .{ .in = "777777777777", .want = 0o77777777 }, 851 }; 852 for (cases) |case| { 853 var bytes = [_]u8{0} ** Header.SIZE; 854 @memcpy(bytes[100 .. 100 + case.in.len], case.in); 855 var header = Header{ .bytes = &bytes }; 856 if (case.err) |err| { 857 try testing.expectError(err, header.mode()); 858 } else { 859 try testing.expectEqual(case.want, try header.mode()); 860 } 861 } 862 } 863 864 test "create file and symlink" { 865 var root = testing.tmpDir(.{}); 866 defer root.cleanup(); 867 868 var file = try createDirAndFile(root.dir, "file1", default_mode); 869 file.close(); 870 file = try createDirAndFile(root.dir, "a/b/c/file2", default_mode); 871 file.close(); 872 873 createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| { 874 // On Windows when developer mode is not enabled 875 if (err == error.AccessDenied) return error.SkipZigTest; 876 return err; 877 }; 878 try createDirAndSymlink(root.dir, "../../../file1", "d/e/f/symlink2"); 879 880 // Danglink symlnik, file created later 881 try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3"); 882 file = try createDirAndFile(root.dir, "g/h/i/file4", default_mode); 883 file.close(); 884 } 885 886 test iterator { 887 // Example tar file is created from this tree structure: 888 // $ tree example 889 // example 890 // ├── a 891 // │ └── file 892 // ├── b 893 // │ └── symlink -> ../a/file 894 // └── empty 895 // $ cat example/a/file 896 // content 897 // $ tar -cf example.tar example 898 // $ tar -tvf example.tar 899 // example/ 900 // example/b/ 901 // example/b/symlink -> ../a/file 902 // example/a/ 903 // example/a/file 904 // example/empty/ 905 906 const data = @embedFile("tar/testdata/example.tar"); 907 var fbs = std.io.fixedBufferStream(data); 908 909 // User provided buffers to the iterator 910 var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; 911 var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; 912 // Create iterator 913 var iter = iterator(fbs.reader(), .{ 914 .file_name_buffer = &file_name_buffer, 915 .link_name_buffer = &link_name_buffer, 916 }); 917 // Iterate over files in example.tar 918 var file_no: usize = 0; 919 while (try iter.next()) |file| : (file_no += 1) { 920 switch (file.kind) { 921 .directory => { 922 switch (file_no) { 923 0 => try testing.expectEqualStrings("example/", file.name), 924 1 => try testing.expectEqualStrings("example/b/", file.name), 925 3 => try testing.expectEqualStrings("example/a/", file.name), 926 5 => try testing.expectEqualStrings("example/empty/", file.name), 927 else => unreachable, 928 } 929 }, 930 .file => { 931 try testing.expectEqualStrings("example/a/file", file.name); 932 // Read file content 933 var buf: [16]u8 = undefined; 934 const n = try file.reader().readAll(&buf); 935 try testing.expectEqualStrings("content\n", buf[0..n]); 936 }, 937 .sym_link => { 938 try testing.expectEqualStrings("example/b/symlink", file.name); 939 try testing.expectEqualStrings("../a/file", file.link_name); 940 }, 941 } 942 } 943 } 944 945 test pipeToFileSystem { 946 // Example tar file is created from this tree structure: 947 // $ tree example 948 // example 949 // ├── a 950 // │ └── file 951 // ├── b 952 // │ └── symlink -> ../a/file 953 // └── empty 954 // $ cat example/a/file 955 // content 956 // $ tar -cf example.tar example 957 // $ tar -tvf example.tar 958 // example/ 959 // example/b/ 960 // example/b/symlink -> ../a/file 961 // example/a/ 962 // example/a/file 963 // example/empty/ 964 965 const data = @embedFile("tar/testdata/example.tar"); 966 var fbs = std.io.fixedBufferStream(data); 967 const reader = fbs.reader(); 968 969 var tmp = testing.tmpDir(.{ .no_follow = true }); 970 defer tmp.cleanup(); 971 const dir = tmp.dir; 972 973 // Save tar from `reader` to the file system `dir` 974 pipeToFileSystem(dir, reader, .{ 975 .mode_mode = .ignore, 976 .strip_components = 1, 977 .exclude_empty_directories = true, 978 }) catch |err| { 979 // Skip on platform which don't support symlinks 980 if (err == error.UnableToCreateSymLink) return error.SkipZigTest; 981 return err; 982 }; 983 984 try testing.expectError(error.FileNotFound, dir.statFile("empty")); 985 try testing.expect((try dir.statFile("a/file")).kind == .file); 986 try testing.expect((try dir.statFile("b/symlink")).kind == .file); // statFile follows symlink 987 988 var buf: [32]u8 = undefined; 989 try testing.expectEqualSlices( 990 u8, 991 "../a/file", 992 normalizePath(try dir.readLink("b/symlink", &buf)), 993 ); 994 } 995 996 fn normalizePath(bytes: []u8) []u8 { 997 const canonical_sep = std.fs.path.sep_posix; 998 if (std.fs.path.sep == canonical_sep) return bytes; 999 std.mem.replaceScalar(u8, bytes, std.fs.path.sep, canonical_sep); 1000 return bytes; 1001 } 1002 1003 const default_mode = std.fs.File.default_mode; 1004 1005 // File system mode based on tar header mode and mode_mode options. 1006 fn fileMode(mode: u32, options: PipeOptions) std.fs.File.Mode { 1007 if (!std.fs.has_executable_bit or options.mode_mode == .ignore) 1008 return default_mode; 1009 1010 const S = std.posix.S; 1011 1012 // The mode from the tar file is inspected for the owner executable bit. 1013 if (mode & S.IXUSR == 0) 1014 return default_mode; 1015 1016 // This bit is copied to the group and other executable bits. 1017 // Other bits of the mode are left as the default when creating files. 1018 return default_mode | S.IXUSR | S.IXGRP | S.IXOTH; 1019 } 1020 1021 test fileMode { 1022 if (!std.fs.has_executable_bit) return error.SkipZigTest; 1023 try testing.expectEqual(default_mode, fileMode(0o744, PipeOptions{ .mode_mode = .ignore })); 1024 try testing.expectEqual(0o777, fileMode(0o744, PipeOptions{})); 1025 try testing.expectEqual(0o666, fileMode(0o644, PipeOptions{})); 1026 try testing.expectEqual(0o666, fileMode(0o655, PipeOptions{})); 1027 } 1028 1029 test "executable bit" { 1030 if (!std.fs.has_executable_bit) return error.SkipZigTest; 1031 1032 const S = std.posix.S; 1033 const data = @embedFile("tar/testdata/example.tar"); 1034 1035 for ([_]PipeOptions.ModeMode{ .ignore, .executable_bit_only }) |opt| { 1036 var fbs = std.io.fixedBufferStream(data); 1037 const reader = fbs.reader(); 1038 1039 var tmp = testing.tmpDir(.{ .no_follow = true }); 1040 //defer tmp.cleanup(); 1041 1042 pipeToFileSystem(tmp.dir, reader, .{ 1043 .strip_components = 1, 1044 .exclude_empty_directories = true, 1045 .mode_mode = opt, 1046 }) catch |err| { 1047 // Skip on platform which don't support symlinks 1048 if (err == error.UnableToCreateSymLink) return error.SkipZigTest; 1049 return err; 1050 }; 1051 1052 const fs = try tmp.dir.statFile("a/file"); 1053 try testing.expect(fs.kind == .file); 1054 1055 if (opt == .executable_bit_only) { 1056 // Executable bit is set for user, group and others 1057 try testing.expect(fs.mode & S.IXUSR > 0); 1058 try testing.expect(fs.mode & S.IXGRP > 0); 1059 try testing.expect(fs.mode & S.IXOTH > 0); 1060 } 1061 if (opt == .ignore) { 1062 try testing.expect(fs.mode & S.IXUSR == 0); 1063 try testing.expect(fs.mode & S.IXGRP == 0); 1064 try testing.expect(fs.mode & S.IXOTH == 0); 1065 } 1066 } 1067 }