commit f60c24c73cc5c5894fbfb7060a70bc683c4a4ba5 (tree)
parent 26e895e3dc4ff1b7ac235414a356840bccb4fb1e
Author: Andrew Kelley <andrew@ziglang.org>
Date: Mon, 11 Mar 2024 17:03:44 -0700
Merge pull request #19155 from ianic/tar_max_file_size
std.tar: error on insufficient buffers provided to iterator
Diffstat:
6 files changed, 633 insertions(+), 449 deletions(-)
diff --git a/lib/docs/wasm/main.zig b/lib/docs/wasm/main.zig
@@ -767,7 +767,7 @@ fn unpack_inner(tar_bytes: []u8) !void {
});
while (try it.next()) |tar_file| {
switch (tar_file.kind) {
- .normal => {
+ .file => {
if (tar_file.size == 0 and tar_file.name.len == 0) break;
if (std.mem.endsWith(u8, tar_file.name, ".zig")) {
log.debug("found file: '{s}'", .{tar_file.name});
@@ -790,7 +790,6 @@ fn unpack_inner(tar_bytes: []u8) !void {
tar_file.name,
});
}
- try tar_file.skip();
},
else => continue,
}
diff --git a/lib/std/tar.zig b/lib/std/tar.zig
@@ -15,22 +15,65 @@
//! GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
//! pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
-const std = @import("std.zig");
+const std = @import("std");
const assert = std.debug.assert;
+const testing = std.testing;
pub const output = @import("tar/output.zig");
-pub const Options = struct {
+/// Provide this to receive detailed error messages.
+/// When this is provided, some errors which would otherwise be returned
+/// immediately will instead be added to this structure. The API user must check
+/// the errors in diagnostics to know whether the operation succeeded or failed.
+pub const Diagnostics = struct {
+ allocator: std.mem.Allocator,
+ errors: std.ArrayListUnmanaged(Error) = .{},
+
+ pub const Error = union(enum) {
+ unable_to_create_sym_link: struct {
+ code: anyerror,
+ file_name: []const u8,
+ link_name: []const u8,
+ },
+ unable_to_create_file: struct {
+ code: anyerror,
+ file_name: []const u8,
+ },
+ unsupported_file_type: struct {
+ file_name: []const u8,
+ file_type: Header.Kind,
+ },
+ };
+
+ pub fn deinit(d: *Diagnostics) void {
+ for (d.errors.items) |item| {
+ switch (item) {
+ .unable_to_create_sym_link => |info| {
+ d.allocator.free(info.file_name);
+ d.allocator.free(info.link_name);
+ },
+ .unable_to_create_file => |info| {
+ d.allocator.free(info.file_name);
+ },
+ .unsupported_file_type => |info| {
+ d.allocator.free(info.file_name);
+ },
+ }
+ }
+ d.errors.deinit(d.allocator);
+ d.* = undefined;
+ }
+};
+
+/// pipeToFileSystem options
+pub const PipeOptions = struct {
/// Number of directory levels to skip when extracting files.
strip_components: u32 = 0,
/// How to handle the "mode" property of files from within the tar file.
mode_mode: ModeMode = .executable_bit_only,
/// Prevents creation of empty directories.
exclude_empty_directories: bool = false,
- /// Provide this to receive detailed error messages.
- /// When this is provided, some errors which would otherwise be returned immediately
- /// will instead be added to this structure. The API user must check the errors
- /// in diagnostics to know whether the operation succeeded or failed.
+ /// Collects error messages during unpacking
diagnostics: ?*Diagnostics = null,
pub const ModeMode = enum {
@@ -42,56 +85,16 @@ pub const Options = struct {
/// Other bits of the mode are left as the default when creating files.
executable_bit_only,
};
-
- pub const Diagnostics = struct {
- allocator: std.mem.Allocator,
- errors: std.ArrayListUnmanaged(Error) = .{},
-
- pub const Error = union(enum) {
- unable_to_create_sym_link: struct {
- code: anyerror,
- file_name: []const u8,
- link_name: []const u8,
- },
- unable_to_create_file: struct {
- code: anyerror,
- file_name: []const u8,
- },
- unsupported_file_type: struct {
- file_name: []const u8,
- file_type: Header.Kind,
- },
- };
-
- pub fn deinit(d: *Diagnostics) void {
- for (d.errors.items) |item| {
- switch (item) {
- .unable_to_create_sym_link => |info| {
- d.allocator.free(info.file_name);
- d.allocator.free(info.link_name);
- },
- .unable_to_create_file => |info| {
- d.allocator.free(info.file_name);
- },
- .unsupported_file_type => |info| {
- d.allocator.free(info.file_name);
- },
- }
- }
- d.errors.deinit(d.allocator);
- d.* = undefined;
- }
- };
};
-pub const Header = struct {
+const Header = struct {
const SIZE = 512;
const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
const LINK_NAME_SIZE = 100;
bytes: *const [SIZE]u8,
- pub const Kind = enum(u8) {
+ const Kind = enum(u8) {
normal_alias = 0,
normal = '0',
hard_link = '1',
@@ -114,9 +117,10 @@ pub const Header = struct {
/// Includes prefix concatenated, if any.
/// TODO: check against "../" and other nefarious things
- pub fn fullName(header: Header, buffer: *[MAX_NAME_SIZE]u8) ![]const u8 {
+ pub fn fullName(header: Header, buffer: []u8) ![]const u8 {
const n = name(header);
const p = prefix(header);
+ if (buffer.len < n.len + p.len + 1) return error.TarInsufficientBuffer;
if (!is_ustar(header) or p.len == 0) {
@memcpy(buffer[0..n.len], n);
return buffer[0..n.len];
@@ -127,11 +131,14 @@ pub const Header = struct {
return buffer[0 .. p.len + 1 + n.len];
}
- pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 {
+ /// When kind is symbolic_link linked-to name (target_path) is specified in
+ /// the linkname field.
+ pub fn linkName(header: Header, buffer: []u8) ![]const u8 {
const link_name = header.str(157, 100);
if (link_name.len == 0) {
return buffer[0..0];
}
+ if (buffer.len < link_name.len) return error.TarInsufficientBuffer;
const buf = buffer[0..link_name.len];
@memcpy(buf, link_name);
return buf;
@@ -233,71 +240,85 @@ fn nullStr(str: []const u8) []const u8 {
return str;
}
+/// Options for iterator.
+/// Buffers should be provided by the caller.
pub const IteratorOptions = struct {
/// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities.
file_name_buffer: []u8,
/// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities.
link_name_buffer: []u8,
+ /// Collects error messages during unpacking
diagnostics: ?*Diagnostics = null,
-
- pub const Diagnostics = Options.Diagnostics;
};
/// Iterates over files in tar archive.
-/// `next` returns each file in `reader` tar archive.
+/// `next` returns each file in tar archive.
pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) {
return .{
.reader = reader,
.diagnostics = options.diagnostics,
- .header_buffer = undefined,
.file_name_buffer = options.file_name_buffer,
.link_name_buffer = options.link_name_buffer,
- .padding = 0,
- .file = undefined,
};
}
-fn Iterator(comptime ReaderType: type) type {
+/// Type of the file returned by iterator `next` method.
+pub const FileKind = enum {
+ directory,
+ sym_link,
+ file,
+};
+
+/// Iteartor over entries in the tar file represented by reader.
+pub fn Iterator(comptime ReaderType: type) type {
return struct {
reader: ReaderType,
- diagnostics: ?*Options.Diagnostics,
+ diagnostics: ?*Diagnostics = null,
// buffers for heeader and file attributes
- header_buffer: [Header.SIZE]u8,
+ header_buffer: [Header.SIZE]u8 = undefined,
file_name_buffer: []u8,
link_name_buffer: []u8,
// bytes of padding to the end of the block
- padding: usize,
- // current tar file
- file: File,
+ padding: usize = 0,
+ // not consumed bytes of file from last next iteration
+ unread_file_bytes: u64 = 0,
pub const File = struct {
name: []const u8, // name of file, symlink or directory
link_name: []const u8, // target name of symlink
- size: u64, // size of the file in bytes
- mode: u32,
- kind: Header.Kind,
+ size: u64 = 0, // size of the file in bytes
+ mode: u32 = 0,
+ kind: FileKind = .file,
+
+ unread_bytes: *u64,
+ parent_reader: ReaderType,
+
+ pub const Reader = std.io.Reader(File, ReaderType.Error, File.read);
- reader: ReaderType,
+ pub fn reader(self: File) Reader {
+ return .{ .context = self };
+ }
+
+ pub fn read(self: File, dest: []u8) ReaderType.Error!usize {
+ const buf = dest[0..@min(dest.len, self.unread_bytes.*)];
+ const n = try self.parent_reader.read(buf);
+ self.unread_bytes.* -= n;
+ return n;
+ }
// Writes file content to writer.
- pub fn write(self: File, writer: anytype) !void {
+ pub fn writeAll(self: File, writer: anytype) !void {
var buffer: [4096]u8 = undefined;
- var n: u64 = 0;
- while (n < self.size) {
- const buf = buffer[0..@min(buffer.len, self.size - n)];
- try self.reader.readNoEof(buf);
+ while (self.unread_bytes.* > 0) {
+ const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)];
+ try self.parent_reader.readNoEof(buf);
try writer.writeAll(buf);
- n += buf.len;
+ self.unread_bytes.* -= buf.len;
}
}
-
- // Skips file content. Advances reader.
- pub fn skip(self: File) !void {
- try self.reader.skipBytes(self.size, .{});
- }
};
const Self = @This();
@@ -315,20 +336,18 @@ fn Iterator(comptime ReaderType: type) type {
}
fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
- if (size > buffer.len) return error.TarCorruptInput;
+ if (size > buffer.len) return error.TarInsufficientBuffer;
const buf = buffer[0..size];
try self.reader.readNoEof(buf);
return nullStr(buf);
}
- fn initFile(self: *Self) void {
- self.file = .{
+ fn newFile(self: *Self) File {
+ return .{
.name = self.file_name_buffer[0..0],
.link_name = self.link_name_buffer[0..0],
- .size = 0,
- .kind = .normal,
- .mode = 0,
- .reader = self.reader,
+ .parent_reader = self.reader,
+ .unread_bytes = &self.unread_file_bytes,
};
}
@@ -345,7 +364,12 @@ fn Iterator(comptime ReaderType: type) type {
/// loop iterates through one or more entries until it collects a all
/// file attributes.
pub fn next(self: *Self) !?File {
- self.initFile();
+ if (self.unread_file_bytes > 0) {
+ // If file content was not consumed by caller
+ try self.reader.skipBytes(self.unread_file_bytes, .{});
+ self.unread_file_bytes = 0;
+ }
+ var file: File = self.newFile();
while (try self.readHeader()) |header| {
const kind = header.kind();
@@ -355,46 +379,52 @@ fn Iterator(comptime ReaderType: type) type {
switch (kind) {
// File types to retrun upstream
.directory, .normal, .symbolic_link => {
- self.file.kind = kind;
- self.file.mode = try header.mode();
+ file.kind = switch (kind) {
+ .directory => .directory,
+ .normal => .file,
+ .symbolic_link => .sym_link,
+ else => unreachable,
+ };
+ file.mode = try header.mode();
// set file attributes if not already set by prefix/extended headers
- if (self.file.size == 0) {
- self.file.size = size;
+ if (file.size == 0) {
+ file.size = size;
}
- if (self.file.link_name.len == 0) {
- self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
+ if (file.link_name.len == 0) {
+ file.link_name = try header.linkName(self.link_name_buffer);
}
- if (self.file.name.len == 0) {
- self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
+ if (file.name.len == 0) {
+ file.name = try header.fullName(self.file_name_buffer);
}
- self.padding = blockPadding(self.file.size);
- return self.file;
+ self.padding = blockPadding(file.size);
+ self.unread_file_bytes = file.size;
+ return file;
},
// Prefix header types
.gnu_long_name => {
- self.file.name = try self.readString(@intCast(size), self.file_name_buffer);
+ file.name = try self.readString(@intCast(size), self.file_name_buffer);
},
.gnu_long_link => {
- self.file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
+ file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
},
.extended_header => {
// Use just attributes from last extended header.
- self.initFile();
+ file = self.newFile();
var rdr = paxIterator(self.reader, @intCast(size));
while (try rdr.next()) |attr| {
switch (attr.kind) {
.path => {
- self.file.name = try attr.value(self.file_name_buffer);
+ file.name = try attr.value(self.file_name_buffer);
},
.linkpath => {
- self.file.link_name = try attr.value(self.link_name_buffer);
+ file.link_name = try attr.value(self.link_name_buffer);
},
.size => {
var buf: [pax_max_size_attr_len]u8 = undefined;
- self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
+ file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
},
}
}
@@ -467,7 +497,8 @@ fn PaxIterator(comptime ReaderType: type) type {
// Copies pax attribute value into destination buffer.
// Must be called with destination buffer of size at least Attribute.len.
pub fn value(self: Attribute, dst: []u8) ![]const u8 {
- assert(self.len <= dst.len);
+ if (self.len > dst.len) return error.TarInsufficientBuffer;
+ // assert(self.len <= dst.len);
const buf = dst[0..self.len];
const n = try self.reader.readAll(buf);
if (n < self.len) return error.UnexpectedEndOfStream;
@@ -540,7 +571,8 @@ fn PaxIterator(comptime ReaderType: type) type {
};
}
-pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
+/// Saves tar file content to the file systems.
+pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void {
switch (options.mode_mode) {
.ignore => {},
.executable_bit_only => {
@@ -568,24 +600,23 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
try dir.makePath(file_name);
}
},
- .normal => {
+ .file => {
if (file.size == 0 and file.name.len == 0) return;
const file_name = stripComponents(file.name, options.strip_components);
if (file_name.len == 0) return error.BadFileName;
if (createDirAndFile(dir, file_name)) |fs_file| {
defer fs_file.close();
- try file.write(fs_file);
+ try file.writeAll(fs_file);
} else |err| {
const d = options.diagnostics orelse return err;
try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
.code = err,
.file_name = try d.allocator.dupe(u8, file_name),
} });
- try file.skip();
}
},
- .symbolic_link => {
+ .sym_link => {
// The file system path of the symbolic link.
const file_name = stripComponents(file.name, options.strip_components);
if (file_name.len == 0) return error.BadFileName;
@@ -601,7 +632,6 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
} });
};
},
- else => unreachable,
}
}
}
@@ -619,6 +649,7 @@ fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8) !std.fs.File {
return fs_file;
}
+// Creates a symbolic link at path `file_name` which points to `link_name`.
fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void {
dir.symLink(link_name, file_name, .{}) catch |err| {
if (err == error.FileNotFound) {
@@ -645,8 +676,8 @@ fn stripComponents(path: []const u8, count: u32) []const u8 {
return path[i..];
}
-test "tar stripComponents" {
- const expectEqualStrings = std.testing.expectEqualStrings;
+test "stripComponents" {
+ const expectEqualStrings = testing.expectEqualStrings;
try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0));
try expectEqualStrings("b/c", stripComponents("a/b/c", 1));
try expectEqualStrings("c", stripComponents("a/b/c", 2));
@@ -654,7 +685,7 @@ test "tar stripComponents" {
try expectEqualStrings("", stripComponents("a/b/c", 4));
}
-test "tar PaxIterator" {
+test "PaxIterator" {
const Attr = struct {
kind: PaxAttributeKind,
value: []const u8 = undefined,
@@ -757,24 +788,24 @@ test "tar PaxIterator" {
var i: usize = 0;
while (iter.next() catch |err| {
if (case.err) |e| {
- try std.testing.expectEqual(e, err);
+ try testing.expectEqual(e, err);
continue;
}
return err;
}) |attr| : (i += 1) {
const exp = case.attrs[i];
- try std.testing.expectEqual(exp.kind, attr.kind);
+ try testing.expectEqual(exp.kind, attr.kind);
const value = attr.value(&buffer) catch |err| {
if (exp.err) |e| {
- try std.testing.expectEqual(e, err);
+ try testing.expectEqual(e, err);
break :outer;
}
return err;
};
- try std.testing.expectEqualStrings(exp.value, value);
+ try testing.expectEqualStrings(exp.value, value);
}
- try std.testing.expectEqual(case.attrs.len, i);
- try std.testing.expect(case.err == null);
+ try testing.expectEqual(case.attrs.len, i);
+ try testing.expect(case.err == null);
}
}
@@ -782,7 +813,7 @@ test {
_ = @import("tar/test.zig");
}
-test "tar header parse size" {
+test "header parse size" {
const cases = [_]struct {
in: []const u8,
want: u64 = 0,
@@ -810,14 +841,14 @@ test "tar header parse size" {
@memcpy(bytes[124 .. 124 + case.in.len], case.in);
var header = Header{ .bytes = &bytes };
if (case.err) |err| {
- try std.testing.expectError(err, header.size());
+ try testing.expectError(err, header.size());
} else {
- try std.testing.expectEqual(case.want, try header.size());
+ try testing.expectEqual(case.want, try header.size());
}
}
}
-test "tar header parse mode" {
+test "header parse mode" {
const cases = [_]struct {
in: []const u8,
want: u64 = 0,
@@ -835,9 +866,148 @@ test "tar header parse mode" {
@memcpy(bytes[100 .. 100 + case.in.len], case.in);
var header = Header{ .bytes = &bytes };
if (case.err) |err| {
- try std.testing.expectError(err, header.mode());
+ try testing.expectError(err, header.mode());
} else {
- try std.testing.expectEqual(case.want, try header.mode());
+ try testing.expectEqual(case.want, try header.mode());
+ }
+ }
+}
+
+test "create file and symlink" {
+ var root = testing.tmpDir(.{});
+ defer root.cleanup();
+
+ var file = try createDirAndFile(root.dir, "file1");
+ file.close();
+ file = try createDirAndFile(root.dir, "a/b/c/file2");
+ file.close();
+
+ createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| {
+ // On Windows when developer mode is not enabled
+ if (err == error.AccessDenied) return error.SkipZigTest;
+ return err;
+ };
+ try createDirAndSymlink(root.dir, "../../../file1", "d/e/f/symlink2");
+
+ // Danglink symlnik, file created later
+ try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3");
+ file = try createDirAndFile(root.dir, "g/h/i/file4");
+ file.close();
+}
+
+test iterator {
+ // Example tar file is created from this tree structure:
+ // $ tree example
+ // example
+ // ├── a
+ // │ └── file
+ // ├── b
+ // │ └── symlink -> ../a/file
+ // └── empty
+ // $ cat example/a/file
+ // content
+ // $ tar -cf example.tar example
+ // $ tar -tvf example.tar
+ // example/
+ // example/b/
+ // example/b/symlink -> ../a/file
+ // example/a/
+ // example/a/file
+ // example/empty/
+
+ const data = @embedFile("tar/testdata/example.tar");
+ var fbs = std.io.fixedBufferStream(data);
+
+ // User provided buffers to the iterator
+ var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
+ var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
+ // Create iterator
+ var iter = iterator(fbs.reader(), .{
+ .file_name_buffer = &file_name_buffer,
+ .link_name_buffer = &link_name_buffer,
+ });
+ // Iterate over files in example.tar
+ var file_no: usize = 0;
+ while (try iter.next()) |file| : (file_no += 1) {
+ switch (file.kind) {
+ .directory => {
+ switch (file_no) {
+ 0 => try testing.expectEqualStrings("example/", file.name),
+ 1 => try testing.expectEqualStrings("example/b/", file.name),
+ 3 => try testing.expectEqualStrings("example/a/", file.name),
+ 5 => try testing.expectEqualStrings("example/empty/", file.name),
+ else => unreachable,
+ }
+ },
+ .file => {
+ try testing.expectEqualStrings("example/a/file", file.name);
+ // Read file content
+ var buf: [16]u8 = undefined;
+ const n = try file.reader().readAll(&buf);
+ try testing.expectEqualStrings("content\n", buf[0..n]);
+ },
+ .sym_link => {
+ try testing.expectEqualStrings("example/b/symlink", file.name);
+ try testing.expectEqualStrings("../a/file", file.link_name);
+ },
}
}
}
+
+test pipeToFileSystem {
+ // Example tar file is created from this tree structure:
+ // $ tree example
+ // example
+ // ├── a
+ // │ └── file
+ // ├── b
+ // │ └── symlink -> ../a/file
+ // └── empty
+ // $ cat example/a/file
+ // content
+ // $ tar -cf example.tar example
+ // $ tar -tvf example.tar
+ // example/
+ // example/b/
+ // example/b/symlink -> ../a/file
+ // example/a/
+ // example/a/file
+ // example/empty/
+
+ const data = @embedFile("tar/testdata/example.tar");
+ var fbs = std.io.fixedBufferStream(data);
+ const reader = fbs.reader();
+
+ var tmp = testing.tmpDir(.{ .no_follow = true });
+ defer tmp.cleanup();
+ const dir = tmp.dir;
+
+ // Save tar from `reader` to the file system `dir`
+ pipeToFileSystem(dir, reader, .{
+ .mode_mode = .ignore,
+ .strip_components = 1,
+ .exclude_empty_directories = true,
+ }) catch |err| {
+ // Skip on platform which don't support symlinks
+ if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
+ return err;
+ };
+
+ try testing.expectError(error.FileNotFound, dir.statFile("empty"));
+ try testing.expect((try dir.statFile("a/file")).kind == .file);
+ try testing.expect((try dir.statFile("b/symlink")).kind == .file); // statFile follows symlink
+
+ var buf: [32]u8 = undefined;
+ try testing.expectEqualSlices(
+ u8,
+ "../a/file",
+ normalizePath(try dir.readLink("b/symlink", &buf)),
+ );
+}
+
+fn normalizePath(bytes: []u8) []u8 {
+ const canonical_sep = std.fs.path.sep_posix;
+ if (std.fs.path.sep == canonical_sep) return bytes;
+ std.mem.replaceScalar(u8, bytes, std.fs.path.sep, canonical_sep);
+ return bytes;
+}
diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig
@@ -1,328 +1,347 @@
-const std = @import("../std.zig");
-const tar = std.tar;
+const std = @import("std");
+const tar = @import("../tar.zig");
const testing = std.testing;
-test "tar run Go test cases" {
- const Case = struct {
- const File = struct {
- name: []const u8,
- size: u64 = 0,
- mode: u32 = 0,
- link_name: []const u8 = &[0]u8{},
- kind: tar.Header.Kind = .normal,
- truncated: bool = false, // when there is no file body, just header, usefull for huge files
- };
-
- data: []const u8, // testdata file content
- files: []const File = &[_]@This().File{}, // expected files to found in archive
- chksums: []const []const u8 = &[_][]const u8{}, // chksums of each file content
- err: ?anyerror = null, // parsing should fail with this error
+const Case = struct {
+ const File = struct {
+ name: []const u8,
+ size: u64 = 0,
+ mode: u32 = 0,
+ link_name: []const u8 = &[0]u8{},
+ kind: tar.FileKind = .file,
+ truncated: bool = false, // when there is no file body, just header, usefull for huge files
};
- const cases = [_]Case{
- .{
- .data = @embedFile("testdata/gnu.tar"),
- .files = &[_]Case.File{
- .{
- .name = "small.txt",
- .size = 5,
- .mode = 0o640,
- },
- .{
- .name = "small2.txt",
- .size = 11,
- .mode = 0o640,
- },
+ data: []const u8, // testdata file content
+ files: []const File = &[_]@This().File{}, // expected files to found in archive
+ chksums: []const []const u8 = &[_][]const u8{}, // chksums of each file content
+ err: ?anyerror = null, // parsing should fail with this error
+};
+
+const cases = [_]Case{
+ .{
+ .data = @embedFile("testdata/gnu.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "small.txt",
+ .size = 5,
+ .mode = 0o640,
},
- .chksums = &[_][]const u8{
- "e38b27eaccb4391bdec553a7f3ae6b2f",
- "c65bd2e50a56a2138bf1716f2fd56fe9",
+ .{
+ .name = "small2.txt",
+ .size = 11,
+ .mode = 0o640,
},
},
- .{
- .data = @embedFile("testdata/sparse-formats.tar"),
- .err = error.TarUnsupportedHeader,
+ .chksums = &[_][]const u8{
+ "e38b27eaccb4391bdec553a7f3ae6b2f",
+ "c65bd2e50a56a2138bf1716f2fd56fe9",
},
- .{
- .data = @embedFile("testdata/star.tar"),
- .files = &[_]Case.File{
- .{
- .name = "small.txt",
- .size = 5,
- .mode = 0o640,
- },
- .{
- .name = "small2.txt",
- .size = 11,
- .mode = 0o640,
- },
+ },
+ .{
+ .data = @embedFile("testdata/sparse-formats.tar"),
+ .err = error.TarUnsupportedHeader,
+ },
+ .{
+ .data = @embedFile("testdata/star.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "small.txt",
+ .size = 5,
+ .mode = 0o640,
},
- .chksums = &[_][]const u8{
- "e38b27eaccb4391bdec553a7f3ae6b2f",
- "c65bd2e50a56a2138bf1716f2fd56fe9",
+ .{
+ .name = "small2.txt",
+ .size = 11,
+ .mode = 0o640,
},
},
- .{
- .data = @embedFile("testdata/v7.tar"),
- .files = &[_]Case.File{
- .{
- .name = "small.txt",
- .size = 5,
- .mode = 0o444,
- },
- .{
- .name = "small2.txt",
- .size = 11,
- .mode = 0o444,
- },
- },
- .chksums = &[_][]const u8{
- "e38b27eaccb4391bdec553a7f3ae6b2f",
- "c65bd2e50a56a2138bf1716f2fd56fe9",
- },
+ .chksums = &[_][]const u8{
+ "e38b27eaccb4391bdec553a7f3ae6b2f",
+ "c65bd2e50a56a2138bf1716f2fd56fe9",
},
- .{
- .data = @embedFile("testdata/pax.tar"),
- .files = &[_]Case.File{
- .{
- .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
- .size = 7,
- .mode = 0o664,
- },
- .{
- .name = "a/b",
- .size = 0,
- .kind = .symbolic_link,
- .mode = 0o777,
- .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
- },
+ },
+ .{
+ .data = @embedFile("testdata/v7.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "small.txt",
+ .size = 5,
+ .mode = 0o444,
},
- .chksums = &[_][]const u8{
- "3c382e8f5b6631aa2db52643912ffd4a",
+ .{
+ .name = "small2.txt",
+ .size = 11,
+ .mode = 0o444,
},
},
- .{
- // pax attribute don't end with \n
- .data = @embedFile("testdata/pax-bad-hdr-file.tar"),
- .err = error.PaxInvalidAttributeEnd,
+ .chksums = &[_][]const u8{
+ "e38b27eaccb4391bdec553a7f3ae6b2f",
+ "c65bd2e50a56a2138bf1716f2fd56fe9",
},
- .{
- // size is in pax attribute
- .data = @embedFile("testdata/pax-pos-size-file.tar"),
- .files = &[_]Case.File{
- .{
- .name = "foo",
- .size = 999,
- .kind = .normal,
- .mode = 0o640,
- },
+ },
+ .{
+ .data = @embedFile("testdata/pax.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
+ .size = 7,
+ .mode = 0o664,
},
- .chksums = &[_][]const u8{
- "0afb597b283fe61b5d4879669a350556",
+ .{
+ .name = "a/b",
+ .size = 0,
+ .kind = .sym_link,
+ .mode = 0o777,
+ .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
},
},
- .{
- // has pax records which we are not interested in
- .data = @embedFile("testdata/pax-records.tar"),
- .files = &[_]Case.File{
- .{
- .name = "file",
- },
+ .chksums = &[_][]const u8{
+ "3c382e8f5b6631aa2db52643912ffd4a",
+ },
+ },
+ .{
+ // pax attribute don't end with \n
+ .data = @embedFile("testdata/pax-bad-hdr-file.tar"),
+ .err = error.PaxInvalidAttributeEnd,
+ },
+ .{
+ // size is in pax attribute
+ .data = @embedFile("testdata/pax-pos-size-file.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "foo",
+ .size = 999,
+ .kind = .file,
+ .mode = 0o640,
},
},
- .{
- // has global records which we are ignoring
- .data = @embedFile("testdata/pax-global-records.tar"),
- .files = &[_]Case.File{
- .{
- .name = "file1",
- },
- .{
- .name = "file2",
- },
- .{
- .name = "file3",
- },
- .{
- .name = "file4",
- },
+ .chksums = &[_][]const u8{
+ "0afb597b283fe61b5d4879669a350556",
+ },
+ },
+ .{
+ // has pax records which we are not interested in
+ .data = @embedFile("testdata/pax-records.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "file",
},
},
- .{
- .data = @embedFile("testdata/nil-uid.tar"),
- .files = &[_]Case.File{
- .{
- .name = "P1050238.JPG.log",
- .size = 14,
- .kind = .normal,
- .mode = 0o664,
- },
+ },
+ .{
+ // has global records which we are ignoring
+ .data = @embedFile("testdata/pax-global-records.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "file1",
},
- .chksums = &[_][]const u8{
- "08d504674115e77a67244beac19668f5",
+ .{
+ .name = "file2",
},
- },
- .{
- // has xattrs and pax records which we are ignoring
- .data = @embedFile("testdata/xattrs.tar"),
- .files = &[_]Case.File{
- .{
- .name = "small.txt",
- .size = 5,
- .kind = .normal,
- .mode = 0o644,
- },
- .{
- .name = "small2.txt",
- .size = 11,
- .kind = .normal,
- .mode = 0o644,
- },
+ .{
+ .name = "file3",
},
- .chksums = &[_][]const u8{
- "e38b27eaccb4391bdec553a7f3ae6b2f",
- "c65bd2e50a56a2138bf1716f2fd56fe9",
+ .{
+ .name = "file4",
},
},
- .{
- .data = @embedFile("testdata/gnu-multi-hdrs.tar"),
- .files = &[_]Case.File{
- .{
- .name = "GNU2/GNU2/long-path-name",
- .link_name = "GNU4/GNU4/long-linkpath-name",
- .kind = .symbolic_link,
- },
+ },
+ .{
+ .data = @embedFile("testdata/nil-uid.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "P1050238.JPG.log",
+ .size = 14,
+ .kind = .file,
+ .mode = 0o664,
},
},
- .{
- // has gnu type D (directory) and S (sparse) blocks
- .data = @embedFile("testdata/gnu-incremental.tar"),
- .err = error.TarUnsupportedHeader,
+ .chksums = &[_][]const u8{
+ "08d504674115e77a67244beac19668f5",
},
- .{
- // should use values only from last pax header
- .data = @embedFile("testdata/pax-multi-hdrs.tar"),
- .files = &[_]Case.File{
- .{
- .name = "bar",
- .link_name = "PAX4/PAX4/long-linkpath-name",
- .kind = .symbolic_link,
- },
+ },
+ .{
+ // has xattrs and pax records which we are ignoring
+ .data = @embedFile("testdata/xattrs.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "small.txt",
+ .size = 5,
+ .kind = .file,
+ .mode = 0o644,
},
- },
- .{
- .data = @embedFile("testdata/gnu-long-nul.tar"),
- .files = &[_]Case.File{
- .{
- .name = "0123456789",
- .mode = 0o644,
- },
+ .{
+ .name = "small2.txt",
+ .size = 11,
+ .kind = .file,
+ .mode = 0o644,
},
},
- .{
- .data = @embedFile("testdata/gnu-utf8.tar"),
- .files = &[_]Case.File{
- .{
- .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
- .mode = 0o644,
- },
- },
+ .chksums = &[_][]const u8{
+ "e38b27eaccb4391bdec553a7f3ae6b2f",
+ "c65bd2e50a56a2138bf1716f2fd56fe9",
},
- .{
- .data = @embedFile("testdata/gnu-not-utf8.tar"),
- .files = &[_]Case.File{
- .{
- .name = "hi\x80\x81\x82\x83bye",
- .mode = 0o644,
- },
+ },
+ .{
+ .data = @embedFile("testdata/gnu-multi-hdrs.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "GNU2/GNU2/long-path-name",
+ .link_name = "GNU4/GNU4/long-linkpath-name",
+ .kind = .sym_link,
},
},
- .{
- // null in pax key
- .data = @embedFile("testdata/pax-nul-xattrs.tar"),
- .err = error.PaxNullInKeyword,
- },
- .{
- .data = @embedFile("testdata/pax-nul-path.tar"),
- .err = error.PaxNullInValue,
- },
- .{
- .data = @embedFile("testdata/neg-size.tar"),
- .err = error.TarHeader,
- },
- .{
- .data = @embedFile("testdata/issue10968.tar"),
- .err = error.TarHeader,
- },
- .{
- .data = @embedFile("testdata/issue11169.tar"),
- .err = error.TarHeader,
+ },
+ .{
+ // has gnu type D (directory) and S (sparse) blocks
+ .data = @embedFile("testdata/gnu-incremental.tar"),
+ .err = error.TarUnsupportedHeader,
+ },
+ .{
+ // should use values only from last pax header
+ .data = @embedFile("testdata/pax-multi-hdrs.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "bar",
+ .link_name = "PAX4/PAX4/long-linkpath-name",
+ .kind = .sym_link,
+ },
},
- .{
- .data = @embedFile("testdata/issue12435.tar"),
- .err = error.TarHeaderChksum,
+ },
+ .{
+ .data = @embedFile("testdata/gnu-long-nul.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "0123456789",
+ .mode = 0o644,
+ },
},
- .{
- // has magic with space at end instead of null
- .data = @embedFile("testdata/invalid-go17.tar"),
- .files = &[_]Case.File{
- .{
- .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
- },
+ },
+ .{
+ .data = @embedFile("testdata/gnu-utf8.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
+ .mode = 0o644,
},
},
- .{
- .data = @embedFile("testdata/ustar-file-devs.tar"),
- .files = &[_]Case.File{
- .{
- .name = "file",
- .mode = 0o644,
- },
+ },
+ .{
+ .data = @embedFile("testdata/gnu-not-utf8.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "hi\x80\x81\x82\x83bye",
+ .mode = 0o644,
},
},
- .{
- .data = @embedFile("testdata/trailing-slash.tar"),
- .files = &[_]Case.File{
- .{
- .name = "123456789/" ** 30,
- .kind = .directory,
- },
+ },
+ .{
+ // null in pax key
+ .data = @embedFile("testdata/pax-nul-xattrs.tar"),
+ .err = error.PaxNullInKeyword,
+ },
+ .{
+ .data = @embedFile("testdata/pax-nul-path.tar"),
+ .err = error.PaxNullInValue,
+ },
+ .{
+ .data = @embedFile("testdata/neg-size.tar"),
+ .err = error.TarHeader,
+ },
+ .{
+ .data = @embedFile("testdata/issue10968.tar"),
+ .err = error.TarHeader,
+ },
+ .{
+ .data = @embedFile("testdata/issue11169.tar"),
+ .err = error.TarHeader,
+ },
+ .{
+ .data = @embedFile("testdata/issue12435.tar"),
+ .err = error.TarHeaderChksum,
+ },
+ .{
+ // has magic with space at end instead of null
+ .data = @embedFile("testdata/invalid-go17.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
},
},
- .{
- // Has size in gnu extended format. To represent size bigger than 8 GB.
- .data = @embedFile("testdata/writer-big.tar"),
- .files = &[_]Case.File{
- .{
- .name = "tmp/16gig.txt",
- .size = 16 * 1024 * 1024 * 1024,
- .truncated = true,
- .mode = 0o640,
- },
+ },
+ .{
+ .data = @embedFile("testdata/ustar-file-devs.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "file",
+ .mode = 0o644,
},
},
- .{
- // Size in gnu extended format, and name in pax attribute.
- .data = @embedFile("testdata/writer-big-long.tar"),
- .files = &[_]Case.File{
- .{
- .name = "longname/" ** 15 ++ "16gig.txt",
- .size = 16 * 1024 * 1024 * 1024,
- .mode = 0o644,
- .truncated = true,
- },
+ },
+ .{
+ .data = @embedFile("testdata/trailing-slash.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "123456789/" ** 30,
+ .kind = .directory,
},
},
- .{
- .data = @embedFile("testdata/fuzz1.tar"),
- .err = error.TarCorruptInput,
+ },
+ .{
+ // Has size in gnu extended format. To represent size bigger than 8 GB.
+ .data = @embedFile("testdata/writer-big.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "tmp/16gig.txt",
+ .size = 16 * 1024 * 1024 * 1024,
+ .truncated = true,
+ .mode = 0o640,
+ },
},
- .{
- .data = @embedFile("testdata/fuzz2.tar"),
- .err = error.PaxSizeAttrOverflow,
+ },
+ .{
+ // Size in gnu extended format, and name in pax attribute.
+ .data = @embedFile("testdata/writer-big-long.tar"),
+ .files = &[_]Case.File{
+ .{
+ .name = "longname/" ** 15 ++ "16gig.txt",
+ .size = 16 * 1024 * 1024 * 1024,
+ .mode = 0o644,
+ .truncated = true,
+ },
},
- };
+ },
+ .{
+ .data = @embedFile("testdata/fuzz1.tar"),
+ .err = error.TarInsufficientBuffer,
+ },
+ .{
+ .data = @embedFile("testdata/fuzz2.tar"),
+ .err = error.PaxSizeAttrOverflow,
+ },
+};
+
+// used in test to calculate file chksum
+const Md5Writer = struct {
+ h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
+
+ pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
+ self.h.update(buf);
+ }
+
+ pub fn writeByte(self: *Md5Writer, byte: u8) !void {
+ self.h.update(&[_]u8{byte});
+ }
+ pub fn chksum(self: *Md5Writer) [32]u8 {
+ var s = [_]u8{0} ** 16;
+ self.h.final(&s);
+ return std.fmt.bytesToHex(s, .lower);
+ }
+};
+
+test "run test cases" {
var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
@@ -350,37 +369,65 @@ test "tar run Go test cases" {
if (case.chksums.len > i) {
var md5writer = Md5Writer{};
- try actual.write(&md5writer);
+ try actual.writeAll(&md5writer);
const chksum = md5writer.chksum();
try testing.expectEqualStrings(case.chksums[i], &chksum);
} else {
- if (!expected.truncated) try actual.skip(); // skip file content
+ if (expected.truncated) {
+ iter.unread_file_bytes = 0;
+ }
}
}
try testing.expectEqual(case.files.len, i);
}
}
-// used in test to calculate file chksum
-const Md5Writer = struct {
- h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
+test "pax/gnu long names with small buffer" {
+ // should fail with insufficient buffer error
- pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
- self.h.update(buf);
- }
+ var min_file_name_buffer: [256]u8 = undefined;
+ var min_link_name_buffer: [100]u8 = undefined;
+ const long_name_cases = [_]Case{ cases[11], cases[25], cases[28] };
- pub fn writeByte(self: *Md5Writer, byte: u8) !void {
- self.h.update(&[_]u8{byte});
- }
+ for (long_name_cases) |case| {
+ var fsb = std.io.fixedBufferStream(case.data);
+ var iter = tar.iterator(fsb.reader(), .{
+ .file_name_buffer = &min_file_name_buffer,
+ .link_name_buffer = &min_link_name_buffer,
+ });
- pub fn chksum(self: *Md5Writer) [32]u8 {
- var s = [_]u8{0} ** 16;
- self.h.final(&s);
- return std.fmt.bytesToHex(s, .lower);
+ var iter_err: ?anyerror = null;
+ while (iter.next() catch |err| brk: {
+ iter_err = err;
+ break :brk null;
+ }) |_| {}
+
+ try testing.expect(iter_err != null);
+ try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?);
}
-};
+}
+
+test "insufficient buffer in Header name filed" {
+ var min_file_name_buffer: [9]u8 = undefined;
+ var min_link_name_buffer: [100]u8 = undefined;
+
+ var fsb = std.io.fixedBufferStream(cases[0].data);
+ var iter = tar.iterator(fsb.reader(), .{
+ .file_name_buffer = &min_file_name_buffer,
+ .link_name_buffer = &min_link_name_buffer,
+ });
+
+ var iter_err: ?anyerror = null;
+ while (iter.next() catch |err| brk: {
+ iter_err = err;
+ break :brk null;
+ }) |_| {}
+
+ try testing.expect(iter_err != null);
+ try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?);
+}
-test "tar should not overwrite existing file" {
+test "should not overwrite existing file" {
// Starting from this folder structure:
// $ tree root
// root
@@ -436,7 +483,7 @@ test "tar should not overwrite existing file" {
try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 0 });
}
-test "tar case sensitivity" {
+test "case sensitivity" {
// Mimicking issue #18089, this tar contains, same file name in two case
// sensitive name version. Should fail on case insensitive file systems.
//
@@ -462,35 +509,3 @@ test "tar case sensitivity" {
try testing.expect((try root.dir.statFile("alacritty/darkermatrix.yml")).kind == .file);
try testing.expect((try root.dir.statFile("alacritty/Darkermatrix.yml")).kind == .file);
}
-
-test "tar pipeToFileSystem" {
- // $ tar tvf
- // pipe_to_file_system_test/
- // pipe_to_file_system_test/b/
- // pipe_to_file_system_test/b/symlink -> ../a/file
- // pipe_to_file_system_test/a/
- // pipe_to_file_system_test/a/file
- // pipe_to_file_system_test/empty/
- const data = @embedFile("testdata/pipe_to_file_system_test.tar");
- var fsb = std.io.fixedBufferStream(data);
-
- var root = std.testing.tmpDir(.{ .no_follow = true });
- defer root.cleanup();
-
- tar.pipeToFileSystem(root.dir, fsb.reader(), .{
- .mode_mode = .ignore,
- .strip_components = 1,
- .exclude_empty_directories = true,
- }) catch |err| {
- // Skip on platform which don't support symlinks
- if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
- return err;
- };
-
- try testing.expectError(error.FileNotFound, root.dir.statFile("empty"));
- try testing.expect((try root.dir.statFile("a/file")).kind == .file);
- // TODO is there better way to test symlink
- try testing.expect((try root.dir.statFile("b/symlink")).kind == .file); // statFile follows symlink
- var buf: [32]u8 = undefined;
- _ = try root.dir.readLink("b/symlink", &buf);
-}
diff --git a/lib/std/tar/testdata/example.tar b/lib/std/tar/testdata/example.tar
Binary files differ.
diff --git a/lib/std/tar/testdata/pipe_to_file_system_test.tar b/lib/std/tar/testdata/pipe_to_file_system_test.tar
Binary files differ.
diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig
@@ -1147,7 +1147,7 @@ fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void {
const eb = &f.error_bundle;
const gpa = f.arena.child_allocator;
- var diagnostics: std.tar.Options.Diagnostics = .{ .allocator = gpa };
+ var diagnostics: std.tar.Diagnostics = .{ .allocator = gpa };
defer diagnostics.deinit();
std.tar.pipeToFileSystem(out_dir, reader, .{