commit f107d654e073a7cc0c2e920ee6fac9b2b34dba0c (tree)
parent 4c8b937fb016a54b09d7447ca634b7cf1af78fce
Author: Andrew Kelley <andrew@ziglang.org>
Date: Fri, 29 May 2020 18:41:40 -0400
Merge branch 'gereeter-reduced-path-max'
closes #4837
Diffstat:
5 files changed, 98 insertions(+), 37 deletions(-)
diff --git a/lib/std/debug.zig b/lib/std/debug.zig
@@ -670,10 +670,12 @@ pub fn openSelfDebugInfo(allocator: *mem.Allocator) anyerror!DebugInfo {
}
}
+/// This takes ownership of coff_file: users of this function should not close
+/// it themselves, even on error.
/// TODO resources https://github.com/ziglang/zig/issues/4353
-fn openCoffDebugInfo(allocator: *mem.Allocator, coff_file_path: [:0]const u16) !ModuleDebugInfo {
+/// TODO it's weird to take ownership even on error, rework this code.
+fn readCoffDebugInfo(allocator: *mem.Allocator, coff_file: File) !ModuleDebugInfo {
nosuspend {
- const coff_file = try std.fs.openFileAbsoluteW(coff_file_path, .{ .intended_io_mode = .blocking });
errdefer coff_file.close();
const coff_obj = try allocator.create(coff.Coff);
@@ -851,10 +853,13 @@ fn chopSlice(ptr: []const u8, offset: u64, size: u64) ![]const u8 {
return ptr[start..end];
}
+/// This takes ownership of elf_file: users of this function should not close
+/// it themselves, even on error.
/// TODO resources https://github.com/ziglang/zig/issues/4353
-pub fn openElfDebugInfo(allocator: *mem.Allocator, elf_file_path: []const u8) !ModuleDebugInfo {
+/// TODO it's weird to take ownership even on error, rework this code.
+pub fn readElfDebugInfo(allocator: *mem.Allocator, elf_file: File) !ModuleDebugInfo {
nosuspend {
- const mapped_mem = try mapWholeFile(elf_file_path);
+ const mapped_mem = try mapWholeFile(elf_file);
const hdr = @ptrCast(*const elf.Ehdr, &mapped_mem[0]);
if (!mem.eql(u8, hdr.e_ident[0..4], "\x7fELF")) return error.InvalidElfMagic;
if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion;
@@ -921,8 +926,11 @@ pub fn openElfDebugInfo(allocator: *mem.Allocator, elf_file_path: []const u8) !M
}
/// TODO resources https://github.com/ziglang/zig/issues/4353
-fn openMachODebugInfo(allocator: *mem.Allocator, macho_file_path: []const u8) !ModuleDebugInfo {
- const mapped_mem = try mapWholeFile(macho_file_path);
+/// This takes ownership of coff_file: users of this function should not close
+/// it themselves, even on error.
+/// TODO it's weird to take ownership even on error, rework this code.
+fn readMachODebugInfo(allocator: *mem.Allocator, macho_file: File) !ModuleDebugInfo {
+ const mapped_mem = try mapWholeFile(macho_file);
const hdr = @ptrCast(
*const macho.mach_header_64,
@@ -1055,9 +1063,11 @@ const MachoSymbol = struct {
}
};
-fn mapWholeFile(path: []const u8) ![]align(mem.page_size) const u8 {
+/// `file` is expected to have been opened with .intended_io_mode == .blocking.
+/// Takes ownership of file, even on error.
+/// TODO it's weird to take ownership even on error, rework this code.
+fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 {
nosuspend {
- const file = try fs.cwd().openFile(path, .{ .intended_io_mode = .blocking });
defer file.close();
const file_len = try math.cast(usize, try file.getEndPos());
@@ -1140,10 +1150,11 @@ pub const DebugInfo = struct {
errdefer self.allocator.destroy(obj_di);
const macho_path = mem.spanZ(std.c._dyld_get_image_name(i));
- obj_di.* = openMachODebugInfo(self.allocator, macho_path) catch |err| switch (err) {
+ const macho_file = fs.cwd().openFile(macho_path, .{ .intended_io_mode = .blocking }) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo,
else => return err,
};
+ obj_di.* = try readMachODebugInfo(self.allocator, macho_file);
obj_di.base_address = base_address;
try self.address_map.putNoClobber(base_address, obj_di);
@@ -1221,10 +1232,11 @@ pub const DebugInfo = struct {
const obj_di = try self.allocator.create(ModuleDebugInfo);
errdefer self.allocator.destroy(obj_di);
- obj_di.* = openCoffDebugInfo(self.allocator, name_buffer[0 .. len + 4 :0]) catch |err| switch (err) {
+ const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo,
else => return err,
};
+ obj_di.* = try readCoffDebugInfo(self.allocator, coff_file);
obj_di.base_address = seg_start;
try self.address_map.putNoClobber(seg_start, obj_di);
@@ -1280,20 +1292,18 @@ pub const DebugInfo = struct {
return obj_di;
}
- const elf_path = if (ctx.name.len > 0)
- ctx.name
- else blk: {
- var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
- break :blk try fs.selfExePath(&buf);
- };
-
const obj_di = try self.allocator.create(ModuleDebugInfo);
errdefer self.allocator.destroy(obj_di);
- obj_di.* = openElfDebugInfo(self.allocator, elf_path) catch |err| switch (err) {
+ const elf_file = (if (ctx.name.len > 0)
+ fs.cwd().openFile(ctx.name, .{ .intended_io_mode = .blocking })
+ else
+ fs.openSelfExe(.{ .intended_io_mode = .blocking })) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo,
else => return err,
};
+
+ obj_di.* = try readElfDebugInfo(self.allocator, elf_file);
obj_di.base_address = ctx.base_address;
try self.address_map.putNoClobber(ctx.base_address, obj_di);
@@ -1329,7 +1339,8 @@ pub const ModuleDebugInfo = switch (builtin.os.tag) {
}
fn loadOFile(self: *@This(), o_file_path: []const u8) !DW.DwarfInfo {
- const mapped_mem = try mapWholeFile(o_file_path);
+ const o_file = try fs.cwd().openFile(o_file_path, .{ .intended_io_mode = .blocking });
+ const mapped_mem = try mapWholeFile(o_file);
const hdr = @ptrCast(
*const macho.mach_header_64,
diff --git a/lib/std/fs.zig b/lib/std/fs.zig
@@ -33,8 +33,11 @@ pub const GetAppDataDirError = @import("fs/get_app_data_dir.zig").GetAppDataDirE
pub const Watch = @import("fs/watch.zig").Watch;
-/// This represents the maximum size of a UTF-8 encoded file path.
-/// All file system operations which return a path are guaranteed to
+/// This represents the maximum size of a UTF-8 encoded file path that the
+/// operating system will accept. Paths, including those returned from file
+/// system operations, may be longer than this length, but such paths cannot
+/// be successfully passed back in other file system operations. However,
+/// all path components returned by file system operations are assumed to
/// fit into a UTF-8 encoded array of this length.
/// The byte count includes room for a null sentinel byte.
pub const MAX_PATH_BYTES = switch (builtin.os.tag) {
@@ -1194,7 +1197,7 @@ pub const Dir = struct {
/// Read value of a symbolic link.
/// The return value is a slice of `buffer`, from index `0`.
/// Asserts that the path parameter has no null bytes.
- pub fn readLink(self: Dir, sub_path: []const u8, buffer: *[MAX_PATH_BYTES]u8) ![]u8 {
+ pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 {
const sub_path_c = try os.toPosixPath(sub_path);
return self.readLinkZ(&sub_path_c, buffer);
}
@@ -1202,7 +1205,7 @@ pub const Dir = struct {
pub const readLinkC = @compileError("deprecated: renamed to readLinkZ");
/// Same as `readLink`, except the `pathname` parameter is null-terminated.
- pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: *[MAX_PATH_BYTES]u8) ![]u8 {
+ pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {
return os.readlinkatZ(self.fd, sub_path_c, buffer);
}
@@ -1320,6 +1323,9 @@ pub const Dir = struct {
var cleanup_dir = true;
defer if (cleanup_dir) dir.close();
+ // Valid use of MAX_PATH_BYTES because dir_name_buf will only
+ // ever store a single path component that was returned from the
+ // filesystem.
var dir_name_buf: [MAX_PATH_BYTES]u8 = undefined;
var dir_name: []const u8 = sub_path;
@@ -1772,19 +1778,21 @@ pub fn walkPath(allocator: *Allocator, dir_path: []const u8) !Walker {
pub const OpenSelfExeError = os.OpenError || os.windows.CreateFileError || SelfExePathError || os.FlockError;
-pub fn openSelfExe() OpenSelfExeError!File {
+pub fn openSelfExe(flags: File.OpenFlags) OpenSelfExeError!File {
if (builtin.os.tag == .linux) {
- return openFileAbsoluteZ("/proc/self/exe", .{});
+ return openFileAbsoluteZ("/proc/self/exe", flags);
}
if (builtin.os.tag == .windows) {
const wide_slice = selfExePathW();
const prefixed_path_w = try os.windows.wToPrefixedFileW(wide_slice);
- return cwd().openFileW(prefixed_path_w.span(), .{});
+ return cwd().openFileW(prefixed_path_w.span(), flags);
}
+ // Use of MAX_PATH_BYTES here is valid as the resulting path is immediately
+ // opened with no modification.
var buf: [MAX_PATH_BYTES]u8 = undefined;
const self_exe_path = try selfExePath(&buf);
buf[self_exe_path.len] = 0;
- return openFileAbsoluteZ(buf[0..self_exe_path.len :0].ptr, .{});
+ return openFileAbsoluteZ(buf[0..self_exe_path.len :0].ptr, flags);
}
pub const SelfExePathError = os.ReadLinkError || os.SysCtlError;
@@ -1792,6 +1800,13 @@ pub const SelfExePathError = os.ReadLinkError || os.SysCtlError;
/// `selfExePath` except allocates the result on the heap.
/// Caller owns returned memory.
pub fn selfExePathAlloc(allocator: *Allocator) ![]u8 {
+ // Use of MAX_PATH_BYTES here is justified as, at least on one tested Linux
+ // system, readlink will completely fail to return a result larger than
+ // PATH_MAX even if given a sufficiently large buffer. This makes it
+ // fundamentally impossible to get the selfExePath of a program running in
+ // a very deeply nested directory chain in this way.
+ // TODO(#4812): Investigate other systems and whether it is possible to get
+ // this path by trying larger and larger buffers until one succeeds.
var buf: [MAX_PATH_BYTES]u8 = undefined;
return mem.dupe(allocator, u8, try selfExePath(&buf));
}
@@ -1806,10 +1821,10 @@ pub fn selfExePathAlloc(allocator: *Allocator) ![]u8 {
/// On Linux, depends on procfs being mounted. If the currently executing binary has
/// been deleted, the file path looks something like `/a/b/c/exe (deleted)`.
/// TODO make the return type of this a null terminated pointer
-pub fn selfExePath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]u8 {
+pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {
if (is_darwin) {
- var u32_len: u32 = out_buffer.len;
- const rc = std.c._NSGetExecutablePath(out_buffer, &u32_len);
+ var u32_len: u32 = @intCast(u32, math.min(out_buffer.len, math.maxInt(u32)));
+ const rc = std.c._NSGetExecutablePath(out_buffer.ptr, &u32_len);
if (rc != 0) return error.NameTooLong;
return mem.spanZ(@ptrCast([*:0]u8, out_buffer));
}
@@ -1818,14 +1833,14 @@ pub fn selfExePath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]u8 {
.freebsd, .dragonfly => {
var mib = [4]c_int{ os.CTL_KERN, os.KERN_PROC, os.KERN_PROC_PATHNAME, -1 };
var out_len: usize = out_buffer.len;
- try os.sysctl(&mib, out_buffer, &out_len, null, 0);
+ try os.sysctl(&mib, out_buffer.ptr, &out_len, null, 0);
// TODO could this slice from 0 to out_len instead?
return mem.spanZ(@ptrCast([*:0]u8, out_buffer));
},
.netbsd => {
var mib = [4]c_int{ os.CTL_KERN, os.KERN_PROC_ARGS, -1, os.KERN_PROC_PATHNAME };
var out_len: usize = out_buffer.len;
- try os.sysctl(&mib, out_buffer, &out_len, null, 0);
+ try os.sysctl(&mib, out_buffer.ptr, &out_len, null, 0);
// TODO could this slice from 0 to out_len instead?
return mem.spanZ(@ptrCast([*:0]u8, out_buffer));
},
@@ -1848,13 +1863,20 @@ pub fn selfExePathW() [:0]const u16 {
/// `selfExeDirPath` except allocates the result on the heap.
/// Caller owns returned memory.
pub fn selfExeDirPathAlloc(allocator: *Allocator) ![]u8 {
+ // Use of MAX_PATH_BYTES here is justified as, at least on one tested Linux
+ // system, readlink will completely fail to return a result larger than
+ // PATH_MAX even if given a sufficiently large buffer. This makes it
+ // fundamentally impossible to get the selfExeDirPath of a program running
+ // in a very deeply nested directory chain in this way.
+ // TODO(#4812): Investigate other systems and whether it is possible to get
+ // this path by trying larger and larger buffers until one succeeds.
var buf: [MAX_PATH_BYTES]u8 = undefined;
return mem.dupe(allocator, u8, try selfExeDirPath(&buf));
}
/// Get the directory path that contains the current executable.
/// Returned value is a slice of out_buffer.
-pub fn selfExeDirPath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]const u8 {
+pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 {
const self_exe_path = try selfExePath(out_buffer);
// Assume that the OS APIs return absolute paths, and therefore dirname
// will not return null.
@@ -1864,6 +1886,12 @@ pub fn selfExeDirPath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]const
/// `realpath`, except caller must free the returned memory.
/// TODO integrate with `Dir`
pub fn realpathAlloc(allocator: *Allocator, pathname: []const u8) ![]u8 {
+ // Use of MAX_PATH_BYTES here is valid as the realpath function does not
+ // have a variant that takes an arbitrary-size buffer.
+ // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008
+ // NULL out parameter (GNU's canonicalize_file_name) to handle overelong
+ // paths. musl supports passing NULL but restricts the output to PATH_MAX
+ // anyway.
var buf: [MAX_PATH_BYTES]u8 = undefined;
return mem.dupe(allocator, u8, try os.realpath(pathname, &buf));
}
diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig
@@ -6,7 +6,7 @@ const File = std.fs.File;
test "openSelfExe" {
if (builtin.os.tag == .wasi) return error.SkipZigTest;
- const self_exe_file = try std.fs.openSelfExe();
+ const self_exe_file = try std.fs.openSelfExe(.{});
self_exe_file.close();
}
diff --git a/lib/std/os.zig b/lib/std/os.zig
@@ -1236,6 +1236,8 @@ pub fn execvpeZ_expandArg0(
if (mem.indexOfScalar(u8, file_slice, '/') != null) return execveZ(file, child_argv, envp);
const PATH = getenvZ("PATH") orelse "/usr/local/bin:/bin/:/usr/bin";
+ // Use of MAX_PATH_BYTES here is valid as the path_buf will be passed
+ // directly to the operating system in execveZ.
var path_buf: [MAX_PATH_BYTES]u8 = undefined;
var it = mem.tokenize(PATH, ":");
var seen_eacces = false;
diff --git a/lib/std/process.zig b/lib/std/process.zig
@@ -15,14 +15,34 @@ pub const changeCurDir = os.chdir;
pub const changeCurDirC = os.chdirC;
/// The result is a slice of `out_buffer`, from index `0`.
-pub fn getCwd(out_buffer: *[fs.MAX_PATH_BYTES]u8) ![]u8 {
+pub fn getCwd(out_buffer: []u8) ![]u8 {
return os.getcwd(out_buffer);
}
/// Caller must free the returned memory.
pub fn getCwdAlloc(allocator: *Allocator) ![]u8 {
- var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
- return mem.dupe(allocator, u8, try os.getcwd(&buf));
+ // The use of MAX_PATH_BYTES here is just a heuristic: most paths will fit
+ // in stack_buf, avoiding an extra allocation in the common case.
+ var stack_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
+ var heap_buf: ?[]u8 = null;
+ defer if (heap_buf) |buf| allocator.free(buf);
+
+ var current_buf: []u8 = &stack_buf;
+ while (true) {
+ if (os.getcwd(current_buf)) |slice| {
+ return mem.dupe(allocator, u8, slice);
+ } else |err| switch (err) {
+ error.NameTooLong => {
+ // The path is too long to fit in stack_buf. Allocate geometrically
+ // increasing buffers until we find one that works
+ const new_capacity = current_buf.len * 2;
+ if (heap_buf) |buf| allocator.free(buf);
+ current_buf = try allocator.alloc(u8, new_capacity);
+ heap_buf = current_buf;
+ },
+ else => |e| return e,
+ }
+ }
}
test "getCwdAlloc" {