Merge pull request #13843 from ziglang/dwarf-abs-paths

debug info: resolve relative paths to source files into absolute paths
This commit is contained in:
Jakub Konka
2022-12-09 19:18:06 +01:00
committed by GitHub
8 changed files with 55 additions and 82 deletions

View File

@@ -1035,9 +1035,6 @@ pub const InitOptions = struct {
/// (Darwin) remove dylibs that are unreachable by the entry point or exported symbols
dead_strip_dylibs: bool = false,
libcxx_abi_version: libcxx.AbiVersion = libcxx.AbiVersion.default,
/// (Windows) PDB source path prefix to instruct the linker how to resolve relative
/// paths when consolidating CodeView streams into a single PDB file.
pdb_source_path: ?[]const u8 = null,
};
fn addPackageTableToCacheHash(
@@ -1740,27 +1737,6 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
};
};
const pdb_source_path: ?[]const u8 = options.pdb_source_path orelse blk: {
if (builtin.target.os.tag == .windows) {
// PDB requires all file paths to be fully resolved, and it is really the
// linker's responsibility to canonicalize any path extracted from the CodeView
// in the object file. However, LLD-link has some very questionable defaults, and
// in particular, it purposely bakes in path separator of the host system it was
// built on rather than the targets, or just throw an error. Thankfully, they have
// left a backdoor we can use via -PDBSOURCEPATH.
const mod = module orelse break :blk null;
var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const resolved_path = if (mod.main_pkg.root_src_directory.path) |base_path| p: {
if (std.fs.path.isAbsolute(base_path)) break :blk base_path;
const resolved_path = std.os.realpath(base_path, &buffer) catch break :blk null;
const pos = std.mem.lastIndexOfLinear(u8, resolved_path, base_path) orelse resolved_path.len;
break :p resolved_path[0..pos];
} else std.os.realpath(".", &buffer) catch break :blk null;
break :blk try arena.dupe(u8, resolved_path);
}
break :blk null;
};
const implib_emit: ?link.Emit = blk: {
const emit_implib = options.emit_implib orelse break :blk null;
@@ -1906,7 +1882,6 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
.headerpad_max_install_names = options.headerpad_max_install_names,
.dead_strip_dylibs = options.dead_strip_dylibs,
.force_undefined_symbols = .{},
.pdb_source_path = pdb_source_path,
});
errdefer bin_file.destroy();
comp.* = .{

View File

@@ -443,20 +443,19 @@ pub const Object = struct {
});
defer gpa.free(producer);
// For macOS stack traces, we want to avoid having to parse the compilation unit debug
// info. As long as each debug info file has a path independent of the compilation unit
// directory (DW_AT_comp_dir), then we never have to look at the compilation unit debug
// info. If we provide an absolute path to LLVM here for the compilation unit debug
// info, LLVM will emit DWARF info that depends on DW_AT_comp_dir. To avoid this, we
// pass "." for the compilation unit directory. This forces each debug file to have a
// directory rather than be relative to DW_AT_comp_dir. According to DWARF 5, debug
// files will no longer reference DW_AT_comp_dir, for the purpose of being able to
// support the common practice of stripping all but the line number sections from an
// executable.
const compile_unit_dir = d: {
if (options.target.isDarwin()) break :d ".";
const mod = options.module orelse break :d ".";
break :d mod.root_pkg.root_src_directory.path orelse ".";
// We fully resolve all paths at this point to avoid lack of source line info in stack
// traces or lack of debugging information which, if relative paths were used, would
// be very location dependent.
// TODO: the only concern I have with this is WASI as either host or target, should
// we leave the paths as relative then?
var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const compile_unit_dir = blk: {
const path = d: {
const mod = options.module orelse break :d ".";
break :d mod.root_pkg.root_src_directory.path orelse ".";
};
if (std.fs.path.isAbsolute(path)) break :blk path;
break :blk std.os.realpath(path, &buf) catch path; // If realpath fails, fallback to whatever path was
};
const compile_unit_dir_z = try gpa.dupeZ(u8, compile_unit_dir);
defer gpa.free(compile_unit_dir_z);
@@ -1389,13 +1388,20 @@ pub const Object = struct {
if (gop.found_existing) {
return @ptrCast(*llvm.DIFile, gop.value_ptr.*);
}
const dir_path = file.pkg.root_src_directory.path orelse ".";
const dir_path_z = d: {
var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const dir_path = file.pkg.root_src_directory.path orelse ".";
const resolved_dir_path = if (std.fs.path.isAbsolute(dir_path))
dir_path
else
std.os.realpath(dir_path, &buffer) catch dir_path; // If realpath fails, fallback to whatever dir_path was
break :d try std.fs.path.joinZ(gpa, &.{
resolved_dir_path, std.fs.path.dirname(file.sub_file_path) orelse "",
});
};
defer gpa.free(dir_path_z);
const sub_file_path_z = try gpa.dupeZ(u8, std.fs.path.basename(file.sub_file_path));
defer gpa.free(sub_file_path_z);
const dir_path_z = try std.fs.path.joinZ(gpa, &.{
dir_path, std.fs.path.dirname(file.sub_file_path) orelse "",
});
defer gpa.free(dir_path_z);
const di_file = o.di_builder.?.createFile(sub_file_path_z, dir_path_z);
gop.value_ptr.* = di_file.toNode();
return di_file;

View File

@@ -217,10 +217,6 @@ pub const Options = struct {
/// (Darwin) remove dylibs that are unreachable by the entry point or exported symbols
dead_strip_dylibs: bool = false,
/// (Windows) PDB source path prefix to instruct the linker how to resolve relative
/// paths when consolidating CodeView streams into a single PDB file.
pdb_source_path: ?[]const u8 = null,
pub fn effectiveOutputMode(options: Options) std.builtin.OutputMode {
return if (options.use_lld) .Obj else options.output_mode;
}

View File

@@ -171,10 +171,6 @@ pub fn linkWithLLD(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
});
try argv.append(try allocPrint(arena, "-PDB:{s}", .{out_pdb}));
try argv.append(try allocPrint(arena, "-PDBALTPATH:{s}", .{out_pdb}));
if (self.base.options.pdb_source_path) |path| {
try argv.append(try std.fmt.allocPrint(arena, "-PDBSOURCEPATH:{s}", .{path}));
}
}
if (self.base.options.lto) {
switch (self.base.options.optimize_mode) {

View File

@@ -1771,7 +1771,8 @@ pub fn writeDbgInfoHeader(self: *Dwarf, module: *Module, low_pc: u64, high_pc: u
}
// Write the form for the compile unit, which must match the abbrev table above.
const name_strp = try self.makeString(module.root_pkg.root_src_path);
const compile_unit_dir = self.getCompDir(module);
var compile_unit_dir_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const compile_unit_dir = resolveCompilationDir(module, &compile_unit_dir_buffer);
const comp_dir_strp = try self.makeString(compile_unit_dir);
const producer_strp = try self.makeString(link.producer_string);
@@ -1823,19 +1824,15 @@ pub fn writeDbgInfoHeader(self: *Dwarf, module: *Module, low_pc: u64, high_pc: u
}
}
fn getCompDir(self: Dwarf, module: *Module) []const u8 {
// For macOS stack traces, we want to avoid having to parse the compilation unit debug
// info. As long as each debug info file has a path independent of the compilation unit
// directory (DW_AT_comp_dir), then we never have to look at the compilation unit debug
// info. If we provide an absolute path to LLVM here for the compilation unit debug
// info, LLVM will emit DWARF info that depends on DW_AT_comp_dir. To avoid this, we
// pass "." for the compilation unit directory. This forces each debug file to have a
// directory rather than be relative to DW_AT_comp_dir. According to DWARF 5, debug
// files will no longer reference DW_AT_comp_dir, for the purpose of being able to
// support the common practice of stripping all but the line number sections from an
// executable.
if (self.bin_file.tag == .macho) return ".";
return module.root_pkg.root_src_directory.path orelse ".";
fn resolveCompilationDir(module: *Module, buffer: *[std.fs.MAX_PATH_BYTES]u8) []const u8 {
// We fully resolve all paths at this point to avoid lack of source line info in stack
// traces or lack of debugging information which, if relative paths were used, would
// be very location dependent.
// TODO: the only concern I have with this is WASI as either host or target, should
// we leave the paths as relative then?
const comp_dir_path = module.root_pkg.root_src_directory.path orelse ".";
if (std.fs.path.isAbsolute(comp_dir_path)) return comp_dir_path;
return std.os.realpath(comp_dir_path, buffer) catch comp_dir_path; // If realpath fails, fallback to whatever comp_dir_path was
}
fn writeAddrAssumeCapacity(self: *Dwarf, buf: *std.ArrayList(u8), addr: u64) void {
@@ -2149,7 +2146,7 @@ pub fn writeDbgAranges(self: *Dwarf, addr: u64, size: u64) !void {
}
}
pub fn writeDbgLineHeader(self: *Dwarf, module: *Module) !void {
pub fn writeDbgLineHeader(self: *Dwarf) !void {
const gpa = self.allocator;
const ptr_width_bytes: u8 = self.ptrWidthBytes();
@@ -2167,7 +2164,7 @@ pub fn writeDbgLineHeader(self: *Dwarf, module: *Module) !void {
// Convert all input DI files into a set of include dirs and file names.
var arena = std.heap.ArenaAllocator.init(gpa);
defer arena.deinit();
const paths = try self.genIncludeDirsAndFileNames(arena.allocator(), module);
const paths = try self.genIncludeDirsAndFileNames(arena.allocator());
// The size of this header is variable, depending on the number of directories,
// files, and padding. We have a function to compute the upper bound size, however,
@@ -2551,7 +2548,7 @@ fn addDIFile(self: *Dwarf, mod: *Module, decl_index: Module.Decl.Index) !u28 {
return @intCast(u28, gop.index + 1);
}
fn genIncludeDirsAndFileNames(self: *Dwarf, arena: Allocator, module: *Module) !struct {
fn genIncludeDirsAndFileNames(self: *Dwarf, arena: Allocator) !struct {
dirs: []const []const u8,
files: []const []const u8,
files_dirs_indexes: []u28,
@@ -2565,19 +2562,22 @@ fn genIncludeDirsAndFileNames(self: *Dwarf, arena: Allocator, module: *Module) !
var files_dir_indexes = std.ArrayList(u28).init(arena);
try files_dir_indexes.ensureTotalCapacity(self.di_files.count());
const comp_dir = self.getCompDir(module);
for (self.di_files.keys()) |dif| {
const full_path = try dif.fullPath(arena);
const dir_path = std.fs.path.dirname(full_path) orelse ".";
const sub_file_path = std.fs.path.basename(full_path);
const dir_path = d: {
var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const dir_path = dif.pkg.root_src_directory.path orelse ".";
const abs_dir_path = if (std.fs.path.isAbsolute(dir_path))
dir_path
else
std.os.realpath(dir_path, &buffer) catch dir_path; // If realpath fails, fallback to whatever dir_path was
break :d try std.fs.path.join(arena, &.{
abs_dir_path, std.fs.path.dirname(dif.sub_file_path) orelse "",
});
};
const sub_file_path = try arena.dupe(u8, std.fs.path.basename(dif.sub_file_path));
const dir_index: u28 = blk: {
const actual_dir_path = if (mem.indexOf(u8, dir_path, comp_dir)) |_| inner: {
if (comp_dir.len == dir_path.len) break :blk 0;
break :inner dir_path[comp_dir.len + 1 ..];
} else dir_path;
const dirs_gop = dirs.getOrPutAssumeCapacity(actual_dir_path);
const dirs_gop = dirs.getOrPutAssumeCapacity(dir_path);
break :blk @intCast(u28, dirs_gop.index + 1);
};

View File

@@ -1149,7 +1149,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
}
if (self.debug_line_header_dirty) {
try dw.writeDbgLineHeader(module);
try dw.writeDbgLineHeader();
self.debug_line_header_dirty = false;
}
}

View File

@@ -281,7 +281,7 @@ pub fn flushModule(self: *DebugSymbols, macho_file: *MachO) !void {
}
if (self.debug_line_header_dirty) {
try self.dwarf.writeDbgLineHeader(module);
try self.dwarf.writeDbgLineHeader();
self.debug_line_header_dirty = false;
}

View File

@@ -2672,7 +2672,7 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
// as locations are always offsets relative to 'code' section.
try dwarf.writeDbgInfoHeader(mod, 0, code_section_size);
try dwarf.writeDbgAranges(0, code_section_size);
try dwarf.writeDbgLineHeader(mod);
try dwarf.writeDbgLineHeader();
}
var debug_bytes = std.ArrayList(u8).init(wasm.base.allocator);