//! Cross-platform abstraction for debug information. const builtin = @import("builtin"); const native_os = builtin.os.tag; const native_endian = native_arch.endian(); const native_arch = builtin.cpu.arch; const std = @import("../std.zig"); const mem = std.mem; const Allocator = std.mem.Allocator; const windows = std.os.windows; const macho = std.macho; const fs = std.fs; const coff = std.coff; const pdb = std.pdb; const assert = std.debug.assert; const posix = std.posix; const elf = std.elf; const Dwarf = std.debug.Dwarf; const File = std.fs.File; const math = std.math; const testing = std.testing; const Info = @This(); const root = @import("root"); allocator: Allocator, address_map: std.AutoHashMap(usize, *Module), modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModuleInfo) else void, pub const OpenSelfError = error{ MissingDebugInfo, UnsupportedOperatingSystem, } || @typeInfo(@typeInfo(@TypeOf(Info.init)).Fn.return_type.?).ErrorUnion.error_set; pub fn openSelf(allocator: Allocator) OpenSelfError!Info { nosuspend { if (builtin.strip_debug_info) return error.MissingDebugInfo; if (@hasDecl(root, "os") and @hasDecl(root.os, "debug") and @hasDecl(root.os.debug, "openSelfDebugInfo")) { return root.os.debug.openSelfDebugInfo(allocator); } switch (native_os) { .linux, .freebsd, .netbsd, .dragonfly, .openbsd, .macos, .solaris, .illumos, .windows, => return try Info.init(allocator), else => return error.UnsupportedOperatingSystem, } } } pub fn init(allocator: Allocator) !Info { var debug_info: Info = .{ .allocator = allocator, .address_map = std.AutoHashMap(usize, *Module).init(allocator), .modules = if (native_os == .windows) .{} else {}, }; if (native_os == .windows) { errdefer debug_info.modules.deinit(allocator); const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); if (handle == windows.INVALID_HANDLE_VALUE) { switch (windows.GetLastError()) { else => |err| return windows.unexpectedError(err), } } defer windows.CloseHandle(handle); var module_entry: windows.MODULEENTRY32 = undefined; module_entry.dwSize = @sizeOf(windows.MODULEENTRY32); if (windows.kernel32.Module32First(handle, &module_entry) == 0) { return error.MissingDebugInfo; } var module_valid = true; while (module_valid) { const module_info = try debug_info.modules.addOne(allocator); const name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; errdefer allocator.free(name); module_info.* = .{ .base_address = @intFromPtr(module_entry.modBaseAddr), .size = module_entry.modBaseSize, .name = name, .handle = module_entry.hModule, }; module_valid = windows.kernel32.Module32Next(handle, &module_entry) == 1; } } return debug_info; } pub fn deinit(self: *Info) void { var it = self.address_map.iterator(); while (it.next()) |entry| { const mdi = entry.value_ptr.*; mdi.deinit(self.allocator); self.allocator.destroy(mdi); } self.address_map.deinit(); if (native_os == .windows) { for (self.modules.items) |module| { self.allocator.free(module.name); if (module.mapped_file) |mapped_file| mapped_file.deinit(); } self.modules.deinit(self.allocator); } } pub fn getModuleForAddress(self: *Info, address: usize) !*Module { if (comptime builtin.target.isDarwin()) { return self.lookupModuleDyld(address); } else if (native_os == .windows) { return self.lookupModuleWin32(address); } else if (native_os == .haiku) { return self.lookupModuleHaiku(address); } else if (comptime builtin.target.isWasm()) { return self.lookupModuleWasm(address); } else { return self.lookupModuleDl(address); } } // Returns the module name for a given address. // This can be called when getModuleForAddress fails, so implementations should provide // a path that doesn't rely on any side-effects of a prior successful module lookup. pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { if (comptime builtin.target.isDarwin()) { return self.lookupModuleNameDyld(address); } else if (native_os == .windows) { return self.lookupModuleNameWin32(address); } else if (native_os == .haiku) { return null; } else if (comptime builtin.target.isWasm()) { return null; } else { return self.lookupModuleNameDl(address); } } fn lookupModuleDyld(self: *Info, address: usize) !*Module { const image_count = std.c._dyld_image_count(); var i: u32 = 0; while (i < image_count) : (i += 1) { const header = std.c._dyld_get_image_header(i) orelse continue; const base_address = @intFromPtr(header); if (address < base_address) continue; const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); var it = macho.LoadCommandIterator{ .ncmds = header.ncmds, .buffer = @alignCast(@as( [*]u8, @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), )[0..header.sizeofcmds]), }; var unwind_info: ?[]const u8 = null; var eh_frame: ?[]const u8 = null; while (it.next()) |cmd| switch (cmd.cmd()) { .SEGMENT_64 => { const segment_cmd = cmd.cast(macho.segment_command_64).?; if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; const seg_start = segment_cmd.vmaddr + vmaddr_slide; const seg_end = seg_start + segment_cmd.vmsize; if (address >= seg_start and address < seg_end) { if (self.address_map.get(base_address)) |obj_di| { return obj_di; } for (cmd.getSections()) |sect| { if (mem.eql(u8, "__unwind_info", sect.sectName())) { unwind_info = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; } else if (mem.eql(u8, "__eh_frame", sect.sectName())) { eh_frame = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; } } const obj_di = try self.allocator.create(Module); errdefer self.allocator.destroy(obj_di); const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0); const macho_file = fs.cwd().openFile(macho_path, .{}) catch |err| switch (err) { error.FileNotFound => return error.MissingDebugInfo, else => return err, }; obj_di.* = try readMachODebugInfo(self.allocator, macho_file); obj_di.base_address = base_address; obj_di.vmaddr_slide = vmaddr_slide; obj_di.unwind_info = unwind_info; obj_di.eh_frame = eh_frame; try self.address_map.putNoClobber(base_address, obj_di); return obj_di; } }, else => {}, }; } return error.MissingDebugInfo; } fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { _ = self; const image_count = std.c._dyld_image_count(); var i: u32 = 0; while (i < image_count) : (i += 1) { const header = std.c._dyld_get_image_header(i) orelse continue; const base_address = @intFromPtr(header); if (address < base_address) continue; const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); var it = macho.LoadCommandIterator{ .ncmds = header.ncmds, .buffer = @alignCast(@as( [*]u8, @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), )[0..header.sizeofcmds]), }; while (it.next()) |cmd| switch (cmd.cmd()) { .SEGMENT_64 => { const segment_cmd = cmd.cast(macho.segment_command_64).?; if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; const original_address = address - vmaddr_slide; const seg_start = segment_cmd.vmaddr; const seg_end = seg_start + segment_cmd.vmsize; if (original_address >= seg_start and original_address < seg_end) { return fs.path.basename(mem.sliceTo(std.c._dyld_get_image_name(i), 0)); } }, else => {}, }; } return null; } fn lookupModuleWin32(self: *Info, address: usize) !*Module { for (self.modules.items) |*module| { if (address >= module.base_address and address < module.base_address + module.size) { if (self.address_map.get(module.base_address)) |obj_di| { return obj_di; } const obj_di = try self.allocator.create(Module); errdefer self.allocator.destroy(obj_di); const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; var coff_obj = try coff.Coff.init(mapped_module, true); // The string table is not mapped into memory by the loader, so if a section name is in the // string table then we have to map the full image file from disk. This can happen when // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. if (coff_obj.strtabRequired()) { var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; // openFileAbsoluteW requires the prefix to be present @memcpy(name_buffer[0..4], &[_]u16{ '\\', '?', '?', '\\' }); const process_handle = windows.GetCurrentProcess(); const len = windows.kernel32.GetModuleFileNameExW( process_handle, module.handle, @ptrCast(&name_buffer[4]), windows.PATH_MAX_WIDE, ); if (len == 0) return error.MissingDebugInfo; const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { error.FileNotFound => return error.MissingDebugInfo, else => return err, }; errdefer coff_file.close(); var section_handle: windows.HANDLE = undefined; const create_section_rc = windows.ntdll.NtCreateSection( §ion_handle, windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, null, null, windows.PAGE_READONLY, // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. windows.SEC_COMMIT, coff_file.handle, ); if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; errdefer windows.CloseHandle(section_handle); var coff_len: usize = 0; var base_ptr: usize = 0; const map_section_rc = windows.ntdll.NtMapViewOfSection( section_handle, process_handle, @ptrCast(&base_ptr), null, 0, null, &coff_len, .ViewUnmap, 0, windows.PAGE_READONLY, ); if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrFromInt(base_ptr)) == .SUCCESS); const section_view = @as([*]const u8, @ptrFromInt(base_ptr))[0..coff_len]; coff_obj = try coff.Coff.init(section_view, false); module.mapped_file = .{ .file = coff_file, .section_handle = section_handle, .section_view = section_view, }; } errdefer if (module.mapped_file) |mapped_file| mapped_file.deinit(); obj_di.* = try readCoffDebugInfo(self.allocator, &coff_obj); obj_di.base_address = module.base_address; try self.address_map.putNoClobber(module.base_address, obj_di); return obj_di; } } return error.MissingDebugInfo; } fn lookupModuleNameWin32(self: *Info, address: usize) ?[]const u8 { for (self.modules.items) |module| { if (address >= module.base_address and address < module.base_address + module.size) { return module.name; } } return null; } fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { _ = self; var ctx: struct { // Input address: usize, // Output name: []const u8 = "", } = .{ .address = address }; const CtxTy = @TypeOf(ctx); if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { _ = size; if (context.address < info.addr) return; const phdrs = info.phdr[0..info.phnum]; for (phdrs) |*phdr| { if (phdr.p_type != elf.PT_LOAD) continue; const seg_start = info.addr +% phdr.p_vaddr; const seg_end = seg_start + phdr.p_memsz; if (context.address >= seg_start and context.address < seg_end) { context.name = mem.sliceTo(info.name, 0) orelse ""; break; } } else return; return error.Found; } }.callback)) { return null; } else |err| switch (err) { error.Found => return fs.path.basename(ctx.name), } return null; } fn lookupModuleDl(self: *Info, address: usize) !*Module { var ctx: struct { // Input address: usize, // Output base_address: usize = undefined, name: []const u8 = undefined, build_id: ?[]const u8 = null, gnu_eh_frame: ?[]const u8 = null, } = .{ .address = address }; const CtxTy = @TypeOf(ctx); if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { _ = size; // The base address is too high if (context.address < info.addr) return; const phdrs = info.phdr[0..info.phnum]; for (phdrs) |*phdr| { if (phdr.p_type != elf.PT_LOAD) continue; // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 const seg_start = info.addr +% phdr.p_vaddr; const seg_end = seg_start + phdr.p_memsz; if (context.address >= seg_start and context.address < seg_end) { // Android libc uses NULL instead of an empty string to mark the // main program context.name = mem.sliceTo(info.name, 0) orelse ""; context.base_address = info.addr; break; } } else return; for (info.phdr[0..info.phnum]) |phdr| { switch (phdr.p_type) { elf.PT_NOTE => { // Look for .note.gnu.build-id const note_bytes = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; const name_size = mem.readInt(u32, note_bytes[0..4], native_endian); if (name_size != 4) continue; const desc_size = mem.readInt(u32, note_bytes[4..8], native_endian); const note_type = mem.readInt(u32, note_bytes[8..12], native_endian); if (note_type != elf.NT_GNU_BUILD_ID) continue; if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; context.build_id = note_bytes[16..][0..desc_size]; }, elf.PT_GNU_EH_FRAME => { context.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; }, else => {}, } } // Stop the iteration return error.Found; } }.callback)) { return error.MissingDebugInfo; } else |err| switch (err) { error.Found => {}, } if (self.address_map.get(ctx.base_address)) |obj_di| { return obj_di; } const obj_di = try self.allocator.create(Module); errdefer self.allocator.destroy(obj_di); var sections: Dwarf.SectionArray = Dwarf.null_section_array; if (ctx.gnu_eh_frame) |eh_frame_hdr| { // This is a special case - pointer offsets inside .eh_frame_hdr // are encoded relative to its base address, so we must use the // version that is already memory mapped, and not the one that // will be mapped separately from the ELF file. sections[@intFromEnum(Dwarf.Section.Id.eh_frame_hdr)] = .{ .data = eh_frame_hdr, .owned = false, }; } obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); obj_di.base_address = ctx.base_address; // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; try self.address_map.putNoClobber(ctx.base_address, obj_di); return obj_di; } fn lookupModuleHaiku(self: *Info, address: usize) !*Module { _ = self; _ = address; @panic("TODO implement lookup module for Haiku"); } fn lookupModuleWasm(self: *Info, address: usize) !*Module { _ = self; _ = address; @panic("TODO implement lookup module for Wasm"); } pub const Module = switch (native_os) { .macos, .ios, .watchos, .tvos, .visionos => struct { base_address: usize, vmaddr_slide: usize, mapped_memory: []align(mem.page_size) const u8, symbols: []const MachoSymbol, strings: [:0]const u8, ofiles: OFileTable, // Backed by the in-memory sections mapped by the loader unwind_info: ?[]const u8 = null, eh_frame: ?[]const u8 = null, const OFileTable = std.StringHashMap(OFileInfo); const OFileInfo = struct { di: Dwarf, addr_table: std.StringHashMap(u64), }; pub fn deinit(self: *@This(), allocator: Allocator) void { var it = self.ofiles.iterator(); while (it.next()) |entry| { const ofile = entry.value_ptr; ofile.di.deinit(allocator); ofile.addr_table.deinit(); } self.ofiles.deinit(); allocator.free(self.symbols); posix.munmap(self.mapped_memory); } fn loadOFile(self: *@This(), allocator: Allocator, o_file_path: []const u8) !*OFileInfo { const o_file = try fs.cwd().openFile(o_file_path, .{}); const mapped_mem = try mapWholeFile(o_file); const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; var segcmd: ?macho.LoadCommandIterator.LoadCommand = null; var symtabcmd: ?macho.symtab_command = null; var it = macho.LoadCommandIterator{ .ncmds = hdr.ncmds, .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], }; while (it.next()) |cmd| switch (cmd.cmd()) { .SEGMENT_64 => segcmd = cmd, .SYMTAB => symtabcmd = cmd.cast(macho.symtab_command).?, else => {}, }; if (segcmd == null or symtabcmd == null) return error.MissingDebugInfo; // Parse symbols const strtab = @as( [*]const u8, @ptrCast(&mapped_mem[symtabcmd.?.stroff]), )[0 .. symtabcmd.?.strsize - 1 :0]; const symtab = @as( [*]const macho.nlist_64, @ptrCast(@alignCast(&mapped_mem[symtabcmd.?.symoff])), )[0..symtabcmd.?.nsyms]; // TODO handle tentative (common) symbols var addr_table = std.StringHashMap(u64).init(allocator); try addr_table.ensureTotalCapacity(@as(u32, @intCast(symtab.len))); for (symtab) |sym| { if (sym.n_strx == 0) continue; if (sym.undf() or sym.tentative() or sym.abs()) continue; const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); // TODO is it possible to have a symbol collision? addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); } var sections: Dwarf.SectionArray = Dwarf.null_section_array; if (self.eh_frame) |eh_frame| sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ .data = eh_frame, .owned = false, }; for (segcmd.?.getSections()) |sect| { if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; var section_index: ?usize = null; inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; } if (section_index == null) continue; const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); sections[section_index.?] = .{ .data = section_bytes, .virtual_address = sect.addr, .owned = false, }; } const missing_debug_info = sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; if (missing_debug_info) return error.MissingDebugInfo; var di = Dwarf{ .endian = .little, .sections = sections, .is_macho = true, }; try Dwarf.open(&di, allocator); const info = OFileInfo{ .di = di, .addr_table = addr_table, }; // Add the debug info to the cache const result = try self.ofiles.getOrPut(o_file_path); assert(!result.found_existing); result.value_ptr.* = info; return result.value_ptr; } pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { nosuspend { const result = try self.getOFileInfoForAddress(allocator, address); if (result.symbol == null) return .{}; // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); if (result.o_file_info == null) return .{ .symbol_name = stab_symbol }; // Translate again the address, this time into an address inside the // .o file const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ .symbol_name = "???", }; const addr_off = result.relocated_address - result.symbol.?.addr; const o_file_di = &result.o_file_info.?.di; if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { return SymbolInfo{ .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", .compile_unit_name = compile_unit.die.getAttrString( o_file_di, std.dwarf.AT.name, o_file_di.section(.debug_str), compile_unit.*, ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => "???", }, .line_info = o_file_di.getLineNumberInfo( allocator, compile_unit.*, relocated_address_o + addr_off, ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, else => return err, }, }; } else |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => { return SymbolInfo{ .symbol_name = stab_symbol }; }, else => return err, } } } pub fn getOFileInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !struct { relocated_address: usize, symbol: ?*const MachoSymbol = null, o_file_info: ?*OFileInfo = null, } { nosuspend { // Translate the VA into an address into this object const relocated_address = address - self.vmaddr_slide; // Find the .o file where this symbol is defined const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ .relocated_address = relocated_address, }; // Check if its debug infos are already in the cache const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); const o_file_info = self.ofiles.getPtr(o_file_path) orelse (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { error.FileNotFound, error.MissingDebugInfo, error.InvalidDebugInfo, => return .{ .relocated_address = relocated_address, .symbol = symbol, }, else => return err, }); return .{ .relocated_address = relocated_address, .symbol = symbol, .o_file_info = o_file_info, }; } } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { return if ((try self.getOFileInfoForAddress(allocator, address)).o_file_info) |o_file_info| &o_file_info.di else null; } }, .uefi, .windows => struct { base_address: usize, pdb: ?pdb.Pdb = null, dwarf: ?Dwarf = null, coff_image_base: u64, /// Only used if pdb is non-null coff_section_headers: []coff.SectionHeader, pub fn deinit(self: *@This(), allocator: Allocator) void { if (self.dwarf) |*dwarf| { dwarf.deinit(allocator); } if (self.pdb) |*p| { p.deinit(); allocator.free(self.coff_section_headers); } } fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo { var coff_section: *align(1) const coff.SectionHeader = undefined; const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { if (sect_contrib.Section > self.coff_section_headers.len) continue; // Remember that SectionContribEntry.Section is 1-based. coff_section = &self.coff_section_headers[sect_contrib.Section - 1]; const vaddr_start = coff_section.virtual_address + sect_contrib.Offset; const vaddr_end = vaddr_start + sect_contrib.Size; if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { break sect_contrib.ModuleIndex; } } else { // we have no information to add to the address return null; }; const module = (try self.pdb.?.getModule(mod_index)) orelse return error.InvalidDebugInfo; const obj_basename = fs.path.basename(module.obj_file_name); const symbol_name = self.pdb.?.getSymbolName( module, relocated_address - coff_section.virtual_address, ) orelse "???"; const opt_line_info = try self.pdb.?.getLineNumberInfo( module, relocated_address - coff_section.virtual_address, ); return SymbolInfo{ .symbol_name = symbol_name, .compile_unit_name = obj_basename, .line_info = opt_line_info, }; } pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { // Translate the VA into an address into this object const relocated_address = address - self.base_address; if (self.pdb != null) { if (try self.getSymbolFromPdb(relocated_address)) |symbol| return symbol; } if (self.dwarf) |*dwarf| { const dwarf_address = relocated_address + self.coff_image_base; return getSymbolFromDwarf(allocator, dwarf_address, dwarf); } return SymbolInfo{}; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { _ = allocator; _ = address; return switch (self.debug_data) { .dwarf => |*dwarf| dwarf, else => null, }; } }, .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { base_address: usize, dwarf: Dwarf, mapped_memory: []align(mem.page_size) const u8, external_mapped_memory: ?[]align(mem.page_size) const u8, pub fn deinit(self: *@This(), allocator: Allocator) void { self.dwarf.deinit(allocator); posix.munmap(self.mapped_memory); if (self.external_mapped_memory) |m| posix.munmap(m); } pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { // Translate the VA into an address into this object const relocated_address = address - self.base_address; return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { _ = allocator; _ = address; return &self.dwarf; } }, .wasi, .emscripten => struct { pub fn deinit(self: *@This(), allocator: Allocator) void { _ = self; _ = allocator; } pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { _ = self; _ = allocator; _ = address; return SymbolInfo{}; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { _ = self; _ = allocator; _ = address; return null; } }, else => Dwarf, }; pub const WindowsModuleInfo = struct { base_address: usize, size: u32, name: []const u8, handle: windows.HMODULE, // Set when the image file needed to be mapped from disk mapped_file: ?struct { file: File, section_handle: windows.HANDLE, section_view: []const u8, pub fn deinit(self: @This()) void { const process_handle = windows.GetCurrentProcess(); assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(@ptrCast(self.section_view.ptr))) == .SUCCESS); windows.CloseHandle(self.section_handle); self.file.close(); } } = null, }; /// This takes ownership of macho_file: users of this function should not close /// it themselves, even on error. /// TODO it's weird to take ownership even on error, rework this code. fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { const mapped_mem = try mapWholeFile(macho_file); const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); if (hdr.magic != macho.MH_MAGIC_64) return error.InvalidDebugInfo; var it = macho.LoadCommandIterator{ .ncmds = hdr.ncmds, .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], }; const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { .SYMTAB => break cmd.cast(macho.symtab_command).?, else => {}, } else return error.MissingDebugInfo; const syms = @as( [*]const macho.nlist_64, @ptrCast(@alignCast(&mapped_mem[symtab.symoff])), )[0..symtab.nsyms]; const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; const symbols_buf = try allocator.alloc(MachoSymbol, syms.len); var ofile: u32 = undefined; var last_sym: MachoSymbol = undefined; var symbol_index: usize = 0; var state: enum { init, oso_open, oso_close, bnsym, fun_strx, fun_size, ensym, } = .init; for (syms) |*sym| { if (!sym.stab()) continue; // TODO handle globals N_GSYM, and statics N_STSYM switch (sym.n_type) { macho.N_OSO => { switch (state) { .init, .oso_close => { state = .oso_open; ofile = sym.n_strx; }, else => return error.InvalidDebugInfo, } }, macho.N_BNSYM => { switch (state) { .oso_open, .ensym => { state = .bnsym; last_sym = .{ .strx = 0, .addr = sym.n_value, .size = 0, .ofile = ofile, }; }, else => return error.InvalidDebugInfo, } }, macho.N_FUN => { switch (state) { .bnsym => { state = .fun_strx; last_sym.strx = sym.n_strx; }, .fun_strx => { state = .fun_size; last_sym.size = @as(u32, @intCast(sym.n_value)); }, else => return error.InvalidDebugInfo, } }, macho.N_ENSYM => { switch (state) { .fun_size => { state = .ensym; symbols_buf[symbol_index] = last_sym; symbol_index += 1; }, else => return error.InvalidDebugInfo, } }, macho.N_SO => { switch (state) { .init, .oso_close => {}, .oso_open, .ensym => { state = .oso_close; }, else => return error.InvalidDebugInfo, } }, else => {}, } } switch (state) { .init => return error.MissingDebugInfo, .oso_close => {}, else => return error.InvalidDebugInfo, } const symbols = try allocator.realloc(symbols_buf, symbol_index); // Even though lld emits symbols in ascending order, this debug code // should work for programs linked in any valid way. // This sort is so that we can binary search later. mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); return .{ .base_address = undefined, .vmaddr_slide = undefined, .mapped_memory = mapped_mem, .ofiles = Module.OFileTable.init(allocator), .symbols = symbols, .strings = strings, }; } fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { nosuspend { var di: Module = .{ .base_address = undefined, .coff_image_base = coff_obj.getImageBase(), .coff_section_headers = undefined, }; if (coff_obj.getSectionByName(".debug_info")) |_| { // This coff file has embedded DWARF debug info var sections: Dwarf.SectionArray = Dwarf.null_section_array; errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { break :blk .{ .data = try coff_obj.getSectionDataAlloc(section_header, allocator), .virtual_address = section_header.virtual_address, .owned = true, }; } else null; } var dwarf = Dwarf{ .endian = native_endian, .sections = sections, .is_macho = false, }; try Dwarf.open(&dwarf, allocator); di.dwarf = dwarf; } const raw_path = try coff_obj.getPdbPath() orelse return di; const path = blk: { if (fs.path.isAbsolute(raw_path)) { break :blk raw_path; } else { const self_dir = try fs.selfExeDirPathAlloc(allocator); defer allocator.free(self_dir); break :blk try fs.path.join(allocator, &.{ self_dir, raw_path }); } }; defer if (path.ptr != raw_path.ptr) allocator.free(path); di.pdb = pdb.Pdb.init(allocator, path) catch |err| switch (err) { error.FileNotFound, error.IsDir => { if (di.dwarf == null) return error.MissingDebugInfo; return di; }, else => return err, }; try di.pdb.?.parseInfoStream(); try di.pdb.?.parseDbiStream(); if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) return error.InvalidDebugInfo; // Only used by the pdb path di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(allocator); errdefer allocator.free(di.coff_section_headers); return di; } } /// Reads debug info from an ELF file, or the current binary if none in specified. /// If the required sections aren't present but a reference to external debug info is, /// then this this function will recurse to attempt to load the debug sections from /// an external file. pub fn readElfDebugInfo( allocator: Allocator, elf_filename: ?[]const u8, build_id: ?[]const u8, expected_crc: ?u32, parent_sections: *Dwarf.SectionArray, parent_mapped_mem: ?[]align(mem.page_size) const u8, ) !Module { nosuspend { const elf_file = (if (elf_filename) |filename| blk: { break :blk fs.cwd().openFile(filename, .{}); } else fs.openSelfExe(.{})) catch |err| switch (err) { error.FileNotFound => return error.MissingDebugInfo, else => return err, }; const mapped_mem = try mapWholeFile(elf_file); if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { elf.ELFDATA2LSB => .little, elf.ELFDATA2MSB => .big, else => return error.InvalidElfEndian, }; assert(endian == native_endian); // this is our own debug info const shoff = hdr.e_shoff; const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; const shdrs = @as( [*]const elf.Shdr, @ptrCast(@alignCast(&mapped_mem[shoff])), )[0..hdr.e_shnum]; var sections: Dwarf.SectionArray = Dwarf.null_section_array; // Combine section list. This takes ownership over any owned sections from the parent scope. for (parent_sections, §ions) |*parent, *section| { if (parent.*) |*p| { section.* = p.*; p.owned = false; } } errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); var separate_debug_filename: ?[]const u8 = null; var separate_debug_crc: ?u32 = null; for (shdrs) |*shdr| { if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); if (mem.eql(u8, name, ".gnu_debuglink")) { const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); const crc_bytes = gnu_debuglink[crc_offset..][0..4]; separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); separate_debug_filename = debug_filename; continue; } var section_index: ?usize = null; inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { if (mem.eql(u8, "." ++ section.name, name)) section_index = i; } if (section_index == null) continue; if (sections[section_index.?] != null) continue; const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { var section_stream = std.io.fixedBufferStream(section_bytes); var section_reader = section_stream.reader(); const chdr = section_reader.readStruct(elf.Chdr) catch continue; if (chdr.ch_type != .ZLIB) continue; var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); const decompressed_section = try allocator.alloc(u8, chdr.ch_size); errdefer allocator.free(decompressed_section); const read = zlib_stream.reader().readAll(decompressed_section) catch continue; assert(read == decompressed_section.len); break :blk .{ .data = decompressed_section, .virtual_address = shdr.sh_addr, .owned = true, }; } else .{ .data = section_bytes, .virtual_address = shdr.sh_addr, .owned = false, }; } const missing_debug_info = sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; // Attempt to load debug info from an external file // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html if (missing_debug_info) { // Only allow one level of debug info nesting if (parent_mapped_mem) |_| { return error.MissingDebugInfo; } const global_debug_directories = [_][]const u8{ "/usr/lib/debug", }; // /.build-id/<2-character id prefix>/.debug if (build_id) |id| blk: { if (id.len < 3) break :blk; // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice const extension = ".debug"; var id_prefix_buf: [2]u8 = undefined; var filename_buf: [38 + extension.len]u8 = undefined; _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; const filename = std.fmt.bufPrint( &filename_buf, "{s}" ++ extension, .{std.fmt.fmtSliceHexLower(id[1..])}, ) catch break :blk; for (global_debug_directories) |global_directory| { const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); defer allocator.free(path); return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; } } // use the path from .gnu_debuglink, in the same search order as gdb if (separate_debug_filename) |separate_filename| blk: { if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; // / if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} // /.debug/ { const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); defer allocator.free(path); if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} } var cwd_buf: [fs.max_path_bytes]u8 = undefined; const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk; // // for (global_debug_directories) |global_directory| { const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); defer allocator.free(path); if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} } } return error.MissingDebugInfo; } var di = Dwarf{ .endian = endian, .sections = sections, .is_macho = false, }; try Dwarf.open(&di, allocator); return .{ .base_address = undefined, .dwarf = di, .mapped_memory = parent_mapped_mem orelse mapped_mem, .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, }; } } const MachoSymbol = struct { strx: u32, addr: u64, size: u32, ofile: u32, /// Returns the address from the macho file fn address(self: MachoSymbol) u64 { return self.addr; } fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { _ = context; return lhs.addr < rhs.addr; } }; /// Takes ownership of file, even on error. /// TODO it's weird to take ownership even on error, rework this code. fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 { nosuspend { defer file.close(); const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); const mapped_mem = try posix.mmap( null, file_len, posix.PROT.READ, .{ .TYPE = .SHARED }, file.handle, 0, ); errdefer posix.munmap(mapped_mem); return mapped_mem; } } fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { const start = math.cast(usize, offset) orelse return error.Overflow; const end = start + (math.cast(usize, size) orelse return error.Overflow); return ptr[start..end]; } pub const SymbolInfo = struct { symbol_name: []const u8 = "???", compile_unit_name: []const u8 = "???", line_info: ?SourceLocation = null, pub fn deinit(self: SymbolInfo, allocator: Allocator) void { if (self.line_info) |li| { li.deinit(allocator); } } }; pub const SourceLocation = struct { line: u64, column: u64, file_name: []const u8, pub fn deinit(self: SourceLocation, allocator: Allocator) void { allocator.free(self.file_name); } }; fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { var min: usize = 0; var max: usize = symbols.len - 1; while (min < max) { const mid = min + (max - min) / 2; const curr = &symbols[mid]; const next = &symbols[mid + 1]; if (address >= next.address()) { min = mid + 1; } else if (address < curr.address()) { max = mid; } else { return curr; } } const max_sym = &symbols[symbols.len - 1]; if (address >= max_sym.address()) return max_sym; return null; } test machoSearchSymbols { const symbols = [_]MachoSymbol{ .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, }; try testing.expectEqual(null, machoSearchSymbols(&symbols, 0)); try testing.expectEqual(null, machoSearchSymbols(&symbols, 99)); try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 100).?); try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 150).?); try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 199).?); try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 200).?); try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 250).?); try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 299).?); try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 300).?); try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 301).?); try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); } fn getSymbolFromDwarf(allocator: Allocator, address: u64, di: *Dwarf) !SymbolInfo { if (nosuspend di.findCompileUnit(address)) |compile_unit| { return SymbolInfo{ .symbol_name = nosuspend di.getSymbolName(address) orelse "???", .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => "???", }, .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, else => return err, }, }; } else |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => { return SymbolInfo{}; }, else => return err, } }