Files
zig/src/link/MachO/Object.zig
Jakub Konka 5d548cc651 macho: move parsing logic for Object, Archive and Dylib into MachO
This way, the functionality is better segregated, and we finally do
not unnecessarily reparse dynamic libraries that were already visited
and parsed.
2021-08-11 19:38:00 +02:00

886 lines
35 KiB
Zig

const Object = @This();
const std = @import("std");
const assert = std.debug.assert;
const dwarf = std.dwarf;
const fs = std.fs;
const io = std.io;
const log = std.log.scoped(.object);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
const sort = std.sort;
const commands = @import("commands.zig");
const segmentName = commands.segmentName;
const sectionName = commands.sectionName;
const Allocator = mem.Allocator;
const LoadCommand = commands.LoadCommand;
const MachO = @import("../MachO.zig");
const TextBlock = @import("TextBlock.zig");
file: fs.File,
name: []const u8,
file_offset: ?u32 = null,
header: ?macho.mach_header_64 = null,
load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
segment_cmd_index: ?u16 = null,
symtab_cmd_index: ?u16 = null,
dysymtab_cmd_index: ?u16 = null,
build_version_cmd_index: ?u16 = null,
data_in_code_cmd_index: ?u16 = null,
text_section_index: ?u16 = null,
mod_init_func_section_index: ?u16 = null,
// __DWARF segment sections
dwarf_debug_info_index: ?u16 = null,
dwarf_debug_abbrev_index: ?u16 = null,
dwarf_debug_str_index: ?u16 = null,
dwarf_debug_line_index: ?u16 = null,
dwarf_debug_ranges_index: ?u16 = null,
symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
strtab: std.ArrayListUnmanaged(u8) = .{},
data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
// Debug info
debug_info: ?DebugInfo = null,
tu_name: ?[]const u8 = null,
tu_comp_dir: ?[]const u8 = null,
mtime: ?u64 = null,
text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{},
sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{},
// TODO symbol mapping and its inverse can probably be simple arrays
// instead of hash maps.
symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{},
reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{},
const DebugInfo = struct {
inner: dwarf.DwarfInfo,
debug_info: []u8,
debug_abbrev: []u8,
debug_str: []u8,
debug_line: []u8,
debug_ranges: []u8,
pub fn parseFromObject(allocator: *Allocator, object: *const Object) !?DebugInfo {
var debug_info = blk: {
const index = object.dwarf_debug_info_index orelse return null;
break :blk try object.readSection(allocator, index);
};
var debug_abbrev = blk: {
const index = object.dwarf_debug_abbrev_index orelse return null;
break :blk try object.readSection(allocator, index);
};
var debug_str = blk: {
const index = object.dwarf_debug_str_index orelse return null;
break :blk try object.readSection(allocator, index);
};
var debug_line = blk: {
const index = object.dwarf_debug_line_index orelse return null;
break :blk try object.readSection(allocator, index);
};
var debug_ranges = blk: {
if (object.dwarf_debug_ranges_index) |ind| {
break :blk try object.readSection(allocator, ind);
}
break :blk try allocator.alloc(u8, 0);
};
var inner: dwarf.DwarfInfo = .{
.endian = .Little,
.debug_info = debug_info,
.debug_abbrev = debug_abbrev,
.debug_str = debug_str,
.debug_line = debug_line,
.debug_ranges = debug_ranges,
};
try dwarf.openDwarfDebugInfo(&inner, allocator);
return DebugInfo{
.inner = inner,
.debug_info = debug_info,
.debug_abbrev = debug_abbrev,
.debug_str = debug_str,
.debug_line = debug_line,
.debug_ranges = debug_ranges,
};
}
pub fn deinit(self: *DebugInfo, allocator: *Allocator) void {
allocator.free(self.debug_info);
allocator.free(self.debug_abbrev);
allocator.free(self.debug_str);
allocator.free(self.debug_line);
allocator.free(self.debug_ranges);
self.inner.abbrev_table_list.deinit();
self.inner.compile_unit_list.deinit();
self.inner.func_list.deinit();
}
};
pub fn deinit(self: *Object, allocator: *Allocator) void {
for (self.load_commands.items) |*lc| {
lc.deinit(allocator);
}
self.load_commands.deinit(allocator);
self.data_in_code_entries.deinit(allocator);
self.symtab.deinit(allocator);
self.strtab.deinit(allocator);
self.text_blocks.deinit(allocator);
self.sections_as_symbols.deinit(allocator);
self.symbol_mapping.deinit(allocator);
self.reverse_symbol_mapping.deinit(allocator);
allocator.free(self.name);
if (self.debug_info) |*db| {
db.deinit(allocator);
}
if (self.tu_name) |n| {
allocator.free(n);
}
if (self.tu_comp_dir) |n| {
allocator.free(n);
}
}
pub fn parse(self: *Object, allocator: *Allocator, target: std.Target) !void {
const reader = self.file.reader();
if (self.file_offset) |offset| {
try reader.context.seekTo(offset);
}
const header = try reader.readStruct(macho.mach_header_64);
if (header.filetype != macho.MH_OBJECT) {
log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{
macho.MH_OBJECT,
header.filetype,
});
return error.NotObject;
}
const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
macho.CPU_TYPE_ARM64 => .aarch64,
macho.CPU_TYPE_X86_64 => .x86_64,
else => |value| {
log.err("unsupported cpu architecture 0x{x}", .{value});
return error.UnsupportedCpuArchitecture;
},
};
if (this_arch != target.cpu.arch) {
log.err("mismatched cpu architecture: expected {s}, found {s}", .{ target.cpu.arch, this_arch });
return error.MismatchedCpuArchitecture;
}
self.header = header;
try self.readLoadCommands(allocator, reader);
try self.parseSymtab(allocator);
try self.parseDataInCode(allocator);
try self.parseDebugInfo(allocator);
}
pub fn readLoadCommands(self: *Object, allocator: *Allocator, reader: anytype) !void {
const header = self.header orelse unreachable; // Unreachable here signifies a fatal unexplored condition.
const offset = self.file_offset orelse 0;
try self.load_commands.ensureCapacity(allocator, header.ncmds);
var i: u16 = 0;
while (i < header.ncmds) : (i += 1) {
var cmd = try LoadCommand.read(allocator, reader);
switch (cmd.cmd()) {
macho.LC_SEGMENT_64 => {
self.segment_cmd_index = i;
var seg = cmd.Segment;
for (seg.sections.items) |*sect, j| {
const index = @intCast(u16, j);
const segname = segmentName(sect.*);
const sectname = sectionName(sect.*);
if (mem.eql(u8, segname, "__DWARF")) {
if (mem.eql(u8, sectname, "__debug_info")) {
self.dwarf_debug_info_index = index;
} else if (mem.eql(u8, sectname, "__debug_abbrev")) {
self.dwarf_debug_abbrev_index = index;
} else if (mem.eql(u8, sectname, "__debug_str")) {
self.dwarf_debug_str_index = index;
} else if (mem.eql(u8, sectname, "__debug_line")) {
self.dwarf_debug_line_index = index;
} else if (mem.eql(u8, sectname, "__debug_ranges")) {
self.dwarf_debug_ranges_index = index;
}
} else if (mem.eql(u8, segname, "__TEXT")) {
if (mem.eql(u8, sectname, "__text")) {
self.text_section_index = index;
}
} else if (mem.eql(u8, segname, "__DATA")) {
if (mem.eql(u8, sectname, "__mod_init_func")) {
self.mod_init_func_section_index = index;
}
}
sect.offset += offset;
if (sect.reloff > 0) {
sect.reloff += offset;
}
}
seg.inner.fileoff += offset;
},
macho.LC_SYMTAB => {
self.symtab_cmd_index = i;
cmd.Symtab.symoff += offset;
cmd.Symtab.stroff += offset;
},
macho.LC_DYSYMTAB => {
self.dysymtab_cmd_index = i;
},
macho.LC_BUILD_VERSION => {
self.build_version_cmd_index = i;
},
macho.LC_DATA_IN_CODE => {
self.data_in_code_cmd_index = i;
cmd.LinkeditData.dataoff += offset;
},
else => {
log.debug("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
},
}
self.load_commands.appendAssumeCapacity(cmd);
}
}
const NlistWithIndex = struct {
nlist: macho.nlist_64,
index: u32,
fn lessThan(_: void, lhs: NlistWithIndex, rhs: NlistWithIndex) bool {
// We sort by type: defined < undefined, and
// afterwards by address in each group. Normally, dysymtab should
// be enough to guarantee the sort, but turns out not every compiler
// is kind enough to specify the symbols in the correct order.
if (MachO.symbolIsSect(lhs.nlist)) {
if (MachO.symbolIsSect(rhs.nlist)) {
// Same group, sort by address.
return lhs.nlist.n_value < rhs.nlist.n_value;
} else {
return true;
}
} else {
return false;
}
}
fn filterInSection(symbols: []NlistWithIndex, sect: macho.section_64) []NlistWithIndex {
const Predicate = struct {
addr: u64,
pub fn predicate(self: @This(), symbol: NlistWithIndex) bool {
return symbol.nlist.n_value >= self.addr;
}
};
const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr });
const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size });
return symbols[start..end];
}
};
fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) []macho.data_in_code_entry {
const Predicate = struct {
addr: u64,
pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool {
return dice.offset >= self.addr;
}
};
const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr });
const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr });
return dices[start..end];
}
const Context = struct {
allocator: *Allocator,
object: *Object,
macho_file: *MachO,
match: MachO.MatchingSection,
};
const TextBlockParser = struct {
section: macho.section_64,
code: []u8,
relocs: []macho.relocation_info,
nlists: []NlistWithIndex,
index: u32 = 0,
fn peek(self: TextBlockParser) ?NlistWithIndex {
return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null;
}
fn lessThanBySeniority(context: Context, lhs: NlistWithIndex, rhs: NlistWithIndex) bool {
if (!MachO.symbolIsExt(rhs.nlist)) {
return MachO.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx));
} else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) {
return !MachO.symbolIsExt(lhs.nlist);
} else {
return false;
}
}
pub fn next(self: *TextBlockParser, context: Context) !?*TextBlock {
if (self.index == self.nlists.len) return null;
var aliases = std.ArrayList(NlistWithIndex).init(context.allocator);
defer aliases.deinit();
const next_nlist: ?NlistWithIndex = blk: while (true) {
const curr_nlist = self.nlists[self.index];
try aliases.append(curr_nlist);
if (self.peek()) |next_nlist| {
if (curr_nlist.nlist.n_value == next_nlist.nlist.n_value) {
self.index += 1;
continue;
}
break :blk next_nlist;
}
break :blk null;
} else null;
for (aliases.items) |*nlist_with_index| {
nlist_with_index.index = context.object.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
}
if (aliases.items.len > 1) {
// Bubble-up senior symbol as the main link to the text block.
sort.sort(
NlistWithIndex,
aliases.items,
context,
TextBlockParser.lessThanBySeniority,
);
}
const senior_nlist = aliases.pop();
const senior_sym = &context.macho_file.locals.items[senior_nlist.index];
senior_sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1);
const start_addr = senior_nlist.nlist.n_value - self.section.addr;
const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size;
const code = self.code[start_addr..end_addr];
const size = code.len;
const max_align = self.section.@"align";
const actual_align = if (senior_nlist.nlist.n_value > 0)
math.min(@ctz(u64, senior_nlist.nlist.n_value), max_align)
else
max_align;
const stab: ?TextBlock.Stab = if (context.object.debug_info) |di| blk: {
// TODO there has to be a better to handle this.
for (di.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) {
break :blk TextBlock.Stab{
.function = range.end - range.start,
};
}
}
}
// TODO
// if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global;
break :blk .static;
} else null;
const block = try context.allocator.create(TextBlock);
block.* = TextBlock.empty;
block.local_sym_index = senior_nlist.index;
block.stab = stab;
block.size = size;
block.alignment = actual_align;
try context.macho_file.managed_blocks.append(context.allocator, block);
try block.code.appendSlice(context.allocator, code);
try block.aliases.ensureTotalCapacity(context.allocator, aliases.items.len);
for (aliases.items) |alias| {
block.aliases.appendAssumeCapacity(alias.index);
const sym = &context.macho_file.locals.items[alias.index];
sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1);
}
try block.parseRelocs(self.relocs, .{
.base_addr = start_addr,
.allocator = context.allocator,
.object = context.object,
.macho_file = context.macho_file,
});
if (context.macho_file.has_dices) {
const dices = filterDice(
context.object.data_in_code_entries.items,
senior_nlist.nlist.n_value,
senior_nlist.nlist.n_value + size,
);
try block.dices.ensureTotalCapacity(context.allocator, dices.len);
for (dices) |dice| {
block.dices.appendAssumeCapacity(.{
.offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value),
.length = dice.length,
.kind = dice.kind,
});
}
}
self.index += 1;
return block;
}
};
pub fn parseTextBlocks(
self: *Object,
allocator: *Allocator,
object_id: u16,
macho_file: *MachO,
) !void {
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
log.debug("analysing {s}", .{self.name});
// You would expect that the symbol table is at least pre-sorted based on symbol's type:
// local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
// the GO compiler does not necessarily respect that therefore we sort immediately by type
// and address within.
var sorted_all_nlists = std.ArrayList(NlistWithIndex).init(allocator);
defer sorted_all_nlists.deinit();
try sorted_all_nlists.ensureTotalCapacity(self.symtab.items.len);
for (self.symtab.items) |nlist, index| {
sorted_all_nlists.appendAssumeCapacity(.{
.nlist = nlist,
.index = @intCast(u32, index),
});
}
sort.sort(NlistWithIndex, sorted_all_nlists.items, {}, NlistWithIndex.lessThan);
// Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we
// have to infer the start of undef section in the symtab ourselves.
const iundefsym = if (self.dysymtab_cmd_index) |cmd_index| blk: {
const dysymtab = self.load_commands.items[cmd_index].Dysymtab;
break :blk dysymtab.iundefsym;
} else blk: {
var iundefsym: usize = sorted_all_nlists.items.len;
while (iundefsym > 0) : (iundefsym -= 1) {
const nlist = sorted_all_nlists.items[iundefsym];
if (MachO.symbolIsSect(nlist.nlist)) break;
}
break :blk iundefsym;
};
// We only care about defined symbols, so filter every other out.
const sorted_nlists = sorted_all_nlists.items[0..iundefsym];
for (seg.sections.items) |sect, id| {
const sect_id = @intCast(u8, id);
log.debug("putting section '{s},{s}' as a TextBlock", .{
segmentName(sect),
sectionName(sect),
});
// Get matching segment/section in the final artifact.
const match = (try macho_file.getMatchingSection(sect)) orelse {
log.debug("unhandled section", .{});
continue;
};
// Read section's code
var code = try allocator.alloc(u8, @intCast(usize, sect.size));
defer allocator.free(code);
_ = try self.file.preadAll(code, sect.offset);
// Read section's list of relocations
var raw_relocs = try allocator.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
defer allocator.free(raw_relocs);
_ = try self.file.preadAll(raw_relocs, sect.reloff);
const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs);
// Symbols within this section only.
const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect);
// In release mode, if the object file was generated with dead code stripping optimisations,
// note it now and parse sections as atoms.
const is_splittable = blk: {
if (macho_file.base.options.optimize_mode == .Debug) break :blk false;
break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
};
macho_file.has_dices = blk: {
if (self.text_section_index) |index| {
if (index != id) break :blk false;
if (self.data_in_code_entries.items.len == 0) break :blk false;
break :blk true;
}
break :blk false;
};
macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null;
next: {
if (is_splittable) blocks: {
if (filtered_nlists.len == 0) break :blocks;
// If the first nlist does not match the start of the section,
// then we need to encapsulate the memory range [section start, first symbol)
// as a temporary symbol and insert the matching TextBlock.
const first_nlist = filtered_nlists[0].nlist;
if (first_nlist.n_value > sect.addr) {
const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
self.name,
segmentName(sect),
sectionName(sect),
});
defer allocator.free(sym_name);
const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const block_local_sym_index = @intCast(u32, macho_file.locals.items.len);
try macho_file.locals.append(allocator, .{
.n_strx = try macho_file.makeString(sym_name),
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
.n_desc = 0,
.n_value = sect.addr,
});
try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index);
break :blk block_local_sym_index;
};
const block_code = code[0 .. first_nlist.n_value - sect.addr];
const block_size = block_code.len;
const block = try allocator.create(TextBlock);
block.* = TextBlock.empty;
block.local_sym_index = block_local_sym_index;
block.size = block_size;
block.alignment = sect.@"align";
try macho_file.managed_blocks.append(allocator, block);
try block.code.appendSlice(allocator, block_code);
try block.parseRelocs(relocs, .{
.base_addr = 0,
.allocator = allocator,
.object = self,
.macho_file = macho_file,
});
if (macho_file.has_dices) {
const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size);
try block.dices.ensureTotalCapacity(allocator, dices.len);
for (dices) |dice| {
block.dices.appendAssumeCapacity(.{
.offset = dice.offset - try math.cast(u32, sect.addr),
.length = dice.length,
.kind = dice.kind,
});
}
}
// Update target section's metadata
// TODO should we update segment's size here too?
// How does it tie with incremental space allocs?
const tseg = &macho_file.load_commands.items[match.seg].Segment;
const tsect = &tseg.sections.items[match.sect];
const new_alignment = math.max(tsect.@"align", block.alignment);
const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
tsect.size = new_size;
tsect.@"align" = new_alignment;
if (macho_file.blocks.getPtr(match)) |last| {
last.*.next = block;
block.prev = last.*;
last.* = block;
} else {
try macho_file.blocks.putNoClobber(allocator, match, block);
}
try self.text_blocks.append(allocator, block);
}
var parser = TextBlockParser{
.section = sect,
.code = code,
.relocs = relocs,
.nlists = filtered_nlists,
};
while (try parser.next(.{
.allocator = allocator,
.object = self,
.macho_file = macho_file,
.match = match,
})) |block| {
const sym = macho_file.locals.items[block.local_sym_index];
const is_ext = blk: {
const orig_sym_id = self.reverse_symbol_mapping.get(block.local_sym_index) orelse unreachable;
break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]);
};
if (is_ext) {
if (macho_file.symbol_resolver.get(sym.n_strx)) |resolv| {
assert(resolv.where == .global);
if (resolv.file != object_id) {
log.debug("deduping definition of {s} in {s}", .{
macho_file.getString(sym.n_strx),
self.name,
});
log.debug(" already defined in {s}", .{
macho_file.objects.items[resolv.file].name,
});
continue;
}
}
}
if (sym.n_value == sect.addr) {
if (self.sections_as_symbols.get(sect_id)) |alias| {
// In x86_64 relocs, it can so happen that the compiler refers to the same
// atom by both the actual assigned symbol and the start of the section. In this
// case, we need to link the two together so add an alias.
try block.aliases.append(allocator, alias);
}
}
// Update target section's metadata
// TODO should we update segment's size here too?
// How does it tie with incremental space allocs?
const tseg = &macho_file.load_commands.items[match.seg].Segment;
const tsect = &tseg.sections.items[match.sect];
const new_alignment = math.max(tsect.@"align", block.alignment);
const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
tsect.size = new_size;
tsect.@"align" = new_alignment;
if (macho_file.blocks.getPtr(match)) |last| {
last.*.next = block;
block.prev = last.*;
last.* = block;
} else {
try macho_file.blocks.putNoClobber(allocator, match, block);
}
try self.text_blocks.append(allocator, block);
}
break :next;
}
// Since there is no symbol to refer to this block, we create
// a temp one, unless we already did that when working out the relocations
// of other text blocks.
const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
self.name,
segmentName(sect),
sectionName(sect),
});
defer allocator.free(sym_name);
const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const block_local_sym_index = @intCast(u32, macho_file.locals.items.len);
try macho_file.locals.append(allocator, .{
.n_strx = try macho_file.makeString(sym_name),
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
.n_desc = 0,
.n_value = sect.addr,
});
try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index);
break :blk block_local_sym_index;
};
const block = try allocator.create(TextBlock);
block.* = TextBlock.empty;
block.local_sym_index = block_local_sym_index;
block.size = sect.size;
block.alignment = sect.@"align";
try macho_file.managed_blocks.append(allocator, block);
try block.code.appendSlice(allocator, code);
try block.parseRelocs(relocs, .{
.base_addr = 0,
.allocator = allocator,
.object = self,
.macho_file = macho_file,
});
if (macho_file.has_dices) {
const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size);
try block.dices.ensureTotalCapacity(allocator, dices.len);
for (dices) |dice| {
block.dices.appendAssumeCapacity(.{
.offset = dice.offset - try math.cast(u32, sect.addr),
.length = dice.length,
.kind = dice.kind,
});
}
}
// Since this is block gets a helper local temporary symbol that didn't exist
// in the object file which encompasses the entire section, we need traverse
// the filtered symbols and note which symbol is contained within so that
// we can properly allocate addresses down the line.
// While we're at it, we need to update segment,section mapping of each symbol too.
try block.contained.ensureTotalCapacity(allocator, filtered_nlists.len);
for (filtered_nlists) |nlist_with_index| {
const nlist = nlist_with_index.nlist;
const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
const local = &macho_file.locals.items[local_sym_index];
local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1);
const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: {
// TODO there has to be a better to handle this.
for (di.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (nlist.n_value >= range.start and nlist.n_value < range.end) {
break :blk TextBlock.Stab{
.function = range.end - range.start,
};
}
}
}
// TODO
// if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
break :blk .static;
} else null;
block.contained.appendAssumeCapacity(.{
.local_sym_index = local_sym_index,
.offset = nlist.n_value - sect.addr,
.stab = stab,
});
}
// Update target section's metadata
// TODO should we update segment's size here too?
// How does it tie with incremental space allocs?
const tseg = &macho_file.load_commands.items[match.seg].Segment;
const tsect = &tseg.sections.items[match.sect];
const new_alignment = math.max(tsect.@"align", block.alignment);
const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
tsect.size = new_size;
tsect.@"align" = new_alignment;
if (macho_file.blocks.getPtr(match)) |last| {
last.*.next = block;
block.prev = last.*;
last.* = block;
} else {
try macho_file.blocks.putNoClobber(allocator, match, block);
}
try self.text_blocks.append(allocator, block);
}
}
}
fn parseSymtab(self: *Object, allocator: *Allocator) !void {
const index = self.symtab_cmd_index orelse return;
const symtab_cmd = self.load_commands.items[index].Symtab;
var symtab = try allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
defer allocator.free(symtab);
_ = try self.file.preadAll(symtab, symtab_cmd.symoff);
const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab));
try self.symtab.appendSlice(allocator, slice);
var strtab = try allocator.alloc(u8, symtab_cmd.strsize);
defer allocator.free(strtab);
_ = try self.file.preadAll(strtab, symtab_cmd.stroff);
try self.strtab.appendSlice(allocator, strtab);
}
pub fn parseDebugInfo(self: *Object, allocator: *Allocator) !void {
log.debug("parsing debug info in '{s}'", .{self.name});
var debug_info = blk: {
var di = try DebugInfo.parseFromObject(allocator, self);
break :blk di orelse return;
};
// We assume there is only one CU.
const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) {
error.MissingDebugInfo => {
// TODO audit cases with missing debug info and audit our dwarf.zig module.
log.debug("invalid or missing debug info in {s}; skipping", .{self.name});
return;
},
else => |e| return e,
};
const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_name);
const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_comp_dir);
self.debug_info = debug_info;
self.tu_name = try allocator.dupe(u8, name);
self.tu_comp_dir = try allocator.dupe(u8, comp_dir);
if (self.mtime == null) {
self.mtime = mtime: {
const stat = self.file.stat() catch break :mtime 0;
break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000));
};
}
}
pub fn parseDataInCode(self: *Object, allocator: *Allocator) !void {
const index = self.data_in_code_cmd_index orelse return;
const data_in_code = self.load_commands.items[index].LinkeditData;
var buffer = try allocator.alloc(u8, data_in_code.datasize);
defer allocator.free(buffer);
_ = try self.file.preadAll(buffer, data_in_code.dataoff);
var stream = io.fixedBufferStream(buffer);
var reader = stream.reader();
while (true) {
const dice = reader.readStruct(macho.data_in_code_entry) catch |err| switch (err) {
error.EndOfStream => break,
else => |e| return e,
};
try self.data_in_code_entries.append(allocator, dice);
}
}
fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
const sect = seg.sections.items[index];
var buffer = try allocator.alloc(u8, @intCast(usize, sect.size));
_ = try self.file.preadAll(buffer, sect.offset);
return buffer;
}
pub fn getString(self: Object, off: u32) []const u8 {
assert(off < self.strtab.items.len);
return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off));
}