Files
zig/src/link/MachO/Object.zig
Jakub Konka a4feb97cdf macho: assign and cache section ordinals upon creation
then, when sorting sections within segments, clear and redo the
ordinals since we re-apply them to symbols anyway. It is vital
to have the ordinals consistent with parsing and resolving relocs
however.
2021-07-22 23:13:13 +02:00

912 lines
36 KiB
Zig

const Object = @This();
const std = @import("std");
const assert = std.debug.assert;
const dwarf = std.dwarf;
const fs = std.fs;
const io = std.io;
const log = std.log.scoped(.object);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
const sort = std.sort;
const Allocator = mem.Allocator;
const Arch = std.Target.Cpu.Arch;
const MachO = @import("../MachO.zig");
const TextBlock = @import("TextBlock.zig");
usingnamespace @import("commands.zig");
allocator: *Allocator,
arch: ?Arch = null,
header: ?macho.mach_header_64 = null,
file: ?fs.File = null,
file_offset: ?u32 = null,
name: ?[]const u8 = null,
load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
segment_cmd_index: ?u16 = null,
symtab_cmd_index: ?u16 = null,
dysymtab_cmd_index: ?u16 = null,
build_version_cmd_index: ?u16 = null,
data_in_code_cmd_index: ?u16 = null,
text_section_index: ?u16 = null,
mod_init_func_section_index: ?u16 = null,
// __DWARF segment sections
dwarf_debug_info_index: ?u16 = null,
dwarf_debug_abbrev_index: ?u16 = null,
dwarf_debug_str_index: ?u16 = null,
dwarf_debug_line_index: ?u16 = null,
dwarf_debug_ranges_index: ?u16 = null,
symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
strtab: std.ArrayListUnmanaged(u8) = .{},
data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
// Debug info
debug_info: ?DebugInfo = null,
tu_name: ?[]const u8 = null,
tu_comp_dir: ?[]const u8 = null,
mtime: ?u64 = null,
text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{},
sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{},
symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{},
const DebugInfo = struct {
inner: dwarf.DwarfInfo,
debug_info: []u8,
debug_abbrev: []u8,
debug_str: []u8,
debug_line: []u8,
debug_ranges: []u8,
pub fn parseFromObject(allocator: *Allocator, object: *const Object) !?DebugInfo {
var debug_info = blk: {
const index = object.dwarf_debug_info_index orelse return null;
break :blk try object.readSection(allocator, index);
};
var debug_abbrev = blk: {
const index = object.dwarf_debug_abbrev_index orelse return null;
break :blk try object.readSection(allocator, index);
};
var debug_str = blk: {
const index = object.dwarf_debug_str_index orelse return null;
break :blk try object.readSection(allocator, index);
};
var debug_line = blk: {
const index = object.dwarf_debug_line_index orelse return null;
break :blk try object.readSection(allocator, index);
};
var debug_ranges = blk: {
if (object.dwarf_debug_ranges_index) |ind| {
break :blk try object.readSection(allocator, ind);
}
break :blk try allocator.alloc(u8, 0);
};
var inner: dwarf.DwarfInfo = .{
.endian = .Little,
.debug_info = debug_info,
.debug_abbrev = debug_abbrev,
.debug_str = debug_str,
.debug_line = debug_line,
.debug_ranges = debug_ranges,
};
try dwarf.openDwarfDebugInfo(&inner, allocator);
return DebugInfo{
.inner = inner,
.debug_info = debug_info,
.debug_abbrev = debug_abbrev,
.debug_str = debug_str,
.debug_line = debug_line,
.debug_ranges = debug_ranges,
};
}
pub fn deinit(self: *DebugInfo, allocator: *Allocator) void {
allocator.free(self.debug_info);
allocator.free(self.debug_abbrev);
allocator.free(self.debug_str);
allocator.free(self.debug_line);
allocator.free(self.debug_ranges);
self.inner.abbrev_table_list.deinit();
self.inner.compile_unit_list.deinit();
self.inner.func_list.deinit();
}
};
pub fn createAndParseFromPath(allocator: *Allocator, arch: Arch, path: []const u8) !?*Object {
const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) {
error.FileNotFound => return null,
else => |e| return e,
};
errdefer file.close();
const object = try allocator.create(Object);
errdefer allocator.destroy(object);
const name = try allocator.dupe(u8, path);
errdefer allocator.free(name);
object.* = .{
.allocator = allocator,
.arch = arch,
.name = name,
.file = file,
};
object.parse() catch |err| switch (err) {
error.EndOfStream, error.NotObject => {
object.deinit();
allocator.destroy(object);
return null;
},
else => |e| return e,
};
return object;
}
pub fn deinit(self: *Object) void {
for (self.load_commands.items) |*lc| {
lc.deinit(self.allocator);
}
self.load_commands.deinit(self.allocator);
self.data_in_code_entries.deinit(self.allocator);
self.symtab.deinit(self.allocator);
self.strtab.deinit(self.allocator);
self.text_blocks.deinit(self.allocator);
self.sections_as_symbols.deinit(self.allocator);
self.symbol_mapping.deinit(self.allocator);
if (self.debug_info) |*db| {
db.deinit(self.allocator);
}
if (self.tu_name) |n| {
self.allocator.free(n);
}
if (self.tu_comp_dir) |n| {
self.allocator.free(n);
}
if (self.name) |n| {
self.allocator.free(n);
}
}
pub fn closeFile(self: Object) void {
if (self.file) |f| {
f.close();
}
}
pub fn parse(self: *Object) !void {
var reader = self.file.?.reader();
if (self.file_offset) |offset| {
try reader.context.seekTo(offset);
}
const header = try reader.readStruct(macho.mach_header_64);
if (header.filetype != macho.MH_OBJECT) {
log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, header.filetype });
return error.NotObject;
}
const this_arch: Arch = switch (header.cputype) {
macho.CPU_TYPE_ARM64 => .aarch64,
macho.CPU_TYPE_X86_64 => .x86_64,
else => |value| {
log.err("unsupported cpu architecture 0x{x}", .{value});
return error.UnsupportedCpuArchitecture;
},
};
if (this_arch != self.arch.?) {
log.err("mismatched cpu architecture: expected {s}, found {s}", .{ self.arch.?, this_arch });
return error.MismatchedCpuArchitecture;
}
self.header = header;
try self.readLoadCommands(reader);
try self.parseSymtab();
try self.parseDataInCode();
try self.parseDebugInfo();
}
pub fn readLoadCommands(self: *Object, reader: anytype) !void {
const offset = self.file_offset orelse 0;
try self.load_commands.ensureCapacity(self.allocator, self.header.?.ncmds);
var i: u16 = 0;
while (i < self.header.?.ncmds) : (i += 1) {
var cmd = try LoadCommand.read(self.allocator, reader);
switch (cmd.cmd()) {
macho.LC_SEGMENT_64 => {
self.segment_cmd_index = i;
var seg = cmd.Segment;
for (seg.sections.items) |*sect, j| {
const index = @intCast(u16, j);
const segname = segmentName(sect.*);
const sectname = sectionName(sect.*);
if (mem.eql(u8, segname, "__DWARF")) {
if (mem.eql(u8, sectname, "__debug_info")) {
self.dwarf_debug_info_index = index;
} else if (mem.eql(u8, sectname, "__debug_abbrev")) {
self.dwarf_debug_abbrev_index = index;
} else if (mem.eql(u8, sectname, "__debug_str")) {
self.dwarf_debug_str_index = index;
} else if (mem.eql(u8, sectname, "__debug_line")) {
self.dwarf_debug_line_index = index;
} else if (mem.eql(u8, sectname, "__debug_ranges")) {
self.dwarf_debug_ranges_index = index;
}
} else if (mem.eql(u8, segname, "__TEXT")) {
if (mem.eql(u8, sectname, "__text")) {
self.text_section_index = index;
}
} else if (mem.eql(u8, segname, "__DATA")) {
if (mem.eql(u8, sectname, "__mod_init_func")) {
self.mod_init_func_section_index = index;
}
}
sect.offset += offset;
if (sect.reloff > 0) {
sect.reloff += offset;
}
}
seg.inner.fileoff += offset;
},
macho.LC_SYMTAB => {
self.symtab_cmd_index = i;
cmd.Symtab.symoff += offset;
cmd.Symtab.stroff += offset;
},
macho.LC_DYSYMTAB => {
self.dysymtab_cmd_index = i;
},
macho.LC_BUILD_VERSION => {
self.build_version_cmd_index = i;
},
macho.LC_DATA_IN_CODE => {
self.data_in_code_cmd_index = i;
cmd.LinkeditData.dataoff += offset;
},
else => {
log.debug("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
},
}
self.load_commands.appendAssumeCapacity(cmd);
}
}
const NlistWithIndex = struct {
nlist: macho.nlist_64,
index: u32,
fn lessThan(_: void, lhs: NlistWithIndex, rhs: NlistWithIndex) bool {
// We sort by type: defined < undefined, and
// afterwards by address in each group. Normally, dysymtab should
// be enough to guarantee the sort, but turns out not every compiler
// is kind enough to specify the symbols in the correct order.
if (MachO.symbolIsSect(lhs.nlist)) {
if (MachO.symbolIsSect(rhs.nlist)) {
// Same group, sort by address.
return lhs.nlist.n_value < rhs.nlist.n_value;
} else {
return true;
}
} else {
return false;
}
}
fn filterInSection(symbols: []NlistWithIndex, sect: macho.section_64) []NlistWithIndex {
const Predicate = struct {
addr: u64,
pub fn predicate(self: @This(), symbol: NlistWithIndex) bool {
return symbol.nlist.n_value >= self.addr;
}
};
const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr });
const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size });
return symbols[start..end];
}
};
fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) []macho.data_in_code_entry {
const Predicate = struct {
addr: u64,
pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool {
return dice.offset >= self.addr;
}
};
const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr });
const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr });
return dices[start..end];
}
const TextBlockParser = struct {
allocator: *Allocator,
section: macho.section_64,
code: []u8,
relocs: []macho.relocation_info,
object: *Object,
macho_file: *MachO,
nlists: []NlistWithIndex,
index: u32 = 0,
match: MachO.MatchingSection,
fn peek(self: *TextBlockParser) ?NlistWithIndex {
return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null;
}
const SeniorityContext = struct {
object: *Object,
};
fn lessThanBySeniority(context: SeniorityContext, lhs: NlistWithIndex, rhs: NlistWithIndex) bool {
if (!MachO.symbolIsExt(rhs.nlist)) {
return MachO.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx));
} else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) {
return !MachO.symbolIsExt(lhs.nlist);
} else {
return true;
}
}
pub fn next(self: *TextBlockParser) !?*TextBlock {
if (self.index == self.nlists.len) return null;
var aliases = std.ArrayList(NlistWithIndex).init(self.allocator);
defer aliases.deinit();
const next_nlist: ?NlistWithIndex = blk: while (true) {
const curr_nlist = self.nlists[self.index];
try aliases.append(curr_nlist);
if (self.peek()) |next_nlist| {
if (curr_nlist.nlist.n_value == next_nlist.nlist.n_value) {
self.index += 1;
continue;
}
break :blk next_nlist;
}
break :blk null;
} else null;
for (aliases.items) |*nlist_with_index| {
nlist_with_index.index = self.symbol_mapping.get(nlist_with_index.index);
const sym = self.object.symbols.items[nlist_with_index.index];
if (sym.payload != .regular) {
log.err("expected a regular symbol, found {s}", .{sym.payload});
log.err(" when remapping {s}", .{self.macho_file.getString(sym.strx)});
return error.SymbolIsNotRegular;
}
assert(sym.payload.regular.local_sym_index != 0); // This means the symbol has not been properly resolved.
nlist_with_index.index = sym.payload.regular.local_sym_index;
}
if (aliases.items.len > 1) {
// Bubble-up senior symbol as the main link to the text block.
sort.sort(
NlistWithIndex,
aliases.items,
SeniorityContext{ .object = self.object },
@This().lessThanBySeniority,
);
}
const senior_nlist = aliases.pop();
const senior_sym = self.macho_file.locals.items[senior_nlist.index];
assert(senior_sym.payload == .regular);
senior_sym.payload.regular.segment_id = self.match.seg;
senior_sym.payload.regular.section_id = self.match.sect;
const start_addr = senior_nlist.nlist.n_value - self.section.addr;
const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size;
const code = self.code[start_addr..end_addr];
const size = code.len;
const max_align = self.section.@"align";
const actual_align = if (senior_nlist.nlist.n_value > 0)
math.min(@ctz(u64, senior_nlist.nlist.n_value), max_align)
else
max_align;
const stab: ?TextBlock.Stab = if (self.object.debug_info) |di| blk: {
// TODO there has to be a better to handle this.
for (di.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) {
break :blk TextBlock.Stab{
.function = range.end - range.start,
};
}
}
}
if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global;
break :blk .static;
} else null;
const block = try self.allocator.create(TextBlock);
errdefer self.allocator.destroy(block);
block.* = TextBlock.init(self.allocator);
block.local_sym_index = senior_nlist.index;
block.stab = stab;
block.code = try self.allocator.dupe(u8, code);
block.size = size;
block.alignment = actual_align;
if (aliases.items.len > 0) {
try block.aliases.ensureTotalCapacity(aliases.items.len);
for (aliases.items) |alias| {
block.aliases.appendAssumeCapacity(alias.index);
const sym = self.macho_file.locals.items[alias.index];
const reg = &sym.payload.regular;
reg.segment_id = self.match.seg;
reg.section_id = self.match.sect;
}
}
try block.parseRelocsFromObject(self.allocator, relocs, object, .{
.base_addr = start_addr,
.macho_file = self.macho_file,
});
if (self.macho_file.has_dices) {
const dices = filterDice(
self.object.data_in_code_entries.items,
senior_nlist.nlist.n_value,
senior_nlist.nlist.n_value + size,
);
try block.dices.ensureTotalCapacity(dices.len);
for (dices) |dice| {
block.dices.appendAssumeCapacity(.{
.offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value),
.length = dice.length,
.kind = dice.kind,
});
}
}
self.index += 1;
return block;
}
};
pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void {
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
log.debug("analysing {s}", .{self.name.?});
// You would expect that the symbol table is at least pre-sorted based on symbol's type:
// local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
// the GO compiler does not necessarily respect that therefore we sort immediately by type
// and address within.
var sorted_all_nlists = std.ArrayList(NlistWithIndex).init(self.allocator);
defer sorted_all_nlists.deinit();
try sorted_all_nlists.ensureTotalCapacity(self.symtab.items.len);
for (self.symtab.items) |nlist, index| {
sorted_all_nlists.appendAssumeCapacity(.{
.nlist = nlist,
.index = @intCast(u32, index),
});
}
sort.sort(NlistWithIndex, sorted_all_nlists.items, {}, NlistWithIndex.lessThan);
const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
// We only care about defined symbols, so filter every other out.
const sorted_nlists = sorted_all_nlists.items[dysymtab.ilocalsym..dysymtab.iundefsym];
for (seg.sections.items) |sect, id| {
const sect_id = @intCast(u8, id);
log.debug("putting section '{s},{s}' as a TextBlock", .{
segmentName(sect),
sectionName(sect),
});
// Get matching segment/section in the final artifact.
const match = (try macho_file.getMatchingSection(sect)) orelse {
log.debug("unhandled section", .{});
continue;
};
// Read section's code
var code = try self.allocator.alloc(u8, @intCast(usize, sect.size));
defer self.allocator.free(code);
_ = try self.file.?.preadAll(code, sect.offset);
// Read section's list of relocations
var raw_relocs = try self.allocator.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
defer self.allocator.free(raw_relocs);
_ = try self.file.?.preadAll(raw_relocs, sect.reloff);
const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs);
// Symbols within this section only.
const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect);
// Is there any padding between symbols within the section?
// const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
// TODO is it perhaps worth skip parsing subsections in Debug mode and not worry about
// duplicates at all? Need some benchmarks!
// const is_splittable = false;
macho_file.has_dices = blk: {
if (self.text_section_index) |index| {
if (index != id) break :blk false;
if (self.data_in_code_entries.items.len == 0) break :blk false;
break :blk true;
}
break :blk false;
};
macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null;
{
// next: {
// if (is_splittable) blocks: {
// if (filtered_nlists.len == 0) break :blocks;
// // If the first nlist does not match the start of the section,
// // then we need encapsulate the memory range [section start, first symbol)
// // as a temporary symbol and insert the matching TextBlock.
// const first_nlist = filtered_nlists[0].nlist;
// if (first_nlist.n_value > sect.addr) {
// const symbol = self.sections_as_symbols.get(sect_id) orelse symbol: {
// const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{
// self.name.?,
// segmentName(sect),
// sectionName(sect),
// });
// defer self.allocator.free(name);
// const symbol = try zld.allocator.create(Symbol);
// symbol.* = .{
// .strx = try zld.makeString(name),
// .payload = .{ .undef = .{} },
// };
// try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol);
// break :symbol symbol;
// };
// const local_sym_index = @intCast(u32, zld.locals.items.len);
// symbol.payload = .{
// .regular = .{
// .linkage = .translation_unit,
// .address = sect.addr,
// .segment_id = match.seg,
// .section_id = match.sect,
// .file = self,
// .local_sym_index = local_sym_index,
// },
// };
// try zld.locals.append(zld.allocator, symbol);
// const block_code = code[0 .. first_nlist.n_value - sect.addr];
// const block_size = block_code.len;
// const block = try self.allocator.create(TextBlock);
// errdefer self.allocator.destroy(block);
// block.* = TextBlock.init(self.allocator);
// block.local_sym_index = local_sym_index;
// block.code = try self.allocator.dupe(u8, block_code);
// block.size = block_size;
// block.alignment = sect.@"align";
// const block_relocs = filterRelocs(relocs, 0, block_size);
// if (block_relocs.len > 0) {
// try self.parseRelocs(zld, block_relocs, block, 0);
// }
// if (zld.has_dices) {
// const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size);
// try block.dices.ensureTotalCapacity(dices.len);
// for (dices) |dice| {
// block.dices.appendAssumeCapacity(.{
// .offset = dice.offset - try math.cast(u32, sect.addr),
// .length = dice.length,
// .kind = dice.kind,
// });
// }
// }
// // Update target section's metadata
// // TODO should we update segment's size here too?
// // How does it tie with incremental space allocs?
// const tseg = &zld.load_commands.items[match.seg].Segment;
// const tsect = &tseg.sections.items[match.sect];
// const new_alignment = math.max(tsect.@"align", block.alignment);
// const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
// const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
// tsect.size = new_size;
// tsect.@"align" = new_alignment;
// if (zld.blocks.getPtr(match)) |last| {
// last.*.next = block;
// block.prev = last.*;
// last.* = block;
// } else {
// try zld.blocks.putNoClobber(zld.allocator, match, block);
// }
// try self.text_blocks.append(self.allocator, block);
// }
// var parser = TextBlockParser{
// .allocator = self.allocator,
// .section = sect,
// .code = code,
// .relocs = relocs,
// .object = self,
// .zld = zld,
// .nlists = filtered_nlists,
// .match = match,
// };
// while (try parser.next()) |block| {
// const sym = zld.locals.items[block.local_sym_index];
// const reg = &sym.payload.regular;
// if (reg.file) |file| {
// if (file != self) {
// log.debug("deduping definition of {s} in {s}", .{ zld.getString(sym.strx), self.name.? });
// block.deinit();
// self.allocator.destroy(block);
// continue;
// }
// }
// if (reg.address == sect.addr) {
// if (self.sections_as_symbols.get(sect_id)) |alias| {
// // Add alias.
// const local_sym_index = @intCast(u32, zld.locals.items.len);
// const reg_alias = &alias.payload.regular;
// reg_alias.segment_id = match.seg;
// reg_alias.section_id = match.sect;
// reg_alias.local_sym_index = local_sym_index;
// try block.aliases.append(local_sym_index);
// try zld.locals.append(zld.allocator, alias);
// }
// }
// // Update target section's metadata
// // TODO should we update segment's size here too?
// // How does it tie with incremental space allocs?
// const tseg = &zld.load_commands.items[match.seg].Segment;
// const tsect = &tseg.sections.items[match.sect];
// const new_alignment = math.max(tsect.@"align", block.alignment);
// const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
// const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
// tsect.size = new_size;
// tsect.@"align" = new_alignment;
// if (zld.blocks.getPtr(match)) |last| {
// last.*.next = block;
// block.prev = last.*;
// last.* = block;
// } else {
// try zld.blocks.putNoClobber(zld.allocator, match, block);
// }
// try self.text_blocks.append(self.allocator, block);
// }
// break :next;
// }
// Since there is no symbol to refer to this block, we create
// a temp one, unless we already did that when working out the relocations
// of other text blocks.
const sym_name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{
self.name.?,
segmentName(sect),
sectionName(sect),
});
defer self.allocator.free(sym_name);
const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const block_local_sym_index = @intCast(u32, macho_file.locals.items.len);
try macho_file.locals.append(macho_file.base.allocator, .{
.n_strx = try macho_file.makeString(sym_name),
.n_type = macho.N_SECT,
.n_sect = macho_file.section_to_ordinal.get(match) orelse unreachable,
.n_desc = 0,
.n_value = sect.addr,
});
try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, block_local_sym_index);
break :blk block_local_sym_index;
};
const block = try macho_file.base.allocator.create(TextBlock);
block.* = TextBlock.empty;
block.local_sym_index = block_local_sym_index;
block.size = sect.size;
block.alignment = sect.@"align";
try macho_file.managed_blocks.append(macho_file.base.allocator, block);
try block.code.appendSlice(macho_file.base.allocator, code);
try block.parseRelocsFromObject(self.allocator, relocs, self, .{
.base_addr = 0,
.macho_file = macho_file,
});
if (macho_file.has_dices) {
const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size);
try block.dices.ensureTotalCapacity(self.allocator, dices.len);
for (dices) |dice| {
block.dices.appendAssumeCapacity(.{
.offset = dice.offset - try math.cast(u32, sect.addr),
.length = dice.length,
.kind = dice.kind,
});
}
}
// Since this is block gets a helper local temporary symbol that didn't exist
// in the object file which encompasses the entire section, we need traverse
// the filtered symbols and note which symbol is contained within so that
// we can properly allocate addresses down the line.
// While we're at it, we need to update segment,section mapping of each symbol too.
try block.contained.ensureTotalCapacity(self.allocator, filtered_nlists.len);
for (filtered_nlists) |nlist_with_index| {
const nlist = nlist_with_index.nlist;
const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
const local = &macho_file.locals.items[local_sym_index];
local.n_sect = macho_file.section_to_ordinal.get(match) orelse unreachable;
const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: {
// TODO there has to be a better to handle this.
for (di.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (nlist.n_value >= range.start and nlist.n_value < range.end) {
break :blk TextBlock.Stab{
.function = range.end - range.start,
};
}
}
}
// TODO
// if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
break :blk .static;
} else null;
block.contained.appendAssumeCapacity(.{
.local_sym_index = local_sym_index,
.offset = nlist.n_value - sect.addr,
.stab = stab,
});
}
// Update target section's metadata
// TODO should we update segment's size here too?
// How does it tie with incremental space allocs?
const tseg = &macho_file.load_commands.items[match.seg].Segment;
const tsect = &tseg.sections.items[match.sect];
const new_alignment = math.max(tsect.@"align", block.alignment);
const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
tsect.size = new_size;
tsect.@"align" = new_alignment;
if (macho_file.blocks.getPtr(match)) |last| {
last.*.next = block;
block.prev = last.*;
last.* = block;
} else {
try macho_file.blocks.putNoClobber(self.allocator, match, block);
}
try self.text_blocks.append(self.allocator, block);
}
}
}
fn parseSymtab(self: *Object) !void {
const index = self.symtab_cmd_index orelse return;
const symtab_cmd = self.load_commands.items[index].Symtab;
var symtab = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
defer self.allocator.free(symtab);
_ = try self.file.?.preadAll(symtab, symtab_cmd.symoff);
const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab));
try self.symtab.appendSlice(self.allocator, slice);
var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize);
defer self.allocator.free(strtab);
_ = try self.file.?.preadAll(strtab, symtab_cmd.stroff);
try self.strtab.appendSlice(self.allocator, strtab);
}
pub fn parseDebugInfo(self: *Object) !void {
log.debug("parsing debug info in '{s}'", .{self.name.?});
var debug_info = blk: {
var di = try DebugInfo.parseFromObject(self.allocator, self);
break :blk di orelse return;
};
// We assume there is only one CU.
const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) {
error.MissingDebugInfo => {
// TODO audit cases with missing debug info and audit our dwarf.zig module.
log.debug("invalid or missing debug info in {s}; skipping", .{self.name.?});
return;
},
else => |e| return e,
};
const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_name);
const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_comp_dir);
self.debug_info = debug_info;
self.tu_name = try self.allocator.dupe(u8, name);
self.tu_comp_dir = try self.allocator.dupe(u8, comp_dir);
if (self.mtime == null) {
self.mtime = mtime: {
const file = self.file orelse break :mtime 0;
const stat = file.stat() catch break :mtime 0;
break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000));
};
}
}
pub fn parseDataInCode(self: *Object) !void {
const index = self.data_in_code_cmd_index orelse return;
const data_in_code = self.load_commands.items[index].LinkeditData;
var buffer = try self.allocator.alloc(u8, data_in_code.datasize);
defer self.allocator.free(buffer);
_ = try self.file.?.preadAll(buffer, data_in_code.dataoff);
var stream = io.fixedBufferStream(buffer);
var reader = stream.reader();
while (true) {
const dice = reader.readStruct(macho.data_in_code_entry) catch |err| switch (err) {
error.EndOfStream => break,
else => |e| return e,
};
try self.data_in_code_entries.append(self.allocator, dice);
}
}
fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
const sect = seg.sections.items[index];
var buffer = try allocator.alloc(u8, @intCast(usize, sect.size));
_ = try self.file.?.preadAll(buffer, sect.offset);
return buffer;
}
pub fn getString(self: Object, off: u32) []const u8 {
assert(off < self.strtab.items.len);
return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off));
}