Files
zig/src/link/MachO/Atom.zig
2023-08-29 11:39:35 +02:00

1309 lines
51 KiB
Zig

/// Each Atom always gets a symbol with the fully qualified name.
/// The symbol can reside in any object file context structure in `symtab` array
/// (see `Object`), or if the symbol is a synthetic symbol such as a GOT cell or
/// a stub trampoline, it can be found in the linkers `locals` arraylist.
/// If this field is 0 and file is 0, it means the codegen size = 0 and there is no symbol or
/// offset table entry.
sym_index: u32 = 0,
/// 0 means an Atom is a synthetic Atom such as a GOT cell defined by the linker.
/// Otherwise, it is the index into appropriate object file (indexing from 1).
/// Prefer using `getFile()` helper to get the file index out rather than using
/// the field directly.
file: u32 = 0,
/// If this Atom is not a synthetic Atom, i.e., references a subsection in an
/// Object file, `inner_sym_index` and `inner_nsyms_trailing` tell where and if
/// this Atom contains any additional symbol references that fall within this Atom's
/// address range. These could for example be an alias symbol which can be used
/// internally by the relocation records, or if the Object file couldn't be split
/// into subsections, this Atom may encompass an entire input section.
inner_sym_index: u32 = 0,
inner_nsyms_trailing: u32 = 0,
/// Size and alignment of this atom
/// Unlike in Elf, we need to store the size of this symbol as part of
/// the atom since macho.nlist_64 lacks this information.
size: u64 = 0,
/// Alignment of this atom as a power of 2.
/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned.
alignment: u32 = 0,
/// Points to the previous and next neighbours
/// TODO use the same trick as with symbols: reserve index 0 as null atom
next_index: ?Index = null,
prev_index: ?Index = null,
pub const Index = u32;
pub const Binding = struct {
target: SymbolWithLoc,
offset: u64,
};
/// Returns `null` if the Atom is a synthetic Atom.
/// Otherwise, returns an index into an array of Objects.
pub fn getFile(self: Atom) ?u32 {
if (self.file == 0) return null;
return self.file - 1;
}
pub fn getSymbolIndex(self: Atom) ?u32 {
if (self.getFile() == null and self.sym_index == 0) return null;
return self.sym_index;
}
/// Returns symbol referencing this atom.
pub fn getSymbol(self: Atom, macho_file: *MachO) macho.nlist_64 {
return self.getSymbolPtr(macho_file).*;
}
/// Returns pointer-to-symbol referencing this atom.
pub fn getSymbolPtr(self: Atom, macho_file: *MachO) *macho.nlist_64 {
const sym_index = self.getSymbolIndex().?;
return macho_file.getSymbolPtr(.{ .sym_index = sym_index, .file = self.file });
}
pub fn getSymbolWithLoc(self: Atom) SymbolWithLoc {
const sym_index = self.getSymbolIndex().?;
return .{ .sym_index = sym_index, .file = self.file };
}
/// Returns the name of this atom.
pub fn getName(self: Atom, macho_file: *MachO) []const u8 {
const sym_index = self.getSymbolIndex().?;
return macho_file.getSymbolName(.{ .sym_index = sym_index, .file = self.file });
}
/// Returns how much room there is to grow in virtual address space.
/// File offset relocation happens transparently, so it is not included in
/// this calculation.
pub fn capacity(self: Atom, macho_file: *MachO) u64 {
const self_sym = self.getSymbol(macho_file);
if (self.next_index) |next_index| {
const next = macho_file.getAtom(next_index);
const next_sym = next.getSymbol(macho_file);
return next_sym.n_value - self_sym.n_value;
} else {
// We are the last atom.
// The capacity is limited only by virtual address space.
return macho_file.allocatedVirtualSize(self_sym.n_value);
}
}
pub fn freeListEligible(self: Atom, macho_file: *MachO) bool {
// No need to keep a free list node for the last atom.
const next_index = self.next_index orelse return false;
const next = macho_file.getAtom(next_index);
const self_sym = self.getSymbol(macho_file);
const next_sym = next.getSymbol(macho_file);
const cap = next_sym.n_value - self_sym.n_value;
const ideal_cap = MachO.padToIdeal(self.size);
if (cap <= ideal_cap) return false;
const surplus = cap - ideal_cap;
return surplus >= MachO.min_text_capacity;
}
pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 {
const gpa = zld.gpa;
const segname = sect.segName();
const sectname = sect.sectName();
const res: ?u8 = blk: {
if (mem.eql(u8, "__LLVM", segname)) {
log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{
sect.flags, segname, sectname,
});
break :blk null;
}
// We handle unwind info separately.
if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) {
break :blk null;
}
if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) {
break :blk null;
}
if (sect.isCode()) {
if (zld.text_section_index == null) {
zld.text_section_index = try MachO.initSection(
gpa,
zld,
"__TEXT",
"__text",
.{
.flags = macho.S_REGULAR |
macho.S_ATTR_PURE_INSTRUCTIONS |
macho.S_ATTR_SOME_INSTRUCTIONS,
},
);
}
break :blk zld.text_section_index.?;
}
if (sect.isDebug()) {
break :blk null;
}
switch (sect.type()) {
macho.S_4BYTE_LITERALS,
macho.S_8BYTE_LITERALS,
macho.S_16BYTE_LITERALS,
=> {
break :blk zld.getSectionByName("__TEXT", "__const") orelse try MachO.initSection(
gpa,
zld,
"__TEXT",
"__const",
.{},
);
},
macho.S_CSTRING_LITERALS => {
if (mem.startsWith(u8, sectname, "__objc")) {
break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection(
gpa,
zld,
segname,
sectname,
.{},
);
}
break :blk zld.getSectionByName("__TEXT", "__cstring") orelse try MachO.initSection(
gpa,
zld,
"__TEXT",
"__cstring",
.{ .flags = macho.S_CSTRING_LITERALS },
);
},
macho.S_MOD_INIT_FUNC_POINTERS,
macho.S_MOD_TERM_FUNC_POINTERS,
=> {
break :blk zld.getSectionByName("__DATA_CONST", sectname) orelse try MachO.initSection(
gpa,
zld,
"__DATA_CONST",
sectname,
.{ .flags = sect.flags },
);
},
macho.S_LITERAL_POINTERS,
macho.S_ZEROFILL,
macho.S_THREAD_LOCAL_VARIABLES,
macho.S_THREAD_LOCAL_VARIABLE_POINTERS,
macho.S_THREAD_LOCAL_REGULAR,
macho.S_THREAD_LOCAL_ZEROFILL,
=> {
break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection(
gpa,
zld,
segname,
sectname,
.{ .flags = sect.flags },
);
},
macho.S_COALESCED => {
break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection(
gpa,
zld,
segname,
sectname,
.{},
);
},
macho.S_REGULAR => {
if (mem.eql(u8, segname, "__TEXT")) {
if (mem.eql(u8, sectname, "__rodata") or
mem.eql(u8, sectname, "__typelink") or
mem.eql(u8, sectname, "__itablink") or
mem.eql(u8, sectname, "__gosymtab") or
mem.eql(u8, sectname, "__gopclntab"))
{
break :blk zld.getSectionByName("__TEXT", sectname) orelse try MachO.initSection(
gpa,
zld,
"__TEXT",
sectname,
.{},
);
}
}
if (mem.eql(u8, segname, "__DATA")) {
if (mem.eql(u8, sectname, "__const") or
mem.eql(u8, sectname, "__cfstring") or
mem.eql(u8, sectname, "__objc_classlist") or
mem.eql(u8, sectname, "__objc_imageinfo"))
{
break :blk zld.getSectionByName("__DATA_CONST", sectname) orelse try MachO.initSection(
gpa,
zld,
"__DATA_CONST",
sectname,
.{},
);
} else if (mem.eql(u8, sectname, "__data")) {
if (zld.data_section_index == null) {
zld.data_section_index = try MachO.initSection(
gpa,
zld,
"__DATA",
"__data",
.{},
);
}
break :blk zld.data_section_index.?;
}
}
break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection(
gpa,
zld,
segname,
sectname,
.{},
);
},
else => break :blk null,
}
};
// TODO we can do this directly in the selection logic above.
// Or is it not worth it?
if (zld.data_const_section_index == null) {
if (zld.getSectionByName("__DATA_CONST", "__const")) |index| {
zld.data_const_section_index = index;
}
}
if (zld.thread_vars_section_index == null) {
if (zld.getSectionByName("__DATA", "__thread_vars")) |index| {
zld.thread_vars_section_index = index;
}
}
if (zld.thread_data_section_index == null) {
if (zld.getSectionByName("__DATA", "__thread_data")) |index| {
zld.thread_data_section_index = index;
}
}
if (zld.thread_bss_section_index == null) {
if (zld.getSectionByName("__DATA", "__thread_bss")) |index| {
zld.thread_bss_section_index = index;
}
}
if (zld.bss_section_index == null) {
if (zld.getSectionByName("__DATA", "__bss")) |index| {
zld.bss_section_index = index;
}
}
return res;
}
pub fn addRelocation(macho_file: *MachO, atom_index: Index, reloc: Relocation) !void {
return addRelocations(macho_file, atom_index, &[_]Relocation{reloc});
}
pub fn addRelocations(macho_file: *MachO, atom_index: Index, relocs: []const Relocation) !void {
const gpa = macho_file.base.allocator;
const gop = try macho_file.relocs.getOrPut(gpa, atom_index);
if (!gop.found_existing) {
gop.value_ptr.* = .{};
}
try gop.value_ptr.ensureUnusedCapacity(gpa, relocs.len);
for (relocs) |reloc| {
log.debug(" (adding reloc of type {s} to target %{d})", .{
@tagName(reloc.type),
reloc.target.sym_index,
});
gop.value_ptr.appendAssumeCapacity(reloc);
}
}
pub fn addRebase(macho_file: *MachO, atom_index: Index, offset: u32) !void {
const gpa = macho_file.base.allocator;
const atom = macho_file.getAtom(atom_index);
log.debug(" (adding rebase at offset 0x{x} in %{?d})", .{ offset, atom.getSymbolIndex() });
const gop = try macho_file.rebases.getOrPut(gpa, atom_index);
if (!gop.found_existing) {
gop.value_ptr.* = .{};
}
try gop.value_ptr.append(gpa, offset);
}
pub fn addBinding(macho_file: *MachO, atom_index: Index, binding: Binding) !void {
const gpa = macho_file.base.allocator;
const atom = macho_file.getAtom(atom_index);
log.debug(" (adding binding to symbol {s} at offset 0x{x} in %{?d})", .{
macho_file.getSymbolName(binding.target),
binding.offset,
atom.getSymbolIndex(),
});
const gop = try macho_file.bindings.getOrPut(gpa, atom_index);
if (!gop.found_existing) {
gop.value_ptr.* = .{};
}
try gop.value_ptr.append(gpa, binding);
}
pub fn resolveRelocations(
macho_file: *MachO,
atom_index: Index,
relocs: []*const Relocation,
code: []u8,
) void {
log.debug("relocating '{s}'", .{macho_file.getAtom(atom_index).getName(macho_file)});
for (relocs) |reloc| {
reloc.resolve(macho_file, atom_index, code);
}
}
pub fn freeRelocations(macho_file: *MachO, atom_index: Index) void {
const gpa = macho_file.base.allocator;
var removed_relocs = macho_file.relocs.fetchOrderedRemove(atom_index);
if (removed_relocs) |*relocs| relocs.value.deinit(gpa);
var removed_rebases = macho_file.rebases.fetchOrderedRemove(atom_index);
if (removed_rebases) |*rebases| rebases.value.deinit(gpa);
var removed_bindings = macho_file.bindings.fetchOrderedRemove(atom_index);
if (removed_bindings) |*bindings| bindings.value.deinit(gpa);
}
const InnerSymIterator = struct {
sym_index: u32,
nsyms: u32,
file: u32,
pos: u32 = 0,
pub fn next(it: *@This()) ?SymbolWithLoc {
if (it.pos == it.nsyms) return null;
const res = SymbolWithLoc{ .sym_index = it.sym_index + it.pos, .file = it.file };
it.pos += 1;
return res;
}
};
/// Returns an iterator over potentially contained symbols.
/// Panics when called on a synthetic Atom.
pub fn getInnerSymbolsIterator(zld: *Zld, atom_index: Index) InnerSymIterator {
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null);
return .{
.sym_index = atom.inner_sym_index,
.nsyms = atom.inner_nsyms_trailing,
.file = atom.file,
};
}
/// Returns a section alias symbol if one is defined.
/// An alias symbol is used to represent the start of an input section
/// if there were no symbols defined within that range.
/// Alias symbols are only used on x86_64.
pub fn getSectionAlias(zld: *Zld, atom_index: Index) ?SymbolWithLoc {
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null);
const object = zld.objects.items[atom.getFile().?];
const nbase = @as(u32, @intCast(object.in_symtab.?.len));
const ntotal = @as(u32, @intCast(object.symtab.len));
var sym_index: u32 = nbase;
while (sym_index < ntotal) : (sym_index += 1) {
if (object.getAtomIndexForSymbol(sym_index)) |other_atom_index| {
if (other_atom_index == atom_index) return SymbolWithLoc{
.sym_index = sym_index,
.file = atom.file,
};
}
}
return null;
}
/// Given an index into a contained symbol within, calculates an offset wrt
/// the start of this Atom.
pub fn calcInnerSymbolOffset(zld: *Zld, atom_index: Index, sym_index: u32) u64 {
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null);
if (atom.sym_index == sym_index) return 0;
const object = zld.objects.items[atom.getFile().?];
const source_sym = object.getSourceSymbol(sym_index).?;
const base_addr = if (object.getSourceSymbol(atom.sym_index)) |sym|
sym.n_value
else blk: {
const nbase = @as(u32, @intCast(object.in_symtab.?.len));
const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
const source_sect = object.getSourceSection(sect_id);
break :blk source_sect.addr;
};
return source_sym.n_value - base_addr;
}
pub fn scanAtomRelocs(zld: *Zld, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void {
const arch = zld.options.target.cpu.arch;
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null); // synthetic atoms do not have relocs
return switch (arch) {
.aarch64 => scanAtomRelocsArm64(zld, atom_index, relocs),
.x86_64 => scanAtomRelocsX86(zld, atom_index, relocs),
else => unreachable,
};
}
const RelocContext = struct {
base_addr: i64 = 0,
base_offset: i32 = 0,
};
pub fn getRelocContext(zld: *Zld, atom_index: Index) RelocContext {
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null); // synthetic atoms do not have relocs
const object = zld.objects.items[atom.getFile().?];
if (object.getSourceSymbol(atom.sym_index)) |source_sym| {
const source_sect = object.getSourceSection(source_sym.n_sect - 1);
return .{
.base_addr = @as(i64, @intCast(source_sect.addr)),
.base_offset = @as(i32, @intCast(source_sym.n_value - source_sect.addr)),
};
}
const nbase = @as(u32, @intCast(object.in_symtab.?.len));
const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
const source_sect = object.getSourceSection(sect_id);
return .{
.base_addr = @as(i64, @intCast(source_sect.addr)),
.base_offset = 0,
};
}
pub fn parseRelocTarget(zld: *Zld, ctx: struct {
object_id: u32,
rel: macho.relocation_info,
code: []const u8,
base_addr: i64 = 0,
base_offset: i32 = 0,
}) SymbolWithLoc {
const tracy = trace(@src());
defer tracy.end();
const object = &zld.objects.items[ctx.object_id];
log.debug("parsing reloc target in object({d}) '{s}' ", .{ ctx.object_id, object.name });
const sym_index = if (ctx.rel.r_extern == 0) sym_index: {
const sect_id = @as(u8, @intCast(ctx.rel.r_symbolnum - 1));
const rel_offset = @as(u32, @intCast(ctx.rel.r_address - ctx.base_offset));
const address_in_section = if (ctx.rel.r_pcrel == 0) blk: {
break :blk if (ctx.rel.r_length == 3)
mem.readIntLittle(u64, ctx.code[rel_offset..][0..8])
else
mem.readIntLittle(u32, ctx.code[rel_offset..][0..4]);
} else blk: {
assert(zld.options.target.cpu.arch == .x86_64);
const correction: u3 = switch (@as(macho.reloc_type_x86_64, @enumFromInt(ctx.rel.r_type))) {
.X86_64_RELOC_SIGNED => 0,
.X86_64_RELOC_SIGNED_1 => 1,
.X86_64_RELOC_SIGNED_2 => 2,
.X86_64_RELOC_SIGNED_4 => 4,
else => unreachable,
};
const addend = mem.readIntLittle(i32, ctx.code[rel_offset..][0..4]);
const target_address = @as(i64, @intCast(ctx.base_addr)) + ctx.rel.r_address + 4 + correction + addend;
break :blk @as(u64, @intCast(target_address));
};
// Find containing atom
log.debug(" | locating symbol by address @{x} in section {d}", .{ address_in_section, sect_id });
break :sym_index object.getSymbolByAddress(address_in_section, sect_id);
} else object.reverse_symtab_lookup[ctx.rel.r_symbolnum];
const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = ctx.object_id + 1 };
const sym = zld.getSymbol(sym_loc);
const target = if (sym.sect() and !sym.ext())
sym_loc
else if (object.getGlobal(sym_index)) |global_index|
zld.globals.items[global_index]
else
sym_loc;
log.debug(" | target %{d} ('{s}') in object({?d})", .{
target.sym_index,
zld.getSymbolName(target),
target.getFile(),
});
return target;
}
pub fn getRelocTargetAtomIndex(zld: *Zld, target: SymbolWithLoc) ?Index {
if (target.getFile() == null) {
const target_sym_name = zld.getSymbolName(target);
if (mem.eql(u8, "__mh_execute_header", target_sym_name)) return null;
if (mem.eql(u8, "___dso_handle", target_sym_name)) return null;
unreachable; // referenced symbol not found
}
const object = zld.objects.items[target.getFile().?];
return object.getAtomIndexForSymbol(target.sym_index);
}
fn scanAtomRelocsArm64(zld: *Zld, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void {
for (relocs) |rel| {
const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type));
switch (rel_type) {
.ARM64_RELOC_ADDEND, .ARM64_RELOC_SUBTRACTOR => continue,
else => {},
}
if (rel.r_extern == 0) continue;
const atom = zld.getAtom(atom_index);
const object = &zld.objects.items[atom.getFile().?];
const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum];
const sym_loc = SymbolWithLoc{
.sym_index = sym_index,
.file = atom.file,
};
const target = if (object.getGlobal(sym_index)) |global_index|
zld.globals.items[global_index]
else
sym_loc;
switch (rel_type) {
.ARM64_RELOC_BRANCH26 => {
// TODO rewrite relocation
const sym = zld.getSymbol(target);
if (sym.undf()) try zld.addStubEntry(target);
},
.ARM64_RELOC_GOT_LOAD_PAGE21,
.ARM64_RELOC_GOT_LOAD_PAGEOFF12,
.ARM64_RELOC_POINTER_TO_GOT,
=> {
// TODO rewrite relocation
try zld.addGotEntry(target);
},
.ARM64_RELOC_TLVP_LOAD_PAGE21,
.ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
=> {
const sym = zld.getSymbol(target);
if (sym.undf()) try zld.addTlvPtrEntry(target);
},
else => {},
}
}
}
fn scanAtomRelocsX86(zld: *Zld, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void {
for (relocs) |rel| {
const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type));
switch (rel_type) {
.X86_64_RELOC_SUBTRACTOR => continue,
else => {},
}
if (rel.r_extern == 0) continue;
const atom = zld.getAtom(atom_index);
const object = &zld.objects.items[atom.getFile().?];
const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum];
const sym_loc = SymbolWithLoc{
.sym_index = sym_index,
.file = atom.file,
};
const target = if (object.getGlobal(sym_index)) |global_index|
zld.globals.items[global_index]
else
sym_loc;
switch (rel_type) {
.X86_64_RELOC_BRANCH => {
// TODO rewrite relocation
const sym = zld.getSymbol(target);
if (sym.undf()) try zld.addStubEntry(target);
},
.X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => {
// TODO rewrite relocation
try zld.addGotEntry(target);
},
.X86_64_RELOC_TLV => {
const sym = zld.getSymbol(target);
if (sym.undf()) try zld.addTlvPtrEntry(target);
},
else => {},
}
}
}
pub fn resolveRelocs(
zld: *Zld,
atom_index: Index,
atom_code: []u8,
atom_relocs: []align(1) const macho.relocation_info,
) !void {
const arch = zld.options.target.cpu.arch;
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null); // synthetic atoms do not have relocs
log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{
atom.sym_index,
zld.getSymbolName(atom.getSymbolWithLoc()),
});
const ctx = getRelocContext(zld, atom_index);
return switch (arch) {
.aarch64 => resolveRelocsArm64(zld, atom_index, atom_code, atom_relocs, ctx),
.x86_64 => resolveRelocsX86(zld, atom_index, atom_code, atom_relocs, ctx),
else => unreachable,
};
}
pub fn getRelocTargetAddress(zld: *Zld, target: SymbolWithLoc, is_tlv: bool) !u64 {
const target_atom_index = getRelocTargetAtomIndex(zld, target) orelse {
// If there is no atom for target, we still need to check for special, atom-less
// symbols such as `___dso_handle`.
const target_name = zld.getSymbolName(target);
const atomless_sym = zld.getSymbol(target);
log.debug(" | atomless target '{s}'", .{target_name});
return atomless_sym.n_value;
};
const target_atom = zld.getAtom(target_atom_index);
log.debug(" | target ATOM(%{d}, '{s}') in object({?})", .{
target_atom.sym_index,
zld.getSymbolName(target_atom.getSymbolWithLoc()),
target_atom.getFile(),
});
const target_sym = zld.getSymbol(target_atom.getSymbolWithLoc());
assert(target_sym.n_desc != MachO.N_DEAD);
// If `target` is contained within the target atom, pull its address value.
const offset = if (target_atom.getFile() != null) blk: {
const object = zld.objects.items[target_atom.getFile().?];
break :blk if (object.getSourceSymbol(target.sym_index)) |_|
Atom.calcInnerSymbolOffset(zld, target_atom_index, target.sym_index)
else
0; // section alias
} else 0;
const base_address: u64 = if (is_tlv) base_address: {
// For TLV relocations, the value specified as a relocation is the displacement from the
// TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first
// defined TLV template init section in the following order:
// * wrt to __thread_data if defined, then
// * wrt to __thread_bss
const sect_id: u16 = sect_id: {
if (zld.thread_data_section_index) |i| {
break :sect_id i;
} else if (zld.thread_bss_section_index) |i| {
break :sect_id i;
} else {
log.err("threadlocal variables present but no initializer sections found", .{});
log.err(" __thread_data not found", .{});
log.err(" __thread_bss not found", .{});
return error.FailedToResolveRelocationTarget;
}
};
break :base_address zld.sections.items(.header)[sect_id].addr;
} else 0;
return target_sym.n_value + offset - base_address;
}
fn resolveRelocsArm64(
zld: *Zld,
atom_index: Index,
atom_code: []u8,
atom_relocs: []align(1) const macho.relocation_info,
context: RelocContext,
) !void {
const atom = zld.getAtom(atom_index);
const object = zld.objects.items[atom.getFile().?];
var addend: ?i64 = null;
var subtractor: ?SymbolWithLoc = null;
for (atom_relocs) |rel| {
const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type));
switch (rel_type) {
.ARM64_RELOC_ADDEND => {
assert(addend == null);
log.debug(" RELA({s}) @ {x} => {x}", .{ @tagName(rel_type), rel.r_address, rel.r_symbolnum });
addend = rel.r_symbolnum;
continue;
},
.ARM64_RELOC_SUBTRACTOR => {
assert(subtractor == null);
log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{
@tagName(rel_type),
rel.r_address,
rel.r_symbolnum,
atom.getFile(),
});
subtractor = parseRelocTarget(zld, .{
.object_id = atom.getFile().?,
.rel = rel,
.code = atom_code,
.base_addr = context.base_addr,
.base_offset = context.base_offset,
});
continue;
},
else => {},
}
const target = parseRelocTarget(zld, .{
.object_id = atom.getFile().?,
.rel = rel,
.code = atom_code,
.base_addr = context.base_addr,
.base_offset = context.base_offset,
});
const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset));
log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{
@tagName(rel_type),
rel.r_address,
target.sym_index,
zld.getSymbolName(target),
target.getFile(),
});
const source_addr = blk: {
const source_sym = zld.getSymbol(atom.getSymbolWithLoc());
break :blk source_sym.n_value + rel_offset;
};
const target_addr = blk: {
if (relocRequiresGot(zld, rel)) break :blk zld.getGotEntryAddress(target).?;
if (relocIsTlv(zld, rel) and zld.getSymbol(target).undf())
break :blk zld.getTlvPtrEntryAddress(target).?;
if (relocIsStub(zld, rel) and zld.getSymbol(target).undf())
break :blk zld.getStubsEntryAddress(target).?;
const is_tlv = is_tlv: {
const source_sym = zld.getSymbol(atom.getSymbolWithLoc());
const header = zld.sections.items(.header)[source_sym.n_sect - 1];
break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES;
};
break :blk try getRelocTargetAddress(zld, target, is_tlv);
};
log.debug(" | source_addr = 0x{x}", .{source_addr});
switch (rel_type) {
.ARM64_RELOC_BRANCH26 => {
log.debug(" source {s} (object({?})), target {s}", .{
zld.getSymbolName(atom.getSymbolWithLoc()),
atom.getFile(),
zld.getSymbolName(target),
});
const displacement = if (Relocation.calcPcRelativeDisplacementArm64(
source_addr,
target_addr,
)) |disp| blk: {
log.debug(" | target_addr = 0x{x}", .{target_addr});
break :blk disp;
} else |_| blk: {
const thunk_index = zld.thunk_table.get(atom_index).?;
const thunk = zld.thunks.items[thunk_index];
const thunk_sym_loc = if (zld.getSymbol(target).undf())
thunk.getTrampoline(zld, .stub, target).?
else
thunk.getTrampoline(zld, .atom, target).?;
const thunk_addr = zld.getSymbol(thunk_sym_loc).n_value;
log.debug(" | target_addr = 0x{x} (thunk)", .{thunk_addr});
break :blk try Relocation.calcPcRelativeDisplacementArm64(source_addr, thunk_addr);
};
const code = atom_code[rel_offset..][0..4];
var inst = aarch64.Instruction{
.unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload(
aarch64.Instruction,
aarch64.Instruction.unconditional_branch_immediate,
), code),
};
inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(displacement >> 2))));
mem.writeIntLittle(u32, code, inst.toU32());
},
.ARM64_RELOC_PAGE21,
.ARM64_RELOC_GOT_LOAD_PAGE21,
.ARM64_RELOC_TLVP_LOAD_PAGE21,
=> {
const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0)));
log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
const pages = @as(u21, @bitCast(Relocation.calcNumberOfPages(source_addr, adjusted_target_addr)));
const code = atom_code[rel_offset..][0..4];
var inst = aarch64.Instruction{
.pc_relative_address = mem.bytesToValue(meta.TagPayload(
aarch64.Instruction,
aarch64.Instruction.pc_relative_address,
), code),
};
inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2));
inst.pc_relative_address.immlo = @as(u2, @truncate(pages));
mem.writeIntLittle(u32, code, inst.toU32());
addend = null;
},
.ARM64_RELOC_PAGEOFF12 => {
const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0)));
log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
const code = atom_code[rel_offset..][0..4];
if (Relocation.isArithmeticOp(code)) {
const off = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic);
var inst = aarch64.Instruction{
.add_subtract_immediate = mem.bytesToValue(meta.TagPayload(
aarch64.Instruction,
aarch64.Instruction.add_subtract_immediate,
), code),
};
inst.add_subtract_immediate.imm12 = off;
mem.writeIntLittle(u32, code, inst.toU32());
} else {
var inst = aarch64.Instruction{
.load_store_register = mem.bytesToValue(meta.TagPayload(
aarch64.Instruction,
aarch64.Instruction.load_store_register,
), code),
};
const off = try Relocation.calcPageOffset(adjusted_target_addr, switch (inst.load_store_register.size) {
0 => if (inst.load_store_register.v == 1)
Relocation.PageOffsetInstKind.load_store_128
else
Relocation.PageOffsetInstKind.load_store_8,
1 => .load_store_16,
2 => .load_store_32,
3 => .load_store_64,
});
inst.load_store_register.offset = off;
mem.writeIntLittle(u32, code, inst.toU32());
}
addend = null;
},
.ARM64_RELOC_GOT_LOAD_PAGEOFF12 => {
const code = atom_code[rel_offset..][0..4];
const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0)));
log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
const off = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64);
var inst: aarch64.Instruction = .{
.load_store_register = mem.bytesToValue(meta.TagPayload(
aarch64.Instruction,
aarch64.Instruction.load_store_register,
), code),
};
inst.load_store_register.offset = off;
mem.writeIntLittle(u32, code, inst.toU32());
addend = null;
},
.ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => {
const code = atom_code[rel_offset..][0..4];
const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0)));
log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
const RegInfo = struct {
rd: u5,
rn: u5,
size: u2,
};
const reg_info: RegInfo = blk: {
if (Relocation.isArithmeticOp(code)) {
const inst = mem.bytesToValue(meta.TagPayload(
aarch64.Instruction,
aarch64.Instruction.add_subtract_immediate,
), code);
break :blk .{
.rd = inst.rd,
.rn = inst.rn,
.size = inst.sf,
};
} else {
const inst = mem.bytesToValue(meta.TagPayload(
aarch64.Instruction,
aarch64.Instruction.load_store_register,
), code);
break :blk .{
.rd = inst.rt,
.rn = inst.rn,
.size = inst.size,
};
}
};
var inst = if (zld.tlv_ptr_table.lookup.contains(target)) aarch64.Instruction{
.load_store_register = .{
.rt = reg_info.rd,
.rn = reg_info.rn,
.offset = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64),
.opc = 0b01,
.op1 = 0b01,
.v = 0,
.size = reg_info.size,
},
} else aarch64.Instruction{
.add_subtract_immediate = .{
.rd = reg_info.rd,
.rn = reg_info.rn,
.imm12 = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic),
.sh = 0,
.s = 0,
.op = 0,
.sf = @as(u1, @truncate(reg_info.size)),
},
};
mem.writeIntLittle(u32, code, inst.toU32());
addend = null;
},
.ARM64_RELOC_POINTER_TO_GOT => {
log.debug(" | target_addr = 0x{x}", .{target_addr});
const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse
return error.Overflow;
mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @bitCast(result)));
},
.ARM64_RELOC_UNSIGNED => {
var ptr_addend = if (rel.r_length == 3)
mem.readIntLittle(i64, atom_code[rel_offset..][0..8])
else
mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
if (rel.r_extern == 0) {
const base_addr = if (target.sym_index >= object.source_address_lookup.len)
@as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr))
else
object.source_address_lookup[target.sym_index];
ptr_addend -= base_addr;
}
const result = blk: {
if (subtractor) |sub| {
const sym = zld.getSymbol(sub);
break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + ptr_addend;
} else {
break :blk @as(i64, @intCast(target_addr)) + ptr_addend;
}
};
log.debug(" | target_addr = 0x{x}", .{result});
if (rel.r_length == 3) {
mem.writeIntLittle(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result)));
} else {
mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result)))));
}
subtractor = null;
},
.ARM64_RELOC_ADDEND => unreachable,
.ARM64_RELOC_SUBTRACTOR => unreachable,
}
}
}
fn resolveRelocsX86(
zld: *Zld,
atom_index: Index,
atom_code: []u8,
atom_relocs: []align(1) const macho.relocation_info,
context: RelocContext,
) !void {
const atom = zld.getAtom(atom_index);
const object = zld.objects.items[atom.getFile().?];
var subtractor: ?SymbolWithLoc = null;
for (atom_relocs) |rel| {
const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type));
switch (rel_type) {
.X86_64_RELOC_SUBTRACTOR => {
assert(subtractor == null);
log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{
@tagName(rel_type),
rel.r_address,
rel.r_symbolnum,
atom.getFile(),
});
subtractor = parseRelocTarget(zld, .{
.object_id = atom.getFile().?,
.rel = rel,
.code = atom_code,
.base_addr = context.base_addr,
.base_offset = context.base_offset,
});
continue;
},
else => {},
}
const target = parseRelocTarget(zld, .{
.object_id = atom.getFile().?,
.rel = rel,
.code = atom_code,
.base_addr = context.base_addr,
.base_offset = context.base_offset,
});
const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset));
log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{
@tagName(rel_type),
rel.r_address,
target.sym_index,
zld.getSymbolName(target),
target.getFile(),
});
const source_addr = blk: {
const source_sym = zld.getSymbol(atom.getSymbolWithLoc());
break :blk source_sym.n_value + rel_offset;
};
const target_addr = blk: {
if (relocRequiresGot(zld, rel)) break :blk zld.getGotEntryAddress(target).?;
if (relocIsStub(zld, rel) and zld.getSymbol(target).undf())
break :blk zld.getStubsEntryAddress(target).?;
if (relocIsTlv(zld, rel) and zld.getSymbol(target).undf())
break :blk zld.getTlvPtrEntryAddress(target).?;
const is_tlv = is_tlv: {
const source_sym = zld.getSymbol(atom.getSymbolWithLoc());
const header = zld.sections.items(.header)[source_sym.n_sect - 1];
break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES;
};
break :blk try getRelocTargetAddress(zld, target, is_tlv);
};
log.debug(" | source_addr = 0x{x}", .{source_addr});
switch (rel_type) {
.X86_64_RELOC_BRANCH => {
const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend));
log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp);
},
.X86_64_RELOC_GOT,
.X86_64_RELOC_GOT_LOAD,
=> {
const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend));
log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp);
},
.X86_64_RELOC_TLV => {
const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend));
log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
if (zld.tlv_ptr_table.lookup.get(target) == null) {
// We need to rewrite the opcode from movq to leaq.
atom_code[rel_offset - 2] = 0x8d;
}
mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp);
},
.X86_64_RELOC_SIGNED,
.X86_64_RELOC_SIGNED_1,
.X86_64_RELOC_SIGNED_2,
.X86_64_RELOC_SIGNED_4,
=> {
const correction: u3 = switch (rel_type) {
.X86_64_RELOC_SIGNED => 0,
.X86_64_RELOC_SIGNED_1 => 1,
.X86_64_RELOC_SIGNED_2 => 2,
.X86_64_RELOC_SIGNED_4 => 4,
else => unreachable,
};
var addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]) + correction;
if (rel.r_extern == 0) {
const base_addr = if (target.sym_index >= object.source_address_lookup.len)
@as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr))
else
object.source_address_lookup[target.sym_index];
addend += @as(i32, @intCast(@as(i64, @intCast(context.base_addr)) + rel.r_address + 4 -
@as(i64, @intCast(base_addr))));
}
const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend));
log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction);
mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp);
},
.X86_64_RELOC_UNSIGNED => {
var addend = if (rel.r_length == 3)
mem.readIntLittle(i64, atom_code[rel_offset..][0..8])
else
mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
if (rel.r_extern == 0) {
const base_addr = if (target.sym_index >= object.source_address_lookup.len)
@as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr))
else
object.source_address_lookup[target.sym_index];
addend -= base_addr;
}
const result = blk: {
if (subtractor) |sub| {
const sym = zld.getSymbol(sub);
break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + addend;
} else {
break :blk @as(i64, @intCast(target_addr)) + addend;
}
};
log.debug(" | target_addr = 0x{x}", .{result});
if (rel.r_length == 3) {
mem.writeIntLittle(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result)));
} else {
mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result)))));
}
subtractor = null;
},
.X86_64_RELOC_SUBTRACTOR => unreachable,
}
}
}
pub fn getAtomCode(zld: *Zld, atom_index: Index) []const u8 {
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null); // Synthetic atom shouldn't need to inquire for code.
const object = zld.objects.items[atom.getFile().?];
const source_sym = object.getSourceSymbol(atom.sym_index) orelse {
// If there was no matching symbol present in the source symtab, this means
// we are dealing with either an entire section, or part of it, but also
// starting at the beginning.
const nbase = @as(u32, @intCast(object.in_symtab.?.len));
const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
const source_sect = object.getSourceSection(sect_id);
assert(!source_sect.isZerofill());
const code = object.getSectionContents(source_sect);
const code_len = @as(usize, @intCast(atom.size));
return code[0..code_len];
};
const source_sect = object.getSourceSection(source_sym.n_sect - 1);
assert(!source_sect.isZerofill());
const code = object.getSectionContents(source_sect);
const offset = @as(usize, @intCast(source_sym.n_value - source_sect.addr));
const code_len = @as(usize, @intCast(atom.size));
return code[offset..][0..code_len];
}
pub fn getAtomRelocs(zld: *Zld, atom_index: Index) []const macho.relocation_info {
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs.
const object = zld.objects.items[atom.getFile().?];
const cache = object.relocs_lookup[atom.sym_index];
const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: {
break :blk source_sym.n_sect - 1;
} else blk: {
// If there was no matching symbol present in the source symtab, this means
// we are dealing with either an entire section, or part of it, but also
// starting at the beginning.
const nbase = @as(u32, @intCast(object.in_symtab.?.len));
const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
break :blk sect_id;
};
const source_sect = object.getSourceSection(source_sect_id);
assert(!source_sect.isZerofill());
const relocs = object.getRelocs(source_sect_id);
return relocs[cache.start..][0..cache.len];
}
pub fn relocRequiresGot(zld: *Zld, rel: macho.relocation_info) bool {
switch (zld.options.target.cpu.arch) {
.aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
.ARM64_RELOC_GOT_LOAD_PAGE21,
.ARM64_RELOC_GOT_LOAD_PAGEOFF12,
.ARM64_RELOC_POINTER_TO_GOT,
=> return true,
else => return false,
},
.x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) {
.X86_64_RELOC_GOT,
.X86_64_RELOC_GOT_LOAD,
=> return true,
else => return false,
},
else => unreachable,
}
}
pub fn relocIsTlv(zld: *Zld, rel: macho.relocation_info) bool {
switch (zld.options.target.cpu.arch) {
.aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
.ARM64_RELOC_TLVP_LOAD_PAGE21,
.ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
=> return true,
else => return false,
},
.x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) {
.X86_64_RELOC_TLV => return true,
else => return false,
},
else => unreachable,
}
}
pub fn relocIsStub(zld: *Zld, rel: macho.relocation_info) bool {
switch (zld.options.target.cpu.arch) {
.aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
.ARM64_RELOC_BRANCH26 => return true,
else => return false,
},
.x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) {
.X86_64_RELOC_BRANCH => return true,
else => return false,
},
else => unreachable,
}
}
const Atom = @This();
const std = @import("std");
const build_options = @import("build_options");
const aarch64 = @import("../../arch/aarch64/bits.zig");
const assert = std.debug.assert;
const log = std.log.scoped(.link);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
const meta = std.meta;
const trace = @import("../../tracy.zig").trace;
const Allocator = mem.Allocator;
const Arch = std.Target.Cpu.Arch;
const MachO = @import("../MachO.zig");
pub const Relocation = @import("Relocation.zig");
const SymbolWithLoc = MachO.SymbolWithLoc;
const Zld = @import("zld.zig").Zld;