macho: rework symbol handling to match zld/ELF

Now, each object file will store a mutable table of symbols that it
defines. Upon symbol resolution between object files, the symbol
will be updated with a globally allocated section ordinal and address
in virtual memory. If the object defines a globally available symbol,
its location only (comprising of the symbol index and object index)
will be stored in the globals map for easy access when relocating, etc.
This approach cleans up the symbol management significantly, and matches
the status quo used in zld/ELF.

Additionally, this makes scoping symbol stabs easier too as they are
now naturally contained within each object file.
This commit is contained in:
Jakub Konka
2022-07-06 17:11:39 +02:00
parent 843701d0fe
commit 9eb7e5182b
13 changed files with 2154 additions and 2457 deletions

View File

@@ -3174,7 +3174,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
const func = func_payload.data;
const fn_owner_decl = mod.declPtr(func.owner_decl);
try self.genSetReg(Type.initTag(.u64), .x30, .{
.got_load = fn_owner_decl.link.macho.local_sym_index,
.got_load = fn_owner_decl.link.macho.sym_index,
});
// blr x30
_ = try self.addInst(.{
@@ -3190,14 +3190,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
lib_name,
});
}
const n_strx = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
const global_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
_ = try self.addInst(.{
.tag = .call_extern,
.data = .{
.extern_fn = .{
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index,
.sym_name = n_strx,
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index,
.global_index = global_index,
},
},
});
@@ -4157,7 +4157,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
.data = .{
.payload = try self.addExtra(Mir.LoadMemoryPie{
.register = @enumToInt(src_reg),
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index,
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index,
.sym_index = sym_index,
}),
},
@@ -4270,7 +4270,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
.data = .{
.payload = try self.addExtra(Mir.LoadMemoryPie{
.register = @enumToInt(reg),
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index,
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index,
.sym_index = sym_index,
}),
},
@@ -4578,8 +4578,8 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne
} else if (self.bin_file.cast(link.File.MachO)) |_| {
// Because MachO is PIE-always-on, we defer memory address resolution until
// the linker has enough info to perform relocations.
assert(decl.link.macho.local_sym_index != 0);
return MCValue{ .got_load = decl.link.macho.local_sym_index };
assert(decl.link.macho.sym_index != 0);
return MCValue{ .got_load = decl.link.macho.sym_index };
} else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes;
return MCValue{ .memory = got_addr };

View File

@@ -660,9 +660,10 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void {
};
// Add relocation to the decl.
const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?;
const target = macho_file.globals.values()[extern_fn.global_index];
try atom.relocs.append(emit.bin_file.allocator, .{
.offset = offset,
.target = .{ .global = extern_fn.sym_name },
.target = target,
.addend = 0,
.subtractor = null,
.pcrel = true,
@@ -864,7 +865,7 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void {
// Page reloc for adrp instruction.
try atom.relocs.append(emit.bin_file.allocator, .{
.offset = offset,
.target = .{ .local = data.sym_index },
.target = .{ .sym_index = data.sym_index, .file = null },
.addend = 0,
.subtractor = null,
.pcrel = true,
@@ -882,7 +883,7 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void {
// Pageoff reloc for adrp instruction.
try atom.relocs.append(emit.bin_file.allocator, .{
.offset = offset + 4,
.target = .{ .local = data.sym_index },
.target = .{ .sym_index = data.sym_index, .file = null },
.addend = 0,
.subtractor = null,
.pcrel = false,

View File

@@ -232,7 +232,7 @@ pub const Inst = struct {
/// Index of the containing atom.
atom_index: u32,
/// Index into the linker's string table.
sym_name: u32,
global_index: u32,
},
/// A 16-bit immediate value.
///

View File

@@ -2563,7 +2563,7 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne
} else if (self.bin_file.cast(link.File.MachO)) |_| {
// TODO I'm hacking my way through here by repurposing .memory for storing
// index to the GOT target symbol index.
return MCValue{ .memory = decl.link.macho.local_sym_index };
return MCValue{ .memory = decl.link.macho.sym_index };
} else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes;
return MCValue{ .memory = got_addr };

View File

@@ -2645,7 +2645,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue
}),
.data = .{
.load_reloc = .{
.atom_index = fn_owner_decl.link.macho.local_sym_index,
.atom_index = fn_owner_decl.link.macho.sym_index,
.sym_index = sym_index,
},
},
@@ -3977,7 +3977,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
const func = func_payload.data;
const fn_owner_decl = mod.declPtr(func.owner_decl);
try self.genSetReg(Type.initTag(.usize), .rax, .{
.got_load = fn_owner_decl.link.macho.local_sym_index,
.got_load = fn_owner_decl.link.macho.sym_index,
});
// callq *%rax
_ = try self.addInst(.{
@@ -3997,14 +3997,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
lib_name,
});
}
const n_strx = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
const global_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
_ = try self.addInst(.{
.tag = .call_extern,
.ops = undefined,
.data = .{
.extern_fn = .{
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index,
.sym_name = n_strx,
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index,
.global_index = global_index,
},
},
});
@@ -6771,8 +6771,8 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne
} else if (self.bin_file.cast(link.File.MachO)) |_| {
// Because MachO is PIE-always-on, we defer memory address resolution until
// the linker has enough info to perform relocations.
assert(decl.link.macho.local_sym_index != 0);
return MCValue{ .got_load = decl.link.macho.local_sym_index };
assert(decl.link.macho.sym_index != 0);
return MCValue{ .got_load = decl.link.macho.sym_index };
} else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes;
return MCValue{ .memory = got_addr };

View File

@@ -1005,7 +1005,7 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
log.debug("adding reloc of type {} to local @{d}", .{ reloc_type, load_reloc.sym_index });
try atom.relocs.append(emit.bin_file.allocator, .{
.offset = @intCast(u32, end_offset - 4),
.target = .{ .local = load_reloc.sym_index },
.target = .{ .sym_index = load_reloc.sym_index, .file = null },
.addend = 0,
.subtractor = null,
.pcrel = true,
@@ -1127,9 +1127,10 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
// Add relocation to the decl.
const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?;
const target = macho_file.globals.values()[extern_fn.global_index];
try atom.relocs.append(emit.bin_file.allocator, .{
.offset = offset,
.target = .{ .global = extern_fn.sym_name },
.target = target,
.addend = 0,
.subtractor = null,
.pcrel = true,

View File

@@ -443,8 +443,8 @@ pub const Inst = struct {
extern_fn: struct {
/// Index of the containing atom.
atom_index: u32,
/// Index into the linker's string table.
sym_name: u32,
/// Index into the linker's globals table.
global_index: u32,
},
/// PIE load relocation.
load_reloc: struct {

View File

@@ -544,12 +544,7 @@ pub const File = struct {
switch (base.tag) {
.coff => return @fieldParentPtr(Coff, "base", base).allocateDeclIndexes(decl_index),
.elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl_index),
.macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl_index) catch |err| switch (err) {
// remap this error code because we are transitioning away from
// `allocateDeclIndexes`.
error.Overflow => return error.OutOfMemory,
error.OutOfMemory => return error.OutOfMemory,
},
.macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl_index),
.wasm => return @fieldParentPtr(Wasm, "base", base).allocateDeclIndexes(decl_index),
.plan9 => return @fieldParentPtr(Plan9, "base", base).allocateDeclIndexes(decl_index),
.c, .spirv, .nvptx => {},

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,7 @@ const Arch = std.Target.Cpu.Arch;
const Dwarf = @import("../Dwarf.zig");
const MachO = @import("../MachO.zig");
const Object = @import("Object.zig");
const StringIndexAdapter = std.hash_map.StringIndexAdapter;
const SymbolWithLoc = MachO.SymbolWithLoc;
/// Each decl always gets a local symbol with the fully qualified name.
/// The vaddr and size are found here directly.
@@ -24,7 +24,10 @@ const StringIndexAdapter = std.hash_map.StringIndexAdapter;
/// the symbol references, and adding that to the file offset of the section.
/// If this field is 0, it means the codegen size = 0 and there is no symbol or
/// offset table entry.
local_sym_index: u32,
sym_index: u32,
/// null means symbol defined by Zig source.
file: ?u32,
/// List of symbols contained within this atom
contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{},
@@ -45,15 +48,15 @@ alignment: u32,
relocs: std.ArrayListUnmanaged(Relocation) = .{},
/// List of offsets contained within this atom that need rebasing by the dynamic
/// loader in presence of ASLR.
/// loader for example in presence of ASLR.
rebases: std.ArrayListUnmanaged(u64) = .{},
/// List of offsets contained within this atom that will be dynamically bound
/// by the dynamic loader and contain pointers to resolved (at load time) extern
/// symbols (aka proxies aka imports)
/// symbols (aka proxies aka imports).
bindings: std.ArrayListUnmanaged(Binding) = .{},
/// List of lazy bindings
/// List of lazy bindings (cf bindings above).
lazy_bindings: std.ArrayListUnmanaged(Binding) = .{},
/// List of data-in-code entries. This is currently specific to x86_64 only.
@@ -68,12 +71,12 @@ dbg_info_atom: Dwarf.Atom,
dirty: bool = true,
pub const Binding = struct {
n_strx: u32,
global_index: u32,
offset: u64,
};
pub const SymbolAtOffset = struct {
local_sym_index: u32,
sym_index: u32,
offset: u64,
stab: ?Stab = null,
};
@@ -83,11 +86,14 @@ pub const Stab = union(enum) {
static,
global,
pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 {
var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator);
pub fn asNlists(stab: Stab, sym_loc: SymbolWithLoc, macho_file: *MachO) ![]macho.nlist_64 {
const gpa = macho_file.base.allocator;
var nlists = std.ArrayList(macho.nlist_64).init(gpa);
defer nlists.deinit();
const sym = macho_file.locals.items[local_sym_index];
const sym = macho_file.getSymbol(sym_loc);
const sym_name = macho_file.getSymbolName(sym_loc);
switch (stab) {
.function => |size| {
try nlists.ensureUnusedCapacity(4);
@@ -99,7 +105,7 @@ pub const Stab = union(enum) {
.n_value = sym.n_value,
});
nlists.appendAssumeCapacity(.{
.n_strx = sym.n_strx,
.n_strx = try macho_file.strtab.insert(gpa, sym_name),
.n_type = macho.N_FUN,
.n_sect = sym.n_sect,
.n_desc = 0,
@@ -122,7 +128,7 @@ pub const Stab = union(enum) {
},
.global => {
try nlists.append(.{
.n_strx = sym.n_strx,
.n_strx = try macho_file.strtab.insert(gpa, sym_name),
.n_type = macho.N_GSYM,
.n_sect = 0,
.n_desc = 0,
@@ -131,7 +137,7 @@ pub const Stab = union(enum) {
},
.static => {
try nlists.append(.{
.n_strx = sym.n_strx,
.n_strx = try macho_file.strtab.insert(gpa, sym_name),
.n_type = macho.N_STSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
@@ -145,30 +151,66 @@ pub const Stab = union(enum) {
};
pub const Relocation = struct {
pub const Target = union(enum) {
local: u32,
global: u32,
};
/// Offset within the atom's code buffer.
/// Note relocation size can be inferred by relocation's kind.
offset: u32,
target: Target,
target: MachO.SymbolWithLoc,
addend: i64,
subtractor: ?u32,
subtractor: ?MachO.SymbolWithLoc,
pcrel: bool,
length: u2,
@"type": u4,
pub fn getTargetAtom(self: Relocation, macho_file: *MachO) !?*Atom {
const is_via_got = got: {
switch (macho_file.base.options.target.cpu.arch) {
.aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, self.@"type")) {
.ARM64_RELOC_GOT_LOAD_PAGE21,
.ARM64_RELOC_GOT_LOAD_PAGEOFF12,
.ARM64_RELOC_POINTER_TO_GOT,
=> true,
else => false,
},
.x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, self.@"type")) {
.X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true,
else => false,
},
else => unreachable,
}
};
const target_sym = macho_file.getSymbol(self.target);
if (is_via_got) {
const got_index = macho_file.got_entries_table.get(self.target) orelse {
log.err("expected GOT entry for symbol", .{});
if (target_sym.undf()) {
log.err(" import('{s}')", .{macho_file.getSymbolName(self.target)});
} else {
log.err(" local(%{d}) in object({d})", .{ self.target.sym_index, self.target.file });
}
log.err(" this is an internal linker error", .{});
return error.FailedToResolveRelocationTarget;
};
return macho_file.got_entries.items[got_index].atom;
}
if (macho_file.stubs_table.get(self.target)) |stub_index| {
return macho_file.stubs.items[stub_index].atom;
} else if (macho_file.tlv_ptr_entries_table.get(self.target)) |tlv_ptr_index| {
return macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom;
} else return macho_file.getAtomForSymbol(self.target);
}
};
pub const empty = Atom{
.local_sym_index = 0,
.sym_index = 0,
.file = null,
.size = 0,
.alignment = 0,
.prev = null,
@@ -196,13 +238,45 @@ pub fn clearRetainingCapacity(self: *Atom) void {
self.code.clearRetainingCapacity();
}
/// Returns symbol referencing this atom.
pub fn getSymbol(self: Atom, macho_file: *MachO) macho.nlist_64 {
return self.getSymbolPtr(macho_file).*;
}
/// Returns pointer-to-symbol referencing this atom.
pub fn getSymbolPtr(self: Atom, macho_file: *MachO) *macho.nlist_64 {
return macho_file.getSymbolPtr(.{
.sym_index = self.sym_index,
.file = self.file,
});
}
/// Returns true if the symbol pointed at with `sym_loc` is contained within this atom.
/// WARNING this function assumes all atoms have been allocated in the virtual memory.
/// Calling it without allocating with `MachO.allocateSymbols` (or equivalent) will
/// give bogus results.
pub fn isSymbolContained(self: Atom, sym_loc: SymbolWithLoc, macho_file: *MachO) bool {
const sym = macho_file.getSymbol(sym_loc);
if (!sym.sect()) return false;
const self_sym = self.getSymbol(macho_file);
return sym.n_value >= self_sym.n_value and sym.n_value < self_sym.n_value + self.size;
}
/// Returns the name of this atom.
pub fn getName(self: Atom, macho_file: *MachO) []const u8 {
return macho_file.getSymbolName(.{
.sym_index = self.sym_index,
.file = self.file,
});
}
/// Returns how much room there is to grow in virtual address space.
/// File offset relocation happens transparently, so it is not included in
/// this calculation.
pub fn capacity(self: Atom, macho_file: MachO) u64 {
const self_sym = macho_file.locals.items[self.local_sym_index];
pub fn capacity(self: Atom, macho_file: *MachO) u64 {
const self_sym = self.getSymbol(macho_file);
if (self.next) |next| {
const next_sym = macho_file.locals.items[next.local_sym_index];
const next_sym = next.getSymbol(macho_file);
return next_sym.n_value - self_sym.n_value;
} else {
// We are the last atom.
@@ -211,11 +285,11 @@ pub fn capacity(self: Atom, macho_file: MachO) u64 {
}
}
pub fn freeListEligible(self: Atom, macho_file: MachO) bool {
pub fn freeListEligible(self: Atom, macho_file: *MachO) bool {
// No need to keep a free list node for the last atom.
const next = self.next orelse return false;
const self_sym = macho_file.locals.items[self.local_sym_index];
const next_sym = macho_file.locals.items[next.local_sym_index];
const self_sym = self.getSymbol(macho_file);
const next_sym = next.getSymbol(macho_file);
const cap = next_sym.n_value - self_sym.n_value;
const ideal_cap = MachO.padToIdeal(self.size);
if (cap <= ideal_cap) return false;
@@ -224,20 +298,20 @@ pub fn freeListEligible(self: Atom, macho_file: MachO) bool {
}
const RelocContext = struct {
macho_file: *MachO,
base_addr: u64 = 0,
base_offset: i32 = 0,
allocator: Allocator,
object: *Object,
macho_file: *MachO,
};
pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: RelocContext) !void {
const tracy = trace(@src());
defer tracy.end();
const gpa = context.macho_file.base.allocator;
const arch = context.macho_file.base.options.target.cpu.arch;
var addend: i64 = 0;
var subtractor: ?u32 = null;
var subtractor: ?SymbolWithLoc = null;
for (relocs) |rel, i| {
blk: {
@@ -274,20 +348,16 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context:
}
assert(subtractor == null);
const sym = context.object.symtab[rel.r_symbolnum];
const sym_loc = MachO.SymbolWithLoc{
.sym_index = rel.r_symbolnum,
.file = self.file,
};
const sym = context.macho_file.getSymbol(sym_loc);
if (sym.sect() and !sym.ext()) {
subtractor = context.object.symbol_mapping.get(rel.r_symbolnum).?;
subtractor = sym_loc;
} else {
const sym_name = context.object.getString(sym.n_strx);
const n_strx = context.macho_file.strtab_dir.getKeyAdapted(
@as([]const u8, sym_name),
StringIndexAdapter{
.bytes = &context.macho_file.strtab,
},
).?;
const resolv = context.macho_file.symbol_resolver.get(n_strx).?;
assert(resolv.where == .global);
subtractor = resolv.local_sym_index;
const sym_name = context.macho_file.getSymbolName(sym_loc);
subtractor = context.macho_file.globals.get(sym_name).?;
}
// Verify that *_SUBTRACTOR is followed by *_UNSIGNED.
if (relocs.len <= i + 1) {
@@ -318,43 +388,40 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context:
continue;
}
const object = &context.macho_file.objects.items[self.file.?];
const target = target: {
if (rel.r_extern == 0) {
const sect_id = @intCast(u16, rel.r_symbolnum - 1);
const local_sym_index = context.object.sections_as_symbols.get(sect_id) orelse blk: {
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment;
const sect = seg.sections.items[sect_id];
const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: {
const sect = object.getSection(sect_id);
const match = (try context.macho_file.getMatchingSection(sect)) orelse
unreachable;
const local_sym_index = @intCast(u32, context.macho_file.locals.items.len);
try context.macho_file.locals.append(context.allocator, .{
const sym_index = @intCast(u32, object.symtab.items.len);
try object.symtab.append(gpa, .{
.n_strx = 0,
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1),
.n_sect = context.macho_file.getSectionOrdinal(match),
.n_desc = 0,
.n_value = 0,
});
try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index);
break :blk local_sym_index;
try object.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index);
break :blk sym_index;
};
break :target Relocation.Target{ .local = local_sym_index };
break :target MachO.SymbolWithLoc{ .sym_index = sym_index, .file = self.file };
}
const sym = context.object.symtab[rel.r_symbolnum];
const sym_name = context.object.getString(sym.n_strx);
const sym_loc = MachO.SymbolWithLoc{
.sym_index = rel.r_symbolnum,
.file = self.file,
};
const sym = context.macho_file.getSymbol(sym_loc);
if (sym.sect() and !sym.ext()) {
const sym_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable;
break :target Relocation.Target{ .local = sym_index };
break :target sym_loc;
} else {
const sym_name = context.macho_file.getSymbolName(sym_loc);
break :target context.macho_file.globals.get(sym_name).?;
}
const n_strx = context.macho_file.strtab_dir.getKeyAdapted(
@as([]const u8, sym_name),
StringIndexAdapter{
.bytes = &context.macho_file.strtab,
},
) orelse unreachable;
break :target Relocation.Target{ .global = n_strx };
};
const offset = @intCast(u32, rel.r_address - context.base_offset);
@@ -378,8 +445,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context:
else
mem.readIntLittle(i32, self.code.items[offset..][0..4]);
if (rel.r_extern == 0) {
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment;
const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr;
const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
addend -= @intCast(i64, target_sect_base_addr);
}
try self.addPtrBindingOrRebase(rel, target, context);
@@ -387,9 +453,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context:
.ARM64_RELOC_TLVP_LOAD_PAGE21,
.ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
=> {
if (target == .global) {
try addTlvPtrEntry(target, context);
}
try addTlvPtrEntry(target, context);
},
else => {},
}
@@ -413,8 +477,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context:
else
mem.readIntLittle(i32, self.code.items[offset..][0..4]);
if (rel.r_extern == 0) {
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment;
const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr;
const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
addend -= @intCast(i64, target_sect_base_addr);
}
try self.addPtrBindingOrRebase(rel, target, context);
@@ -435,16 +498,13 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context:
if (rel.r_extern == 0) {
// Note for the future self: when r_extern == 0, we should subtract correction from the
// addend.
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment;
const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr;
const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
addend += @intCast(i64, context.base_addr + offset + 4) -
@intCast(i64, target_sect_base_addr);
}
},
.X86_64_RELOC_TLV => {
if (target == .global) {
try addTlvPtrEntry(target, context);
}
try addTlvPtrEntry(target, context);
},
else => {},
}
@@ -452,7 +512,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context:
else => unreachable,
}
try self.relocs.append(context.allocator, .{
try self.relocs.append(gpa, .{
.offset = offset,
.target = target,
.addend = addend,
@@ -470,338 +530,181 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context:
fn addPtrBindingOrRebase(
self: *Atom,
rel: macho.relocation_info,
target: Relocation.Target,
target: MachO.SymbolWithLoc,
context: RelocContext,
) !void {
switch (target) {
.global => |n_strx| {
try self.bindings.append(context.allocator, .{
.n_strx = n_strx,
.offset = @intCast(u32, rel.r_address - context.base_offset),
});
},
.local => {
const source_sym = context.macho_file.locals.items[self.local_sym_index];
const match = context.macho_file.section_ordinals.keys()[source_sym.n_sect - 1];
const seg = context.macho_file.load_commands.items[match.seg].segment;
const sect = seg.sections.items[match.sect];
const sect_type = sect.type_();
const gpa = context.macho_file.base.allocator;
const sym = context.macho_file.getSymbol(target);
if (sym.undf()) {
const sym_name = context.macho_file.getSymbolName(target);
const global_index = @intCast(u32, context.macho_file.globals.getIndex(sym_name).?);
try self.bindings.append(gpa, .{
.global_index = global_index,
.offset = @intCast(u32, rel.r_address - context.base_offset),
});
} else {
const source_sym = self.getSymbol(context.macho_file);
const match = context.macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect);
const sect = context.macho_file.getSection(match);
const sect_type = sect.type_();
const should_rebase = rebase: {
if (rel.r_length != 3) break :rebase false;
const should_rebase = rebase: {
if (rel.r_length != 3) break :rebase false;
// TODO actually, a check similar to what dyld is doing, that is, verifying
// that the segment is writable should be enough here.
const is_right_segment = blk: {
if (context.macho_file.data_segment_cmd_index) |idx| {
if (match.seg == idx) {
break :blk true;
}
// TODO actually, a check similar to what dyld is doing, that is, verifying
// that the segment is writable should be enough here.
const is_right_segment = blk: {
if (context.macho_file.data_segment_cmd_index) |idx| {
if (match.seg == idx) {
break :blk true;
}
if (context.macho_file.data_const_segment_cmd_index) |idx| {
if (match.seg == idx) {
break :blk true;
}
}
break :blk false;
};
if (!is_right_segment) break :rebase false;
if (sect_type != macho.S_LITERAL_POINTERS and
sect_type != macho.S_REGULAR and
sect_type != macho.S_MOD_INIT_FUNC_POINTERS and
sect_type != macho.S_MOD_TERM_FUNC_POINTERS)
{
break :rebase false;
}
break :rebase true;
if (context.macho_file.data_const_segment_cmd_index) |idx| {
if (match.seg == idx) {
break :blk true;
}
}
break :blk false;
};
if (should_rebase) {
try self.rebases.append(
context.allocator,
@intCast(u32, rel.r_address - context.base_offset),
);
if (!is_right_segment) break :rebase false;
if (sect_type != macho.S_LITERAL_POINTERS and
sect_type != macho.S_REGULAR and
sect_type != macho.S_MOD_INIT_FUNC_POINTERS and
sect_type != macho.S_MOD_TERM_FUNC_POINTERS)
{
break :rebase false;
}
},
break :rebase true;
};
if (should_rebase) {
try self.rebases.append(gpa, @intCast(u32, rel.r_address - context.base_offset));
}
}
}
fn addTlvPtrEntry(target: Relocation.Target, context: RelocContext) !void {
fn addTlvPtrEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void {
const target_sym = context.macho_file.getSymbol(target);
if (!target_sym.undf()) return;
if (context.macho_file.tlv_ptr_entries_table.contains(target)) return;
const index = try context.macho_file.allocateTlvPtrEntry(target);
const atom = try context.macho_file.createTlvPtrAtom(target);
context.macho_file.tlv_ptr_entries.items[index].atom = atom;
const match = (try context.macho_file.getMatchingSection(.{
.segname = MachO.makeStaticString("__DATA"),
.sectname = MachO.makeStaticString("__thread_ptrs"),
.flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS,
})).?;
if (!context.object.start_atoms.contains(match)) {
try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
}
if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
}
fn addGotEntry(target: Relocation.Target, context: RelocContext) !void {
fn addGotEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void {
if (context.macho_file.got_entries_table.contains(target)) return;
const index = try context.macho_file.allocateGotEntry(target);
const atom = try context.macho_file.createGotAtom(target);
context.macho_file.got_entries.items[index].atom = atom;
const match = MachO.MatchingSection{
.seg = context.macho_file.data_const_segment_cmd_index.?,
.sect = context.macho_file.got_section_index.?,
};
if (!context.object.start_atoms.contains(match)) {
try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
}
if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
}
fn addStub(target: Relocation.Target, context: RelocContext) !void {
if (target != .global) return;
if (context.macho_file.stubs_table.contains(target.global)) return;
// If the symbol has been resolved as defined globally elsewhere (in a different translation unit),
// then skip creating stub entry.
// TODO Is this the correct for the incremental?
if (context.macho_file.symbol_resolver.get(target.global).?.where == .global) return;
fn addStub(target: MachO.SymbolWithLoc, context: RelocContext) !void {
const target_sym = context.macho_file.getSymbol(target);
if (!target_sym.undf()) return;
if (context.macho_file.stubs_table.contains(target)) return;
const stub_index = try context.macho_file.allocateStubEntry(target.global);
const stub_index = try context.macho_file.allocateStubEntry(target);
const stub_helper_atom = try context.macho_file.createStubHelperAtom();
const laptr_atom = try context.macho_file.createLazyPointerAtom(stub_helper_atom.sym_index, target);
const stub_atom = try context.macho_file.createStubAtom(laptr_atom.sym_index);
// TODO clean this up!
const stub_helper_atom = atom: {
const atom = try context.macho_file.createStubHelperAtom();
const match = MachO.MatchingSection{
.seg = context.macho_file.text_segment_cmd_index.?,
.sect = context.macho_file.stub_helper_section_index.?,
};
if (!context.object.start_atoms.contains(match)) {
try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
}
if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
break :atom atom;
};
const laptr_atom = atom: {
const atom = try context.macho_file.createLazyPointerAtom(
stub_helper_atom.local_sym_index,
target.global,
);
const match = MachO.MatchingSection{
.seg = context.macho_file.data_segment_cmd_index.?,
.sect = context.macho_file.la_symbol_ptr_section_index.?,
};
if (!context.object.start_atoms.contains(match)) {
try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
}
if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
break :atom atom;
};
const atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index);
const match = MachO.MatchingSection{
.seg = context.macho_file.text_segment_cmd_index.?,
.sect = context.macho_file.stubs_section_index.?,
};
if (!context.object.start_atoms.contains(match)) {
try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
}
if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
context.macho_file.stubs.items[stub_index] = atom;
}
pub fn getTargetAtom(rel: Relocation, macho_file: *MachO) !?*Atom {
const is_via_got = got: {
switch (macho_file.base.options.target.cpu.arch) {
.aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) {
.ARM64_RELOC_GOT_LOAD_PAGE21,
.ARM64_RELOC_GOT_LOAD_PAGEOFF12,
.ARM64_RELOC_POINTER_TO_GOT,
=> true,
else => false,
},
.x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) {
.X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true,
else => false,
},
else => unreachable,
}
};
if (is_via_got) {
const got_index = macho_file.got_entries_table.get(rel.target) orelse {
log.err("expected GOT entry for symbol", .{});
switch (rel.target) {
.local => |sym_index| log.err(" local @{d}", .{sym_index}),
.global => |n_strx| log.err(" global @'{s}'", .{macho_file.getString(n_strx)}),
}
log.err(" this is an internal linker error", .{});
return error.FailedToResolveRelocationTarget;
};
return macho_file.got_entries.items[got_index].atom;
}
switch (rel.target) {
.local => |sym_index| {
return macho_file.atom_by_index_table.get(sym_index);
},
.global => |n_strx| {
const resolv = macho_file.symbol_resolver.get(n_strx).?;
switch (resolv.where) {
.global => return macho_file.atom_by_index_table.get(resolv.local_sym_index),
.undef => {
if (macho_file.stubs_table.get(n_strx)) |stub_index| {
return macho_file.stubs.items[stub_index];
} else {
if (macho_file.tlv_ptr_entries_table.get(rel.target)) |tlv_ptr_index| {
return macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom;
}
return null;
}
},
}
},
}
context.macho_file.stubs.items[stub_index].atom = stub_atom;
}
pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
const tracy = trace(@src());
defer tracy.end();
log.debug("ATOM(%{d}, '{s}')", .{ self.sym_index, self.getName(macho_file) });
for (self.relocs.items) |rel| {
log.debug("relocating {}", .{rel});
const arch = macho_file.base.options.target.cpu.arch;
switch (arch) {
.aarch64 => {
log.debug(" RELA({s}) @ {x} => %{d} in object({d})", .{
@tagName(@intToEnum(macho.reloc_type_arm64, rel.@"type")),
rel.offset,
rel.target.sym_index,
rel.target.file,
});
},
.x86_64 => {
log.debug(" RELA({s}) @ {x} => %{d} in object({d})", .{
@tagName(@intToEnum(macho.reloc_type_x86_64, rel.@"type")),
rel.offset,
rel.target.sym_index,
rel.target.file,
});
},
else => unreachable,
}
const source_addr = blk: {
const sym = macho_file.locals.items[self.local_sym_index];
break :blk sym.n_value + rel.offset;
const source_sym = self.getSymbol(macho_file);
break :blk source_sym.n_value + rel.offset;
};
const is_tlv = is_tlv: {
const source_sym = self.getSymbol(macho_file);
const match = macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect);
const sect = macho_file.getSection(match);
break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES;
};
var is_via_thread_ptrs: bool = false;
const target_addr = blk: {
const is_via_got = got: {
switch (arch) {
.aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) {
.ARM64_RELOC_GOT_LOAD_PAGE21,
.ARM64_RELOC_GOT_LOAD_PAGEOFF12,
.ARM64_RELOC_POINTER_TO_GOT,
=> true,
else => false,
},
.x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) {
.X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true,
else => false,
},
else => unreachable,
const target_atom = (try rel.getTargetAtom(macho_file)) orelse {
// If there is no atom for target, we still need to check for special, atom-less
// symbols such as `___dso_handle`.
const target_name = macho_file.getSymbolName(rel.target);
if (macho_file.globals.contains(target_name)) {
const atomless_sym = macho_file.getSymbol(rel.target);
log.debug(" | atomless target '{s}'", .{target_name});
break :blk atomless_sym.n_value;
}
log.debug(" | undef target '{s}'", .{target_name});
break :blk 0;
};
if (is_via_got) {
const got_index = macho_file.got_entries_table.get(rel.target) orelse {
log.err("expected GOT entry for symbol", .{});
switch (rel.target) {
.local => |sym_index| log.err(" local @{d}", .{sym_index}),
.global => |n_strx| log.err(" global @'{s}'", .{macho_file.getString(n_strx)}),
log.debug(" | target ATOM(%{d}, '{s}') in object({d})", .{
target_atom.sym_index,
target_atom.getName(macho_file),
target_atom.file,
});
// If `rel.target` is contained within the target atom, pull its address value.
const target_sym = if (target_atom.isSymbolContained(rel.target, macho_file))
macho_file.getSymbol(rel.target)
else
target_atom.getSymbol(macho_file);
const base_address: u64 = if (is_tlv) base_address: {
// For TLV relocations, the value specified as a relocation is the displacement from the
// TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first
// defined TLV template init section in the following order:
// * wrt to __thread_data if defined, then
// * wrt to __thread_bss
const sect_id: u16 = sect_id: {
if (macho_file.tlv_data_section_index) |i| {
break :sect_id i;
} else if (macho_file.tlv_bss_section_index) |i| {
break :sect_id i;
} else {
log.err("threadlocal variables present but no initializer sections found", .{});
log.err(" __thread_data not found", .{});
log.err(" __thread_bss not found", .{});
return error.FailedToResolveRelocationTarget;
}
log.err(" this is an internal linker error", .{});
return error.FailedToResolveRelocationTarget;
};
const atom = macho_file.got_entries.items[got_index].atom;
break :blk macho_file.locals.items[atom.local_sym_index].n_value;
}
switch (rel.target) {
.local => |sym_index| {
const sym = macho_file.locals.items[sym_index];
const is_tlv = is_tlv: {
const source_sym = macho_file.locals.items[self.local_sym_index];
const match = macho_file.section_ordinals.keys()[source_sym.n_sect - 1];
const seg = macho_file.load_commands.items[match.seg].segment;
const sect = seg.sections.items[match.sect];
break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES;
};
if (is_tlv) {
// For TLV relocations, the value specified as a relocation is the displacement from the
// TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first
// defined TLV template init section in the following order:
// * wrt to __thread_data if defined, then
// * wrt to __thread_bss
const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].segment;
const base_address = inner: {
if (macho_file.tlv_data_section_index) |i| {
break :inner seg.sections.items[i].addr;
} else if (macho_file.tlv_bss_section_index) |i| {
break :inner seg.sections.items[i].addr;
} else {
log.err("threadlocal variables present but no initializer sections found", .{});
log.err(" __thread_data not found", .{});
log.err(" __thread_bss not found", .{});
return error.FailedToResolveRelocationTarget;
}
};
break :blk sym.n_value - base_address;
}
break :blk sym.n_value;
},
.global => |n_strx| {
// TODO Still trying to figure out how to possibly use stubs for local symbol indirection with
// branching instructions. If it is not possible, then the best course of action is to
// resurrect the former approach of defering creating synthethic atoms in __got and __la_symbol_ptr
// sections until we resolve the relocations.
const resolv = macho_file.symbol_resolver.get(n_strx).?;
switch (resolv.where) {
.global => break :blk macho_file.globals.items[resolv.where_index].n_value,
.undef => {
if (macho_file.stubs_table.get(n_strx)) |stub_index| {
const atom = macho_file.stubs.items[stub_index];
break :blk macho_file.locals.items[atom.local_sym_index].n_value;
} else {
if (macho_file.tlv_ptr_entries_table.get(rel.target)) |tlv_ptr_index| {
is_via_thread_ptrs = true;
const atom = macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom;
break :blk macho_file.locals.items[atom.local_sym_index].n_value;
}
break :blk 0;
}
},
}
},
}
break :base_address macho_file.getSection(.{
.seg = macho_file.data_segment_cmd_index.?,
.sect = sect_id,
}).addr;
} else 0;
break :blk target_sym.n_value - base_address;
};
log.debug(" | source_addr = 0x{x}", .{source_addr});
log.debug(" | target_addr = 0x{x}", .{target_addr});
log.debug(" | source_addr = 0x{x}", .{source_addr});
log.debug(" | target_addr = 0x{x}", .{target_addr});
switch (arch) {
.aarch64 => {
@@ -933,7 +836,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
}
};
const narrowed = @truncate(u12, @intCast(u64, actual_target_addr));
var inst = if (is_via_thread_ptrs) blk: {
var inst = if (macho_file.tlv_ptr_entries_table.contains(rel.target)) blk: {
const offset = try math.divExact(u12, narrowed, 8);
break :blk aarch64.Instruction{
.load_store_register = .{
@@ -966,7 +869,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
.ARM64_RELOC_UNSIGNED => {
const result = blk: {
if (rel.subtractor) |subtractor| {
const sym = macho_file.locals.items[subtractor];
const sym = macho_file.getSymbol(subtractor);
break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + rel.addend;
} else {
break :blk @intCast(i64, target_addr) + rel.addend;
@@ -1004,7 +907,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement));
},
.X86_64_RELOC_TLV => {
if (!is_via_thread_ptrs) {
if (!macho_file.tlv_ptr_entries_table.contains(rel.target)) {
// We need to rewrite the opcode from movq to leaq.
self.code.items[rel.offset - 2] = 0x8d;
}
@@ -1036,7 +939,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
.X86_64_RELOC_UNSIGNED => {
const result = blk: {
if (rel.subtractor) |subtractor| {
const sym = macho_file.locals.items[subtractor];
const sym = macho_file.getSymbol(subtractor);
break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + rel.addend;
} else {
break :blk @intCast(i64, target_addr) + rel.addend;

View File

@@ -17,6 +17,7 @@ const Allocator = mem.Allocator;
const Dwarf = @import("../Dwarf.zig");
const MachO = @import("../MachO.zig");
const Module = @import("../../Module.zig");
const StringTable = @import("../strtab.zig").StringTable;
const TextBlock = MachO.TextBlock;
const Type = @import("../../type.zig").Type;
@@ -59,6 +60,8 @@ debug_aranges_section_dirty: bool = false,
debug_info_header_dirty: bool = false,
debug_line_header_dirty: bool = false,
strtab: StringTable(.link) = .{},
relocs: std.ArrayListUnmanaged(Reloc) = .{},
pub const Reloc = struct {
@@ -93,6 +96,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void
.strsize = 0,
},
});
try self.strtab.buffer.append(allocator, 0);
self.load_commands_dirty = true;
}
@@ -269,22 +273,30 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti
for (self.relocs.items) |*reloc| {
const sym = switch (reloc.@"type") {
.direct_load => self.base.locals.items[reloc.target],
.direct_load => self.base.getSymbol(.{ .sym_index = reloc.target, .file = null }),
.got_load => blk: {
const got_index = self.base.got_entries_table.get(.{ .local = reloc.target }).?;
const got_entry = self.base.got_entries.items[got_index];
break :blk self.base.locals.items[got_entry.atom.local_sym_index];
const got_index = self.base.got_entries_table.get(.{ .sym_index = reloc.target, .file = null }).?;
const got_atom = self.base.got_entries.items[got_index].atom;
break :blk got_atom.getSymbol(self.base);
},
};
if (sym.n_value == reloc.prev_vaddr) continue;
const sym_name = switch (reloc.@"type") {
.direct_load => self.base.getSymbolName(.{ .sym_index = reloc.target, .file = null }),
.got_load => blk: {
const got_index = self.base.got_entries_table.get(.{ .sym_index = reloc.target, .file = null }).?;
const got_atom = self.base.got_entries.items[got_index].atom;
break :blk got_atom.getName(self.base);
},
};
const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment;
const sect = &seg.sections.items[self.debug_info_section_index.?];
const file_offset = sect.offset + reloc.offset;
log.debug("resolving relocation: {d}@{x} ('{s}') at offset {x}", .{
reloc.target,
sym.n_value,
self.base.getString(sym.n_strx),
sym_name,
file_offset,
});
try self.file.pwriteAll(mem.asBytes(&sym.n_value), file_offset);
@@ -367,6 +379,7 @@ pub fn deinit(self: *DebugSymbols, allocator: Allocator) void {
}
self.load_commands.deinit(allocator);
self.dwarf.deinit();
self.strtab.deinit(allocator);
self.relocs.deinit(allocator);
}
@@ -582,21 +595,39 @@ fn writeSymbolTable(self: *DebugSymbols) !void {
const tracy = trace(@src());
defer tracy.end();
const gpa = self.base.base.allocator;
const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment;
const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab;
symtab.symoff = @intCast(u32, seg.inner.fileoff);
var locals = std.ArrayList(macho.nlist_64).init(self.base.base.allocator);
var locals = std.ArrayList(macho.nlist_64).init(gpa);
defer locals.deinit();
for (self.base.locals.items) |sym| {
if (sym.n_strx == 0) continue;
if (self.base.symbol_resolver.get(sym.n_strx)) |_| continue;
try locals.append(sym);
for (self.base.locals.items) |sym, sym_id| {
if (sym.n_strx == 0) continue; // no name, skip
if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip
const sym_loc = MachO.SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null };
if (self.base.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip
if (self.base.globals.contains(self.base.getSymbolName(sym_loc))) continue; // global symbol is either an export or import, skip
var out_sym = sym;
out_sym.n_strx = try self.strtab.insert(gpa, self.base.getSymbolName(sym_loc));
try locals.append(out_sym);
}
var exports = std.ArrayList(macho.nlist_64).init(gpa);
defer exports.deinit();
for (self.base.globals.values()) |global| {
const sym = self.base.getSymbol(global);
if (sym.undf()) continue; // import, skip
if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip
var out_sym = sym;
out_sym.n_strx = try self.strtab.insert(gpa, self.base.getSymbolName(global));
try exports.append(out_sym);
}
const nlocals = locals.items.len;
const nexports = self.base.globals.items.len;
const nexports = exports.items.len;
const locals_off = symtab.symoff;
const locals_size = nlocals * @sizeOf(macho.nlist_64);
const exports_off = locals_off + locals_size;
@@ -641,7 +672,7 @@ fn writeSymbolTable(self: *DebugSymbols) !void {
try self.file.pwriteAll(mem.sliceAsBytes(locals.items), locals_off);
log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off });
try self.file.pwriteAll(mem.sliceAsBytes(self.base.globals.items), exports_off);
try self.file.pwriteAll(mem.sliceAsBytes(exports.items), exports_off);
self.load_commands_dirty = true;
}
@@ -655,7 +686,7 @@ fn writeStringTable(self: *DebugSymbols) !void {
const symtab_size = @intCast(u32, symtab.nsyms * @sizeOf(macho.nlist_64));
symtab.stroff = symtab.symoff + symtab_size;
const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.items.len, @alignOf(u64));
const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64));
symtab.strsize = @intCast(u32, needed_size);
if (symtab_size + needed_size > seg.inner.filesize) {
@@ -692,7 +723,7 @@ fn writeStringTable(self: *DebugSymbols) !void {
log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize });
try self.file.pwriteAll(self.base.strtab.items, symtab.stroff);
try self.file.pwriteAll(self.strtab.buffer.items, symtab.stroff);
self.load_commands_dirty = true;
}

View File

@@ -47,7 +47,7 @@ dwarf_debug_line_index: ?u16 = null,
dwarf_debug_line_str_index: ?u16 = null,
dwarf_debug_ranges_index: ?u16 = null,
symtab: []const macho.nlist_64 = &.{},
symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
strtab: []const u8 = &.{},
data_in_code_entries: []const macho.data_in_code_entry = &.{},
@@ -57,17 +57,13 @@ tu_name: ?[]const u8 = null,
tu_comp_dir: ?[]const u8 = null,
mtime: ?u64 = null,
contained_atoms: std.ArrayListUnmanaged(*Atom) = .{},
start_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{},
end_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{},
sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{},
// TODO symbol mapping and its inverse can probably be simple arrays
// instead of hash maps.
symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{},
reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{},
/// List of atoms that map to the symbols parsed from this object file.
managed_atoms: std.ArrayListUnmanaged(*Atom) = .{},
analyzed: bool = false,
/// Table of atoms belonging to this object file indexed by the symbol index.
atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{},
const DebugInfo = struct {
inner: dwarf.DwarfInfo,
@@ -135,97 +131,25 @@ const DebugInfo = struct {
}
};
pub fn deinit(self: *Object, allocator: Allocator) void {
pub fn deinit(self: *Object, gpa: Allocator) void {
for (self.load_commands.items) |*lc| {
lc.deinit(allocator);
lc.deinit(gpa);
}
self.load_commands.deinit(allocator);
allocator.free(self.contents);
self.sections_as_symbols.deinit(allocator);
self.symbol_mapping.deinit(allocator);
self.reverse_symbol_mapping.deinit(allocator);
allocator.free(self.name);
self.load_commands.deinit(gpa);
gpa.free(self.contents);
self.sections_as_symbols.deinit(gpa);
self.atom_by_index_table.deinit(gpa);
self.contained_atoms.deinit(allocator);
self.start_atoms.deinit(allocator);
self.end_atoms.deinit(allocator);
for (self.managed_atoms.items) |atom| {
atom.deinit(gpa);
gpa.destroy(atom);
}
self.managed_atoms.deinit(gpa);
gpa.free(self.name);
if (self.debug_info) |*db| {
db.deinit(allocator);
}
}
pub fn free(self: *Object, allocator: Allocator, macho_file: *MachO) void {
log.debug("freeObject {*}", .{self});
var it = self.end_atoms.iterator();
while (it.next()) |entry| {
const match = entry.key_ptr.*;
const first_atom = self.start_atoms.get(match).?;
const last_atom = entry.value_ptr.*;
var atom = first_atom;
while (true) {
if (atom.local_sym_index != 0) {
macho_file.locals_free_list.append(allocator, atom.local_sym_index) catch {};
const local = &macho_file.locals.items[atom.local_sym_index];
local.* = .{
.n_strx = 0,
.n_type = 0,
.n_sect = 0,
.n_desc = 0,
.n_value = 0,
};
_ = macho_file.atom_by_index_table.remove(atom.local_sym_index);
_ = macho_file.gc_roots.remove(atom);
for (atom.contained.items) |sym_off| {
_ = macho_file.atom_by_index_table.remove(sym_off.local_sym_index);
}
atom.local_sym_index = 0;
}
if (atom == last_atom) {
break;
}
if (atom.next) |next| {
atom = next;
} else break;
}
}
self.freeAtoms(macho_file);
}
fn freeAtoms(self: *Object, macho_file: *MachO) void {
var it = self.end_atoms.iterator();
while (it.next()) |entry| {
const match = entry.key_ptr.*;
var first_atom: *Atom = self.start_atoms.get(match).?;
var last_atom: *Atom = entry.value_ptr.*;
if (macho_file.atoms.getPtr(match)) |atom_ptr| {
if (atom_ptr.* == last_atom) {
if (first_atom.prev) |prev| {
// TODO shrink the section size here
atom_ptr.* = prev;
} else {
_ = macho_file.atoms.fetchRemove(match);
}
}
}
if (first_atom.prev) |prev| {
prev.next = last_atom.next;
} else {
first_atom.prev = null;
}
if (last_atom.next) |next| {
next.prev = last_atom.prev;
} else {
last_atom.next = null;
}
db.deinit(gpa);
}
}
@@ -327,24 +251,40 @@ pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void {
self.load_commands.appendAssumeCapacity(cmd);
}
self.parseSymtab();
try self.parseSymtab(allocator);
self.parseDataInCode();
try self.parseDebugInfo(allocator);
}
const NlistWithIndex = struct {
nlist: macho.nlist_64,
const Context = struct {
symtab: []const macho.nlist_64,
strtab: []const u8,
};
const SymbolAtIndex = struct {
index: u32,
fn lessThan(_: void, lhs: NlistWithIndex, rhs: NlistWithIndex) bool {
fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 {
return ctx.symtab[self.index];
}
fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 {
const sym = self.getSymbol(ctx);
if (sym.n_strx == 0) return "";
return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0);
}
fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool {
// We sort by type: defined < undefined, and
// afterwards by address in each group. Normally, dysymtab should
// be enough to guarantee the sort, but turns out not every compiler
// is kind enough to specify the symbols in the correct order.
if (lhs.nlist.sect()) {
if (rhs.nlist.sect()) {
const lhs = lhs_index.getSymbol(ctx);
const rhs = rhs_index.getSymbol(ctx);
if (lhs.sect()) {
if (rhs.sect()) {
// Same group, sort by address.
return lhs.nlist.n_value < rhs.nlist.n_value;
return lhs.n_value < rhs.n_value;
} else {
return true;
}
@@ -352,27 +292,35 @@ const NlistWithIndex = struct {
return false;
}
}
fn filterByAddress(symbols: []NlistWithIndex, start_addr: u64, end_addr: u64) []NlistWithIndex {
const Predicate = struct {
addr: u64,
pub fn predicate(self: @This(), symbol: NlistWithIndex) bool {
return symbol.nlist.n_value >= self.addr;
}
};
const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{
.addr = start_addr,
});
const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{
.addr = end_addr,
});
return symbols[start..end];
}
};
fn filterSymbolsByAddress(
indexes: []SymbolAtIndex,
start_addr: u64,
end_addr: u64,
ctx: Context,
) []SymbolAtIndex {
const Predicate = struct {
addr: u64,
ctx: Context,
pub fn predicate(pred: @This(), index: SymbolAtIndex) bool {
return index.getSymbol(pred.ctx).n_value >= pred.addr;
}
};
const start = MachO.findFirst(SymbolAtIndex, indexes, 0, Predicate{
.addr = start_addr,
.ctx = ctx,
});
const end = MachO.findFirst(SymbolAtIndex, indexes, start, Predicate{
.addr = end_addr,
.ctx = ctx,
});
return indexes[start..end];
}
fn filterRelocs(
relocs: []const macho.relocation_info,
start_addr: u64,
@@ -411,29 +359,32 @@ fn filterDice(
return dices[start..end];
}
pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) !void {
/// Splits object into atoms assuming whole cache mode aka traditional linking mode.
pub fn splitIntoAtomsWhole(self: *Object, macho_file: *MachO, object_id: u32) !void {
const tracy = trace(@src());
defer tracy.end();
const gpa = macho_file.base.allocator;
const seg = self.load_commands.items[self.segment_cmd_index.?].segment;
log.debug("analysing {s}", .{self.name});
log.debug("splitting object({d}, {s}) into atoms: whole cache mode", .{ object_id, self.name });
// You would expect that the symbol table is at least pre-sorted based on symbol's type:
// local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
// the GO compiler does not necessarily respect that therefore we sort immediately by type
// and address within.
var sorted_all_nlists = try std.ArrayList(NlistWithIndex).initCapacity(allocator, self.symtab.len);
defer sorted_all_nlists.deinit();
const context = Context{
.symtab = self.getSourceSymtab(),
.strtab = self.strtab,
};
var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, context.symtab.len);
defer sorted_all_syms.deinit();
for (self.symtab) |nlist, index| {
sorted_all_nlists.appendAssumeCapacity(.{
.nlist = nlist,
.index = @intCast(u32, index),
});
for (context.symtab) |_, index| {
sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) });
}
sort.sort(NlistWithIndex, sorted_all_nlists.items, {}, NlistWithIndex.lessThan);
sort.sort(SymbolAtIndex, sorted_all_syms.items, context, SymbolAtIndex.lessThan);
// Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we
// have to infer the start of undef section in the symtab ourselves.
@@ -441,30 +392,36 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) !
const dysymtab = self.load_commands.items[cmd_index].dysymtab;
break :blk dysymtab.iundefsym;
} else blk: {
var iundefsym: usize = sorted_all_nlists.items.len;
var iundefsym: usize = sorted_all_syms.items.len;
while (iundefsym > 0) : (iundefsym -= 1) {
const nlist = sorted_all_nlists.items[iundefsym - 1];
if (nlist.nlist.sect()) break;
const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context);
if (sym.sect()) break;
}
break :blk iundefsym;
};
// We only care about defined symbols, so filter every other out.
const sorted_nlists = sorted_all_nlists.items[0..iundefsym];
const sorted_syms = sorted_all_syms.items[0..iundefsym];
const dead_strip = macho_file.base.options.gc_sections orelse false;
const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0 and
(macho_file.base.options.optimize_mode != .Debug or dead_strip);
// const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
for (seg.sections.items) |sect, id| {
const sect_id = @intCast(u8, id);
log.debug("parsing section '{s},{s}' into Atoms", .{ sect.segName(), sect.sectName() });
log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() });
// Get matching segment/section in the final artifact.
const match = (try macho_file.getMatchingSection(sect)) orelse {
log.debug("unhandled section", .{});
log.debug(" unhandled section", .{});
continue;
};
const target_sect = macho_file.getSection(match);
log.debug(" output sect({d}, '{s},{s}')", .{
macho_file.getSectionOrdinal(match),
target_sect.segName(),
target_sect.sectName(),
});
const is_zerofill = blk: {
const section_type = sect.type_();
@@ -482,10 +439,11 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) !
);
// Symbols within this section only.
const filtered_nlists = NlistWithIndex.filterByAddress(
sorted_nlists,
const filtered_syms = filterSymbolsByAddress(
sorted_syms,
sect.addr,
sect.addr + sect.size,
context,
);
macho_file.has_dices = macho_file.has_dices or blk: {
@@ -498,32 +456,33 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) !
};
macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null;
if (subsections_via_symbols and filtered_nlists.len > 0) {
if (subsections_via_symbols and filtered_syms.len > 0) {
// If the first nlist does not match the start of the section,
// then we need to encapsulate the memory range [section start, first symbol)
// as a temporary symbol and insert the matching Atom.
const first_nlist = filtered_nlists[0].nlist;
if (first_nlist.n_value > sect.addr) {
const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const local_sym_index = @intCast(u32, macho_file.locals.items.len);
try macho_file.locals.append(allocator, .{
const first_sym = filtered_syms[0].getSymbol(context);
if (first_sym.n_value > sect.addr) {
const sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const sym_index = @intCast(u32, self.symtab.items.len);
try self.symtab.append(gpa, .{
.n_strx = 0,
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
.n_sect = macho_file.getSectionOrdinal(match),
.n_desc = 0,
.n_value = sect.addr,
});
try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index);
break :blk local_sym_index;
try self.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index);
break :blk sym_index;
};
const atom_size = first_nlist.n_value - sect.addr;
const atom_size = first_sym.n_value - sect.addr;
const atom_code: ?[]const u8 = if (code) |cc|
cc[0..atom_size]
else
null;
try self.parseIntoAtom(
allocator,
local_sym_index,
const atom = try self.createAtomFromSubsection(
macho_file,
object_id,
sym_index,
atom_size,
sect.@"align",
atom_code,
@@ -531,33 +490,27 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) !
&.{},
match,
sect,
macho_file,
);
try macho_file.addAtomToSection(atom, match);
}
var next_nlist_count: usize = 0;
while (next_nlist_count < filtered_nlists.len) {
const next_nlist = filtered_nlists[next_nlist_count];
const addr = next_nlist.nlist.n_value;
const atom_nlists = NlistWithIndex.filterByAddress(
filtered_nlists[next_nlist_count..],
var next_sym_count: usize = 0;
while (next_sym_count < filtered_syms.len) {
const next_sym = filtered_syms[next_sym_count].getSymbol(context);
const addr = next_sym.n_value;
const atom_syms = filterSymbolsByAddress(
filtered_syms[next_sym_count..],
addr,
addr + 1,
context,
);
next_nlist_count += atom_nlists.len;
const local_sym_index = @intCast(u32, macho_file.locals.items.len);
try macho_file.locals.append(allocator, .{
.n_strx = 0,
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
.n_desc = 0,
.n_value = addr,
});
next_sym_count += atom_syms.len;
assert(atom_syms.len > 0);
const sym_index = atom_syms[0].index;
const atom_size = blk: {
const end_addr = if (next_nlist_count < filtered_nlists.len)
filtered_nlists[next_nlist_count].nlist.n_value
const end_addr = if (next_sym_count < filtered_syms.len)
filtered_syms[next_sym_count].getSymbol(context).n_value
else
sect.addr + sect.size;
break :blk end_addr - addr;
@@ -570,86 +523,91 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) !
math.min(@ctz(u64, addr), sect.@"align")
else
sect.@"align";
try self.parseIntoAtom(
allocator,
local_sym_index,
const atom = try self.createAtomFromSubsection(
macho_file,
object_id,
sym_index,
atom_size,
atom_align,
atom_code,
relocs,
atom_nlists,
atom_syms[1..],
match,
sect,
macho_file,
);
try macho_file.addAtomToSection(atom, match);
}
} else {
// If there is no symbol to refer to this atom, we create
// a temp one, unless we already did that when working out the relocations
// of other atoms.
const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const local_sym_index = @intCast(u32, macho_file.locals.items.len);
try macho_file.locals.append(allocator, .{
const sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const sym_index = @intCast(u32, self.symtab.items.len);
try self.symtab.append(gpa, .{
.n_strx = 0,
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
.n_sect = macho_file.getSectionOrdinal(match),
.n_desc = 0,
.n_value = sect.addr,
});
try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index);
break :blk local_sym_index;
try self.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index);
break :blk sym_index;
};
try self.parseIntoAtom(
allocator,
local_sym_index,
const atom = try self.createAtomFromSubsection(
macho_file,
object_id,
sym_index,
sect.size,
sect.@"align",
code,
relocs,
filtered_nlists,
filtered_syms,
match,
sect,
macho_file,
);
try macho_file.addAtomToSection(atom, match);
}
}
}
fn parseIntoAtom(
fn createAtomFromSubsection(
self: *Object,
allocator: Allocator,
local_sym_index: u32,
macho_file: *MachO,
object_id: u32,
sym_index: u32,
size: u64,
alignment: u32,
code: ?[]const u8,
relocs: []const macho.relocation_info,
nlists: []const NlistWithIndex,
indexes: []const SymbolAtIndex,
match: MatchingSection,
sect: macho.section_64,
macho_file: *MachO,
) !void {
const sym = macho_file.locals.items[local_sym_index];
const align_pow_2 = try math.powi(u32, 2, alignment);
const aligned_size = mem.alignForwardGeneric(u64, size, align_pow_2);
const atom = try macho_file.createEmptyAtom(local_sym_index, aligned_size, alignment);
) !*Atom {
const gpa = macho_file.base.allocator;
const sym = &self.symtab.items[sym_index];
const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment);
atom.file = object_id;
sym.n_sect = macho_file.getSectionOrdinal(match);
try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom);
try self.managed_atoms.append(gpa, atom);
if (code) |cc| {
assert(size == cc.len);
mem.copy(u8, atom.code.items, cc);
}
const base_offset = sym.n_value - sect.addr;
const filtered_relocs = filterRelocs(relocs, base_offset, base_offset + size);
try atom.parseRelocs(filtered_relocs, .{
.macho_file = macho_file,
.base_addr = sect.addr,
.base_offset = @intCast(i32, base_offset),
.allocator = allocator,
.object = self,
.macho_file = macho_file,
});
if (macho_file.has_dices) {
const dices = filterDice(self.data_in_code_entries, sym.n_value, sym.n_value + size);
try atom.dices.ensureTotalCapacity(allocator, dices.len);
try atom.dices.ensureTotalCapacity(gpa, dices.len);
for (dices) |dice| {
atom.dices.appendAssumeCapacity(.{
@@ -665,19 +623,14 @@ fn parseIntoAtom(
// the filtered symbols and note which symbol is contained within so that
// we can properly allocate addresses down the line.
// While we're at it, we need to update segment,section mapping of each symbol too.
try atom.contained.ensureTotalCapacity(allocator, nlists.len);
for (nlists) |nlist_with_index| {
const nlist = nlist_with_index.nlist;
const sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
const this_sym = &macho_file.locals.items[sym_index];
this_sym.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1);
try atom.contained.ensureTotalCapacity(gpa, indexes.len + 1);
{
const stab: ?Atom.Stab = if (self.debug_info) |di| blk: {
// TODO there has to be a better to handle this.
for (di.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (nlist.n_value >= range.start and nlist.n_value < range.end) {
if (sym.n_value >= range.start and sym.n_value < range.end) {
break :blk Atom.Stab{
.function = range.end - range.start,
};
@@ -690,12 +643,39 @@ fn parseIntoAtom(
} else null;
atom.contained.appendAssumeCapacity(.{
.local_sym_index = sym_index,
.offset = nlist.n_value - sym.n_value,
.sym_index = sym_index,
.offset = 0,
.stab = stab,
});
}
for (indexes) |inner_sym_index| {
const inner_sym = &self.symtab.items[inner_sym_index.index];
inner_sym.n_sect = macho_file.getSectionOrdinal(match);
const stab: ?Atom.Stab = if (self.debug_info) |di| blk: {
// TODO there has to be a better to handle this.
for (di.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (inner_sym.n_value >= range.start and inner_sym.n_value < range.end) {
break :blk Atom.Stab{
.function = range.end - range.start,
};
}
}
}
// TODO
// if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
break :blk .static;
} else null;
atom.contained.appendAssumeCapacity(.{
.sym_index = inner_sym_index.index,
.offset = inner_sym.n_value - sym.n_value,
.stab = stab,
});
try macho_file.atom_by_index_table.putNoClobber(allocator, sym_index, atom);
try self.atom_by_index_table.putNoClobber(gpa, inner_sym_index.index, atom);
}
const is_gc_root = blk: {
@@ -714,30 +694,28 @@ fn parseIntoAtom(
}
};
if (is_gc_root) {
try macho_file.gc_roots.putNoClobber(allocator, atom, {});
try macho_file.gc_roots.putNoClobber(gpa, atom, {});
}
if (!self.start_atoms.contains(match)) {
try self.start_atoms.putNoClobber(allocator, match, atom);
}
if (self.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try self.end_atoms.putNoClobber(allocator, match, atom);
}
try self.contained_atoms.append(allocator, atom);
return atom;
}
fn parseSymtab(self: *Object) void {
fn parseSymtab(self: *Object, allocator: Allocator) !void {
const index = self.symtab_cmd_index orelse return;
const symtab = self.load_commands.items[index].symtab;
try self.symtab.appendSlice(allocator, self.getSourceSymtab());
self.strtab = self.contents[symtab.stroff..][0..symtab.strsize];
}
fn getSourceSymtab(self: *Object) []const macho.nlist_64 {
const index = self.symtab_cmd_index orelse return &[0]macho.nlist_64{};
const symtab = self.load_commands.items[index].symtab;
const symtab_size = @sizeOf(macho.nlist_64) * symtab.nsyms;
const raw_symtab = self.contents[symtab.symoff..][0..symtab_size];
self.symtab = mem.bytesAsSlice(macho.nlist_64, @alignCast(@alignOf(macho.nlist_64), raw_symtab));
self.strtab = self.contents[symtab.stroff..][0..symtab.strsize];
return mem.bytesAsSlice(
macho.nlist_64,
@alignCast(@alignOf(macho.nlist_64), raw_symtab),
);
}
fn parseDebugInfo(self: *Object, allocator: Allocator) !void {
@@ -783,8 +761,7 @@ fn parseDataInCode(self: *Object) void {
}
fn getSectionContents(self: Object, sect_id: u16) []const u8 {
const seg = self.load_commands.items[self.segment_cmd_index.?].segment;
const sect = seg.sections.items[sect_id];
const sect = self.getSection(sect_id);
log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{
sect.segName(),
sect.sectName(),
@@ -798,3 +775,9 @@ pub fn getString(self: Object, off: u32) []const u8 {
assert(off < self.strtab.len);
return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.ptr + off), 0);
}
pub fn getSection(self: Object, n_sect: u16) macho.section_64 {
const seg = self.load_commands.items[self.segment_cmd_index.?].segment;
assert(n_sect < seg.sections.items.len);
return seg.sections.items[n_sect];
}

113
src/link/strtab.zig Normal file
View File

@@ -0,0 +1,113 @@
const std = @import("std");
const mem = std.mem;
const Allocator = mem.Allocator;
const StringIndexAdapter = std.hash_map.StringIndexAdapter;
const StringIndexContext = std.hash_map.StringIndexContext;
pub fn StringTable(comptime log_scope: @Type(.EnumLiteral)) type {
return struct {
const Self = @This();
const log = std.log.scoped(log_scope);
buffer: std.ArrayListUnmanaged(u8) = .{},
table: std.HashMapUnmanaged(u32, bool, StringIndexContext, std.hash_map.default_max_load_percentage) = .{},
pub fn deinit(self: *Self, gpa: Allocator) void {
self.buffer.deinit(gpa);
self.table.deinit(gpa);
}
pub fn toOwnedSlice(self: *Self, gpa: Allocator) []const u8 {
const result = self.buffer.toOwnedSlice(gpa);
self.table.clearRetainingCapacity();
return result;
}
pub const PrunedResult = struct {
buffer: []const u8,
idx_map: std.AutoHashMap(u32, u32),
};
pub fn toPrunedResult(self: *Self, gpa: Allocator) !PrunedResult {
var buffer = std.ArrayList(u8).init(gpa);
defer buffer.deinit();
try buffer.ensureTotalCapacity(self.buffer.items.len);
buffer.appendAssumeCapacity(0);
var idx_map = std.AutoHashMap(u32, u32).init(gpa);
errdefer idx_map.deinit();
try idx_map.ensureTotalCapacity(self.table.count());
var it = self.table.iterator();
while (it.next()) |entry| {
const off = entry.key_ptr.*;
const save = entry.value_ptr.*;
if (!save) continue;
const new_off = @intCast(u32, buffer.items.len);
buffer.appendSliceAssumeCapacity(self.getAssumeExists(off));
idx_map.putAssumeCapacityNoClobber(off, new_off);
}
self.buffer.clearRetainingCapacity();
self.table.clearRetainingCapacity();
return PrunedResult{
.buffer = buffer.toOwnedSlice(),
.idx_map = idx_map,
};
}
pub fn insert(self: *Self, gpa: Allocator, string: []const u8) !u32 {
const gop = try self.table.getOrPutContextAdapted(gpa, @as([]const u8, string), StringIndexAdapter{
.bytes = &self.buffer,
}, StringIndexContext{
.bytes = &self.buffer,
});
if (gop.found_existing) {
const off = gop.key_ptr.*;
gop.value_ptr.* = true;
log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off });
return off;
}
try self.buffer.ensureUnusedCapacity(gpa, string.len + 1);
const new_off = @intCast(u32, self.buffer.items.len);
log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off });
self.buffer.appendSliceAssumeCapacity(string);
self.buffer.appendAssumeCapacity(0);
gop.key_ptr.* = new_off;
gop.value_ptr.* = true;
return new_off;
}
pub fn delete(self: *Self, string: []const u8) void {
const value_ptr = self.table.getPtrAdapted(@as([]const u8, string), StringIndexAdapter{
.bytes = &self.buffer,
}) orelse return;
value_ptr.* = false;
log.debug("marked '{s}' for deletion", .{string});
}
pub fn getOffset(self: *Self, string: []const u8) ?u32 {
return self.table.getKeyAdapted(string, StringIndexAdapter{
.bytes = &self.buffer,
});
}
pub fn get(self: Self, off: u32) ?[]const u8 {
log.debug("getting string at 0x{x}", .{off});
if (off >= self.buffer.items.len) return null;
return mem.sliceTo(@ptrCast([*:0]const u8, self.buffer.items.ptr + off), 0);
}
pub fn getAssumeExists(self: Self, off: u32) []const u8 {
return self.get(off) orelse unreachable;
}
};
}