zld: rewrite Object to include pointers to Symbols

This commit is contained in:
Jakub Konka
2021-05-02 23:40:08 +02:00
parent b6be28ddcc
commit 86ab6ca56c
5 changed files with 186 additions and 145 deletions

View File

@@ -43,17 +43,13 @@ dwarf_debug_str_index: ?u16 = null,
dwarf_debug_line_index: ?u16 = null,
dwarf_debug_ranges_index: ?u16 = null,
symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
strtab: std.ArrayListUnmanaged(u8) = .{},
symbols: std.ArrayListUnmanaged(*Symbol) = .{},
initializers: std.ArrayListUnmanaged(*Symbol) = .{},
data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
locals: std.StringArrayHashMapUnmanaged(Symbol) = .{},
stabs: std.ArrayListUnmanaged(Stab) = .{},
tu_path: ?[]const u8 = null,
tu_mtime: ?u64 = null,
initializers: std.ArrayListUnmanaged(CppStatic) = .{},
data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
pub const Section = struct {
inner: macho.section_64,
code: []u8,
@@ -71,23 +67,6 @@ pub const Section = struct {
}
};
const CppStatic = struct {
symbol: u32,
target_addr: u64,
};
const Stab = struct {
tag: Tag,
symbol: u32,
size: ?u64 = null,
const Tag = enum {
function,
global,
static,
};
};
const DebugInfo = struct {
inner: dwarf.DwarfInfo,
debug_info: []u8,
@@ -169,14 +148,12 @@ pub fn deinit(self: *Object) void {
}
self.sections.deinit(self.allocator);
for (self.locals.items()) |*entry| {
entry.value.deinit(self.allocator);
for (self.symbols.items) |sym| {
sym.deinit(self.allocator);
self.allocator.destroy(sym);
}
self.locals.deinit(self.allocator);
self.symbols.deinit(self.allocator);
self.symtab.deinit(self.allocator);
self.strtab.deinit(self.allocator);
self.stabs.deinit(self.allocator);
self.data_in_code_entries.deinit(self.allocator);
self.initializers.deinit(self.allocator);
@@ -222,9 +199,9 @@ pub fn parse(self: *Object) !void {
}
try self.readLoadCommands(reader);
try self.parseSymbols();
try self.parseSections();
if (self.symtab_cmd_index != null) try self.parseSymtab();
if (self.data_in_code_cmd_index != null) try self.readDataInCode();
try self.parseDataInCode();
try self.parseInitializers();
try self.parseDebugInfo();
}
@@ -298,9 +275,10 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void {
}
pub fn parseSections(self: *Object) !void {
log.debug("parsing sections in {s}", .{self.name.?});
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
log.debug("parsing sections in {s}", .{self.name.?});
try self.sections.ensureCapacity(self.allocator, seg.sections.items.len);
for (seg.sections.items) |sect| {
@@ -327,6 +305,7 @@ pub fn parseSections(self: *Object) !void {
self.arch.?,
section.code,
mem.bytesAsSlice(macho.relocation_info, raw_relocs),
self.symbols.items,
);
}
@@ -344,60 +323,70 @@ pub fn parseInitializers(self: *Object) !void {
const relocs = section.relocs orelse unreachable;
try self.initializers.ensureCapacity(self.allocator, relocs.len);
for (relocs) |rel| {
self.initializers.appendAssumeCapacity(.{
.symbol = rel.target.symbol,
.target_addr = undefined,
});
self.initializers.appendAssumeCapacity(rel.target.symbol);
}
mem.reverse(CppStatic, self.initializers.items);
for (self.initializers.items) |initializer| {
const sym = self.symtab.items[initializer.symbol];
const sym_name = self.getString(sym.n_strx);
log.debug(" | {s}", .{sym_name});
}
mem.reverse(*Symbol, self.initializers.items);
}
pub fn parseSymtab(self: *Object) !void {
const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
pub fn parseSymbols(self: *Object) !void {
const index = self.symtab_cmd_index orelse return;
const symtab_cmd = self.load_commands.items[index].Symtab;
var symtab = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
defer self.allocator.free(symtab);
_ = try self.file.?.preadAll(symtab, symtab_cmd.symoff);
const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab));
try self.symtab.appendSlice(self.allocator, slice);
var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize);
defer self.allocator.free(strtab);
_ = try self.file.?.preadAll(strtab, symtab_cmd.stroff);
try self.strtab.appendSlice(self.allocator, strtab);
for (self.symtab.items) |sym, sym_id| {
if (Symbol.isStab(sym) or Symbol.isUndef(sym)) continue;
for (slice) |sym| {
if (Symbol.isStab(sym)) {
log.err("TODO handle stabs embedded within object files", .{});
return error.HandleStabsInObjects;
}
const sym_name = self.getString(sym.n_strx);
const tag: Symbol.Tag = tag: {
if (Symbol.isLocal(sym)) {
if (self.arch.? == .aarch64 and mem.startsWith(u8, sym_name, "l")) continue;
break :tag .local;
}
if (Symbol.isWeakDef(sym)) {
break :tag .weak;
}
break :tag .strong;
};
const sym_name = mem.spanZ(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx));
const name = try self.allocator.dupe(u8, sym_name);
try self.locals.putNoClobber(self.allocator, name, .{
.tag = tag,
.name = name,
.address = 0,
.section = 0,
.index = @intCast(u32, sym_id),
});
const symbol: *Symbol = symbol: {
if (Symbol.isSect(sym)) {
const linkage: Symbol.Regular.Linkage = linkage: {
if (!Symbol.isExt(sym)) break :linkage .translation_unit;
if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) break :linkage .linkage_unit;
break :linkage .global;
};
const regular = try self.allocator.create(Symbol.Regular);
errdefer self.allocator.destroy(regular);
regular.* = .{
.base = .{
.@"type" = .regular,
.name = name,
},
.linkage = .translation_unit,
.address = sym.n_value,
.section = sym.n_sect - 1,
.weak_ref = Symbol.isWeakRef(sym),
.file = self,
};
break :symbol &regular.base;
}
const undef = try self.allocator.create(Symbol.Unresolved);
errdefer self.allocator.destroy(undef);
undef.* = .{
.base = .{
.@"type" = .unresolved,
.name = name,
},
.file = self,
};
break :symbol &undef.base;
};
try self.symbols.append(self.allocator, symbol);
}
}
@@ -429,38 +418,31 @@ pub fn parseDebugInfo(self: *Object) !void {
break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000));
};
for (self.locals.items()) |entry, index| {
const local = entry.value;
const source_sym = self.symtab.items[local.index.?];
const size = blk: for (debug_info.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (source_sym.n_value >= range.start and source_sym.n_value < range.end) {
break :blk range.end - range.start;
for (self.symbols.items) |sym| {
if (sym.cast(Symbol.Regular)) |reg| {
const size: u64 = blk: for (debug_info.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (reg.address >= range.start and reg.address < range.end) {
break :blk range.end - range.start;
}
}
}
} else null;
const tag: Stab.Tag = tag: {
if (size != null) break :tag .function;
switch (local.tag) {
.weak, .strong => break :tag .global,
else => break :tag .static,
}
};
} else 0;
try self.stabs.append(self.allocator, .{
.tag = tag,
.size = size,
.symbol = @intCast(u32, index),
});
reg.stab = .{
.kind = kind: {
if (size > 0) break :kind .function;
switch (reg.linkage) {
.translation_unit => break :kind .static,
else => break :kind .global,
}
},
.size = size,
};
}
}
}
pub fn getString(self: *const Object, str_off: u32) []const u8 {
assert(str_off < self.strtab.items.len);
return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off));
}
pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
const sect = seg.sections.items[index];
var buffer = try allocator.alloc(u8, sect.size);
@@ -468,7 +450,7 @@ pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
return buffer;
}
pub fn readDataInCode(self: *Object) !void {
pub fn parseDataInCode(self: *Object) !void {
const index = self.data_in_code_cmd_index orelse return;
const data_in_code = self.load_commands.items[index].LinkeditData;

View File

@@ -2,31 +2,93 @@ const Symbol = @This();
const std = @import("std");
const macho = std.macho;
const mem = std.mem;
const Allocator = std.mem.Allocator;
const Allocator = mem.Allocator;
const Object = @import("Object.zig");
pub const Tag = enum {
local,
weak,
strong,
import,
undef,
pub const Type = enum {
regular,
proxy,
unresolved,
};
tag: Tag,
/// Symbol type.
@"type": Type,
/// Symbol name. Owned slice.
name: []u8,
address: u64,
section: u8,
/// Index of file where to locate this symbol.
/// Depending on context, this is either an object file, or a dylib.
file: ?u16 = null,
pub const Regular = struct {
base: Symbol,
/// Index of this symbol within the file's symbol table.
index: ?u32 = null,
/// Linkage type.
linkage: Linkage,
pub fn deinit(self: *Symbol, allocator: *Allocator) void {
allocator.free(self.name);
/// Symbol address.
address: u64,
/// Section ID where the symbol resides.
section: u8,
/// Whether the symbol is a weak ref.
weak_ref: bool,
/// File where to locate this symbol.
file: *Object,
/// Debug stab if defined.
stab: ?struct {
/// Stab kind
kind: enum {
function,
global,
static,
},
/// Size of the stab.
size: u64,
} = null,
pub const base_type: Symbol.Type = .regular;
pub const Linkage = enum {
translation_unit,
linkage_unit,
global,
};
};
pub const Proxy = struct {
base: Symbol,
/// Dylib ordinal.
dylib: u16,
pub const base_type: Symbol.Type = .proxy;
};
pub const Unresolved = struct {
base: Symbol,
/// Alias of.
alias: ?*Symbol = null,
/// File where this symbol was referenced.
file: *Object,
pub const base_type: Symbol.Type = .unresolved;
};
pub fn deinit(base: *Symbol, allocator: *Allocator) void {
allocator.free(base.name);
}
pub fn cast(base: *Symbol, comptime T: type) ?*T {
if (base.@"type" != T.base_type) {
return null;
}
return @fieldParentPtr(T, "base", base);
}
pub fn isStab(sym: macho.nlist_64) bool {
@@ -55,17 +117,6 @@ pub fn isWeakDef(sym: macho.nlist_64) bool {
return (sym.n_desc & macho.N_WEAK_DEF) != 0;
}
/// Symbol is local if it is defined and not an extern.
pub fn isLocal(sym: macho.nlist_64) bool {
return isSect(sym) and !isExt(sym);
}
/// Symbol is global if it is defined and an extern.
pub fn isGlobal(sym: macho.nlist_64) bool {
return isSect(sym) and isExt(sym);
}
/// Symbol is undefined if it is not defined and an extern.
pub fn isUndef(sym: macho.nlist_64) bool {
return isUndf(sym) and isExt(sym);
pub fn isWeakRef(sym: macho.nlist_64) bool {
return (sym.n_desc & macho.N_WEAK_REF) != 0;
}

View File

@@ -10,6 +10,7 @@ const aarch64 = @import("reloc/aarch64.zig");
const x86_64 = @import("reloc/x86_64.zig");
const Allocator = mem.Allocator;
const Symbol = @import("Symbol.zig");
pub const Relocation = struct {
@"type": Type,
@@ -75,12 +76,12 @@ pub const Relocation = struct {
};
pub const Target = union(enum) {
symbol: u32,
symbol: *Symbol,
section: u16,
pub fn from_reloc(reloc: macho.relocation_info) Target {
pub fn from_reloc(reloc: macho.relocation_info, symbols: []*Symbol) Target {
return if (reloc.r_extern == 1) .{
.symbol = reloc.r_symbolnum,
.symbol = symbols[reloc.r_symbolnum],
} else .{
.section = @intCast(u16, reloc.r_symbolnum - 1),
};
@@ -136,6 +137,7 @@ pub fn parse(
arch: std.Target.Cpu.Arch,
code: []u8,
relocs: []const macho.relocation_info,
symbols: []*Symbol,
) ![]*Relocation {
var it = RelocIterator{
.buffer = relocs,
@@ -148,6 +150,7 @@ pub fn parse(
.it = &it,
.code = code,
.parsed = std.ArrayList(*Relocation).init(allocator),
.symbols = symbols,
};
defer parser.deinit();
try parser.parse();
@@ -160,6 +163,7 @@ pub fn parse(
.it = &it,
.code = code,
.parsed = std.ArrayList(*Relocation).init(allocator),
.symbols = symbols,
};
defer parser.deinit();
try parser.parse();

View File

@@ -10,6 +10,7 @@ const reloc = @import("../reloc.zig");
const Allocator = mem.Allocator;
const Relocation = reloc.Relocation;
const Symbol = @import("../Symbol.zig");
pub const Branch = struct {
base: Relocation,
@@ -188,6 +189,7 @@ pub const Parser = struct {
it: *reloc.RelocIterator,
code: []u8,
parsed: std.ArrayList(*Relocation),
symbols: []*Symbol,
addend: ?u32 = null,
subtractor: ?Relocation.Target = null,
@@ -273,7 +275,7 @@ pub const Parser = struct {
var branch = try parser.allocator.create(Branch);
errdefer parser.allocator.destroy(branch);
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
branch.* = .{
.base = .{
@@ -294,7 +296,7 @@ pub const Parser = struct {
assert(rel.r_length == 2);
const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type);
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
const offset = @intCast(u32, rel.r_address);
const inst = parser.code[offset..][0..4];
@@ -400,7 +402,7 @@ pub const Parser = struct {
aarch64.Instruction.load_store_register,
), inst) };
}
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
var page_off = try parser.allocator.create(PageOff);
errdefer parser.allocator.destroy(page_off);
@@ -437,7 +439,7 @@ pub const Parser = struct {
), inst);
assert(parsed_inst.size == 3);
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
var page_off = try parser.allocator.create(GotPageOff);
errdefer parser.allocator.destroy(page_off);
@@ -496,7 +498,7 @@ pub const Parser = struct {
}
};
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
var page_off = try parser.allocator.create(TlvpPageOff);
errdefer parser.allocator.destroy(page_off);
@@ -531,7 +533,7 @@ pub const Parser = struct {
assert(rel.r_pcrel == 0);
assert(parser.subtractor == null);
parser.subtractor = Relocation.Target.from_reloc(rel);
parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols);
// Verify SUBTRACTOR is followed by UNSIGNED.
const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type);
@@ -554,7 +556,7 @@ pub const Parser = struct {
var unsigned = try parser.allocator.create(reloc.Unsigned);
errdefer parser.allocator.destroy(unsigned);
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
const is_64bit: bool = switch (rel.r_length) {
3 => true,
2 => false,

View File

@@ -9,6 +9,7 @@ const reloc = @import("../reloc.zig");
const Allocator = mem.Allocator;
const Relocation = reloc.Relocation;
const Symbol = @import("../Symbol.zig");
pub const Branch = struct {
base: Relocation,
@@ -95,6 +96,7 @@ pub const Parser = struct {
it: *reloc.RelocIterator,
code: []u8,
parsed: std.ArrayList(*Relocation),
symbols: []*Symbol,
subtractor: ?Relocation.Target = null,
pub fn deinit(parser: *Parser) void {
@@ -145,7 +147,7 @@ pub const Parser = struct {
var branch = try parser.allocator.create(Branch);
errdefer parser.allocator.destroy(branch);
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
branch.* = .{
.base = .{
@@ -165,7 +167,7 @@ pub const Parser = struct {
assert(rel.r_length == 2);
const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
const is_extern = rel.r_extern == 1;
const offset = @intCast(u32, rel.r_address);
@@ -211,7 +213,7 @@ pub const Parser = struct {
const offset = @intCast(u32, rel.r_address);
const inst = parser.code[offset..][0..4];
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
var got_load = try parser.allocator.create(GotLoad);
errdefer parser.allocator.destroy(got_load);
@@ -237,7 +239,7 @@ pub const Parser = struct {
const offset = @intCast(u32, rel.r_address);
const inst = parser.code[offset..][0..4];
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
var got = try parser.allocator.create(Got);
errdefer parser.allocator.destroy(got);
@@ -263,7 +265,7 @@ pub const Parser = struct {
const offset = @intCast(u32, rel.r_address);
const inst = parser.code[offset..][0..4];
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
var tlv = try parser.allocator.create(Tlv);
errdefer parser.allocator.destroy(tlv);
@@ -288,7 +290,7 @@ pub const Parser = struct {
assert(rel.r_pcrel == 0);
assert(parser.subtractor == null);
parser.subtractor = Relocation.Target.from_reloc(rel);
parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols);
// Verify SUBTRACTOR is followed by UNSIGNED.
const next = @intToEnum(macho.reloc_type_x86_64, parser.it.peek().r_type);
@@ -311,7 +313,7 @@ pub const Parser = struct {
var unsigned = try parser.allocator.create(reloc.Unsigned);
errdefer parser.allocator.destroy(unsigned);
const target = Relocation.Target.from_reloc(rel);
const target = Relocation.Target.from_reloc(rel, parser.symbols);
const is_64bit: bool = switch (rel.r_length) {
3 => true,
2 => false,