zld: put relocs in a TextBlock

This commit is contained in:
Jakub Konka
2021-07-04 12:56:14 +02:00
parent 453c16d8ac
commit 5b3c4691e6
3 changed files with 190 additions and 131 deletions

View File

@@ -9,13 +9,13 @@ const log = std.log.scoped(.object);
const macho = std.macho;
const mem = std.mem;
const reloc = @import("reloc.zig");
const parseName = @import("Zld.zig").parseName;
const Allocator = mem.Allocator;
const Arch = std.Target.Cpu.Arch;
const Relocation = reloc.Relocation;
const Symbol = @import("Symbol.zig");
const TextBlock = @import("Zld.zig").TextBlock;
const TextBlock = Zld.TextBlock;
const Zld = @import("Zld.zig");
usingnamespace @import("commands.zig");
@@ -74,43 +74,6 @@ pub const Section = struct {
allocator.free(relocs);
}
}
pub fn segname(self: Section) []const u8 {
return parseName(&self.inner.segname);
}
pub fn sectname(self: Section) []const u8 {
return parseName(&self.inner.sectname);
}
pub fn flags(self: Section) u32 {
return self.inner.flags;
}
pub fn sectionType(self: Section) u8 {
return @truncate(u8, self.flags() & 0xff);
}
pub fn sectionAttrs(self: Section) u32 {
return self.flags() & 0xffffff00;
}
pub fn isCode(self: Section) bool {
const attr = self.sectionAttrs();
return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0;
}
pub fn isDebug(self: Section) bool {
return self.sectionAttrs() & macho.S_ATTR_DEBUG != 0;
}
pub fn dontDeadStrip(self: Section) bool {
return self.sectionAttrs() & macho.S_ATTR_NO_DEAD_STRIP != 0;
}
pub fn dontDeadStripIfReferencesLive(self: Section) bool {
return self.sectionAttrs() & macho.S_ATTR_LIVE_SUPPORT != 0;
}
};
const DebugInfo = struct {
@@ -272,7 +235,6 @@ pub fn parse(self: *Object) !void {
try self.parseSymtab();
try self.parseDataInCode();
try self.parseInitializers();
try self.parseDummy();
}
pub fn readLoadCommands(self: *Object, reader: anytype) !void {
@@ -288,8 +250,8 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void {
var seg = cmd.Segment;
for (seg.sections.items) |*sect, j| {
const index = @intCast(u16, j);
const segname = parseName(&sect.segname);
const sectname = parseName(&sect.sectname);
const segname = segmentName(sect.*);
const sectname = sectionName(sect.*);
if (mem.eql(u8, segname, "__DWARF")) {
if (mem.eql(u8, sectname, "__debug_info")) {
self.dwarf_debug_info_index = index;
@@ -351,7 +313,7 @@ pub fn parseSections(self: *Object) !void {
try self.sections.ensureCapacity(self.allocator, seg.sections.items.len);
for (seg.sections.items) |sect| {
log.debug("parsing section '{s},{s}'", .{ parseName(&sect.segname), parseName(&sect.sectname) });
log.debug("parsing section '{s},{s}'", .{ segmentName(sect), sectionName(sect) });
// Read sections' code
var code = try self.allocator.alloc(u8, @intCast(usize, sect.size));
_ = try self.file.?.preadAll(code, sect.offset);
@@ -381,47 +343,91 @@ pub fn parseSections(self: *Object) !void {
}
}
fn cmpNlist(_: void, lhs: macho.nlist_64, rhs: macho.nlist_64) bool {
return lhs.n_value < rhs.n_value;
}
fn filterSymsInSection(symbols: []macho.nlist_64, sect_id: u8) []macho.nlist_64 {
var start: usize = 0;
var end: usize = symbols.len;
while (true) {
var change = false;
if (symbols[start].n_sect != sect_id) {
start += 1;
change = true;
}
if (symbols[end - 1].n_sect != sect_id) {
end -= 1;
change = true;
}
if (start == end) break;
if (!change) break;
}
return symbols[start..end];
}
pub fn parseDummy(self: *Object) !void {
pub fn parseTextBlocks(self: *Object, zld: *Zld) !void {
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
log.warn("analysing {s}", .{self.name.?});
const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
var sorted_syms = std.ArrayList(macho.nlist_64).init(self.allocator);
defer sorted_syms.deinit();
try sorted_syms.appendSlice(self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym]);
const SymWithIndex = struct {
nlist: macho.nlist_64,
index: u32,
std.sort.sort(macho.nlist_64, sorted_syms.items, {}, cmpNlist);
pub fn cmp(_: void, lhs: @This(), rhs: @This()) bool {
return lhs.nlist.n_value < rhs.nlist.n_value;
}
fn filterSymsInSection(symbols: []@This(), sect_id: u8) []@This() {
var start: usize = 0;
var end: usize = symbols.len;
while (true) {
var change = false;
if (symbols[start].nlist.n_sect != sect_id) {
start += 1;
change = true;
}
if (symbols[end - 1].nlist.n_sect != sect_id) {
end -= 1;
change = true;
}
if (start == end) break;
if (!change) break;
}
return symbols[start..end];
}
fn filterRelocs(relocs: []macho.relocation_info, start: u64, end: u64) []macho.relocation_info {
if (relocs.len == 0) return relocs;
var start_id: usize = 0;
var end_id: usize = relocs.len;
while (true) {
var change = false;
if (relocs[start_id].r_address > end) {
start_id += 1;
change = true;
}
if (relocs[end_id - 1].r_address < start) {
end_id -= 1;
change = true;
}
if (start_id == end_id) break;
if (!change) break;
}
return relocs[start_id..end_id];
}
};
const nlists = self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym];
var sorted_syms = std.ArrayList(SymWithIndex).init(self.allocator);
defer sorted_syms.deinit();
try sorted_syms.ensureTotalCapacity(nlists.len);
for (nlists) |nlist, index| {
sorted_syms.appendAssumeCapacity(.{
.nlist = nlist,
.index = @intCast(u32, index + dysymtab.ilocalsym),
});
}
std.sort.sort(SymWithIndex, sorted_syms.items, {}, SymWithIndex.cmp);
for (seg.sections.items) |sect, sect_id| {
log.warn("section {s},{s}", .{ parseName(&sect.segname), parseName(&sect.sectname) });
log.warn("section {s},{s}", .{ segmentName(sect), sectionName(sect) });
const match = (try zld.getMatchingSection(sect)) orelse {
log.warn("unhandled section", .{});
continue;
};
// Read code
var code = try self.allocator.alloc(u8, @intCast(usize, sect.size));
defer self.allocator.free(code);
@@ -431,16 +437,25 @@ pub fn parseDummy(self: *Object) !void {
const raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc);
defer self.allocator.free(raw_relocs);
_ = try self.file.?.preadAll(raw_relocs, sect.reloff);
const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs);
const relocs = try reloc.parse(
self.allocator,
self.arch.?,
code,
mem.bytesAsSlice(macho.relocation_info, raw_relocs),
);
const alignment = sect.@"align";
if (self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) {
const syms = filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1));
const syms = SymWithIndex.filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1));
if (syms.len == 0) {
// One large text block referenced by section offsets only
log.warn("TextBlock", .{});
log.warn(" | referenced by section offsets", .{});
log.warn(" | start_addr = {}", .{sect.addr});
log.warn(" | end_addr = {}", .{sect.size});
log.warn(" | size = {}", .{sect.size});
log.warn(" | alignment = 0x{x}", .{alignment});
log.warn(" | segment_id = {}", .{match.seg});
log.warn(" | section_id = {}", .{match.sect});
log.warn(" | relocs: {any}", .{relocs});
}
var indices = std.ArrayList(u32).init(self.allocator);
defer indices.deinit();
@@ -450,32 +465,35 @@ pub fn parseDummy(self: *Object) !void {
const curr = syms[i];
try indices.append(i);
const next: ?macho.nlist_64 = if (i + 1 < syms.len)
const next: ?SymWithIndex = if (i + 1 < syms.len)
syms[i + 1]
else
null;
if (next) |n| {
if (curr.n_value == n.n_value) {
if (curr.nlist.n_value == n.nlist.n_value) {
continue;
}
}
const start_addr = curr.n_value - sect.addr;
const end_addr = if (next) |n| n.n_value - sect.addr else sect.size;
const alignment = sect.@"align";
const start_addr = curr.nlist.n_value - sect.addr;
const end_addr = if (next) |n| n.nlist.n_value - sect.addr else sect.size;
const tb_code = code[start_addr..end_addr];
const size = tb_code.len;
log.warn("TextBlock", .{});
for (indices.items) |id| {
log.warn(" | symbol {s}", .{self.getString(syms[id].n_strx)});
const sym = self.symbols.items[syms[id].index];
log.warn(" | symbol = {s}", .{sym.name});
}
log.warn(" | start_addr = 0x{x}", .{start_addr});
log.warn(" | end_addr = 0x{x}", .{end_addr});
log.warn(" | start_addr = {}", .{start_addr});
log.warn(" | end_addr = {}", .{end_addr});
log.warn(" | size = {}", .{size});
log.warn(" | alignment = 0x{x}", .{alignment});
log.warn(" | segment_id = {}", .{match.seg});
log.warn(" | section_id = {}", .{match.sect});
log.warn(" | relocs: {any}", .{SymWithIndex.filterRelocs(relocs, start_addr, end_addr)});
indices.clearRetainingCapacity();
}

View File

@@ -234,6 +234,7 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg
try self.parseInputFiles(files, args.syslibroot);
try self.parseLibs(args.libs, args.syslibroot);
try self.resolveSymbols();
try self.parseTextBlocks();
try self.resolveStubsAndGotEntries();
try self.updateMetadata();
try self.sortSections();
@@ -322,10 +323,10 @@ fn mapAndUpdateSections(
log.debug("{s}: '{s},{s}' mapped to '{s},{s}' from 0x{x} to 0x{x}", .{
object.name.?,
parseName(&source_sect.inner.segname),
parseName(&source_sect.inner.sectname),
parseName(&target_sect.segname),
parseName(&target_sect.sectname),
segmentName(source_sect.inner),
sectionName(source_sect.inner),
segmentName(target_sect.*),
sectionName(target_sect.*),
offset,
offset + size,
});
@@ -343,12 +344,12 @@ fn updateMetadata(self: *Zld) !void {
for (self.objects.items) |object| {
// Find ideal section alignment and update section mappings
for (object.sections.items) |sect, sect_id| {
const match = (try self.getMatchingSection(sect)) orelse {
const match = (try self.getMatchingSection(sect.inner)) orelse {
log.debug("{s}: unhandled section type 0x{x} for '{s},{s}'", .{
object.name.?,
sect.flags(),
sect.segname(),
sect.sectname(),
sect.inner.flags,
segmentName(sect.inner),
sectionName(sect.inner),
});
continue;
};
@@ -441,15 +442,15 @@ const MatchingSection = struct {
sect: u16,
};
fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
pub fn getMatchingSection(self: *Zld, sect: macho.section_64) !?MatchingSection {
const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
const segname = sect.segname();
const sectname = sect.sectname();
const segname = segmentName(sect);
const sectname = sectionName(sect);
const res: ?MatchingSection = blk: {
switch (sect.sectionType()) {
switch (sectionType(sect)) {
macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => {
if (self.text_const_section_index == null) {
self.text_const_section_index = @intCast(u16, text_seg.sections.items.len);
@@ -649,7 +650,7 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
};
},
macho.S_REGULAR => {
if (sect.isCode()) {
if (sectionIsCode(sect)) {
if (self.text_section_index == null) {
self.text_section_index = @intCast(u16, text_seg.sections.items.len);
try text_seg.addSection(self.allocator, "__text", .{
@@ -662,11 +663,11 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
.sect = self.text_section_index.?,
};
}
if (sect.isDebug()) {
if (sectionIsDebug(sect)) {
// TODO debug attributes
if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) {
log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{
sect.flags(), segname, sectname,
sect.flags, segname, sectname,
});
}
break :blk null;
@@ -829,7 +830,7 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) {
log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{
sect.flags(), segname, sectname,
sect.flags, segname, sectname,
});
}
@@ -956,8 +957,8 @@ fn sortSections(self: *Zld) !void {
log.debug("remapping in {s}: '{s},{s}': {} => {}", .{
object.name.?,
parseName(&sect.inner.segname),
parseName(&sect.inner.sectname),
segmentName(sect.inner),
sectionName(sect.inner),
target_map.section_id,
new_index,
});
@@ -1086,8 +1087,8 @@ fn allocateSymbol(self: *Zld, symbol: *Symbol) !void {
const source_sect = &object.sections.items[reg.section];
const target_map = source_sect.target_map orelse {
log.debug("section '{s},{s}' not mapped for symbol '{s}'", .{
parseName(&source_sect.inner.segname),
parseName(&source_sect.inner.sectname),
segmentName(source_sect.inner),
sectionName(source_sect.inner),
symbol.name,
});
return;
@@ -1464,7 +1465,7 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void {
fn resolveSymbolsInObject(self: *Zld, object: *Object) !void {
log.debug("resolving symbols in '{s}'", .{object.name});
for (object.symtab.items) |sym| {
for (object.symtab.items) |sym, sym_id| {
const sym_name = object.getString(sym.n_strx);
if (Symbol.isStab(sym)) {
@@ -1497,6 +1498,7 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void {
.file = object,
},
};
const index = @intCast(u32, self.locals.items.len);
try self.locals.append(self.allocator, symbol);
try object.symbols.append(self.allocator, symbol);
continue;
@@ -1665,6 +1667,12 @@ fn resolveSymbols(self: *Zld) !void {
if (has_undefined) return error.UndefinedSymbolReference;
}
fn parseTextBlocks(self: *Zld) !void {
for (self.objects.items) |object| {
try object.parseTextBlocks(self);
}
}
fn resolveStubsAndGotEntries(self: *Zld) !void {
for (self.objects.items) |object| {
log.debug("resolving stubs and got entries from {s}", .{object.name});
@@ -1718,11 +1726,11 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
log.debug("relocating object {s}", .{object.name});
for (object.sections.items) |sect| {
if (sect.inner.flags == macho.S_MOD_INIT_FUNC_POINTERS or
sect.inner.flags == macho.S_MOD_TERM_FUNC_POINTERS) continue;
if (sectionType(sect.inner) == macho.S_MOD_INIT_FUNC_POINTERS or
sectionType(sect.inner) == macho.S_MOD_TERM_FUNC_POINTERS) continue;
const segname = parseName(&sect.inner.segname);
const sectname = parseName(&sect.inner.sectname);
const segname = segmentName(sect.inner);
const sectname = sectionName(sect.inner);
log.debug("relocating section '{s},{s}'", .{ segname, sectname });
@@ -1759,7 +1767,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
args.source_target_sect_addr = source_sect.inner.addr;
}
const flags = @truncate(u8, target_sect.flags & 0xff);
const sect_type = sectionType(target_sect);
const should_rebase = rebase: {
if (!unsigned.is_64bit) break :rebase false;
@@ -1780,8 +1788,8 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
};
if (!is_right_segment) break :rebase false;
if (flags != macho.S_LITERAL_POINTERS and
flags != macho.S_REGULAR)
if (sect_type != macho.S_LITERAL_POINTERS and
sect_type != macho.S_REGULAR)
{
break :rebase false;
}
@@ -1804,7 +1812,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
// TLV is handled via a separate offset mechanism.
// Calculate the offset to the initializer.
if (flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: {
if (sect_type == macho.S_THREAD_LOCAL_VARIABLES) tlv: {
// TODO we don't want to save offset to tlv_bootstrap
if (mem.eql(u8, object.symbols.items[rel.target.symbol].name, "__tlv_bootstrap")) break :tlv;
@@ -1858,13 +1866,13 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
target_sect_off + sect.code.len,
});
if (target_sect.flags == macho.S_ZEROFILL or
target_sect.flags == macho.S_THREAD_LOCAL_ZEROFILL or
target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES)
if (sectionType(target_sect) == macho.S_ZEROFILL or
sectionType(target_sect) == macho.S_THREAD_LOCAL_ZEROFILL or
sectionType(target_sect) == macho.S_THREAD_LOCAL_VARIABLES)
{
log.debug("zeroing out '{s},{s}' from 0x{x} to 0x{x}", .{
parseName(&target_sect.segname),
parseName(&target_sect.sectname),
segmentName(target_sect),
sectionName(target_sect),
target_sect_off,
target_sect_off + sect.code.len,
});
@@ -1926,8 +1934,8 @@ fn relocTargetAddr(self: *Zld, object: *const Object, target: reloc.Relocation.T
log.debug(" | section offset", .{});
const source_sect = object.sections.items[sect_id];
log.debug(" | section '{s},{s}'", .{
parseName(&source_sect.inner.segname),
parseName(&source_sect.inner.sectname),
segmentName(source_sect.inner),
sectionName(source_sect.inner),
});
const target_map = source_sect.target_map orelse unreachable;
const target_seg = self.load_commands.items[target_map.segment_id].Segment;
@@ -2999,8 +3007,3 @@ fn writeHeader(self: *Zld) !void {
try self.file.?.pwriteAll(mem.asBytes(&header), 0);
}
pub fn parseName(name: *const [16]u8) []const u8 {
const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
return name[0..len];
}

View File

@@ -425,6 +425,44 @@ fn makeStaticString(bytes: []const u8) [16]u8 {
return buf;
}
fn parseName(name: *const [16]u8) []const u8 {
const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
return name[0..len];
}
pub fn segmentName(sect: macho.section_64) []const u8 {
return parseName(&sect.segname);
}
pub fn sectionName(sect: macho.section_64) []const u8 {
return parseName(&sect.sectname);
}
pub fn sectionType(sect: macho.section_64) u8 {
return @truncate(u8, sect.flags & 0xff);
}
pub fn sectionAttrs(sect: macho.section_64) u32 {
return sect.flags & 0xffffff00;
}
pub fn sectionIsCode(sect: macho.section_64) bool {
const attr = sectionAttrs(sect);
return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0;
}
pub fn sectionIsDebug(sect: macho.section_64) bool {
return sectionAttrs(sect) & macho.S_ATTR_DEBUG != 0;
}
pub fn sectionIsDontDeadStrip(sect: macho.section_64) bool {
return sectionAttrs(sect) & macho.S_ATTR_NO_DEAD_STRIP != 0;
}
pub fn sectionIsDontDeadStripIfReferencesLive(sect: macho.section_64) bool {
return sectionAttrs(sect) & macho.S_ATTR_LIVE_SUPPORT != 0;
}
fn testRead(allocator: *Allocator, buffer: []const u8, expected: anytype) !void {
var stream = io.fixedBufferStream(buffer);
var given = try LoadCommand.read(allocator, stream.reader());