diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 8627a3917e..8e9a3075d5 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -9,13 +9,13 @@ const log = std.log.scoped(.object); const macho = std.macho; const mem = std.mem; const reloc = @import("reloc.zig"); -const parseName = @import("Zld.zig").parseName; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; const Relocation = reloc.Relocation; const Symbol = @import("Symbol.zig"); -const TextBlock = @import("Zld.zig").TextBlock; +const TextBlock = Zld.TextBlock; +const Zld = @import("Zld.zig"); usingnamespace @import("commands.zig"); @@ -74,43 +74,6 @@ pub const Section = struct { allocator.free(relocs); } } - - pub fn segname(self: Section) []const u8 { - return parseName(&self.inner.segname); - } - - pub fn sectname(self: Section) []const u8 { - return parseName(&self.inner.sectname); - } - - pub fn flags(self: Section) u32 { - return self.inner.flags; - } - - pub fn sectionType(self: Section) u8 { - return @truncate(u8, self.flags() & 0xff); - } - - pub fn sectionAttrs(self: Section) u32 { - return self.flags() & 0xffffff00; - } - - pub fn isCode(self: Section) bool { - const attr = self.sectionAttrs(); - return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0; - } - - pub fn isDebug(self: Section) bool { - return self.sectionAttrs() & macho.S_ATTR_DEBUG != 0; - } - - pub fn dontDeadStrip(self: Section) bool { - return self.sectionAttrs() & macho.S_ATTR_NO_DEAD_STRIP != 0; - } - - pub fn dontDeadStripIfReferencesLive(self: Section) bool { - return self.sectionAttrs() & macho.S_ATTR_LIVE_SUPPORT != 0; - } }; const DebugInfo = struct { @@ -272,7 +235,6 @@ pub fn parse(self: *Object) !void { try self.parseSymtab(); try self.parseDataInCode(); try self.parseInitializers(); - try self.parseDummy(); } pub fn readLoadCommands(self: *Object, reader: anytype) !void { @@ -288,8 +250,8 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void { var seg = cmd.Segment; for (seg.sections.items) |*sect, j| { const index = @intCast(u16, j); - const segname = parseName(§.segname); - const sectname = parseName(§.sectname); + const segname = segmentName(sect.*); + const sectname = sectionName(sect.*); if (mem.eql(u8, segname, "__DWARF")) { if (mem.eql(u8, sectname, "__debug_info")) { self.dwarf_debug_info_index = index; @@ -351,7 +313,7 @@ pub fn parseSections(self: *Object) !void { try self.sections.ensureCapacity(self.allocator, seg.sections.items.len); for (seg.sections.items) |sect| { - log.debug("parsing section '{s},{s}'", .{ parseName(§.segname), parseName(§.sectname) }); + log.debug("parsing section '{s},{s}'", .{ segmentName(sect), sectionName(sect) }); // Read sections' code var code = try self.allocator.alloc(u8, @intCast(usize, sect.size)); _ = try self.file.?.preadAll(code, sect.offset); @@ -381,47 +343,91 @@ pub fn parseSections(self: *Object) !void { } } -fn cmpNlist(_: void, lhs: macho.nlist_64, rhs: macho.nlist_64) bool { - return lhs.n_value < rhs.n_value; -} - -fn filterSymsInSection(symbols: []macho.nlist_64, sect_id: u8) []macho.nlist_64 { - var start: usize = 0; - var end: usize = symbols.len; - - while (true) { - var change = false; - if (symbols[start].n_sect != sect_id) { - start += 1; - change = true; - } - if (symbols[end - 1].n_sect != sect_id) { - end -= 1; - change = true; - } - - if (start == end) break; - if (!change) break; - } - - return symbols[start..end]; -} - -pub fn parseDummy(self: *Object) !void { +pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.warn("analysing {s}", .{self.name.?}); const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - var sorted_syms = std.ArrayList(macho.nlist_64).init(self.allocator); - defer sorted_syms.deinit(); - try sorted_syms.appendSlice(self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym]); + const SymWithIndex = struct { + nlist: macho.nlist_64, + index: u32, - std.sort.sort(macho.nlist_64, sorted_syms.items, {}, cmpNlist); + pub fn cmp(_: void, lhs: @This(), rhs: @This()) bool { + return lhs.nlist.n_value < rhs.nlist.n_value; + } + + fn filterSymsInSection(symbols: []@This(), sect_id: u8) []@This() { + var start: usize = 0; + var end: usize = symbols.len; + + while (true) { + var change = false; + if (symbols[start].nlist.n_sect != sect_id) { + start += 1; + change = true; + } + if (symbols[end - 1].nlist.n_sect != sect_id) { + end -= 1; + change = true; + } + + if (start == end) break; + if (!change) break; + } + + return symbols[start..end]; + } + + fn filterRelocs(relocs: []macho.relocation_info, start: u64, end: u64) []macho.relocation_info { + if (relocs.len == 0) return relocs; + + var start_id: usize = 0; + var end_id: usize = relocs.len; + + while (true) { + var change = false; + if (relocs[start_id].r_address > end) { + start_id += 1; + change = true; + } + if (relocs[end_id - 1].r_address < start) { + end_id -= 1; + change = true; + } + + if (start_id == end_id) break; + if (!change) break; + } + + return relocs[start_id..end_id]; + } + }; + + const nlists = self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym]; + + var sorted_syms = std.ArrayList(SymWithIndex).init(self.allocator); + defer sorted_syms.deinit(); + try sorted_syms.ensureTotalCapacity(nlists.len); + + for (nlists) |nlist, index| { + sorted_syms.appendAssumeCapacity(.{ + .nlist = nlist, + .index = @intCast(u32, index + dysymtab.ilocalsym), + }); + } + + std.sort.sort(SymWithIndex, sorted_syms.items, {}, SymWithIndex.cmp); for (seg.sections.items) |sect, sect_id| { - log.warn("section {s},{s}", .{ parseName(§.segname), parseName(§.sectname) }); + log.warn("section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + + const match = (try zld.getMatchingSection(sect)) orelse { + log.warn("unhandled section", .{}); + continue; + }; + // Read code var code = try self.allocator.alloc(u8, @intCast(usize, sect.size)); defer self.allocator.free(code); @@ -431,16 +437,25 @@ pub fn parseDummy(self: *Object) !void { const raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc); defer self.allocator.free(raw_relocs); _ = try self.file.?.preadAll(raw_relocs, sect.reloff); + const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); - const relocs = try reloc.parse( - self.allocator, - self.arch.?, - code, - mem.bytesAsSlice(macho.relocation_info, raw_relocs), - ); + const alignment = sect.@"align"; if (self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) { - const syms = filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1)); + const syms = SymWithIndex.filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1)); + + if (syms.len == 0) { + // One large text block referenced by section offsets only + log.warn("TextBlock", .{}); + log.warn(" | referenced by section offsets", .{}); + log.warn(" | start_addr = {}", .{sect.addr}); + log.warn(" | end_addr = {}", .{sect.size}); + log.warn(" | size = {}", .{sect.size}); + log.warn(" | alignment = 0x{x}", .{alignment}); + log.warn(" | segment_id = {}", .{match.seg}); + log.warn(" | section_id = {}", .{match.sect}); + log.warn(" | relocs: {any}", .{relocs}); + } var indices = std.ArrayList(u32).init(self.allocator); defer indices.deinit(); @@ -450,32 +465,35 @@ pub fn parseDummy(self: *Object) !void { const curr = syms[i]; try indices.append(i); - const next: ?macho.nlist_64 = if (i + 1 < syms.len) + const next: ?SymWithIndex = if (i + 1 < syms.len) syms[i + 1] else null; if (next) |n| { - if (curr.n_value == n.n_value) { + if (curr.nlist.n_value == n.nlist.n_value) { continue; } } - const start_addr = curr.n_value - sect.addr; - const end_addr = if (next) |n| n.n_value - sect.addr else sect.size; - const alignment = sect.@"align"; + const start_addr = curr.nlist.n_value - sect.addr; + const end_addr = if (next) |n| n.nlist.n_value - sect.addr else sect.size; const tb_code = code[start_addr..end_addr]; const size = tb_code.len; log.warn("TextBlock", .{}); for (indices.items) |id| { - log.warn(" | symbol {s}", .{self.getString(syms[id].n_strx)}); + const sym = self.symbols.items[syms[id].index]; + log.warn(" | symbol = {s}", .{sym.name}); } - log.warn(" | start_addr = 0x{x}", .{start_addr}); - log.warn(" | end_addr = 0x{x}", .{end_addr}); + log.warn(" | start_addr = {}", .{start_addr}); + log.warn(" | end_addr = {}", .{end_addr}); log.warn(" | size = {}", .{size}); log.warn(" | alignment = 0x{x}", .{alignment}); + log.warn(" | segment_id = {}", .{match.seg}); + log.warn(" | section_id = {}", .{match.sect}); + log.warn(" | relocs: {any}", .{SymWithIndex.filterRelocs(relocs, start_addr, end_addr)}); indices.clearRetainingCapacity(); } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 152995c931..2b7b905b89 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -234,6 +234,7 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseInputFiles(files, args.syslibroot); try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); + try self.parseTextBlocks(); try self.resolveStubsAndGotEntries(); try self.updateMetadata(); try self.sortSections(); @@ -322,10 +323,10 @@ fn mapAndUpdateSections( log.debug("{s}: '{s},{s}' mapped to '{s},{s}' from 0x{x} to 0x{x}", .{ object.name.?, - parseName(&source_sect.inner.segname), - parseName(&source_sect.inner.sectname), - parseName(&target_sect.segname), - parseName(&target_sect.sectname), + segmentName(source_sect.inner), + sectionName(source_sect.inner), + segmentName(target_sect.*), + sectionName(target_sect.*), offset, offset + size, }); @@ -343,12 +344,12 @@ fn updateMetadata(self: *Zld) !void { for (self.objects.items) |object| { // Find ideal section alignment and update section mappings for (object.sections.items) |sect, sect_id| { - const match = (try self.getMatchingSection(sect)) orelse { + const match = (try self.getMatchingSection(sect.inner)) orelse { log.debug("{s}: unhandled section type 0x{x} for '{s},{s}'", .{ object.name.?, - sect.flags(), - sect.segname(), - sect.sectname(), + sect.inner.flags, + segmentName(sect.inner), + sectionName(sect.inner), }); continue; }; @@ -441,15 +442,15 @@ const MatchingSection = struct { sect: u16, }; -fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection { +pub fn getMatchingSection(self: *Zld, sect: macho.section_64) !?MatchingSection { const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const segname = sect.segname(); - const sectname = sect.sectname(); + const segname = segmentName(sect); + const sectname = sectionName(sect); const res: ?MatchingSection = blk: { - switch (sect.sectionType()) { + switch (sectionType(sect)) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); @@ -649,7 +650,7 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection { }; }, macho.S_REGULAR => { - if (sect.isCode()) { + if (sectionIsCode(sect)) { if (self.text_section_index == null) { self.text_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.allocator, "__text", .{ @@ -662,11 +663,11 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection { .sect = self.text_section_index.?, }; } - if (sect.isDebug()) { + if (sectionIsDebug(sect)) { // TODO debug attributes if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ - sect.flags(), segname, sectname, + sect.flags, segname, sectname, }); } break :blk null; @@ -829,7 +830,7 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection { if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ - sect.flags(), segname, sectname, + sect.flags, segname, sectname, }); } @@ -956,8 +957,8 @@ fn sortSections(self: *Zld) !void { log.debug("remapping in {s}: '{s},{s}': {} => {}", .{ object.name.?, - parseName(§.inner.segname), - parseName(§.inner.sectname), + segmentName(sect.inner), + sectionName(sect.inner), target_map.section_id, new_index, }); @@ -1086,8 +1087,8 @@ fn allocateSymbol(self: *Zld, symbol: *Symbol) !void { const source_sect = &object.sections.items[reg.section]; const target_map = source_sect.target_map orelse { log.debug("section '{s},{s}' not mapped for symbol '{s}'", .{ - parseName(&source_sect.inner.segname), - parseName(&source_sect.inner.sectname), + segmentName(source_sect.inner), + sectionName(source_sect.inner), symbol.name, }); return; @@ -1464,7 +1465,7 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void { fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { log.debug("resolving symbols in '{s}'", .{object.name}); - for (object.symtab.items) |sym| { + for (object.symtab.items) |sym, sym_id| { const sym_name = object.getString(sym.n_strx); if (Symbol.isStab(sym)) { @@ -1497,6 +1498,7 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { .file = object, }, }; + const index = @intCast(u32, self.locals.items.len); try self.locals.append(self.allocator, symbol); try object.symbols.append(self.allocator, symbol); continue; @@ -1665,6 +1667,12 @@ fn resolveSymbols(self: *Zld) !void { if (has_undefined) return error.UndefinedSymbolReference; } +fn parseTextBlocks(self: *Zld) !void { + for (self.objects.items) |object| { + try object.parseTextBlocks(self); + } +} + fn resolveStubsAndGotEntries(self: *Zld) !void { for (self.objects.items) |object| { log.debug("resolving stubs and got entries from {s}", .{object.name}); @@ -1718,11 +1726,11 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { log.debug("relocating object {s}", .{object.name}); for (object.sections.items) |sect| { - if (sect.inner.flags == macho.S_MOD_INIT_FUNC_POINTERS or - sect.inner.flags == macho.S_MOD_TERM_FUNC_POINTERS) continue; + if (sectionType(sect.inner) == macho.S_MOD_INIT_FUNC_POINTERS or + sectionType(sect.inner) == macho.S_MOD_TERM_FUNC_POINTERS) continue; - const segname = parseName(§.inner.segname); - const sectname = parseName(§.inner.sectname); + const segname = segmentName(sect.inner); + const sectname = sectionName(sect.inner); log.debug("relocating section '{s},{s}'", .{ segname, sectname }); @@ -1759,7 +1767,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { args.source_target_sect_addr = source_sect.inner.addr; } - const flags = @truncate(u8, target_sect.flags & 0xff); + const sect_type = sectionType(target_sect); const should_rebase = rebase: { if (!unsigned.is_64bit) break :rebase false; @@ -1780,8 +1788,8 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { }; if (!is_right_segment) break :rebase false; - if (flags != macho.S_LITERAL_POINTERS and - flags != macho.S_REGULAR) + if (sect_type != macho.S_LITERAL_POINTERS and + sect_type != macho.S_REGULAR) { break :rebase false; } @@ -1804,7 +1812,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { // TLV is handled via a separate offset mechanism. // Calculate the offset to the initializer. - if (flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: { + if (sect_type == macho.S_THREAD_LOCAL_VARIABLES) tlv: { // TODO we don't want to save offset to tlv_bootstrap if (mem.eql(u8, object.symbols.items[rel.target.symbol].name, "__tlv_bootstrap")) break :tlv; @@ -1858,13 +1866,13 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { target_sect_off + sect.code.len, }); - if (target_sect.flags == macho.S_ZEROFILL or - target_sect.flags == macho.S_THREAD_LOCAL_ZEROFILL or - target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES) + if (sectionType(target_sect) == macho.S_ZEROFILL or + sectionType(target_sect) == macho.S_THREAD_LOCAL_ZEROFILL or + sectionType(target_sect) == macho.S_THREAD_LOCAL_VARIABLES) { log.debug("zeroing out '{s},{s}' from 0x{x} to 0x{x}", .{ - parseName(&target_sect.segname), - parseName(&target_sect.sectname), + segmentName(target_sect), + sectionName(target_sect), target_sect_off, target_sect_off + sect.code.len, }); @@ -1926,8 +1934,8 @@ fn relocTargetAddr(self: *Zld, object: *const Object, target: reloc.Relocation.T log.debug(" | section offset", .{}); const source_sect = object.sections.items[sect_id]; log.debug(" | section '{s},{s}'", .{ - parseName(&source_sect.inner.segname), - parseName(&source_sect.inner.sectname), + segmentName(source_sect.inner), + sectionName(source_sect.inner), }); const target_map = source_sect.target_map orelse unreachable; const target_seg = self.load_commands.items[target_map.segment_id].Segment; @@ -2999,8 +3007,3 @@ fn writeHeader(self: *Zld) !void { try self.file.?.pwriteAll(mem.asBytes(&header), 0); } - -pub fn parseName(name: *const [16]u8) []const u8 { - const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; - return name[0..len]; -} diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 5919496526..f7a2fd3eda 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -425,6 +425,44 @@ fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } +fn parseName(name: *const [16]u8) []const u8 { + const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; + return name[0..len]; +} + +pub fn segmentName(sect: macho.section_64) []const u8 { + return parseName(§.segname); +} + +pub fn sectionName(sect: macho.section_64) []const u8 { + return parseName(§.sectname); +} + +pub fn sectionType(sect: macho.section_64) u8 { + return @truncate(u8, sect.flags & 0xff); +} + +pub fn sectionAttrs(sect: macho.section_64) u32 { + return sect.flags & 0xffffff00; +} + +pub fn sectionIsCode(sect: macho.section_64) bool { + const attr = sectionAttrs(sect); + return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0; +} + +pub fn sectionIsDebug(sect: macho.section_64) bool { + return sectionAttrs(sect) & macho.S_ATTR_DEBUG != 0; +} + +pub fn sectionIsDontDeadStrip(sect: macho.section_64) bool { + return sectionAttrs(sect) & macho.S_ATTR_NO_DEAD_STRIP != 0; +} + +pub fn sectionIsDontDeadStripIfReferencesLive(sect: macho.section_64) bool { + return sectionAttrs(sect) & macho.S_ATTR_LIVE_SUPPORT != 0; +} + fn testRead(allocator: *Allocator, buffer: []const u8, expected: anytype) !void { var stream = io.fixedBufferStream(buffer); var given = try LoadCommand.read(allocator, stream.reader());