From 26af8d254af5277e36cc78e1ab99241dee199c37 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Sep 2022 13:47:05 +0200 Subject: [PATCH 01/17] macho: separate in structure and logic incremental relocs and file relocs --- src/link/MachO.zig | 86 ++++++++++- src/link/MachO/Atom.zig | 46 ++++++ src/link/MachO/Relocation.zig | 277 ++++++++++++++++++++++++++++++++++ 3 files changed, 408 insertions(+), 1 deletion(-) create mode 100644 src/link/MachO/Relocation.zig diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a6720f8dd3..b6195562f3 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -38,6 +38,8 @@ const LibStub = @import("tapi.zig").LibStub; const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; const Module = @import("../Module.zig"); +const Relocation = @import("MachO/Relocation.zig"); +const RelocationTable = Relocation.Table; const StringTable = @import("strtab.zig").StringTable; const Trie = @import("MachO/Trie.zig"); const Type = @import("../type.zig").Type; @@ -193,6 +195,11 @@ atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, /// with `Decl` `main`, and lives as long as that `Decl`. unnamed_const_atoms: UnnamedConstTable = .{}, +/// A table of relocations indexed by the owning them `Atom`. +/// Note that once we refactor `Atom`'s lifetime and ownership rules, +/// this will be a table indexed by index into the list of Atoms. +relocs: RelocationTable = .{}, + /// Table of Decls that are currently alive. /// We store them here so that we can properly dispose of any allocated /// memory within the atom in the incremental linker. @@ -1854,11 +1861,80 @@ pub fn writeAtom(self: *MachO, atom: *Atom, sect_id: u8) !void { const section = self.sections.get(sect_id); const sym = atom.getSymbol(self); const file_offset = section.header.offset + sym.n_value - section.header.addr; - try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); } +// fn markRelocsDirtyByTarget(self: *MachO, target: SymbolWithLoc) void { +// // TODO: reverse-lookup might come in handy here +// var it = self.relocs.valueIterator(); +// while (it.next()) |relocs| { +// for (relocs.items) |*reloc| { +// if (!reloc.target.eql(target)) continue; +// reloc.dirty = true; +// } +// } +// } + +// fn markRelocsDirtyByAddress(self: *MachO, addr: u32) void { +// var it = self.relocs.valueIterator(); +// while (it.next()) |relocs| { +// for (relocs.items) |*reloc| { +// const target_atom = reloc.getTargetAtom(self) orelse continue; +// const target_sym = target_atom.getSymbol(self); +// if (target_sym.value < addr) continue; +// reloc.dirty = true; +// } +// } +// } + +// fn resolveRelocs(self: *MachO, atom: *Atom) !void { +// const relocs = self.relocs.get(atom) orelse return; +// const source_sym = atom.getSymbol(self); +// const source_section = self.sections.get(@enumToInt(source_sym.section_number) - 1).header; +// const file_offset = section.offset + source_sym.n_value - section.addr; + +// log.debug("relocating '{s}'", .{atom.getName(self)}); + +// for (relocs.items) |*reloc| { +// if (!reloc.dirty) continue; + +// const target_atom = reloc.getTargetAtom(self) orelse continue; +// const target_vaddr = target_atom.getSymbol(self).value; +// const target_vaddr_with_addend = target_vaddr + reloc.addend; + +// log.debug(" ({x}: [() => 0x{x} ({s})) ({s}) (in file at 0x{x})", .{ +// source_sym.value + reloc.offset, +// target_vaddr_with_addend, +// self.getSymbolName(reloc.target), +// @tagName(reloc.@"type"), +// file_offset + reloc.offset, +// }); + +// reloc.dirty = false; + +// if (reloc.pcrel) { +// const source_vaddr = source_sym.value + reloc.offset; +// const disp = +// @intCast(i32, target_vaddr_with_addend) - @intCast(i32, source_vaddr) - 4; +// try self.base.file.?.pwriteAll(mem.asBytes(&disp), file_offset + reloc.offset); +// continue; +// } + +// switch (reloc.length) { +// 2 => try self.base.file.?.pwriteAll( +// mem.asBytes(&@truncate(u32, target_vaddr_with_addend)), +// file_offset + reloc.offset, +// ), +// 3 => try self.base.file.?.pwriteAll( +// mem.asBytes(&(target_vaddr_with_addend)), +// file_offset + reloc.offset, +// ), +// else => unreachable, +// } +// } +// } + fn allocateSymbols(self: *MachO) !void { const slice = self.sections.slice(); for (slice.items(.last_atom)) |last_atom, sect_id| { @@ -3069,6 +3145,14 @@ pub fn deinit(self: *MachO) void { } self.atom_by_index_table.deinit(gpa); + + { + var it = self.relocs.valueIterator(); + while (it.next()) |relocs| { + relocs.deinit(gpa); + } + self.relocs.deinit(gpa); + } } fn freeAtom(self: *MachO, atom: *Atom, sect_id: u8, owns_atom: bool) void { diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 5b242a2013..d5758cfb74 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -16,6 +16,7 @@ const Arch = std.Target.Cpu.Arch; const Dwarf = @import("../Dwarf.zig"); const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); +const RelocationIncr = @import("Relocation.zig"); // temporary name until we clean up object-file relocation scanning const SymbolWithLoc = MachO.SymbolWithLoc; /// Each decl always gets a local symbol with the fully qualified name. @@ -894,3 +895,48 @@ inline fn isArithmeticOp(inst: *const [4]u8) bool { const group_decode = @truncate(u5, inst[3]); return ((group_decode >> 2) == 4); } + +pub fn addRelocation(self: *Atom, macho_file: *MachO, reloc: RelocationIncr) !void { + const gpa = macho_file.base.allocator; + log.debug(" (adding reloc of type {s} to target %{d})", .{ @tagName(reloc.@"type"), reloc.target.sym_index }); + const gop = try macho_file.relocs.getOrPut(gpa, self); + if (!gop.found_existing) { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.append(gpa, reloc); +} + +pub fn resolveRelocationsInCodeBuffer(self: *Atom, macho_file: *MachO, code: []u8) !void { + const relocs = macho_file.relocs.get(self) orelse return; + + log.debug("relocating '{s}'", .{self.getName(macho_file)}); + + for (relocs.items) |*reloc| { + // We don't check for dirty relocation as we resolve in memory so it's effectively free. + try reloc.resolve(self, macho_file, code); + reloc.dirty = false; + } +} + +pub fn resolveRelocationsInFile(self: *Atom, macho_file: *MachO) !void { + const relocs = macho_file.relocs.get(self) orelse return; + const gpa = macho_file.base.allocator; + + // No code available in a buffer; we need to read it in from the binary. + const source_sym = self.getSymbol(macho_file); + const source_section = macho_file.sections.get(source_sym.n_sect - 1).header; + const file_offset = source_section.offset + source_sym.value - source_section.addr; + const code = try gpa.alloc(u8, self.size); + try self.base.file.?.preadAll(code, file_offset); + defer gpa.free(code); + + log.debug("relocating '{s}'", .{self.getName(macho_file)}); + + for (relocs.items) |*reloc| { + if (!reloc.dirty) continue; + try reloc.resolve(self, macho_file, code); + reloc.dirty = false; + } + + try self.base.file.?.pwriteAll(code, file_offset); +} diff --git a/src/link/MachO/Relocation.zig b/src/link/MachO/Relocation.zig new file mode 100644 index 0000000000..945def7302 --- /dev/null +++ b/src/link/MachO/Relocation.zig @@ -0,0 +1,277 @@ +const Relocation = @This(); + +const std = @import("std"); +const aarch64 = @import("../../arch/aarch64/bits.zig"); +const assert = std.debug.assert; +const log = std.log.scoped(.link); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; + +const Atom = @import("Atom.zig"); +const MachO = @import("../MachO.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; + +pub const Table = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Relocation)); + +/// Offset within the atom's code buffer. +/// Note relocation size can be inferred by relocation's kind. +offset: u32, +target: SymbolWithLoc, +addend: i64, +pcrel: bool, +length: u2, +@"type": u4, +dirty: bool = true, + +pub fn getTargetAtom(self: Relocation, macho_file: *MachO) ?*Atom { + switch (macho_file.base.options.target.cpu.arch) { + .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, self.@"type")) { + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => return macho_file.getGotAtomForSymbol(self.target).?, + else => {}, + }, + .x86_64 => switch (@intToEnum(macho.reloc_type_x86_64, self.@"type")) { + .X86_64_RELOC_GOT, + .X86_64_RELOC_GOT_LOAD, + => return macho_file.getGotAtomForSymbol(self.target).?, + else => {}, + }, + else => unreachable, + } + if (macho_file.getStubsAtomForSymbol(self.target)) |stubs_atom| return stubs_atom; + if (macho_file.getTlvPtrAtomForSymbol(self.target)) |tlv_ptr_atom| return tlv_ptr_atom; + return macho_file.getAtomForSymbol(self.target); +} + +pub fn resolve(self: Relocation, atom: *Atom, macho_file: *MachO, code: []u8) !void { + const arch = macho_file.base.options.target.cpu.arch; + const source_sym = atom.getSymbol(macho_file); + const source_addr = source_sym.n_value + self.offset; + + const target_atom = self.getTargetAtom(macho_file) orelse return; + const target_addr = target_atom.getSymbol(macho_file).n_value + self.addend; + + log.debug(" ({x}: [() => 0x{x} ({s})) ({s})", .{ + source_addr, + target_addr, + macho_file.getSymbolName(self.target), + switch (arch) { + .aarch64 => @tagName(@intToEnum(macho.reloc_type_arm64, self.@"type")), + .x86_64 => @tagName(@intToEnum(macho.reloc_type_x86_64, self.@"type")), + else => unreachable, + }, + }); + + switch (arch) { + .aarch64 => return self.resolveAarch64(source_addr, target_addr, macho_file, code), + .x86_64 => return self.resolveX8664(source_addr, target_addr, code), + else => unreachable, + } +} + +fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: u64, macho_file: *MachO, code: []u8) !void { + const rel_type = @intToEnum(macho.reloc_type_arm64, self.@"type"); + switch (rel_type) { + .ARM64_RELOC_BRANCH26 => { + const displacement = math.cast(i28, @intCast(i64, target_addr) - @intCast(i64, source_addr)) orelse { + log.err("jump too big to encode as i28 displacement value", .{}); + log.err(" (target - source) = displacement => 0x{x} - 0x{x} = 0x{x}", .{ + target_addr, + source_addr, + @intCast(i64, target_addr) - @intCast(i64, source_addr), + }); + log.err(" TODO implement branch islands to extend jump distance for arm64", .{}); + return error.TODOImplementBranchIslands; + }; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code), + }; + inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => { + const source_page = @intCast(i32, source_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code), + }; + inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + inst.pc_relative_address.immlo = @truncate(u2, pages); + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .ARM64_RELOC_PAGEOFF12 => { + const narrowed = @truncate(u12, @intCast(u64, target_addr)); + if (isArithmeticOp(code)) { + var inst = aarch64.Instruction{ + .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code), + }; + inst.add_subtract_immediate.imm12 = narrowed; + mem.writeIntLittle(u32, code, inst.toU32()); + } else { + var inst = aarch64.Instruction{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + const offset: u12 = blk: { + if (inst.load_store_register.size == 0) { + if (inst.load_store_register.v == 1) { + // 128-bit SIMD is scaled by 16. + break :blk try math.divExact(u12, narrowed, 16); + } + // Otherwise, 8-bit SIMD or ldrb. + break :blk narrowed; + } else { + const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); + break :blk try math.divExact(u12, narrowed, denom); + } + }; + inst.load_store_register.offset = offset; + mem.writeIntLittle(u32, code, inst.toU32()); + } + }, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { + const narrowed = @truncate(u12, @intCast(u64, target_addr)); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + const offset = try math.divExact(u12, narrowed, 8); + inst.load_store_register.offset = offset; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { + const RegInfo = struct { + rd: u5, + rn: u5, + size: u2, + }; + const reg_info: RegInfo = blk: { + if (isArithmeticOp(code)) { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; + } else { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = inst.size, + }; + } + }; + const narrowed = @truncate(u12, @intCast(u64, target_addr)); + var inst = if (macho_file.tlv_ptr_entries_table.contains(self.target)) blk: { + const offset = try math.divExact(u12, narrowed, 8); + break :blk aarch64.Instruction{ + .load_store_register = .{ + .rt = reg_info.rd, + .rn = reg_info.rn, + .offset = offset, + .opc = 0b01, + .op1 = 0b01, + .v = 0, + .size = reg_info.size, + }, + }; + } else aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = narrowed, + .sh = 0, + .s = 0, + .op = 0, + .sf = @truncate(u1, reg_info.size), + }, + }; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .ARM64_RELOC_POINTER_TO_GOT => { + const result = math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)) orelse + return error.Overflow; + mem.writeIntLittle(u32, code, @bitCast(u32, result)); + }, + .ARM64_RELOC_UNSIGNED => { + switch (self.length) { + 2 => mem.writeIntLittle(u32, code, @truncate(u32, @bitCast(u64, target_addr))), + 3 => mem.writeIntLittle(u64, code, target_addr), + else => unreachable, + } + }, + .ARM64_RELOC_SUBTRACTOR => unreachable, + .ARM64_RELOC_ADDEND => unreachable, + } +} + +fn resolveX8664(self: Relocation, source_addr: u64, target_addr: u64, code: []u8) !void { + const rel_type = @intToEnum(macho.reloc_type_x86_64, self.@"type"); + switch (rel_type) { + .X86_64_RELOC_BRANCH, + .X86_64_RELOC_GOT, + .X86_64_RELOC_GOT_LOAD, + .X86_64_RELOC_TLV, + => { + const displacement = math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4) orelse + return error.Overflow; + mem.writeIntLittle(u32, code, @bitCast(u32, displacement)); + }, + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + const correction: u3 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + const displacement = math.cast(i32, target_addr - @intCast(i64, source_addr + correction + 4)) orelse + return error.Overflow; + mem.writeIntLittle(u32, code, @bitCast(u32, displacement)); + }, + .X86_64_RELOC_UNSIGNED => { + switch (self.length) { + 2 => mem.writeIntLittle(u32, code, @truncate(u32, @bitCast(u64, target_addr))), + 3 => mem.writeIntLittle(u64, code, target_addr), + } + }, + .X86_64_RELOC_SUBTRACTOR => unreachable, + } +} + +inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @truncate(u5, inst[3]); + return ((group_decode >> 2) == 4); +} From 05d0c42894b40c530819b1ac15f8133bfd34cf47 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Sep 2022 15:04:16 +0200 Subject: [PATCH 02/17] macho: move main driver loop for one-shot into standalone zld module --- src/link/MachO.zig | 955 ++------------------------------------ src/link/MachO/Object.zig | 2 +- src/link/MachO/zld.zig | 844 +++++++++++++++++++++++++++++++++ 3 files changed, 889 insertions(+), 912 deletions(-) create mode 100644 src/link/MachO/zld.zig diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b6195562f3..863ba4c2f5 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -22,6 +22,7 @@ const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); const target_util = @import("../target.zig"); const trace = @import("../tracy.zig").trace; +const zld = @import("MachO/zld.zig"); const Air = @import("../Air.zig"); const Allocator = mem.Allocator; @@ -57,11 +58,6 @@ pub const SearchStrategy = enum { pub const N_DESC_GCED: u16 = @bitCast(u16, @as(i16, -1)); -const SystemLib = struct { - needed: bool = false, - weak: bool = false, -}; - const Section = struct { header: macho.section_64, segment_index: u8, @@ -412,7 +408,7 @@ pub fn flush(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) !v } switch (self.mode) { - .one_shot => return self.linkOneShot(comp, prog_node), + .one_shot => return zld.linkWithZld(self, comp, prog_node), .incremental => return self.flushModule(comp, prog_node), } } @@ -441,7 +437,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try d_sym.dwarf.flushModule(&self.base, module); } - var libs = std.StringArrayHashMap(SystemLib).init(arena); + var libs = std.StringArrayHashMap(link.SystemLib).init(arena); try self.resolveLibSystem(arena, comp, &.{}, &libs); const id_symlink_basename = "zld.id"; @@ -531,7 +527,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No self.logAtoms(); } - try self.writeAtomsIncremental(); + try self.writeAtoms(); var lc_buffer = std.ArrayList(u8).init(arena); const lc_writer = lc_buffer.writer(); @@ -631,635 +627,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No self.cold_start = false; } -fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const gpa = self.base.allocator; - var arena_allocator = std.heap.ArenaAllocator.init(gpa); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type. - const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); - - // If there is no Zig code to compile, then we should skip flushing the output file because it - // will not be part of the linker line anyway. - const module_obj_path: ?[]const u8 = if (self.base.options.module) |module| blk: { - if (self.base.options.use_stage1) { - const obj_basename = try std.zig.binNameAlloc(arena, .{ - .root_name = self.base.options.root_name, - .target = self.base.options.target, - .output_mode = .Obj, - }); - switch (self.base.options.cache_mode) { - .incremental => break :blk try module.zig_cache_artifact_directory.join( - arena, - &[_][]const u8{obj_basename}, - ), - .whole => break :blk try fs.path.join(arena, &.{ - fs.path.dirname(full_out_path).?, obj_basename, - }), - } - } - - try self.flushModule(comp, prog_node); - - if (fs.path.dirname(full_out_path)) |dirname| { - break :blk try fs.path.join(arena, &.{ dirname, self.base.intermediary_basename.? }); - } else { - break :blk self.base.intermediary_basename.?; - } - } else null; - - var sub_prog_node = prog_node.start("MachO Flush", 0); - sub_prog_node.activate(); - sub_prog_node.context.refresh(); - defer sub_prog_node.end(); - - const cpu_arch = self.base.options.target.cpu.arch; - const os_tag = self.base.options.target.os.tag; - const abi = self.base.options.target.abi; - const is_lib = self.base.options.output_mode == .Lib; - const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; - const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; - const stack_size = self.base.options.stack_size_override orelse 0; - const is_debug_build = self.base.options.optimize_mode == .Debug; - const gc_sections = self.base.options.gc_sections orelse !is_debug_build; - - const id_symlink_basename = "zld.id"; - - var man: Cache.Manifest = undefined; - defer if (!self.base.options.disable_lld_caching) man.deinit(); - - var digest: [Cache.hex_digest_len]u8 = undefined; - - if (!self.base.options.disable_lld_caching) { - man = comp.cache_parent.obtain(); - - // We are about to obtain this lock, so here we give other processes a chance first. - self.base.releaseLock(); - - comptime assert(Compilation.link_hash_implementation_version == 7); - - for (self.base.options.objects) |obj| { - _ = try man.addFile(obj.path, null); - man.hash.add(obj.must_link); - } - for (comp.c_object_table.keys()) |key| { - _ = try man.addFile(key.status.success.object_path, null); - } - try man.addOptionalFile(module_obj_path); - // We can skip hashing libc and libc++ components that we are in charge of building from Zig - // installation sources because they are always a product of the compiler version + target information. - man.hash.add(stack_size); - man.hash.addOptional(self.base.options.pagezero_size); - man.hash.addOptional(self.base.options.search_strategy); - man.hash.addOptional(self.base.options.headerpad_size); - man.hash.add(self.base.options.headerpad_max_install_names); - man.hash.add(gc_sections); - man.hash.add(self.base.options.dead_strip_dylibs); - man.hash.add(self.base.options.strip); - man.hash.addListOfBytes(self.base.options.lib_dirs); - man.hash.addListOfBytes(self.base.options.framework_dirs); - link.hashAddSystemLibs(&man.hash, self.base.options.frameworks); - man.hash.addListOfBytes(self.base.options.rpath_list); - if (is_dyn_lib) { - man.hash.addOptionalBytes(self.base.options.install_name); - man.hash.addOptional(self.base.options.version); - } - link.hashAddSystemLibs(&man.hash, self.base.options.system_libs); - man.hash.addOptionalBytes(self.base.options.sysroot); - try man.addOptionalFile(self.base.options.entitlements); - - // We don't actually care whether it's a cache hit or miss; we just need the digest and the lock. - _ = try man.hit(); - digest = man.final(); - - var prev_digest_buf: [digest.len]u8 = undefined; - const prev_digest: []u8 = Cache.readSmallFile( - directory.handle, - id_symlink_basename, - &prev_digest_buf, - ) catch |err| blk: { - log.debug("MachO Zld new_digest={s} error: {s}", .{ - std.fmt.fmtSliceHexLower(&digest), - @errorName(err), - }); - // Handle this as a cache miss. - break :blk prev_digest_buf[0..0]; - }; - if (mem.eql(u8, prev_digest, &digest)) { - // Hot diggity dog! The output binary is already there. - log.debug("MachO Zld digest={s} match - skipping invocation", .{ - std.fmt.fmtSliceHexLower(&digest), - }); - self.base.lock = man.toOwnedLock(); - return; - } - log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ - std.fmt.fmtSliceHexLower(prev_digest), - std.fmt.fmtSliceHexLower(&digest), - }); - - // We are about to change the output file to be different, so we invalidate the build hash now. - directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { - error.FileNotFound => {}, - else => |e| return e, - }; - } - - if (self.base.options.output_mode == .Obj) { - // LLD's MachO driver does not support the equivalent of `-r` so we do a simple file copy - // here. TODO: think carefully about how we can avoid this redundant operation when doing - // build-obj. See also the corresponding TODO in linkAsArchive. - const the_object_path = blk: { - if (self.base.options.objects.len != 0) { - break :blk self.base.options.objects[0].path; - } - - if (comp.c_object_table.count() != 0) - break :blk comp.c_object_table.keys()[0].status.success.object_path; - - if (module_obj_path) |p| - break :blk p; - - // TODO I think this is unreachable. Audit this situation when solving the above TODO - // regarding eliding redundant object -> object transformations. - return error.NoObjectsToLink; - }; - // This can happen when using --enable-cache and using the stage1 backend. In this case - // we can skip the file copy. - if (!mem.eql(u8, the_object_path, full_out_path)) { - try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); - } - } else { - const sub_path = self.base.options.emit.?.sub_path; - if (self.base.file == null) { - self.base.file = try directory.handle.createFile(sub_path, .{ - .truncate = true, - .read = true, - .mode = link.determineMode(self.base.options), - }); - } - // Index 0 is always a null symbol. - try self.locals.append(gpa, .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.strtab.buffer.append(gpa, 0); - try self.populateMissingMetadata(); - - var lib_not_found = false; - var framework_not_found = false; - - // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList([]const u8).init(arena); - try positionals.ensureUnusedCapacity(self.base.options.objects.len); - - var must_link_archives = std.StringArrayHashMap(void).init(arena); - try must_link_archives.ensureUnusedCapacity(self.base.options.objects.len); - - for (self.base.options.objects) |obj| { - if (must_link_archives.contains(obj.path)) continue; - if (obj.must_link) { - _ = must_link_archives.getOrPutAssumeCapacity(obj.path); - } else { - _ = positionals.appendAssumeCapacity(obj.path); - } - } - - for (comp.c_object_table.keys()) |key| { - try positionals.append(key.status.success.object_path); - } - - if (module_obj_path) |p| { - try positionals.append(p); - } - - if (comp.compiler_rt_lib) |lib| { - try positionals.append(lib.full_object_path); - } - - // libc++ dep - if (self.base.options.link_libcpp) { - try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); - try positionals.append(comp.libcxx_static_lib.?.full_object_path); - } - - // Shared and static libraries passed via `-l` flag. - var candidate_libs = std.StringArrayHashMap(SystemLib).init(arena); - - const system_lib_names = self.base.options.system_libs.keys(); - for (system_lib_names) |system_lib_name| { - // By this time, we depend on these libs being dynamically linked libraries and not static libraries - // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which - // case we want to avoid prepending "-l". - if (Compilation.classifyFileExt(system_lib_name) == .shared_library) { - try positionals.append(system_lib_name); - continue; - } - - const system_lib_info = self.base.options.system_libs.get(system_lib_name).?; - try candidate_libs.put(system_lib_name, .{ - .needed = system_lib_info.needed, - .weak = system_lib_info.weak, - }); - } - - var lib_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.lib_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try lib_dirs.append(search_dir); - } else { - log.warn("directory not found for '-L{s}'", .{dir}); - } - } - - var libs = std.StringArrayHashMap(SystemLib).init(arena); - - // Assume ld64 default -search_paths_first if no strategy specified. - const search_strategy = self.base.options.search_strategy orelse .paths_first; - outer: for (candidate_libs.keys()) |lib_name| { - switch (search_strategy) { - .paths_first => { - // Look in each directory for a dylib (stub first), and then for archive - for (lib_dirs.items) |dir| { - for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { - if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { - try libs.put(full_path, candidate_libs.get(lib_name).?); - continue :outer; - } - } - } else { - log.warn("library not found for '-l{s}'", .{lib_name}); - lib_not_found = true; - } - }, - .dylibs_first => { - // First, look for a dylib in each search dir - for (lib_dirs.items) |dir| { - for (&[_][]const u8{ ".tbd", ".dylib" }) |ext| { - if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { - try libs.put(full_path, candidate_libs.get(lib_name).?); - continue :outer; - } - } - } else for (lib_dirs.items) |dir| { - if (try resolveLib(arena, dir, lib_name, ".a")) |full_path| { - try libs.put(full_path, candidate_libs.get(lib_name).?); - } else { - log.warn("library not found for '-l{s}'", .{lib_name}); - lib_not_found = true; - } - } - }, - } - } - - if (lib_not_found) { - log.warn("Library search paths:", .{}); - for (lib_dirs.items) |dir| { - log.warn(" {s}", .{dir}); - } - } - - try self.resolveLibSystem(arena, comp, lib_dirs.items, &libs); - - // frameworks - var framework_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.framework_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try framework_dirs.append(search_dir); - } else { - log.warn("directory not found for '-F{s}'", .{dir}); - } - } - - outer: for (self.base.options.frameworks.keys()) |f_name| { - for (framework_dirs.items) |dir| { - for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { - if (try resolveFramework(arena, dir, f_name, ext)) |full_path| { - const info = self.base.options.frameworks.get(f_name).?; - try libs.put(full_path, .{ - .needed = info.needed, - .weak = info.weak, - }); - continue :outer; - } - } - } else { - log.warn("framework not found for '-framework {s}'", .{f_name}); - framework_not_found = true; - } - } - - if (framework_not_found) { - log.warn("Framework search paths:", .{}); - for (framework_dirs.items) |dir| { - log.warn(" {s}", .{dir}); - } - } - - if (self.base.options.verbose_link) { - var argv = std.ArrayList([]const u8).init(arena); - - try argv.append("zig"); - try argv.append("ld"); - - if (is_exe_or_dyn_lib) { - try argv.append("-dynamic"); - } - - if (is_dyn_lib) { - try argv.append("-dylib"); - - if (self.base.options.install_name) |install_name| { - try argv.append("-install_name"); - try argv.append(install_name); - } - } - - if (self.base.options.sysroot) |syslibroot| { - try argv.append("-syslibroot"); - try argv.append(syslibroot); - } - - for (self.base.options.rpath_list) |rpath| { - try argv.append("-rpath"); - try argv.append(rpath); - } - - if (self.base.options.pagezero_size) |pagezero_size| { - try argv.append("-pagezero_size"); - try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{pagezero_size})); - } - - if (self.base.options.search_strategy) |strat| switch (strat) { - .paths_first => try argv.append("-search_paths_first"), - .dylibs_first => try argv.append("-search_dylibs_first"), - }; - - if (self.base.options.headerpad_size) |headerpad_size| { - try argv.append("-headerpad_size"); - try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{headerpad_size})); - } - - if (self.base.options.headerpad_max_install_names) { - try argv.append("-headerpad_max_install_names"); - } - - if (gc_sections) { - try argv.append("-dead_strip"); - } - - if (self.base.options.dead_strip_dylibs) { - try argv.append("-dead_strip_dylibs"); - } - - if (self.base.options.entry) |entry| { - try argv.append("-e"); - try argv.append(entry); - } - - for (self.base.options.objects) |obj| { - try argv.append(obj.path); - } - - for (comp.c_object_table.keys()) |key| { - try argv.append(key.status.success.object_path); - } - - if (module_obj_path) |p| { - try argv.append(p); - } - - if (comp.compiler_rt_lib) |lib| { - try argv.append(lib.full_object_path); - } - - if (self.base.options.link_libcpp) { - try argv.append(comp.libcxxabi_static_lib.?.full_object_path); - try argv.append(comp.libcxx_static_lib.?.full_object_path); - } - - try argv.append("-o"); - try argv.append(full_out_path); - - try argv.append("-lSystem"); - try argv.append("-lc"); - - for (self.base.options.system_libs.keys()) |l_name| { - const info = self.base.options.system_libs.get(l_name).?; - const arg = if (info.needed) - try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name}) - else if (info.weak) - try std.fmt.allocPrint(arena, "-weak-l{s}", .{l_name}) - else - try std.fmt.allocPrint(arena, "-l{s}", .{l_name}); - try argv.append(arg); - } - - for (self.base.options.lib_dirs) |lib_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); - } - - for (self.base.options.frameworks.keys()) |framework| { - const info = self.base.options.frameworks.get(framework).?; - const arg = if (info.needed) - try std.fmt.allocPrint(arena, "-needed_framework {s}", .{framework}) - else if (info.weak) - try std.fmt.allocPrint(arena, "-weak_framework {s}", .{framework}) - else - try std.fmt.allocPrint(arena, "-framework {s}", .{framework}); - try argv.append(arg); - } - - for (self.base.options.framework_dirs) |framework_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); - } - - if (is_dyn_lib and (self.base.options.allow_shlib_undefined orelse false)) { - try argv.append("-undefined"); - try argv.append("dynamic_lookup"); - } - - for (must_link_archives.keys()) |lib| { - try argv.append(try std.fmt.allocPrint(arena, "-force_load {s}", .{lib})); - } - - Compilation.dump_argv(argv.items); - } - - var dependent_libs = std.fifo.LinearFifo(struct { - id: Dylib.Id, - parent: u16, - }, .Dynamic).init(arena); - - try self.parseInputFiles(positionals.items, self.base.options.sysroot, &dependent_libs); - try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); - try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); - try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); - - for (self.objects.items) |_, object_id| { - try self.resolveSymbolsInObject(@intCast(u16, object_id)); - } - - try self.resolveSymbolsInArchives(); - try self.resolveDyldStubBinder(); - try self.resolveSymbolsInDylibs(); - try self.createMhExecuteHeaderSymbol(); - try self.createDsoHandleSymbol(); - try self.resolveSymbolsAtLoading(); - - if (self.unresolved.count() > 0) { - return error.UndefinedSymbolReference; - } - if (lib_not_found) { - return error.LibraryNotFound; - } - if (framework_not_found) { - return error.FrameworkNotFound; - } - - for (self.objects.items) |*object| { - try object.scanInputSections(self); - } - - try self.createDyldPrivateAtom(); - try self.createTentativeDefAtoms(); - try self.createStubHelperPreambleAtom(); - - for (self.objects.items) |*object, object_id| { - try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id)); - } - - if (gc_sections) { - try dead_strip.gcAtoms(self); - } - - try self.allocateSegments(); - try self.allocateSymbols(); - - try self.allocateSpecialSymbols(); - - if (build_options.enable_logging or true) { - self.logSymtab(); - self.logSections(); - self.logAtoms(); - } - - try self.writeAtomsOneShot(); - - var lc_buffer = std.ArrayList(u8).init(arena); - const lc_writer = lc_buffer.writer(); - var ncmds: u32 = 0; - - try self.writeLinkeditSegmentData(&ncmds, lc_writer); - - // If the last section of __DATA segment is zerofill section, we need to ensure - // that the free space between the end of the last non-zerofill section of __DATA - // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will - // copy-paste this space into memory for quicker zerofill operation. - if (self.data_segment_cmd_index) |data_seg_id| blk: { - var physical_zerofill_start: u64 = 0; - const section_indexes = self.getSectionIndexes(data_seg_id); - for (self.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| { - if (header.isZerofill() and header.size > 0) break; - physical_zerofill_start = header.offset + header.size; - } else break :blk; - const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?]; - const physical_zerofill_size = math.cast(usize, linkedit.fileoff - physical_zerofill_start) orelse - return error.Overflow; - if (physical_zerofill_size > 0) { - var padding = try self.base.allocator.alloc(u8, physical_zerofill_size); - defer self.base.allocator.free(padding); - mem.set(u8, padding, 0); - try self.base.file.?.pwriteAll(padding, physical_zerofill_start); - } - } - - try writeDylinkerLC(&ncmds, lc_writer); - try self.writeMainLC(&ncmds, lc_writer); - try self.writeDylibIdLC(&ncmds, lc_writer); - try self.writeRpathLCs(&ncmds, lc_writer); - - { - try lc_writer.writeStruct(macho.source_version_command{ - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }); - ncmds += 1; - } - - try self.writeBuildVersionLC(&ncmds, lc_writer); - - { - var uuid_lc = macho.uuid_command{ - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_lc.uuid); - try lc_writer.writeStruct(uuid_lc); - ncmds += 1; - } - - try self.writeLoadDylibLCs(&ncmds, lc_writer); - - const requires_codesig = blk: { - if (self.base.options.entitlements) |_| break :blk true; - if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; - break :blk false; - }; - var codesig_offset: ?u32 = null; - var codesig: ?CodeSignature = if (requires_codesig) blk: { - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - var codesig = CodeSignature.init(self.page_size); - codesig.code_directory.ident = self.base.options.emit.?.sub_path; - if (self.base.options.entitlements) |path| { - try codesig.addEntitlements(arena, path); - } - codesig_offset = try self.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); - break :blk codesig; - } else null; - - var headers_buf = std.ArrayList(u8).init(arena); - try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); - - try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); - try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - - try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); - - if (codesig) |*csig| { - try self.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last - } - } - - if (!self.base.options.disable_lld_caching) { - // Update the file with the digest. If it fails we can continue; it only - // means that the next invocation will have an unnecessary cache miss. - Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { - log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); - }; - // Again failure here only means an unnecessary cache miss. - man.writeManifest() catch |err| { - log.debug("failed to write cache manifest when linking: {s}", .{@errorName(err)}); - }; - // We hang on to this lock so that the output file path can be used without - // other processes clobbering it. - self.base.lock = man.toOwnedLock(); - } -} - -fn resolveLibSystem( +pub fn resolveLibSystem( self: *MachO, arena: Allocator, comp: *Compilation, @@ -1302,7 +670,7 @@ fn resolveLibSystem( } } -fn resolveSearchDir( +pub fn resolveSearchDir( arena: Allocator, dir: []const u8, syslibroot: ?[]const u8, @@ -1344,17 +712,7 @@ fn resolveSearchDir( return null; } -fn resolveSearchDirs(arena: Allocator, dirs: []const []const u8, syslibroot: ?[]const u8, out_dirs: anytype) !void { - for (dirs) |dir| { - if (try resolveSearchDir(arena, dir, syslibroot)) |search_dir| { - try out_dirs.append(search_dir); - } else { - log.warn("directory not found for '-L{s}'", .{dir}); - } - } -} - -fn resolveLib( +pub fn resolveLib( arena: Allocator, search_dir: []const u8, name: []const u8, @@ -1373,7 +731,7 @@ fn resolveLib( return full_path; } -fn resolveFramework( +pub fn resolveFramework( arena: Allocator, search_dir: []const u8, name: []const u8, @@ -1583,7 +941,7 @@ pub fn parseDylib( return true; } -fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const u8, dependent_libs: anytype) !void { +pub fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const u8, dependent_libs: anytype) !void { for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; @@ -1601,7 +959,7 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const } } -fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !void { +pub fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !void { for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; @@ -1614,10 +972,10 @@ fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !voi } } -fn parseLibs( +pub fn parseLibs( self: *MachO, lib_names: []const []const u8, - lib_infos: []const SystemLib, + lib_infos: []const link.SystemLib, syslibroot: ?[]const u8, dependent_libs: anytype, ) !void { @@ -1635,7 +993,7 @@ fn parseLibs( } } -fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: anytype) !void { +pub fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: anytype) !void { // At this point, we can now parse dependents of dylibs preserving the inclusion order of: // 1) anything on the linker line is parsed first // 2) afterwards, we parse dependents of the included dylibs @@ -1935,55 +1293,7 @@ pub fn writeAtom(self: *MachO, atom: *Atom, sect_id: u8) !void { // } // } -fn allocateSymbols(self: *MachO) !void { - const slice = self.sections.slice(); - for (slice.items(.last_atom)) |last_atom, sect_id| { - const header = slice.items(.header)[sect_id]; - var atom = last_atom orelse continue; - - while (atom.prev) |prev| { - atom = prev; - } - - const n_sect = @intCast(u8, sect_id + 1); - var base_vaddr = header.addr; - - log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ - n_sect, - header.segName(), - header.sectName(), - }); - - while (true) { - const alignment = try math.powi(u32, 2, atom.alignment); - base_vaddr = mem.alignForwardGeneric(u64, base_vaddr, alignment); - - const sym = atom.getSymbolPtr(self); - sym.n_value = base_vaddr; - sym.n_sect = n_sect; - - log.debug(" ATOM(%{d}, '{s}') @{x}", .{ atom.sym_index, atom.getName(self), base_vaddr }); - - // Update each symbol contained within the atom - for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }); - contained_sym.n_value = base_vaddr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - - base_vaddr += atom.size; - - if (atom.next) |next| { - atom = next; - } else break; - } - } -} - -fn allocateSpecialSymbols(self: *MachO) !void { +pub fn allocateSpecialSymbols(self: *MachO) !void { for (&[_][]const u8{ "___dso_handle", "__mh_execute_header", @@ -2002,96 +1312,7 @@ fn allocateSpecialSymbols(self: *MachO) !void { } } -fn writeAtomsOneShot(self: *MachO) !void { - assert(self.mode == .one_shot); - - const gpa = self.base.allocator; - const slice = self.sections.slice(); - - for (slice.items(.last_atom)) |last_atom, sect_id| { - const header = slice.items(.header)[sect_id]; - if (header.size == 0) continue; - var atom = last_atom.?; - - if (header.isZerofill()) continue; - - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); - - log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); - - while (atom.prev) |prev| { - atom = prev; - } - - while (true) { - const this_sym = atom.getSymbol(self); - const padding_size: usize = if (atom.next) |next| blk: { - const next_sym = next.getSymbol(self); - const size = next_sym.n_value - (this_sym.n_value + atom.size); - break :blk math.cast(usize, size) orelse return error.Overflow; - } else 0; - - log.debug(" (adding ATOM(%{d}, '{s}') from object({?d}) to buffer)", .{ - atom.sym_index, - atom.getName(self), - atom.file, - }); - if (padding_size > 0) { - log.debug(" (with padding {x})", .{padding_size}); - } - - try atom.resolveRelocs(self); - buffer.appendSliceAssumeCapacity(atom.code.items); - - var i: usize = 0; - while (i < padding_size) : (i += 1) { - // TODO with NOPs - buffer.appendAssumeCapacity(0); - } - - if (atom.next) |next| { - atom = next; - } else { - assert(buffer.items.len == header.size); - log.debug(" (writing at file offset 0x{x})", .{header.offset}); - try self.base.file.?.pwriteAll(buffer.items, header.offset); - break; - } - } - } -} - -fn writePadding(self: *MachO, sect_id: u8, size: usize, writer: anytype) !void { - const header = self.sections.items(.header)[sect_id]; - const min_alignment: u3 = if (!header.isCode()) - 1 - else switch (self.base.options.target.cpu.arch) { - .aarch64 => @sizeOf(u32), - .x86_64 => @as(u3, 1), - else => unreachable, - }; - - const len = @divExact(size, min_alignment); - var i: usize = 0; - while (i < len) : (i += 1) { - if (!header.isCode()) { - try writer.writeByte(0); - } else switch (self.base.options.target.cpu.arch) { - .aarch64 => { - const inst = aarch64.Instruction.nop(); - try writer.writeIntLittle(u32, inst.toU32()); - }, - .x86_64 => { - try writer.writeByte(0x90); - }, - else => unreachable, - } - } -} - -fn writeAtomsIncremental(self: *MachO) !void { +fn writeAtoms(self: *MachO) !void { assert(self.mode == .incremental); const slice = self.sections.slice(); @@ -2186,7 +1407,7 @@ pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { return atom; } -fn createDyldPrivateAtom(self: *MachO) !void { +pub fn createDyldPrivateAtom(self: *MachO) !void { if (self.dyld_stub_binder_index == null) return; if (self.dyld_private_atom != null) return; @@ -2203,7 +1424,7 @@ fn createDyldPrivateAtom(self: *MachO) !void { try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); } -fn createStubHelperPreambleAtom(self: *MachO) !void { +pub fn createStubHelperPreambleAtom(self: *MachO) !void { if (self.dyld_stub_binder_index == null) return; if (self.stub_helper_preamble_atom != null) return; @@ -2509,7 +1730,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { return atom; } -fn createTentativeDefAtoms(self: *MachO) !void { +pub fn createTentativeDefAtoms(self: *MachO) !void { const gpa = self.base.allocator; for (self.globals.items) |global| { @@ -2554,7 +1775,7 @@ fn createTentativeDefAtoms(self: *MachO) !void { } } -fn createMhExecuteHeaderSymbol(self: *MachO) !void { +pub fn createMhExecuteHeaderSymbol(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; if (self.getGlobal("__mh_execute_header")) |global| { const sym = self.getSymbol(global); @@ -2577,7 +1798,7 @@ fn createMhExecuteHeaderSymbol(self: *MachO) !void { gop.value_ptr.* = sym_loc; } -fn createDsoHandleSymbol(self: *MachO) !void { +pub fn createDsoHandleSymbol(self: *MachO) !void { const global = self.getGlobalPtr("___dso_handle") orelse return; if (!self.getSymbol(global.*).undf()) return; @@ -2652,7 +1873,7 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { gop.value_ptr.* = current; } -fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { +pub fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { const object = &self.objects.items[object_id]; log.debug("resolving symbols in '{s}'", .{object.name}); @@ -2705,7 +1926,7 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { } } -fn resolveSymbolsInArchives(self: *MachO) !void { +pub fn resolveSymbolsInArchives(self: *MachO) !void { if (self.archives.items.len == 0) return; const gpa = self.base.allocator; @@ -2736,7 +1957,7 @@ fn resolveSymbolsInArchives(self: *MachO) !void { } } -fn resolveSymbolsInDylibs(self: *MachO) !void { +pub fn resolveSymbolsInDylibs(self: *MachO) !void { if (self.dylibs.items.len == 0) return; const gpa = self.base.allocator; @@ -2782,7 +2003,7 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { } } -fn resolveSymbolsAtLoading(self: *MachO) !void { +pub fn resolveSymbolsAtLoading(self: *MachO) !void { const is_lib = self.base.options.output_mode == .Lib; const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const allow_undef = is_dyn_lib and (self.base.options.allow_shlib_undefined orelse false); @@ -2825,7 +2046,7 @@ fn resolveSymbolsAtLoading(self: *MachO) !void { } } -fn resolveDyldStubBinder(self: *MachO) !void { +pub fn resolveDyldStubBinder(self: *MachO) !void { if (self.dyld_stub_binder_index != null) return; if (self.unresolved.count() == 0) return; // no need for a stub binder if we don't have any imports @@ -2872,7 +2093,7 @@ fn resolveDyldStubBinder(self: *MachO) !void { self.got_entries.items[got_index].sym_index = got_atom.sym_index; } -fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { +pub fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { const name_len = mem.sliceTo(default_dyld_path, 0).len; const cmdsize = @intCast(u32, mem.alignForwardGeneric( u64, @@ -2892,7 +2113,7 @@ fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { ncmds.* += 1; } -fn writeMainLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeMainLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { if (self.base.options.output_mode != .Exe) return; const seg = self.segments.items[self.text_segment_cmd_index.?]; const global = try self.getEntryPoint(); @@ -2914,7 +2135,7 @@ const WriteDylibLCCtx = struct { compatibility_version: u32 = 0x10000, }; -fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { const name_len = ctx.name.len + 1; const cmdsize = @intCast(u32, mem.alignForwardGeneric( u64, @@ -2940,7 +2161,7 @@ fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { ncmds.* += 1; } -fn writeDylibIdLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeDylibIdLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { if (self.base.options.output_mode != .Lib) return; const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; const curr = self.base.options.version orelse std.builtin.Version{ @@ -2986,7 +2207,7 @@ const RpathIterator = struct { } }; -fn writeRpathLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeRpathLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const gpa = self.base.allocator; var it = RpathIterator.init(gpa, self.base.options.rpath_list); @@ -3013,7 +2234,7 @@ fn writeRpathLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { } } -fn writeBuildVersionLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeBuildVersionLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); const platform_version = blk: { const ver = self.base.options.target.os.version_range.semver.min; @@ -3046,7 +2267,7 @@ fn writeBuildVersionLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { ncmds.* += 1; } -fn writeLoadDylibLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeLoadDylibLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { for (self.referenced_dylibs.keys()) |id| { const dylib = self.dylibs.items[id]; const dylib_id = dylib.id orelse unreachable; @@ -4035,7 +3256,7 @@ pub fn getDeclVAddr(self: *MachO, decl_index: Module.Decl.Index, reloc_info: Fil return 0; } -fn populateMissingMetadata(self: *MachO) !void { +pub fn populateMissingMetadata(self: *MachO) !void { const gpa = self.base.allocator; const cpu_arch = self.base.options.target.cpu.arch; const pagezero_vmsize = self.base.options.pagezero_size orelse default_pagezero_vmsize; @@ -4367,7 +3588,7 @@ fn calcLCsSize(self: *MachO, assume_max_path_len: bool) !u32 { return @intCast(u32, sizeofcmds); } -fn calcMinHeaderPad(self: *MachO) !u64 { +pub fn calcMinHeaderPad(self: *MachO) !u64 { var padding: u32 = (try self.calcLCsSize(false)) + (self.base.options.headerpad_size orelse 0); log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); @@ -4384,94 +3605,6 @@ fn calcMinHeaderPad(self: *MachO) !u64 { return offset; } -fn allocateSegments(self: *MachO) !void { - try self.allocateSegment(self.text_segment_cmd_index, &.{ - self.pagezero_segment_cmd_index, - }, try self.calcMinHeaderPad()); - - if (self.text_segment_cmd_index) |index| blk: { - const indexes = self.getSectionIndexes(index); - if (indexes.start == indexes.end) break :blk; - const seg = self.segments.items[index]; - - // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. - var min_alignment: u32 = 0; - for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { - const alignment = try math.powi(u32, 2, header.@"align"); - min_alignment = math.max(min_alignment, alignment); - } - - assert(min_alignment > 0); - const last_header = self.sections.items(.header)[indexes.end - 1]; - const shift: u32 = shift: { - const diff = seg.filesize - last_header.offset - last_header.size; - const factor = @divTrunc(diff, min_alignment); - break :shift @intCast(u32, factor * min_alignment); - }; - - if (shift > 0) { - for (self.sections.items(.header)[indexes.start..indexes.end]) |*header| { - header.offset += shift; - header.addr += shift; - } - } - } - - try self.allocateSegment(self.data_const_segment_cmd_index, &.{ - self.text_segment_cmd_index, - self.pagezero_segment_cmd_index, - }, 0); - - try self.allocateSegment(self.data_segment_cmd_index, &.{ - self.data_const_segment_cmd_index, - self.text_segment_cmd_index, - self.pagezero_segment_cmd_index, - }, 0); - - try self.allocateSegment(self.linkedit_segment_cmd_index, &.{ - self.data_segment_cmd_index, - self.data_const_segment_cmd_index, - self.text_segment_cmd_index, - self.pagezero_segment_cmd_index, - }, 0); -} - -fn allocateSegment(self: *MachO, maybe_index: ?u8, indices: []const ?u8, init_size: u64) !void { - const index = maybe_index orelse return; - const seg = &self.segments.items[index]; - - const base = self.getSegmentAllocBase(indices); - seg.vmaddr = base.vmaddr; - seg.fileoff = base.fileoff; - seg.filesize = init_size; - seg.vmsize = init_size; - - // Allocate the sections according to their alignment at the beginning of the segment. - const indexes = self.getSectionIndexes(index); - var start = init_size; - const slice = self.sections.slice(); - for (slice.items(.header)[indexes.start..indexes.end]) |*header| { - const alignment = try math.powi(u32, 2, header.@"align"); - const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - - header.offset = if (header.isZerofill()) - 0 - else - @intCast(u32, seg.fileoff + start_aligned); - header.addr = seg.vmaddr + start_aligned; - - start = start_aligned + header.size; - - if (!header.isZerofill()) { - seg.filesize = start; - } - seg.vmsize = start; - } - - seg.filesize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); - seg.vmsize = mem.alignForwardGeneric(u64, seg.vmsize, self.page_size); -} - const InitSectionOpts = struct { flags: u32 = macho.S_REGULAR, reserved1: u32 = 0, @@ -4956,7 +4089,7 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { return global_index; } -fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, fileoff: u64 } { +pub fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, fileoff: u64 } { for (indices) |maybe_prev_id| { const prev_id = maybe_prev_id orelse continue; const prev = self.segments.items[prev_id]; @@ -4968,7 +4101,7 @@ fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, return .{ .vmaddr = 0, .fileoff = 0 }; } -fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { +pub fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { for (self.segments.items) |seg, i| { const indexes = self.getSectionIndexes(@intCast(u8, i)); var out_seg = seg; @@ -4997,7 +4130,7 @@ fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { } } -fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; seg.filesize = 0; seg.vmsize = 0; @@ -5010,7 +4143,7 @@ fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); } -fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -5696,7 +4829,7 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !voi lc.nindirectsyms = nindirectsyms; } -fn writeCodeSignaturePadding( +pub fn writeCodeSignaturePadding( self: *MachO, code_sig: *CodeSignature, ncmds: *u32, @@ -5725,7 +4858,7 @@ fn writeCodeSignaturePadding( return @intCast(u32, offset); } -fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void { +pub fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void { const seg = self.segments.items[self.text_segment_cmd_index.?]; var buffer = std.ArrayList(u8).init(self.base.allocator); @@ -5749,7 +4882,7 @@ fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void } /// Writes Mach-O file header. -fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { +pub fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; @@ -6371,7 +5504,7 @@ fn generateSymbolStabsForSymbol( // try writer.writeByte(']'); // } -fn logSections(self: *MachO) void { +pub fn logSections(self: *MachO) void { log.debug("sections:", .{}); for (self.sections.items(.header)) |header, i| { log.debug(" sect({d}): {s},{s} @{x}, sizeof({x})", .{ @@ -6409,7 +5542,7 @@ fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { return buf[0..]; } -fn logSymtab(self: *MachO) void { +pub fn logSymtab(self: *MachO) void { var buf: [9]u8 = undefined; log.debug("symtab:", .{}); @@ -6502,7 +5635,7 @@ fn logSymtab(self: *MachO) void { } } -fn logAtoms(self: *MachO) void { +pub fn logAtoms(self: *MachO) void { log.debug("atoms:", .{}); const slice = self.sections.slice(); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index b65c9ccfd1..7c7c83ab3e 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -236,7 +236,7 @@ pub fn scanInputSections(self: Object, macho_file: *MachO) !void { } /// Splits object into atoms assuming one-shot linking mode. -pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) !void { +pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { assert(macho_file.mode == .one_shot); const tracy = trace(@src()); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig new file mode 100644 index 0000000000..0ddcf2d08e --- /dev/null +++ b/src/link/MachO/zld.zig @@ -0,0 +1,844 @@ +const std = @import("std"); +const build_options = @import("build_options"); +const assert = std.debug.assert; +const fs = std.fs; +const log = std.log.scoped(.link); +const macho = std.macho; +const math = std.math; +const mem = std.mem; + +const link = @import("../../link.zig"); +const trace = @import("../../tracy.zig").trace; + +const Cache = @import("../../Cache.zig"); +const CodeSignature = @import("CodeSignature.zig"); +const Compilation = @import("../../Compilation.zig"); +const Dylib = @import("Dylib.zig"); +const MachO = @import("../MachO.zig"); + +const dead_strip = @import("dead_strip.zig"); + +pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + var arena_allocator = std.heap.ArenaAllocator.init(gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const directory = macho_file.base.options.emit.?.directory; // Just an alias to make it shorter to type. + const full_out_path = try directory.join(arena, &[_][]const u8{macho_file.base.options.emit.?.sub_path}); + + // If there is no Zig code to compile, then we should skip flushing the output file because it + // will not be part of the linker line anyway. + const module_obj_path: ?[]const u8 = if (macho_file.base.options.module) |module| blk: { + if (macho_file.base.options.use_stage1) { + const obj_basename = try std.zig.binNameAlloc(arena, .{ + .root_name = macho_file.base.options.root_name, + .target = macho_file.base.options.target, + .output_mode = .Obj, + }); + switch (macho_file.base.options.cache_mode) { + .incremental => break :blk try module.zig_cache_artifact_directory.join( + arena, + &[_][]const u8{obj_basename}, + ), + .whole => break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path).?, obj_basename, + }), + } + } + + try macho_file.flushModule(comp, prog_node); + + if (fs.path.dirname(full_out_path)) |dirname| { + break :blk try fs.path.join(arena, &.{ dirname, macho_file.base.intermediary_basename.? }); + } else { + break :blk macho_file.base.intermediary_basename.?; + } + } else null; + + var sub_prog_node = prog_node.start("MachO Flush", 0); + sub_prog_node.activate(); + sub_prog_node.context.refresh(); + defer sub_prog_node.end(); + + const cpu_arch = macho_file.base.options.target.cpu.arch; + const os_tag = macho_file.base.options.target.os.tag; + const abi = macho_file.base.options.target.abi; + const is_lib = macho_file.base.options.output_mode == .Lib; + const is_dyn_lib = macho_file.base.options.link_mode == .Dynamic and is_lib; + const is_exe_or_dyn_lib = is_dyn_lib or macho_file.base.options.output_mode == .Exe; + const stack_size = macho_file.base.options.stack_size_override orelse 0; + const is_debug_build = macho_file.base.options.optimize_mode == .Debug; + const gc_sections = macho_file.base.options.gc_sections orelse !is_debug_build; + + const id_symlink_basename = "zld.id"; + + var man: Cache.Manifest = undefined; + defer if (!macho_file.base.options.disable_lld_caching) man.deinit(); + + var digest: [Cache.hex_digest_len]u8 = undefined; + + if (!macho_file.base.options.disable_lld_caching) { + man = comp.cache_parent.obtain(); + + // We are about to obtain this lock, so here we give other processes a chance first. + macho_file.base.releaseLock(); + + comptime assert(Compilation.link_hash_implementation_version == 7); + + for (macho_file.base.options.objects) |obj| { + _ = try man.addFile(obj.path, null); + man.hash.add(obj.must_link); + } + for (comp.c_object_table.keys()) |key| { + _ = try man.addFile(key.status.success.object_path, null); + } + try man.addOptionalFile(module_obj_path); + // We can skip hashing libc and libc++ components that we are in charge of building from Zig + // installation sources because they are always a product of the compiler version + target information. + man.hash.add(stack_size); + man.hash.addOptional(macho_file.base.options.pagezero_size); + man.hash.addOptional(macho_file.base.options.search_strategy); + man.hash.addOptional(macho_file.base.options.headerpad_size); + man.hash.add(macho_file.base.options.headerpad_max_install_names); + man.hash.add(gc_sections); + man.hash.add(macho_file.base.options.dead_strip_dylibs); + man.hash.add(macho_file.base.options.strip); + man.hash.addListOfBytes(macho_file.base.options.lib_dirs); + man.hash.addListOfBytes(macho_file.base.options.framework_dirs); + link.hashAddSystemLibs(&man.hash, macho_file.base.options.frameworks); + man.hash.addListOfBytes(macho_file.base.options.rpath_list); + if (is_dyn_lib) { + man.hash.addOptionalBytes(macho_file.base.options.install_name); + man.hash.addOptional(macho_file.base.options.version); + } + link.hashAddSystemLibs(&man.hash, macho_file.base.options.system_libs); + man.hash.addOptionalBytes(macho_file.base.options.sysroot); + try man.addOptionalFile(macho_file.base.options.entitlements); + + // We don't actually care whether it's a cache hit or miss; we just need the digest and the lock. + _ = try man.hit(); + digest = man.final(); + + var prev_digest_buf: [digest.len]u8 = undefined; + const prev_digest: []u8 = Cache.readSmallFile( + directory.handle, + id_symlink_basename, + &prev_digest_buf, + ) catch |err| blk: { + log.debug("MachO Zld new_digest={s} error: {s}", .{ + std.fmt.fmtSliceHexLower(&digest), + @errorName(err), + }); + // Handle this as a cache miss. + break :blk prev_digest_buf[0..0]; + }; + if (mem.eql(u8, prev_digest, &digest)) { + // Hot diggity dog! The output binary is already there. + log.debug("MachO Zld digest={s} match - skipping invocation", .{ + std.fmt.fmtSliceHexLower(&digest), + }); + macho_file.base.lock = man.toOwnedLock(); + return; + } + log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ + std.fmt.fmtSliceHexLower(prev_digest), + std.fmt.fmtSliceHexLower(&digest), + }); + + // We are about to change the output file to be different, so we invalidate the build hash now. + directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { + error.FileNotFound => {}, + else => |e| return e, + }; + } + + if (macho_file.base.options.output_mode == .Obj) { + // LLD's MachO driver does not support the equivalent of `-r` so we do a simple file copy + // here. TODO: think carefully about how we can avoid this redundant operation when doing + // build-obj. See also the corresponding TODO in linkAsArchive. + const the_object_path = blk: { + if (macho_file.base.options.objects.len != 0) { + break :blk macho_file.base.options.objects[0].path; + } + + if (comp.c_object_table.count() != 0) + break :blk comp.c_object_table.keys()[0].status.success.object_path; + + if (module_obj_path) |p| + break :blk p; + + // TODO I think this is unreachable. Audit this situation when solving the above TODO + // regarding eliding redundant object -> object transformations. + return error.NoObjectsToLink; + }; + // This can happen when using --enable-cache and using the stage1 backend. In this case + // we can skip the file copy. + if (!mem.eql(u8, the_object_path, full_out_path)) { + try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); + } + } else { + const sub_path = macho_file.base.options.emit.?.sub_path; + if (macho_file.base.file == null) { + macho_file.base.file = try directory.handle.createFile(sub_path, .{ + .truncate = true, + .read = true, + .mode = link.determineMode(macho_file.base.options), + }); + } + // Index 0 is always a null symbol. + try macho_file.locals.append(gpa, .{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try macho_file.strtab.buffer.append(gpa, 0); + try macho_file.populateMissingMetadata(); + + var lib_not_found = false; + var framework_not_found = false; + + // Positional arguments to the linker such as object files and static archives. + var positionals = std.ArrayList([]const u8).init(arena); + try positionals.ensureUnusedCapacity(macho_file.base.options.objects.len); + + var must_link_archives = std.StringArrayHashMap(void).init(arena); + try must_link_archives.ensureUnusedCapacity(macho_file.base.options.objects.len); + + for (macho_file.base.options.objects) |obj| { + if (must_link_archives.contains(obj.path)) continue; + if (obj.must_link) { + _ = must_link_archives.getOrPutAssumeCapacity(obj.path); + } else { + _ = positionals.appendAssumeCapacity(obj.path); + } + } + + for (comp.c_object_table.keys()) |key| { + try positionals.append(key.status.success.object_path); + } + + if (module_obj_path) |p| { + try positionals.append(p); + } + + if (comp.compiler_rt_lib) |lib| { + try positionals.append(lib.full_object_path); + } + + // libc++ dep + if (macho_file.base.options.link_libcpp) { + try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); + try positionals.append(comp.libcxx_static_lib.?.full_object_path); + } + + // Shared and static libraries passed via `-l` flag. + var candidate_libs = std.StringArrayHashMap(link.SystemLib).init(arena); + + const system_lib_names = macho_file.base.options.system_libs.keys(); + for (system_lib_names) |system_lib_name| { + // By this time, we depend on these libs being dynamically linked libraries and not static libraries + // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which + // case we want to avoid prepending "-l". + if (Compilation.classifyFileExt(system_lib_name) == .shared_library) { + try positionals.append(system_lib_name); + continue; + } + + const system_lib_info = macho_file.base.options.system_libs.get(system_lib_name).?; + try candidate_libs.put(system_lib_name, .{ + .needed = system_lib_info.needed, + .weak = system_lib_info.weak, + }); + } + + var lib_dirs = std.ArrayList([]const u8).init(arena); + for (macho_file.base.options.lib_dirs) |dir| { + if (try MachO.resolveSearchDir(arena, dir, macho_file.base.options.sysroot)) |search_dir| { + try lib_dirs.append(search_dir); + } else { + log.warn("directory not found for '-L{s}'", .{dir}); + } + } + + var libs = std.StringArrayHashMap(link.SystemLib).init(arena); + + // Assume ld64 default -search_paths_first if no strategy specified. + const search_strategy = macho_file.base.options.search_strategy orelse .paths_first; + outer: for (candidate_libs.keys()) |lib_name| { + switch (search_strategy) { + .paths_first => { + // Look in each directory for a dylib (stub first), and then for archive + for (lib_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { + if (try MachO.resolveLib(arena, dir, lib_name, ext)) |full_path| { + try libs.put(full_path, candidate_libs.get(lib_name).?); + continue :outer; + } + } + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; + } + }, + .dylibs_first => { + // First, look for a dylib in each search dir + for (lib_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib" }) |ext| { + if (try MachO.resolveLib(arena, dir, lib_name, ext)) |full_path| { + try libs.put(full_path, candidate_libs.get(lib_name).?); + continue :outer; + } + } + } else for (lib_dirs.items) |dir| { + if (try MachO.resolveLib(arena, dir, lib_name, ".a")) |full_path| { + try libs.put(full_path, candidate_libs.get(lib_name).?); + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; + } + } + }, + } + } + + if (lib_not_found) { + log.warn("Library search paths:", .{}); + for (lib_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } + } + + try macho_file.resolveLibSystem(arena, comp, lib_dirs.items, &libs); + + // frameworks + var framework_dirs = std.ArrayList([]const u8).init(arena); + for (macho_file.base.options.framework_dirs) |dir| { + if (try MachO.resolveSearchDir(arena, dir, macho_file.base.options.sysroot)) |search_dir| { + try framework_dirs.append(search_dir); + } else { + log.warn("directory not found for '-F{s}'", .{dir}); + } + } + + outer: for (macho_file.base.options.frameworks.keys()) |f_name| { + for (framework_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { + if (try MachO.resolveFramework(arena, dir, f_name, ext)) |full_path| { + const info = macho_file.base.options.frameworks.get(f_name).?; + try libs.put(full_path, .{ + .needed = info.needed, + .weak = info.weak, + }); + continue :outer; + } + } + } else { + log.warn("framework not found for '-framework {s}'", .{f_name}); + framework_not_found = true; + } + } + + if (framework_not_found) { + log.warn("Framework search paths:", .{}); + for (framework_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } + } + + if (macho_file.base.options.verbose_link) { + var argv = std.ArrayList([]const u8).init(arena); + + try argv.append("zig"); + try argv.append("ld"); + + if (is_exe_or_dyn_lib) { + try argv.append("-dynamic"); + } + + if (is_dyn_lib) { + try argv.append("-dylib"); + + if (macho_file.base.options.install_name) |install_name| { + try argv.append("-install_name"); + try argv.append(install_name); + } + } + + if (macho_file.base.options.sysroot) |syslibroot| { + try argv.append("-syslibroot"); + try argv.append(syslibroot); + } + + for (macho_file.base.options.rpath_list) |rpath| { + try argv.append("-rpath"); + try argv.append(rpath); + } + + if (macho_file.base.options.pagezero_size) |pagezero_size| { + try argv.append("-pagezero_size"); + try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{pagezero_size})); + } + + if (macho_file.base.options.search_strategy) |strat| switch (strat) { + .paths_first => try argv.append("-search_paths_first"), + .dylibs_first => try argv.append("-search_dylibs_first"), + }; + + if (macho_file.base.options.headerpad_size) |headerpad_size| { + try argv.append("-headerpad_size"); + try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{headerpad_size})); + } + + if (macho_file.base.options.headerpad_max_install_names) { + try argv.append("-headerpad_max_install_names"); + } + + if (gc_sections) { + try argv.append("-dead_strip"); + } + + if (macho_file.base.options.dead_strip_dylibs) { + try argv.append("-dead_strip_dylibs"); + } + + if (macho_file.base.options.entry) |entry| { + try argv.append("-e"); + try argv.append(entry); + } + + for (macho_file.base.options.objects) |obj| { + try argv.append(obj.path); + } + + for (comp.c_object_table.keys()) |key| { + try argv.append(key.status.success.object_path); + } + + if (module_obj_path) |p| { + try argv.append(p); + } + + if (comp.compiler_rt_lib) |lib| { + try argv.append(lib.full_object_path); + } + + if (macho_file.base.options.link_libcpp) { + try argv.append(comp.libcxxabi_static_lib.?.full_object_path); + try argv.append(comp.libcxx_static_lib.?.full_object_path); + } + + try argv.append("-o"); + try argv.append(full_out_path); + + try argv.append("-lSystem"); + try argv.append("-lc"); + + for (macho_file.base.options.system_libs.keys()) |l_name| { + const info = macho_file.base.options.system_libs.get(l_name).?; + const arg = if (info.needed) + try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name}) + else if (info.weak) + try std.fmt.allocPrint(arena, "-weak-l{s}", .{l_name}) + else + try std.fmt.allocPrint(arena, "-l{s}", .{l_name}); + try argv.append(arg); + } + + for (macho_file.base.options.lib_dirs) |lib_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); + } + + for (macho_file.base.options.frameworks.keys()) |framework| { + const info = macho_file.base.options.frameworks.get(framework).?; + const arg = if (info.needed) + try std.fmt.allocPrint(arena, "-needed_framework {s}", .{framework}) + else if (info.weak) + try std.fmt.allocPrint(arena, "-weak_framework {s}", .{framework}) + else + try std.fmt.allocPrint(arena, "-framework {s}", .{framework}); + try argv.append(arg); + } + + for (macho_file.base.options.framework_dirs) |framework_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); + } + + if (is_dyn_lib and (macho_file.base.options.allow_shlib_undefined orelse false)) { + try argv.append("-undefined"); + try argv.append("dynamic_lookup"); + } + + for (must_link_archives.keys()) |lib| { + try argv.append(try std.fmt.allocPrint(arena, "-force_load {s}", .{lib})); + } + + Compilation.dump_argv(argv.items); + } + + var dependent_libs = std.fifo.LinearFifo(struct { + id: Dylib.Id, + parent: u16, + }, .Dynamic).init(arena); + + try macho_file.parseInputFiles(positionals.items, macho_file.base.options.sysroot, &dependent_libs); + try macho_file.parseAndForceLoadStaticArchives(must_link_archives.keys()); + try macho_file.parseLibs(libs.keys(), libs.values(), macho_file.base.options.sysroot, &dependent_libs); + try macho_file.parseDependentLibs(macho_file.base.options.sysroot, &dependent_libs); + + for (macho_file.objects.items) |_, object_id| { + try macho_file.resolveSymbolsInObject(@intCast(u16, object_id)); + } + + try macho_file.resolveSymbolsInArchives(); + try macho_file.resolveDyldStubBinder(); + try macho_file.resolveSymbolsInDylibs(); + try macho_file.createMhExecuteHeaderSymbol(); + try macho_file.createDsoHandleSymbol(); + try macho_file.resolveSymbolsAtLoading(); + + if (macho_file.unresolved.count() > 0) { + return error.UndefinedSymbolReference; + } + if (lib_not_found) { + return error.LibraryNotFound; + } + if (framework_not_found) { + return error.FrameworkNotFound; + } + + for (macho_file.objects.items) |*object| { + try object.scanInputSections(macho_file); + } + + try macho_file.createDyldPrivateAtom(); + try macho_file.createTentativeDefAtoms(); + try macho_file.createStubHelperPreambleAtom(); + + for (macho_file.objects.items) |*object, object_id| { + try object.splitIntoAtoms(macho_file, @intCast(u32, object_id)); + } + + if (gc_sections) { + try dead_strip.gcAtoms(macho_file); + } + + try allocateSegments(macho_file); + try allocateSymbols(macho_file); + + try macho_file.allocateSpecialSymbols(); + + if (build_options.enable_logging or true) { + macho_file.logSymtab(); + macho_file.logSections(); + macho_file.logAtoms(); + } + + try writeAtoms(macho_file); + + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; + + try macho_file.writeLinkeditSegmentData(&ncmds, lc_writer); + + // If the last section of __DATA segment is zerofill section, we need to ensure + // that the free space between the end of the last non-zerofill section of __DATA + // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will + // copy-paste this space into memory for quicker zerofill operation. + if (macho_file.data_segment_cmd_index) |data_seg_id| blk: { + var physical_zerofill_start: u64 = 0; + const section_indexes = macho_file.getSectionIndexes(data_seg_id); + for (macho_file.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| { + if (header.isZerofill() and header.size > 0) break; + physical_zerofill_start = header.offset + header.size; + } else break :blk; + const linkedit = macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; + const physical_zerofill_size = math.cast(usize, linkedit.fileoff - physical_zerofill_start) orelse + return error.Overflow; + if (physical_zerofill_size > 0) { + var padding = try macho_file.base.allocator.alloc(u8, physical_zerofill_size); + defer macho_file.base.allocator.free(padding); + mem.set(u8, padding, 0); + try macho_file.base.file.?.pwriteAll(padding, physical_zerofill_start); + } + } + + try MachO.writeDylinkerLC(&ncmds, lc_writer); + try macho_file.writeMainLC(&ncmds, lc_writer); + try macho_file.writeDylibIdLC(&ncmds, lc_writer); + try macho_file.writeRpathLCs(&ncmds, lc_writer); + + { + try lc_writer.writeStruct(macho.source_version_command{ + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }); + ncmds += 1; + } + + try macho_file.writeBuildVersionLC(&ncmds, lc_writer); + + { + var uuid_lc = macho.uuid_command{ + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, + }; + std.crypto.random.bytes(&uuid_lc.uuid); + try lc_writer.writeStruct(uuid_lc); + ncmds += 1; + } + + try macho_file.writeLoadDylibLCs(&ncmds, lc_writer); + + const requires_codesig = blk: { + if (macho_file.base.options.entitlements) |_| break :blk true; + if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; + break :blk false; + }; + var codesig_offset: ?u32 = null; + var codesig: ?CodeSignature = if (requires_codesig) blk: { + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + var codesig = CodeSignature.init(macho_file.page_size); + codesig.code_directory.ident = macho_file.base.options.emit.?.sub_path; + if (macho_file.base.options.entitlements) |path| { + try codesig.addEntitlements(arena, path); + } + codesig_offset = try macho_file.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + break :blk codesig; + } else null; + + var headers_buf = std.ArrayList(u8).init(arena); + try macho_file.writeSegmentHeaders(&ncmds, headers_buf.writer()); + + try macho_file.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try macho_file.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); + + try macho_file.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + + if (codesig) |*csig| { + try macho_file.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last + } + } + + if (!macho_file.base.options.disable_lld_caching) { + // Update the file with the digest. If it fails we can continue; it only + // means that the next invocation will have an unnecessary cache miss. + Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { + log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); + }; + // Again failure here only means an unnecessary cache miss. + man.writeManifest() catch |err| { + log.debug("failed to write cache manifest when linking: {s}", .{@errorName(err)}); + }; + // We hang on to this lock so that the output file path can be used without + // other processes clobbering it. + macho_file.base.lock = man.toOwnedLock(); + } +} + +fn writeAtoms(macho_file: *MachO) !void { + assert(macho_file.mode == .one_shot); + + const gpa = macho_file.base.allocator; + const slice = macho_file.sections.slice(); + + for (slice.items(.last_atom)) |last_atom, sect_id| { + const header = slice.items(.header)[sect_id]; + if (header.size == 0) continue; + var atom = last_atom.?; + + if (header.isZerofill()) continue; + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); + + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); + + while (atom.prev) |prev| { + atom = prev; + } + + while (true) { + const this_sym = atom.getSymbol(macho_file); + const padding_size: usize = if (atom.next) |next| blk: { + const next_sym = next.getSymbol(macho_file); + const size = next_sym.n_value - (this_sym.n_value + atom.size); + break :blk math.cast(usize, size) orelse return error.Overflow; + } else 0; + + log.debug(" (adding ATOM(%{d}, '{s}') from object({?d}) to buffer)", .{ + atom.sym_index, + atom.getName(macho_file), + atom.file, + }); + if (padding_size > 0) { + log.debug(" (with padding {x})", .{padding_size}); + } + + try atom.resolveRelocs(macho_file); + buffer.appendSliceAssumeCapacity(atom.code.items); + + var i: usize = 0; + while (i < padding_size) : (i += 1) { + // TODO with NOPs + buffer.appendAssumeCapacity(0); + } + + if (atom.next) |next| { + atom = next; + } else { + assert(buffer.items.len == header.size); + log.debug(" (writing at file offset 0x{x})", .{header.offset}); + try macho_file.base.file.?.pwriteAll(buffer.items, header.offset); + break; + } + } + } +} + +fn allocateSegments(macho_file: *MachO) !void { + try allocateSegment(macho_file, macho_file.text_segment_cmd_index, &.{ + macho_file.pagezero_segment_cmd_index, + }, try macho_file.calcMinHeaderPad()); + + if (macho_file.text_segment_cmd_index) |index| blk: { + const indexes = macho_file.getSectionIndexes(index); + if (indexes.start == indexes.end) break :blk; + const seg = macho_file.segments.items[index]; + + // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. + var min_alignment: u32 = 0; + for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |header| { + const alignment = try math.powi(u32, 2, header.@"align"); + min_alignment = math.max(min_alignment, alignment); + } + + assert(min_alignment > 0); + const last_header = macho_file.sections.items(.header)[indexes.end - 1]; + const shift: u32 = shift: { + const diff = seg.filesize - last_header.offset - last_header.size; + const factor = @divTrunc(diff, min_alignment); + break :shift @intCast(u32, factor * min_alignment); + }; + + if (shift > 0) { + for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |*header| { + header.offset += shift; + header.addr += shift; + } + } + } + + try allocateSegment(macho_file, macho_file.data_const_segment_cmd_index, &.{ + macho_file.text_segment_cmd_index, + macho_file.pagezero_segment_cmd_index, + }, 0); + + try allocateSegment(macho_file, macho_file.data_segment_cmd_index, &.{ + macho_file.data_const_segment_cmd_index, + macho_file.text_segment_cmd_index, + macho_file.pagezero_segment_cmd_index, + }, 0); + + try allocateSegment(macho_file, macho_file.linkedit_segment_cmd_index, &.{ + macho_file.data_segment_cmd_index, + macho_file.data_const_segment_cmd_index, + macho_file.text_segment_cmd_index, + macho_file.pagezero_segment_cmd_index, + }, 0); +} + +fn allocateSegment(macho_file: *MachO, maybe_index: ?u8, indices: []const ?u8, init_size: u64) !void { + const index = maybe_index orelse return; + const seg = &macho_file.segments.items[index]; + + const base = macho_file.getSegmentAllocBase(indices); + seg.vmaddr = base.vmaddr; + seg.fileoff = base.fileoff; + seg.filesize = init_size; + seg.vmsize = init_size; + + // Allocate the sections according to their alignment at the beginning of the segment. + const indexes = macho_file.getSectionIndexes(index); + var start = init_size; + const slice = macho_file.sections.slice(); + for (slice.items(.header)[indexes.start..indexes.end]) |*header| { + const alignment = try math.powi(u32, 2, header.@"align"); + const start_aligned = mem.alignForwardGeneric(u64, start, alignment); + + header.offset = if (header.isZerofill()) + 0 + else + @intCast(u32, seg.fileoff + start_aligned); + header.addr = seg.vmaddr + start_aligned; + + start = start_aligned + header.size; + + if (!header.isZerofill()) { + seg.filesize = start; + } + seg.vmsize = start; + } + + seg.filesize = mem.alignForwardGeneric(u64, seg.filesize, macho_file.page_size); + seg.vmsize = mem.alignForwardGeneric(u64, seg.vmsize, macho_file.page_size); +} + +fn allocateSymbols(macho_file: *MachO) !void { + const slice = macho_file.sections.slice(); + for (slice.items(.last_atom)) |last_atom, sect_id| { + const header = slice.items(.header)[sect_id]; + var atom = last_atom orelse continue; + + while (atom.prev) |prev| { + atom = prev; + } + + const n_sect = @intCast(u8, sect_id + 1); + var base_vaddr = header.addr; + + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ + n_sect, + header.segName(), + header.sectName(), + }); + + while (true) { + const alignment = try math.powi(u32, 2, atom.alignment); + base_vaddr = mem.alignForwardGeneric(u64, base_vaddr, alignment); + + const sym = atom.getSymbolPtr(macho_file); + sym.n_value = base_vaddr; + sym.n_sect = n_sect; + + log.debug(" ATOM(%{d}, '{s}') @{x}", .{ atom.sym_index, atom.getName(macho_file), base_vaddr }); + + // Update each symbol contained within the atom + for (atom.contained.items) |sym_at_off| { + const contained_sym = macho_file.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); + contained_sym.n_value = base_vaddr + sym_at_off.offset; + contained_sym.n_sect = n_sect; + } + + base_vaddr += atom.size; + + if (atom.next) |next| { + atom = next; + } else break; + } + } +} From e5da251635bb24d418dbec4385aa2319c7f75247 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Sep 2022 16:24:41 +0200 Subject: [PATCH 03/17] macho: clean up use of section ids --- src/link/MachO.zig | 136 ++++++++++++++++++-------------------- src/link/MachO/Atom.zig | 4 +- src/link/MachO/Object.zig | 44 ++++++------ 3 files changed, 88 insertions(+), 96 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 863ba4c2f5..4a8dd9930d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1215,10 +1215,11 @@ pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32 return atom; } -pub fn writeAtom(self: *MachO, atom: *Atom, sect_id: u8) !void { - const section = self.sections.get(sect_id); +pub fn writeAtom(self: *MachO, atom: *Atom) !void { const sym = atom.getSymbol(self); + const section = self.sections.get(sym.n_sect - 1); const file_offset = section.header.offset + sym.n_value - section.header.addr; + try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); } @@ -1327,7 +1328,7 @@ fn writeAtoms(self: *MachO) !void { while (true) { if (atom.dirty) { - try self.writeAtom(atom, sect_i); + try self.writeAtom(atom); atom.dirty = false; } @@ -1344,6 +1345,7 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; + sym.n_sect = self.got_section_index.? + 1; try atom.relocs.append(gpa, .{ .offset = 0, @@ -1373,7 +1375,7 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, self.got_section_index.?); + try self.allocateAtomCommon(atom); return atom; } @@ -1382,8 +1384,6 @@ pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { const gpa = self.base.allocator; const sym_index = try self.allocateSymbol(); const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); - const sym = atom.getSymbolPtr(self); - sym.n_type = macho.N_SECT; const target_sym = self.getSymbol(target); assert(target_sym.undf()); @@ -1397,12 +1397,16 @@ pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - const match = (try self.getOutputSection(.{ + const sym = atom.getSymbolPtr(self); + sym.n_type = macho.N_SECT; + const sect_id = (try self.getOutputSection(.{ .segname = makeStaticString("__DATA"), .sectname = makeStaticString("__thread_ptrs"), .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, })).?; - try self.allocateAtomCommon(atom, match); + sym.n_sect = sect_id + 1; + + try self.allocateAtomCommon(atom); return atom; } @@ -1416,9 +1420,10 @@ pub fn createDyldPrivateAtom(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; + sym.n_sect = self.data_section_index.? + 1; self.dyld_private_atom = atom; - try self.allocateAtomCommon(atom, self.data_section_index.?); + try self.allocateAtomCommon(atom); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); @@ -1444,6 +1449,7 @@ pub fn createStubHelperPreambleAtom(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; + sym.n_sect = self.stub_helper_section_index.? + 1; const dyld_private_sym_index = self.dyld_private_atom.?.sym_index; switch (arch) { @@ -1542,7 +1548,7 @@ pub fn createStubHelperPreambleAtom(self: *MachO) !void { } self.stub_helper_preamble_atom = atom; - try self.allocateAtomCommon(atom, self.stub_helper_section_index.?); + try self.allocateAtomCommon(atom); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); @@ -1565,6 +1571,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { const atom = try MachO.createEmptyAtom(gpa, sym_index, stub_size, alignment); const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; + sym.n_sect = self.stub_helper_section_index.? + 1; try atom.relocs.ensureTotalCapacity(gpa, 1); @@ -1614,7 +1621,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, self.stub_helper_section_index.?); + try self.allocateAtomCommon(atom); return atom; } @@ -1625,6 +1632,7 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWi const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; + sym.n_sect = self.la_symbol_ptr_section_index.? + 1; try atom.relocs.append(gpa, .{ .offset = 0, @@ -1650,7 +1658,7 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWi try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, self.la_symbol_ptr_section_index.?); + try self.allocateAtomCommon(atom); return atom; } @@ -1672,6 +1680,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { const atom = try MachO.createEmptyAtom(gpa, sym_index, stub_size, alignment); const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; + sym.n_sect = self.stubs_section_index.? + 1; switch (arch) { .x86_64 => { @@ -1725,7 +1734,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, self.stubs_section_index.?); + try self.allocateAtomCommon(atom); return atom; } @@ -1762,7 +1771,7 @@ pub fn createTentativeDefAtoms(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); atom.file = global.file; - try self.allocateAtomCommon(atom, n_sect); + try self.allocateAtomCommon(atom); if (global.file) |file| { const object = &self.objects.items[file]; @@ -2376,12 +2385,13 @@ pub fn deinit(self: *MachO) void { } } -fn freeAtom(self: *MachO, atom: *Atom, sect_id: u8, owns_atom: bool) void { +fn freeAtom(self: *MachO, atom: *Atom, owns_atom: bool) void { log.debug("freeAtom {*}", .{atom}); if (!owns_atom) { atom.deinit(self.base.allocator); } + const sect_id = atom.getSymbol(self).n_sect - 1; const free_list = &self.sections.items(.free_list)[sect_id]; var already_have_free_list_node = false; { @@ -2434,21 +2444,20 @@ fn freeAtom(self: *MachO, atom: *Atom, sect_id: u8, owns_atom: bool) void { } } -fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, sect_id: u8) void { +fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64) void { _ = self; _ = atom; _ = new_block_size; - _ = sect_id; // TODO check the new capacity, and if it crosses the size threshold into a big enough // capacity, insert a free list node for it. } -fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, sect_id: u8) !u64 { +fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) !u64 { const sym = atom.getSymbol(self); const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; const need_realloc = !align_ok or new_atom_size > atom.capacity(self); if (!need_realloc) return sym.n_value; - return self.allocateAtom(atom, new_atom_size, alignment, sect_id); + return self.allocateAtom(atom, new_atom_size, alignment); } fn allocateSymbol(self: *MachO) !u32 { @@ -2704,21 +2713,16 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu typed_value.val, required_alignment, ); - const addr = try self.allocateAtom(atom, code.len, required_alignment, sect_id); + const symbol = atom.getSymbolPtr(self); + symbol.n_strx = name_str_index; + symbol.n_type = macho.N_SECT; + symbol.n_sect = sect_id + 1; + symbol.n_value = try self.allocateAtom(atom, code.len, required_alignment); - log.debug("allocated atom for {?s} at 0x{x}", .{ name, addr }); + log.debug("allocated atom for {?s} at 0x{x}", .{ name, symbol.n_value }); log.debug(" (required alignment 0x{x})", .{required_alignment}); - errdefer self.freeAtom(atom, sect_id, true); - - const symbol = atom.getSymbolPtr(self); - symbol.* = .{ - .n_strx = name_str_index, - .n_type = macho.N_SECT, - .n_sect = sect_id + 1, - .n_desc = 0, - .n_value = addr, - }; + errdefer self.freeAtom(atom, true); try unnamed_consts.append(gpa, atom); @@ -2968,15 +2972,20 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 required_alignment, ); } - const match = decl_ptr.*.?; + const sect_id = decl_ptr.*.?; if (decl.link.macho.size != 0) { const symbol = decl.link.macho.getSymbolPtr(self); + symbol.n_strx = try self.strtab.insert(self.base.allocator, sym_name); + symbol.n_type = macho.N_SECT; + symbol.n_sect = sect_id + 1; + symbol.n_desc = 0; + const capacity = decl.link.macho.capacity(self); const need_realloc = code_len > capacity or !mem.isAlignedGeneric(u64, symbol.n_value, required_alignment); if (need_realloc) { - const vaddr = try self.growAtom(&decl.link.macho, code_len, required_alignment, match); + const vaddr = try self.growAtom(&decl.link.macho, code_len, required_alignment); log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ sym_name, symbol.n_value, vaddr }); log.debug(" (required alignment 0x{x})", .{required_alignment}); symbol.n_value = vaddr; @@ -2987,32 +2996,24 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 }).?; got_atom.dirty = true; } else if (code_len < decl.link.macho.size) { - self.shrinkAtom(&decl.link.macho, code_len, match); + self.shrinkAtom(&decl.link.macho, code_len); } + decl.link.macho.size = code_len; decl.link.macho.dirty = true; - - symbol.n_strx = try self.strtab.insert(self.base.allocator, sym_name); - symbol.n_type = macho.N_SECT; - symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; - symbol.n_desc = 0; } else { const name_str_index = try self.strtab.insert(self.base.allocator, sym_name); - const addr = try self.allocateAtom(&decl.link.macho, code_len, required_alignment, match); + const symbol = decl.link.macho.getSymbolPtr(self); + symbol.n_strx = name_str_index; + symbol.n_type = macho.N_SECT; + symbol.n_sect = sect_id + 1; + symbol.n_desc = 0; + symbol.n_value = try self.allocateAtom(&decl.link.macho, code_len, required_alignment); - log.debug("allocated atom for {s} at 0x{x}", .{ sym_name, addr }); + log.debug("allocated atom for {s} at 0x{x}", .{ sym_name, symbol.n_value }); log.debug(" (required alignment 0x{x})", .{required_alignment}); - errdefer self.freeAtom(&decl.link.macho, match, false); - - const symbol = decl.link.macho.getSymbolPtr(self); - symbol.* = .{ - .n_strx = name_str_index, - .n_type = macho.N_SECT, - .n_sect = match + 1, - .n_desc = 0, - .n_value = addr, - }; + errdefer self.freeAtom(&decl.link.macho, false); const got_target = SymbolWithLoc{ .sym_index = decl.link.macho.sym_index, .file = null }; const got_index = try self.allocateGotEntry(got_target); @@ -3171,10 +3172,7 @@ pub fn deleteExport(self: *MachO, exp: Export) void { fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { - // TODO - // const sect_id = atom.getSymbol(self).n_sect; - const sect_id = self.getSectionByName("__TEXT", "__const").?; - self.freeAtom(atom, sect_id, true); + self.freeAtom(atom, true); self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; _ = self.atom_by_index_table.remove(atom.sym_index); @@ -3192,8 +3190,8 @@ pub fn freeDecl(self: *MachO, decl_index: Module.Decl.Index) void { const decl = mod.declPtr(decl_index); log.debug("freeDecl {*}", .{decl}); const kv = self.decls.fetchSwapRemove(decl_index); - if (kv.?.value) |match| { - self.freeAtom(&decl.link.macho, match, false); + if (kv.?.value) |_| { + self.freeAtom(&decl.link.macho, false); self.freeUnnamedConsts(decl_index); } // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. @@ -3928,29 +3926,22 @@ fn getSectionMaxAlignment(self: *MachO, start: u8, end: u8) !u32 { return max_alignment; } -fn allocateAtomCommon(self: *MachO, atom: *Atom, sect_id: u8) !void { - const sym = atom.getSymbolPtr(self); +fn allocateAtomCommon(self: *MachO, atom: *Atom) !void { if (self.mode == .incremental) { + const sym_name = atom.getName(self); const size = atom.size; const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, size, alignment, sect_id); - const sym_name = atom.getName(self); + const vaddr = try self.allocateAtom(atom, size, alignment); log.debug("allocated {s} atom at 0x{x}", .{ sym_name, vaddr }); - sym.n_value = vaddr; - } else try self.addAtomToSection(atom, sect_id); - sym.n_sect = sect_id + 1; + atom.getSymbolPtr(self).n_value = vaddr; + } else try self.addAtomToSection(atom); } -fn allocateAtom( - self: *MachO, - atom: *Atom, - new_atom_size: u64, - alignment: u64, - sect_id: u8, -) !u64 { +fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) !u64 { const tracy = trace(@src()); defer tracy.end(); + const sect_id = atom.getSymbol(self).n_sect - 1; const header = &self.sections.items(.header)[sect_id]; const free_list = &self.sections.items(.free_list)[sect_id]; const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; @@ -4050,7 +4041,8 @@ fn allocateAtom( return vaddr; } -pub fn addAtomToSection(self: *MachO, atom: *Atom, sect_id: u8) !void { +pub fn addAtomToSection(self: *MachO, atom: *Atom) !void { + const sect_id = atom.getSymbol(self).n_sect - 1; var section = self.sections.get(sect_id); if (section.header.size > 0) { section.last_atom.?.next = atom; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index d5758cfb74..0a5c3ec182 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -314,13 +314,13 @@ pub fn parseRelocs(self: *Atom, relocs: []align(1) const macho.relocation_info, const sect_id = @intCast(u16, rel.r_symbolnum - 1); const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { const sect = object.getSourceSection(sect_id); - const match = (try context.macho_file.getOutputSection(sect)) orelse + const out_sect_id = (try context.macho_file.getOutputSection(sect)) orelse unreachable; const sym_index = @intCast(u32, object.symtab.items.len); try object.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = match + 1, + .n_sect = out_sect_id + 1, .n_desc = 0, .n_value = sect.addr, }); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 7c7c83ab3e..28244c674f 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -220,15 +220,15 @@ fn filterRelocs( pub fn scanInputSections(self: Object, macho_file: *MachO) !void { for (self.sections.items) |sect| { - const match = (try macho_file.getOutputSection(sect)) orelse { + const sect_id = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; - const output = macho_file.sections.items(.header)[match]; + const output = macho_file.sections.items(.header)[sect_id]; log.debug("mapping '{s},{s}' into output sect({d}, '{s},{s}')", .{ sect.segName(), sect.sectName(), - match + 1, + sect_id + 1, output.segName(), output.sectName(), }); @@ -335,15 +335,15 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. - const match = (try macho_file.getOutputSection(sect)) orelse { + const out_sect_id = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; log.debug(" output sect({d}, '{s},{s}')", .{ - match + 1, - macho_file.sections.items(.header)[match].segName(), - macho_file.sections.items(.header)[match].sectName(), + out_sect_id + 1, + macho_file.sections.items(.header)[out_sect_id].segName(), + macho_file.sections.items(.header)[out_sect_id].sectName(), }); const cpu_arch = macho_file.base.options.target.cpu.arch; @@ -376,7 +376,7 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = match + 1, + .n_sect = out_sect_id + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -397,10 +397,10 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { atom_code, relocs, &.{}, - match, + out_sect_id, sect, ); - try macho_file.addAtomToSection(atom, match); + try macho_file.addAtomToSection(atom); } var next_sym_count: usize = 0; @@ -452,7 +452,7 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { atom_code, relocs, sorted_atom_syms.items[1..], - match, + out_sect_id, sect, ); @@ -465,7 +465,7 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = match + 1, + .n_sect = out_sect_id + 1, .n_desc = 0, .n_value = addr, }); @@ -479,7 +479,7 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { try self.atom_by_index_table.put(gpa, alias, atom); } - try macho_file.addAtomToSection(atom, match); + try macho_file.addAtomToSection(atom); } } else { // If there is no symbol to refer to this atom, we create @@ -490,7 +490,7 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = match + 1, + .n_sect = out_sect_id + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -506,10 +506,10 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { code, relocs, filtered_syms, - match, + out_sect_id, sect, ); - try macho_file.addAtomToSection(atom, match); + try macho_file.addAtomToSection(atom); } } } @@ -524,21 +524,21 @@ fn createAtomFromSubsection( code: ?[]const u8, relocs: []align(1) const macho.relocation_info, indexes: []const SymbolAtIndex, - match: u8, + out_sect_id: u8, sect: macho.section_64, ) !*Atom { const gpa = macho_file.base.allocator; const sym = self.symtab.items[sym_index]; const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); atom.file = object_id; - self.symtab.items[sym_index].n_sect = match + 1; + self.symtab.items[sym_index].n_sect = out_sect_id + 1; log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ sym_index, self.getString(sym.n_strx), - match + 1, - macho_file.sections.items(.header)[match].segName(), - macho_file.sections.items(.header)[match].sectName(), + out_sect_id + 1, + macho_file.sections.items(.header)[out_sect_id].segName(), + macho_file.sections.items(.header)[out_sect_id].sectName(), object_id, }); @@ -566,7 +566,7 @@ fn createAtomFromSubsection( try atom.contained.ensureTotalCapacity(gpa, indexes.len); for (indexes) |inner_sym_index| { const inner_sym = &self.symtab.items[inner_sym_index.index]; - inner_sym.n_sect = match + 1; + inner_sym.n_sect = out_sect_id + 1; atom.contained.appendAssumeCapacity(.{ .sym_index = inner_sym_index.index, .offset = inner_sym.n_value - sym.n_value, From 34f9360ea20228b895df10b3950c72f40efb6843 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Sep 2022 20:41:30 +0200 Subject: [PATCH 04/17] macho: do not call populateMissingMetadata in full link mode --- src/link/MachO.zig | 534 +++++++++++++++++--------------------- src/link/MachO/Atom.zig | 3 +- src/link/MachO/Object.zig | 9 +- src/link/MachO/zld.zig | 112 +++++++- 4 files changed, 355 insertions(+), 303 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4a8dd9930d..8ca18d8fdc 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -304,13 +304,9 @@ pub fn openPath(allocator: Allocator, options: link.Options) !*MachO { errdefer file.close(); self.base.file = file; - if (!options.strip and options.module != null) blk: { - // TODO once I add support for converting (and relocating) DWARF info from relocatable - // object files, this check becomes unnecessary. - // For now, for LLVM backend we fallback to the old-fashioned stabs approach used by - // stage1. - if (build_options.have_llvm and options.use_llvm) break :blk; + if (self.mode == .one_shot) return self; + if (!options.strip and options.module != null) { // Create dSYM bundle. const dir = options.module.?.zig_cache_artifact_directory; log.debug("creating {s}.dSYM bundle in {?s}", .{ emit.sub_path, dir.path }); @@ -1038,30 +1034,32 @@ pub fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: } } -pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { +const GetOutputSectionResult = struct { + found_existing: bool, + sect_id: u8, +}; + +pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?GetOutputSectionResult { const segname = sect.segName(); const sectname = sect.sectName(); - const res: ?u8 = blk: { + + var found_existing: bool = true; + const sect_id: u8 = blk: { if (mem.eql(u8, "__LLVM", segname)) { log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ sect.flags, segname, sectname, }); - break :blk null; + return null; } if (sect.isCode()) { if (self.text_section_index == null) { - self.text_section_index = try self.initSection( - "__TEXT", - "__text", - sect.size, - sect.@"align", - .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }, - ); + self.text_section_index = try self.initSection("__TEXT", "__text", .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }); + found_existing = false; } break :blk self.text_section_index.?; } @@ -1073,7 +1071,7 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { sect.flags, segname, sectname, }); } - break :blk null; + return null; } switch (sect.@"type"()) { @@ -1081,42 +1079,30 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS, => { - break :blk self.getSectionByName("__TEXT", "__const") orelse try self.initSection( - "__TEXT", - "__const", - sect.size, - sect.@"align", - .{}, - ); + if (self.getSectionByName("__TEXT", "__const")) |sect_id| break :blk sect_id; + found_existing = false; + break :blk try self.initSection("__TEXT", "__const", .{}); }, macho.S_CSTRING_LITERALS => { if (mem.startsWith(u8, sectname, "__objc")) { - break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( - segname, - sectname, - sect.size, - sect.@"align", - .{}, - ); + if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; + found_existing = false; + break :blk try self.initSection(segname, sectname, .{}); } - break :blk self.getSectionByName("__TEXT", "__cstring") orelse try self.initSection( - "__TEXT", - "__cstring", - sect.size, - sect.@"align", - .{ .flags = macho.S_CSTRING_LITERALS }, - ); + if (self.getSectionByName("__TEXT", "__cstring")) |sect_id| break :blk sect_id; + found_existing = false; + break :blk try self.initSection("__TEXT", "__cstring", .{ + .flags = macho.S_CSTRING_LITERALS, + }); }, macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, => { - break :blk self.getSectionByName("__DATA_CONST", sectname) orelse try self.initSection( - "__DATA_CONST", - sectname, - sect.size, - sect.@"align", - .{ .flags = sect.flags }, - ); + if (self.getSectionByName("__DATA_CONST", sectname)) |sect_id| break :blk sect_id; + found_existing = false; + break :blk try self.initSection("__DATA_CONST", sectname, .{ + .flags = sect.flags, + }); }, macho.S_LITERAL_POINTERS, macho.S_ZEROFILL, @@ -1125,22 +1111,14 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { macho.S_THREAD_LOCAL_REGULAR, macho.S_THREAD_LOCAL_ZEROFILL, => { - break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( - segname, - sectname, - sect.size, - sect.@"align", - .{ .flags = sect.flags }, - ); + if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; + found_existing = false; + break :blk try self.initSection(segname, sectname, .{ .flags = sect.flags }); }, macho.S_COALESCED => { - break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( - segname, - sectname, - sect.size, - sect.@"align", - .{}, - ); + if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; + found_existing = false; + break :blk try self.initSection(segname, sectname, .{}); }, macho.S_REGULAR => { if (mem.eql(u8, segname, "__TEXT")) { @@ -1150,13 +1128,9 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { mem.eql(u8, sectname, "__gosymtab") or mem.eql(u8, sectname, "__gopclntab")) { - break :blk self.getSectionByName("__DATA_CONST", "__const") orelse try self.initSection( - "__DATA_CONST", - "__const", - sect.size, - sect.@"align", - .{}, - ); + if (self.getSectionByName("__DATA_CONST", "__const")) |sect_id| break :blk sect_id; + found_existing = false; + break :blk try self.initSection("__DATA_CONST", "__const", .{}); } } if (mem.eql(u8, segname, "__DATA")) { @@ -1165,39 +1139,29 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { mem.eql(u8, sectname, "__objc_classlist") or mem.eql(u8, sectname, "__objc_imageinfo")) { - break :blk self.getSectionByName("__DATA_CONST", sectname) orelse - try self.initSection( - "__DATA_CONST", - sectname, - sect.size, - sect.@"align", - .{}, - ); + if (self.getSectionByName("__DATA_CONST", sectname)) |sect_id| break :blk sect_id; + found_existing = false; + break :blk try self.initSection("__DATA_CONST", sectname, .{}); } else if (mem.eql(u8, sectname, "__data")) { if (self.data_section_index == null) { - self.data_section_index = try self.initSection( - segname, - sectname, - sect.size, - sect.@"align", - .{}, - ); + self.data_section_index = try self.initSection(segname, sectname, .{}); + found_existing = false; } break :blk self.data_section_index.?; } } - break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( - segname, - sectname, - sect.size, - sect.@"align", - .{}, - ); + if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; + found_existing = false; + break :blk try self.initSection(segname, sectname, .{}); }, - else => break :blk null, + else => return null, } }; - return res; + + return GetOutputSectionResult{ + .found_existing = found_existing, + .sect_id = sect_id, + }; } pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32) !*Atom { @@ -1399,12 +1363,17 @@ pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; - const sect_id = (try self.getOutputSection(.{ + const gop = (try self.getOutputSection(.{ .segname = makeStaticString("__DATA"), .sectname = makeStaticString("__thread_ptrs"), .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, })).?; - sym.n_sect = sect_id + 1; + if (self.mode == .incremental and !gop.found_existing) { + // TODO allocate section + const needed_size: u64 = self.page_size; + try self.allocateSection(gop.sect_id, needed_size, @alignOf(u64)); + } + sym.n_sect = gop.sect_id + 1; try self.allocateAtomCommon(atom); @@ -1754,16 +1723,20 @@ pub fn createTentativeDefAtoms(self: *MachO) !void { // text blocks for each tentative definition. const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; - const n_sect = (try self.getOutputSection(.{ + const gop = (try self.getOutputSection(.{ .segname = makeStaticString("__DATA"), .sectname = makeStaticString("__bss"), .flags = macho.S_ZEROFILL, })).?; + if (self.mode == .incremental and !gop.found_existing) { + // TODO allocate section + try self.allocateSection(gop.sect_id, size, alignment); + } sym.* = .{ .n_strx = sym.n_strx, .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = n_sect, + .n_sect = gop.sect_id, .n_desc = 0, .n_value = 0, }; @@ -2883,16 +2856,19 @@ fn getOutputSectionAtom( const align_log_2 = math.log2(alignment); const zig_ty = ty.zigTypeTag(); const mode = self.base.options.optimize_mode; + const sect_id: u8 = blk: { // TODO finish and audit this function if (val.isUndefDeep()) { if (mode == .ReleaseFast or mode == .ReleaseSmall) { - break :blk (try self.getOutputSection(.{ + const gop = (try self.getOutputSection(.{ .segname = makeStaticString("__DATA"), .sectname = makeStaticString("__bss"), - .size = code.len, - .@"align" = align_log_2, })).?; + if (!gop.found_existing) { + try self.allocateSection(gop.sect_id, code.len, align_log_2); + } + break :blk gop.sect_id; } else { break :blk self.data_section_index.?; } @@ -2903,12 +2879,14 @@ fn getOutputSectionAtom( } if (needsPointerRebase(ty, val, mod)) { - break :blk (try self.getOutputSection(.{ + const gop = (try self.getOutputSection(.{ .segname = makeStaticString("__DATA_CONST"), .sectname = makeStaticString("__const"), - .size = code.len, - .@"align" = align_log_2, })).?; + if (!gop.found_existing) { + try self.allocateSection(gop.sect_id, code.len, align_log_2); + } + break :blk gop.sect_id; } switch (zig_ty) { @@ -2922,13 +2900,15 @@ fn getOutputSectionAtom( .const_slice_u8_sentinel_0, .manyptr_const_u8_sentinel_0, => { - break :blk (try self.getOutputSection(.{ + const gop = (try self.getOutputSection(.{ .segname = makeStaticString("__TEXT"), .sectname = makeStaticString("__cstring"), .flags = macho.S_CSTRING_LITERALS, - .size = code.len, - .@"align" = align_log_2, })).?; + if (!gop.found_existing) { + try self.allocateSection(gop.sect_id, code.len, align_log_2); + } + break :blk gop.sect_id; }, else => {}, } @@ -2936,13 +2916,16 @@ fn getOutputSectionAtom( }, else => {}, } - break :blk (try self.getOutputSection(.{ + const gop = (try self.getOutputSection(.{ .segname = makeStaticString("__TEXT"), .sectname = makeStaticString("__const"), - .size = code.len, - .@"align" = align_log_2, })).?; + if (!gop.found_existing) { + try self.allocateSection(gop.sect_id, code.len, align_log_2); + } + break :blk gop.sect_id; }; + const header = self.sections.items(.header)[sect_id]; log.debug(" allocating atom '{s}' in '{s},{s}', ord({d})", .{ name, @@ -3255,40 +3238,36 @@ pub fn getDeclVAddr(self: *MachO, decl_index: Module.Decl.Index, reloc_info: Fil } pub fn populateMissingMetadata(self: *MachO) !void { + assert(self.mode == .incremental); + const gpa = self.base.allocator; const cpu_arch = self.base.options.target.cpu.arch; - const pagezero_vmsize = self.base.options.pagezero_size orelse default_pagezero_vmsize; - const aligned_pagezero_vmsize = mem.alignBackwardGeneric(u64, pagezero_vmsize, self.page_size); + const pagezero_vmsize = self.calcPagezeroSize(); - if (self.pagezero_segment_cmd_index == null) blk: { - if (self.base.options.output_mode == .Lib) break :blk; - if (aligned_pagezero_vmsize == 0) break :blk; - if (aligned_pagezero_vmsize != pagezero_vmsize) { - log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); - log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); + if (self.pagezero_segment_cmd_index == null) { + if (pagezero_vmsize > 0) { + self.pagezero_segment_cmd_index = @intCast(u8, self.segments.items.len); + try self.segments.append(gpa, .{ + .segname = makeStaticString("__PAGEZERO"), + .vmsize = pagezero_vmsize, + .cmdsize = @sizeOf(macho.segment_command_64), + }); } - self.pagezero_segment_cmd_index = @intCast(u8, self.segments.items.len); - try self.segments.append(gpa, .{ - .segname = makeStaticString("__PAGEZERO"), - .vmsize = aligned_pagezero_vmsize, - .cmdsize = @sizeOf(macho.segment_command_64), - }); } if (self.text_segment_cmd_index == null) { self.text_segment_cmd_index = @intCast(u8, self.segments.items.len); - const needed_size = if (self.mode == .incremental) blk: { - const headerpad_size = @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size); - const program_code_size_hint = self.base.options.program_code_size_hint; - const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; - const ideal_size = headerpad_size + program_code_size_hint + got_size_hint; - const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); - break :blk needed_size; - } else 0; + const headerpad_size = @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size); + const program_code_size_hint = self.base.options.program_code_size_hint; + const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; + const ideal_size = headerpad_size + program_code_size_hint + got_size_hint; + const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); + + log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); + try self.segments.append(gpa, .{ .segname = makeStaticString("__TEXT"), - .vmaddr = aligned_pagezero_vmsize, + .vmaddr = pagezero_vmsize, .vmsize = needed_size, .filesize = needed_size, .maxprot = macho.PROT.READ | macho.PROT.EXEC, @@ -3303,16 +3282,11 @@ pub fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 2, else => unreachable, // unhandled architecture type }; - const needed_size = if (self.mode == .incremental) self.base.options.program_code_size_hint else 0; - self.text_section_index = try self.initSection( - "__TEXT", - "__text", - needed_size, - alignment, - .{ - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }, - ); + const needed_size = self.base.options.program_code_size_hint; + self.text_section_index = try self.initSection("__TEXT", "__text", .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + try self.allocateSection(self.text_section_index.?, needed_size, alignment); } if (self.stubs_section_index == null) { @@ -3326,17 +3300,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; - const needed_size = if (self.mode == .incremental) stub_size * self.base.options.symbol_count_hint else 0; - self.stubs_section_index = try self.initSection( - "__TEXT", - "__stubs", - needed_size, - alignment, - .{ - .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - }, - ); + const needed_size = stub_size * self.base.options.symbol_count_hint; + self.stubs_section_index = try self.initSection("__TEXT", "__stubs", .{ + .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stub_size, + }); + try self.allocateSection(self.stubs_section_index.?, needed_size, alignment); } if (self.stub_helper_section_index == null) { @@ -3355,37 +3324,26 @@ pub fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 3 * @sizeOf(u32), else => unreachable, }; - const needed_size = if (self.mode == .incremental) - stub_size * self.base.options.symbol_count_hint + preamble_size - else - 0; - self.stub_helper_section_index = try self.initSection( - "__TEXT", - "__stub_helper", - needed_size, - alignment, - .{ - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }, - ); + const needed_size = stub_size * self.base.options.symbol_count_hint + preamble_size; + self.stub_helper_section_index = try self.initSection("__TEXT", "__stub_helper", .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + try self.allocateSection(self.stub_helper_section_index.?, needed_size, alignment); } if (self.data_const_segment_cmd_index == null) { self.data_const_segment_cmd_index = @intCast(u8, self.segments.items.len); - var vmaddr: u64 = 0; - var fileoff: u64 = 0; - var needed_size: u64 = 0; - if (self.mode == .incremental) { - const base = self.getSegmentAllocBase(&.{self.text_segment_cmd_index.?}); - vmaddr = base.vmaddr; - fileoff = base.fileoff; - const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ - fileoff, - fileoff + needed_size, - }); - } + const base = self.getSegmentAllocBase(&.{self.text_segment_cmd_index.?}); + const vmaddr = base.vmaddr; + const fileoff = base.fileoff; + const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); + + log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ + fileoff, + fileoff + needed_size, + }); + try self.segments.append(gpa, .{ .segname = makeStaticString("__DATA_CONST"), .vmaddr = vmaddr, @@ -3399,38 +3357,27 @@ pub fn populateMissingMetadata(self: *MachO) !void { } if (self.got_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.got_section_index = try self.initSection( - "__DATA_CONST", - "__got", - needed_size, - alignment, - .{ - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }, - ); + self.got_section_index = try self.initSection("__DATA_CONST", "__got", .{ + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }); + try self.allocateSection(self.got_section_index.?, needed_size, alignment); } if (self.data_segment_cmd_index == null) { self.data_segment_cmd_index = @intCast(u8, self.segments.items.len); - var vmaddr: u64 = 0; - var fileoff: u64 = 0; - var needed_size: u64 = 0; - if (self.mode == .incremental) { - const base = self.getSegmentAllocBase(&.{self.data_const_segment_cmd_index.?}); - vmaddr = base.vmaddr; - fileoff = base.fileoff; - const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint; - needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - log.debug("found __DATA segment free space 0x{x} to 0x{x}", .{ - fileoff, - fileoff + needed_size, - }); - } + const base = self.getSegmentAllocBase(&.{self.data_const_segment_cmd_index.?}); + const vmaddr = base.vmaddr; + const fileoff = base.fileoff; + const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint; + const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); + + log.debug("found __DATA segment free space 0x{x} to 0x{x}", .{ + fileoff, + fileoff + needed_size, + }); + try self.segments.append(gpa, .{ .segname = makeStaticString("__DATA"), .vmaddr = vmaddr, @@ -3444,47 +3391,29 @@ pub fn populateMissingMetadata(self: *MachO) !void { } if (self.la_symbol_ptr_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.la_symbol_ptr_section_index = try self.initSection( - "__DATA", - "__la_symbol_ptr", - needed_size, - alignment, - .{ - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }, - ); + self.la_symbol_ptr_section_index = try self.initSection("__DATA", "__la_symbol_ptr", .{ + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }); + try self.allocateSection(self.la_symbol_ptr_section_index.?, needed_size, alignment); } if (self.data_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.data_section_index = try self.initSection( - "__DATA", - "__data", - needed_size, - alignment, - .{}, - ); + self.data_section_index = try self.initSection("__DATA", "__data", .{}); + try self.allocateSection(self.data_section_index.?, needed_size, alignment); } if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); - var vmaddr: u64 = 0; - var fileoff: u64 = 0; - if (self.mode == .incremental) { - const base = self.getSegmentAllocBase(&.{self.data_segment_cmd_index.?}); - vmaddr = base.vmaddr; - fileoff = base.fileoff; - log.debug("found __LINKEDIT segment free space at 0x{x}", .{fileoff}); - } + const base = self.getSegmentAllocBase(&.{self.data_segment_cmd_index.?}); + const vmaddr = base.vmaddr; + const fileoff = base.fileoff; + + log.debug("found __LINKEDIT segment free space at 0x{x}", .{fileoff}); + try self.segments.append(gpa, .{ .segname = makeStaticString("__LINKEDIT"), .vmaddr = vmaddr, @@ -3586,6 +3515,18 @@ fn calcLCsSize(self: *MachO, assume_max_path_len: bool) !u32 { return @intCast(u32, sizeofcmds); } +pub fn calcPagezeroSize(self: *MachO) u64 { + const pagezero_vmsize = self.base.options.pagezero_size orelse default_pagezero_vmsize; + const aligned_pagezero_vmsize = mem.alignBackwardGeneric(u64, pagezero_vmsize, self.page_size); + if (self.base.options.output_mode == .Lib) return 0; + if (aligned_pagezero_vmsize == 0) return 0; + if (aligned_pagezero_vmsize != pagezero_vmsize) { + log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); + log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); + } + return aligned_pagezero_vmsize; +} + pub fn calcMinHeaderPad(self: *MachO) !u64 { var padding: u32 = (try self.calcLCsSize(false)) + (self.base.options.headerpad_size orelse 0); log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); @@ -3603,69 +3544,42 @@ pub fn calcMinHeaderPad(self: *MachO) !u64 { return offset; } -const InitSectionOpts = struct { - flags: u32 = macho.S_REGULAR, - reserved1: u32 = 0, - reserved2: u32 = 0, -}; - -fn initSection( - self: *MachO, - segname: []const u8, - sectname: []const u8, - size: u64, - alignment: u32, - opts: InitSectionOpts, -) !u8 { - const segment_id = self.getSegmentByName(segname).?; +fn allocateSection(self: *MachO, sect_id: u8, size: u64, alignment: u32) !void { + const segment_id = self.sections.items(.segment_index)[sect_id]; const seg = &self.segments.items[segment_id]; - const index = try self.insertSection(segment_id, .{ - .sectname = makeStaticString(sectname), - .segname = seg.segname, - .flags = opts.flags, - .reserved1 = opts.reserved1, - .reserved2 = opts.reserved2, - }); - seg.cmdsize += @sizeOf(macho.section_64); - seg.nsects += 1; + const header = &self.sections.items(.header)[sect_id]; + header.size = size; + header.@"align" = alignment; - if (self.mode == .incremental) { - const header = &self.sections.items(.header)[index]; - header.size = size; - header.@"align" = alignment; + const prev_end_off = if (sect_id > 0) blk: { + const prev_section = self.sections.get(sect_id - 1); + if (prev_section.segment_index == segment_id) { + const prev_header = prev_section.header; + break :blk prev_header.offset + padToIdeal(prev_header.size); + } else break :blk seg.fileoff; + } else 0; + const alignment_pow_2 = try math.powi(u32, 2, alignment); + // TODO better prealloc for __text section + // const padding: u64 = if (sect_id == 0) try self.calcMinHeaderPad() else 0; + const padding: u64 = if (sect_id == 0) 0x1000 else 0; + const off = mem.alignForwardGeneric(u64, padding + prev_end_off, alignment_pow_2); - const prev_end_off = if (index > 0) blk: { - const prev_section = self.sections.get(index - 1); - if (prev_section.segment_index == segment_id) { - const prev_header = prev_section.header; - break :blk prev_header.offset + padToIdeal(prev_header.size); - } else break :blk seg.fileoff; - } else 0; - const alignment_pow_2 = try math.powi(u32, 2, alignment); - // TODO better prealloc for __text section - // const padding: u64 = if (index == 0) try self.calcMinHeaderPad() else 0; - const padding: u64 = if (index == 0) 0x1000 else 0; - const off = mem.alignForwardGeneric(u64, padding + prev_end_off, alignment_pow_2); + if (!header.isZerofill()) { + header.offset = @intCast(u32, off); + } + header.addr = seg.vmaddr + off - seg.fileoff; - if (!header.isZerofill()) { - header.offset = @intCast(u32, off); - } - header.addr = seg.vmaddr + off - seg.fileoff; + // TODO Will this break if we are inserting section that is not the last section + // in a segment? + const max_size = self.allocatedSize(segment_id, off); - // TODO Will this break if we are inserting section that is not the last section - // in a segment? - const max_size = self.allocatedSize(segment_id, off); - - if (size > max_size) { - try self.growSection(index, @intCast(u32, size)); - } - - log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); - - self.updateSectionOrdinals(index + 1); + if (size > max_size) { + try self.growSection(sect_id, @intCast(u32, size)); } - return index; + log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); + + self.updateSectionOrdinals(sect_id + 1); } fn getSectionPrecedence(header: macho.section_64) u4 { @@ -3690,6 +3604,32 @@ fn getSectionPrecedence(header: macho.section_64) u4 { } } +const InitSectionOpts = struct { + flags: u32 = macho.S_REGULAR, + reserved1: u32 = 0, + reserved2: u32 = 0, +}; + +pub fn initSection( + self: *MachO, + segname: []const u8, + sectname: []const u8, + opts: InitSectionOpts, +) !u8 { + const segment_id = self.getSegmentByName(segname).?; + const seg = &self.segments.items[segment_id]; + const index = try self.insertSection(segment_id, .{ + .sectname = makeStaticString(sectname), + .segname = seg.segname, + .flags = opts.flags, + .reserved1 = opts.reserved1, + .reserved2 = opts.reserved2, + }); + seg.cmdsize += @sizeOf(macho.section_64); + seg.nsects += 1; + return index; +} + fn insertSection(self: *MachO, segment_index: u8, header: macho.section_64) !u8 { const precedence = getSectionPrecedence(header); const indexes = self.getSectionIndexes(segment_index); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 0a5c3ec182..45846c5e09 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -314,8 +314,9 @@ pub fn parseRelocs(self: *Atom, relocs: []align(1) const macho.relocation_info, const sect_id = @intCast(u16, rel.r_symbolnum - 1); const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { const sect = object.getSourceSection(sect_id); - const out_sect_id = (try context.macho_file.getOutputSection(sect)) orelse + const gop = (try context.macho_file.getOutputSection(sect)) orelse unreachable; + const out_sect_id = gop.sect_id; const sym_index = @intCast(u32, object.symtab.items.len); try object.symtab.append(gpa, .{ .n_strx = 0, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 28244c674f..9a6aae9b8b 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -220,15 +220,15 @@ fn filterRelocs( pub fn scanInputSections(self: Object, macho_file: *MachO) !void { for (self.sections.items) |sect| { - const sect_id = (try macho_file.getOutputSection(sect)) orelse { + const gop = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; - const output = macho_file.sections.items(.header)[sect_id]; + const output = macho_file.sections.items(.header)[gop.sect_id]; log.debug("mapping '{s},{s}' into output sect({d}, '{s},{s}')", .{ sect.segName(), sect.sectName(), - sect_id + 1, + gop.sect_id + 1, output.segName(), output.sectName(), }); @@ -335,10 +335,11 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. - const out_sect_id = (try macho_file.getOutputSection(sect)) orelse { + const gop = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; + const out_sect_id = gop.sect_id; log.debug(" output sect({d}, '{s},{s}')", .{ out_sect_id + 1, diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 0ddcf2d08e..3cf4d54014 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -198,7 +198,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr .n_value = 0, }); try macho_file.strtab.buffer.append(gpa, 0); - try macho_file.populateMissingMetadata(); + try initSections(macho_file); var lib_not_found = false; var framework_not_found = false; @@ -646,6 +646,116 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } } +fn initSections(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.base.options.target.cpu.arch; + const pagezero_vmsize = macho_file.calcPagezeroSize(); + + if (macho_file.pagezero_segment_cmd_index == null) { + if (pagezero_vmsize > 0) { + macho_file.pagezero_segment_cmd_index = @intCast(u8, macho_file.segments.items.len); + try macho_file.segments.append(gpa, .{ + .segname = MachO.makeStaticString("__PAGEZERO"), + .vmsize = pagezero_vmsize, + .cmdsize = @sizeOf(macho.segment_command_64), + }); + } + } + + if (macho_file.text_segment_cmd_index == null) { + macho_file.text_segment_cmd_index = @intCast(u8, macho_file.segments.items.len); + try macho_file.segments.append(gpa, .{ + .segname = MachO.makeStaticString("__TEXT"), + .vmaddr = pagezero_vmsize, + .vmsize = 0, + .filesize = 0, + .maxprot = macho.PROT.READ | macho.PROT.EXEC, + .initprot = macho.PROT.READ | macho.PROT.EXEC, + .cmdsize = @sizeOf(macho.segment_command_64), + }); + } + + if (macho_file.text_section_index == null) { + macho_file.text_section_index = try macho_file.initSection("__TEXT", "__text", .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + if (macho_file.stubs_section_index == null) { + const stub_size: u4 = switch (cpu_arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + macho_file.stubs_section_index = try macho_file.initSection("__TEXT", "__stubs", .{ + .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stub_size, + }); + } + + if (macho_file.stub_helper_section_index == null) { + macho_file.stub_helper_section_index = try macho_file.initSection("__TEXT", "__stub_helper", .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + if (macho_file.data_const_segment_cmd_index == null) { + macho_file.data_const_segment_cmd_index = @intCast(u8, macho_file.segments.items.len); + try macho_file.segments.append(gpa, .{ + .segname = MachO.makeStaticString("__DATA_CONST"), + .vmaddr = 0, + .vmsize = 0, + .fileoff = 0, + .filesize = 0, + .maxprot = macho.PROT.READ | macho.PROT.WRITE, + .initprot = macho.PROT.READ | macho.PROT.WRITE, + .cmdsize = @sizeOf(macho.segment_command_64), + }); + } + + if (macho_file.got_section_index == null) { + macho_file.got_section_index = try macho_file.initSection("__DATA_CONST", "__got", .{ + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }); + } + + if (macho_file.data_segment_cmd_index == null) { + macho_file.data_segment_cmd_index = @intCast(u8, macho_file.segments.items.len); + try macho_file.segments.append(gpa, .{ + .segname = MachO.makeStaticString("__DATA"), + .vmaddr = 0, + .vmsize = 0, + .fileoff = 0, + .filesize = 0, + .maxprot = macho.PROT.READ | macho.PROT.WRITE, + .initprot = macho.PROT.READ | macho.PROT.WRITE, + .cmdsize = @sizeOf(macho.segment_command_64), + }); + } + + if (macho_file.la_symbol_ptr_section_index == null) { + macho_file.la_symbol_ptr_section_index = try macho_file.initSection("__DATA", "__la_symbol_ptr", .{ + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }); + } + + if (macho_file.data_section_index == null) { + macho_file.data_section_index = try macho_file.initSection("__DATA", "__data", .{}); + } + + if (macho_file.linkedit_segment_cmd_index == null) { + macho_file.linkedit_segment_cmd_index = @intCast(u8, macho_file.segments.items.len); + try macho_file.segments.append(gpa, .{ + .segname = MachO.makeStaticString("__LINKEDIT"), + .vmaddr = 0, + .fileoff = 0, + .maxprot = macho.PROT.READ, + .initprot = macho.PROT.READ, + .cmdsize = @sizeOf(macho.segment_command_64), + }); + } +} + fn writeAtoms(macho_file: *MachO) !void { assert(macho_file.mode == .one_shot); From 53bd7bd044fccc70699b04d9ae37151423f70165 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 11 Sep 2022 01:18:17 +0200 Subject: [PATCH 05/17] macho: move to incremental writes and global relocs for incremental --- src/arch/aarch64/Emit.zig | 25 +- src/arch/x86_64/Emit.zig | 24 +- src/link/Coff.zig | 2 +- src/link/MachO.zig | 1265 ++++++++++++++++++++------------- src/link/MachO/Atom.zig | 83 ++- src/link/MachO/Relocation.zig | 242 ++++--- src/link/MachO/dead_strip.zig | 6 +- src/link/MachO/zld.zig | 330 ++++++++- 8 files changed, 1328 insertions(+), 649 deletions(-) diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 9e243a3f86..a868b74edc 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -680,16 +680,15 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void { break :blk offset; }; // Add relocation to the decl. - const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?; + const atom = macho_file.getAtomForSymbol(.{ .sym_index = relocation.atom_index, .file = null }).?; const target = macho_file.getGlobalByIndex(relocation.sym_index); - try atom.relocs.append(emit.bin_file.allocator, .{ - .offset = offset, + try atom.addRelocation(macho_file, .{ + .@"type" = @enumToInt(std.macho.reloc_type_arm64.ARM64_RELOC_BRANCH26), .target = target, + .offset = offset, .addend = 0, - .subtractor = null, .pcrel = true, .length = 2, - .@"type" = @enumToInt(std.macho.reloc_type_arm64.ARM64_RELOC_BRANCH26), }); } else { return emit.fail("Implement call_extern for linking backends != MachO", .{}); @@ -882,13 +881,13 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { } if (emit.bin_file.cast(link.File.MachO)) |macho_file| { - const atom = macho_file.atom_by_index_table.get(data.atom_index).?; - // Page reloc for adrp instruction. - try atom.relocs.append(emit.bin_file.allocator, .{ - .offset = offset, + const atom = macho_file.getAtomForSymbol(.{ .sym_index = data.atom_index, .file = null }).?; + // TODO this causes segfault in stage1 + // try atom.addRelocations(macho_file, 2, .{ + try atom.addRelocation(macho_file, .{ .target = .{ .sym_index = data.sym_index, .file = null }, + .offset = offset, .addend = 0, - .subtractor = null, .pcrel = true, .length = 2, .@"type" = switch (tag) { @@ -901,12 +900,10 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { else => unreachable, }, }); - // Pageoff reloc for adrp instruction. - try atom.relocs.append(emit.bin_file.allocator, .{ - .offset = offset + 4, + try atom.addRelocation(macho_file, .{ .target = .{ .sym_index = data.sym_index, .file = null }, + .offset = offset + 4, .addend = 0, - .subtractor = null, .pcrel = false, .length = 2, .@"type" = switch (tag) { diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index e99f6ff4f5..0cdc7a4c5f 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -996,7 +996,6 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { ); const end_offset = emit.code.items.len; - const gpa = emit.bin_file.allocator; if (emit.bin_file.cast(link.File.MachO)) |macho_file| { const reloc_type = switch (ops.flags) { @@ -1004,19 +1003,17 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { 0b01 => @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_SIGNED), else => unreachable, }; - const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?; - log.debug("adding reloc of type {} to local @{d}", .{ reloc_type, relocation.sym_index }); - try atom.relocs.append(gpa, .{ - .offset = @intCast(u32, end_offset - 4), + const atom = macho_file.getAtomForSymbol(.{ .sym_index = relocation.atom_index, .file = null }).?; + try atom.addRelocation(macho_file, .{ + .@"type" = reloc_type, .target = .{ .sym_index = relocation.sym_index, .file = null }, + .offset = @intCast(u32, end_offset - 4), .addend = 0, - .subtractor = null, .pcrel = true, .length = 2, - .@"type" = reloc_type, }); } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { - const atom = coff_file.atom_by_index_table.get(relocation.atom_index).?; + const atom = coff_file.getAtomForSymbol(.{ .sym_index = relocation.atom_index, .file = null }).?; try atom.addRelocation(coff_file, .{ .@"type" = switch (ops.flags) { 0b00 => .got, @@ -1145,20 +1142,19 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { if (emit.bin_file.cast(link.File.MachO)) |macho_file| { // Add relocation to the decl. - const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?; + const atom = macho_file.getAtomForSymbol(.{ .sym_index = relocation.atom_index, .file = null }).?; const target = macho_file.getGlobalByIndex(relocation.sym_index); - try atom.relocs.append(emit.bin_file.allocator, .{ - .offset = offset, + try atom.addRelocation(macho_file, .{ + .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), .target = target, + .offset = offset, .addend = 0, - .subtractor = null, .pcrel = true, .length = 2, - .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), }); } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { // Add relocation to the decl. - const atom = coff_file.atom_by_index_table.get(relocation.atom_index).?; + const atom = coff_file.getAtomForSymbol(.{ .sym_index = relocation.atom_index, .file = null }).?; const target = coff_file.getGlobalByIndex(relocation.sym_index); try atom.addRelocation(coff_file, .{ .@"type" = .direct, diff --git a/src/link/Coff.zig b/src/link/Coff.zig index 013a0c0475..d9b887f831 100644 --- a/src/link/Coff.zig +++ b/src/link/Coff.zig @@ -1527,7 +1527,7 @@ pub fn getDeclVAddr( assert(self.llvm_object == null); assert(decl.link.coff.sym_index != 0); - const atom = self.atom_by_index_table.get(reloc_info.parent_atom_index).?; + const atom = self.getAtomForSymbol(.{ .sym_index = reloc_info.parent_atom_index, .file = null }).?; const target = SymbolWithLoc{ .sym_index = decl.link.coff.sym_index, .file = null }; try atom.addRelocation(self, .{ .@"type" = .direct, diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8ca18d8fdc..2d88930768 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -40,7 +40,6 @@ const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; const Module = @import("../Module.zig"); const Relocation = @import("MachO/Relocation.zig"); -const RelocationTable = Relocation.Table; const StringTable = @import("strtab.zig").StringTable; const Trie = @import("MachO/Trie.zig"); const Type = @import("../type.zig").Type; @@ -196,6 +195,21 @@ unnamed_const_atoms: UnnamedConstTable = .{}, /// this will be a table indexed by index into the list of Atoms. relocs: RelocationTable = .{}, +/// A table of rebases indexed by the owning them `Atom`. +/// Note that once we refactor `Atom`'s lifetime and ownership rules, +/// this will be a table indexed by index into the list of Atoms. +rebases: RebaseTable = .{}, + +/// A table of bindings indexed by the owning them `Atom`. +/// Note that once we refactor `Atom`'s lifetime and ownership rules, +/// this will be a table indexed by index into the list of Atoms. +bindings: BindingTable = .{}, + +/// A table of lazy bindings indexed by the owning them `Atom`. +/// Note that once we refactor `Atom`'s lifetime and ownership rules, +/// this will be a table indexed by index into the list of Atoms. +lazy_bindings: BindingTable = .{}, + /// Table of Decls that are currently alive. /// We store them here so that we can properly dispose of any allocated /// memory within the atom in the incremental linker. @@ -215,8 +229,8 @@ const Entry = struct { return macho_file.getSymbolPtr(.{ .sym_index = entry.sym_index, .file = null }); } - pub fn getAtom(entry: Entry, macho_file: *MachO) *Atom { - return macho_file.getAtomForSymbol(.{ .sym_index = entry.sym_index, .file = null }).?; + pub fn getAtom(entry: Entry, macho_file: *MachO) ?*Atom { + return macho_file.getAtomForSymbol(.{ .sym_index = entry.sym_index, .file = null }); } pub fn getName(entry: Entry, macho_file: *MachO) []const u8 { @@ -224,7 +238,10 @@ const Entry = struct { } }; +const BindingTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Atom.Binding)); const UnnamedConstTable = std.AutoHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(*Atom)); +const RebaseTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(u32)); +const RelocationTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Relocation)); const PendingUpdate = union(enum) { resolve_undef: u32, @@ -238,6 +255,16 @@ pub const SymbolWithLoc = struct { // null means it's a synthetic global. file: ?u32 = null, + + pub fn eql(this: SymbolWithLoc, other: SymbolWithLoc) bool { + if (this.file == null and other.file == null) { + return this.sym_index == other.sym_index; + } + if (this.file != null and other.file != null) { + return this.sym_index == other.sym_index and this.file.? == other.file.?; + } + return false; + } }; /// When allocating, the ideal_capacity is calculated by @@ -436,7 +463,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No var libs = std.StringArrayHashMap(link.SystemLib).init(arena); try self.resolveLibSystem(arena, comp, &.{}, &libs); - const id_symlink_basename = "zld.id"; + const id_symlink_basename = "link.id"; const cache_dir_handle = module.zig_cache_artifact_directory.handle; var man: Cache.Manifest = undefined; @@ -517,14 +544,19 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.allocateSpecialSymbols(); + { + var it = self.relocs.keyIterator(); + while (it.next()) |atom| { + try atom.*.resolveRelocations(self); + } + } + if (build_options.enable_logging) { self.logSymtab(); self.logSections(); self.logAtoms(); } - try self.writeAtoms(); - var lc_buffer = std.ArrayList(u8).init(arena); const lc_writer = lc_buffer.writer(); var ncmds: u32 = 0; @@ -1179,84 +1211,49 @@ pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32 return atom; } -pub fn writeAtom(self: *MachO, atom: *Atom) !void { +pub fn writeAtom(self: *MachO, atom: *Atom, code: []const u8) !void { + // TODO: temporary sanity check + assert(atom.code.items.len == 0); + assert(atom.relocs.items.len == 0); + assert(atom.rebases.items.len == 0); + assert(atom.bindings.items.len == 0); + assert(atom.lazy_bindings.items.len == 0); + const sym = atom.getSymbol(self); const section = self.sections.get(sym.n_sect - 1); const file_offset = section.header.offset + sym.n_value - section.header.addr; - try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset }); - try self.base.file.?.pwriteAll(atom.code.items, file_offset); + try self.base.file.?.pwriteAll(code, file_offset); + try atom.resolveRelocations(self); } -// fn markRelocsDirtyByTarget(self: *MachO, target: SymbolWithLoc) void { -// // TODO: reverse-lookup might come in handy here -// var it = self.relocs.valueIterator(); -// while (it.next()) |relocs| { -// for (relocs.items) |*reloc| { -// if (!reloc.target.eql(target)) continue; -// reloc.dirty = true; -// } -// } -// } +fn writePtrWidthAtom(self: *MachO, atom: *Atom) !void { + var buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64); + try self.writeAtom(atom, &buffer); +} -// fn markRelocsDirtyByAddress(self: *MachO, addr: u32) void { -// var it = self.relocs.valueIterator(); -// while (it.next()) |relocs| { -// for (relocs.items) |*reloc| { -// const target_atom = reloc.getTargetAtom(self) orelse continue; -// const target_sym = target_atom.getSymbol(self); -// if (target_sym.value < addr) continue; -// reloc.dirty = true; -// } -// } -// } +fn markRelocsDirtyByTarget(self: *MachO, target: SymbolWithLoc) void { + // TODO: reverse-lookup might come in handy here + var it = self.relocs.valueIterator(); + while (it.next()) |relocs| { + for (relocs.items) |*reloc| { + if (!reloc.target.eql(target)) continue; + reloc.dirty = true; + } + } +} -// fn resolveRelocs(self: *MachO, atom: *Atom) !void { -// const relocs = self.relocs.get(atom) orelse return; -// const source_sym = atom.getSymbol(self); -// const source_section = self.sections.get(@enumToInt(source_sym.section_number) - 1).header; -// const file_offset = section.offset + source_sym.n_value - section.addr; - -// log.debug("relocating '{s}'", .{atom.getName(self)}); - -// for (relocs.items) |*reloc| { -// if (!reloc.dirty) continue; - -// const target_atom = reloc.getTargetAtom(self) orelse continue; -// const target_vaddr = target_atom.getSymbol(self).value; -// const target_vaddr_with_addend = target_vaddr + reloc.addend; - -// log.debug(" ({x}: [() => 0x{x} ({s})) ({s}) (in file at 0x{x})", .{ -// source_sym.value + reloc.offset, -// target_vaddr_with_addend, -// self.getSymbolName(reloc.target), -// @tagName(reloc.@"type"), -// file_offset + reloc.offset, -// }); - -// reloc.dirty = false; - -// if (reloc.pcrel) { -// const source_vaddr = source_sym.value + reloc.offset; -// const disp = -// @intCast(i32, target_vaddr_with_addend) - @intCast(i32, source_vaddr) - 4; -// try self.base.file.?.pwriteAll(mem.asBytes(&disp), file_offset + reloc.offset); -// continue; -// } - -// switch (reloc.length) { -// 2 => try self.base.file.?.pwriteAll( -// mem.asBytes(&@truncate(u32, target_vaddr_with_addend)), -// file_offset + reloc.offset, -// ), -// 3 => try self.base.file.?.pwriteAll( -// mem.asBytes(&(target_vaddr_with_addend)), -// file_offset + reloc.offset, -// ), -// else => unreachable, -// } -// } -// } +fn markRelocsDirtyByAddress(self: *MachO, addr: u64) void { + var it = self.relocs.valueIterator(); + while (it.next()) |relocs| { + for (relocs.items) |*reloc| { + const target_atom = reloc.getTargetAtom(self) orelse continue; + const target_sym = target_atom.getSymbol(self); + if (target_sym.n_value < addr) continue; + reloc.dirty = true; + } + } +} pub fn allocateSpecialSymbols(self: *MachO) !void { for (&[_][]const u8{ @@ -1277,74 +1274,92 @@ pub fn allocateSpecialSymbols(self: *MachO) !void { } } -fn writeAtoms(self: *MachO) !void { - assert(self.mode == .incremental); - - const slice = self.sections.slice(); - for (slice.items(.last_atom)) |last, i| { - var atom: *Atom = last orelse continue; - const sect_i = @intCast(u8, i); - const header = slice.items(.header)[sect_i]; - - if (header.isZerofill()) continue; - - log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); - - while (true) { - if (atom.dirty) { - try self.writeAtom(atom); - atom.dirty = false; - } - - if (atom.prev) |prev| { - atom = prev; - } else break; - } - } -} - pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { const gpa = self.base.allocator; + const sym_index = try self.allocateSymbol(); - const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); + const atom = switch (self.mode) { + .incremental => blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = @sizeOf(u64); + atom.alignment = 3; + break :blk atom; + }, + .one_shot => try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3), + }; + errdefer gpa.destroy(atom); + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom); + const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; sym.n_sect = self.got_section_index.? + 1; - try atom.relocs.append(gpa, .{ - .offset = 0, - .target = target, - .addend = 0, - .subtractor = null, - .pcrel = false, - .length = 3, - .@"type" = switch (self.base.options.target.cpu.arch) { - .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), - .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), - else => unreachable, - }, - }); + if (self.mode == .incremental) { + sym.n_value = try self.allocateAtom(atom, atom.size, @alignOf(u64)); - const target_sym = self.getSymbol(target); - if (target_sym.undf()) { - const global = self.getGlobal(self.getSymbolName(target)).?; - try atom.bindings.append(gpa, .{ - .target = global, + log.debug("allocated GOT atom at 0x{x}", .{sym.n_value}); + + try atom.addRelocation(self, .{ + .@"type" = switch (self.base.options.target.cpu.arch) { + .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), + .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), + else => unreachable, + }, + .target = target, .offset = 0, + .addend = 0, + .pcrel = false, + .length = 3, }); + + const target_sym = self.getSymbol(target); + if (target_sym.undf()) { + try atom.addBinding(self, .{ + .target = self.getGlobal(self.getSymbolName(target)).?, + .offset = 0, + }); + } else { + try atom.addRebase(self, 0); + } } else { - try atom.rebases.append(gpa, 0); + try atom.relocs.append(gpa, .{ + .offset = 0, + .target = target, + .addend = 0, + .subtractor = null, + .pcrel = false, + .length = 3, + .@"type" = switch (self.base.options.target.cpu.arch) { + .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), + .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), + else => unreachable, + }, + }); + + const target_sym = self.getSymbol(target); + if (target_sym.undf()) { + const global = self.getGlobal(self.getSymbolName(target)).?; + try atom.bindings.append(gpa, .{ + .target = global, + .offset = 0, + }); + } else { + try atom.rebases.append(gpa, 0); + } + + try self.addAtomToSection(atom); } - try self.managed_atoms.append(gpa, atom); - try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - - try self.allocateAtomCommon(atom); - return atom; } pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { + assert(self.mode == .one_shot); + const gpa = self.base.allocator; const sym_index = try self.allocateSymbol(); const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); @@ -1368,14 +1383,9 @@ pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { .sectname = makeStaticString("__thread_ptrs"), .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, })).?; - if (self.mode == .incremental and !gop.found_existing) { - // TODO allocate section - const needed_size: u64 = self.page_size; - try self.allocateSection(gop.sect_id, needed_size, @alignOf(u64)); - } sym.n_sect = gop.sect_id + 1; - try self.allocateAtomCommon(atom); + try self.addAtomToSection(atom); return atom; } @@ -1385,17 +1395,36 @@ pub fn createDyldPrivateAtom(self: *MachO) !void { if (self.dyld_private_atom != null) return; const gpa = self.base.allocator; + const sym_index = try self.allocateSymbol(); - const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); + const atom = switch (self.mode) { + .incremental => blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = @sizeOf(u64); + atom.alignment = 3; + break :blk atom; + }, + .one_shot => try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3), + }; + errdefer gpa.destroy(atom); + const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; sym.n_sect = self.data_section_index.? + 1; self.dyld_private_atom = atom; - try self.allocateAtomCommon(atom); - try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + if (self.mode == .incremental) { + sym.n_value = try self.allocateAtom(atom, atom.size, @alignOf(u64)); + log.debug("allocated dyld_private atom at 0x{x}", .{sym.n_value}); + try self.writePtrWidthAtom(atom); + } else { + try self.addAtomToSection(atom); + } } pub fn createStubHelperPreambleAtom(self: *MachO) !void { @@ -1415,118 +1444,196 @@ pub fn createStubHelperPreambleAtom(self: *MachO) !void { else => unreachable, }; const sym_index = try self.allocateSymbol(); - const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); + const atom = switch (self.mode) { + .incremental => blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = size; + atom.alignment = alignment; + break :blk atom; + }, + .one_shot => try MachO.createEmptyAtom(gpa, sym_index, size, alignment), + }; + errdefer gpa.destroy(atom); + const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; sym.n_sect = self.stub_helper_section_index.? + 1; const dyld_private_sym_index = self.dyld_private_atom.?.sym_index; + + const code = try gpa.alloc(u8, size); + defer gpa.free(code); + mem.set(u8, code, 0); + switch (arch) { .x86_64 => { - try atom.relocs.ensureUnusedCapacity(self.base.allocator, 2); // lea %r11, [rip + disp] - atom.code.items[0] = 0x4c; - atom.code.items[1] = 0x8d; - atom.code.items[2] = 0x1d; - atom.relocs.appendAssumeCapacity(.{ - .offset = 3, - .target = .{ .sym_index = dyld_private_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_SIGNED), - }); + code[0] = 0x4c; + code[1] = 0x8d; + code[2] = 0x1d; // push %r11 - atom.code.items[7] = 0x41; - atom.code.items[8] = 0x53; + code[7] = 0x41; + code[8] = 0x53; // jmp [rip + disp] - atom.code.items[9] = 0xff; - atom.code.items[10] = 0x25; - atom.relocs.appendAssumeCapacity(.{ - .offset = 11, - .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_GOT), - }); + code[9] = 0xff; + code[10] = 0x25; + + if (self.mode == .incremental) { + try atom.addRelocations(self, 2, .{ .{ + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_SIGNED), + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, + .offset = 3, + .addend = 0, + .pcrel = true, + .length = 2, + }, .{ + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_GOT), + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, + .offset = 11, + .addend = 0, + .pcrel = true, + .length = 2, + } }); + } else { + try atom.relocs.ensureUnusedCapacity(self.base.allocator, 2); + atom.relocs.appendAssumeCapacity(.{ + .offset = 3, + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_SIGNED), + }); + atom.relocs.appendAssumeCapacity(.{ + .offset = 11, + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_GOT), + }); + } }, + .aarch64 => { - try atom.relocs.ensureUnusedCapacity(self.base.allocator, 4); // adrp x17, 0 - mem.writeIntLittle(u32, atom.code.items[0..][0..4], aarch64.Instruction.adrp(.x17, 0).toU32()); - atom.relocs.appendAssumeCapacity(.{ - .offset = 0, - .target = .{ .sym_index = dyld_private_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), - }); + mem.writeIntLittle(u32, code[0..][0..4], aarch64.Instruction.adrp(.x17, 0).toU32()); // add x17, x17, 0 - mem.writeIntLittle(u32, atom.code.items[4..][0..4], aarch64.Instruction.add(.x17, .x17, 0, false).toU32()); - atom.relocs.appendAssumeCapacity(.{ - .offset = 4, - .target = .{ .sym_index = dyld_private_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = false, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), - }); + mem.writeIntLittle(u32, code[4..][0..4], aarch64.Instruction.add(.x17, .x17, 0, false).toU32()); // stp x16, x17, [sp, #-16]! - mem.writeIntLittle(u32, atom.code.items[8..][0..4], aarch64.Instruction.stp( + mem.writeIntLittle(u32, code[8..][0..4], aarch64.Instruction.stp( .x16, .x17, aarch64.Register.sp, aarch64.Instruction.LoadStorePairOffset.pre_index(-16), ).toU32()); // adrp x16, 0 - mem.writeIntLittle(u32, atom.code.items[12..][0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); - atom.relocs.appendAssumeCapacity(.{ - .offset = 12, - .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGE21), - }); + mem.writeIntLittle(u32, code[12..][0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); // ldr x16, [x16, 0] - mem.writeIntLittle(u32, atom.code.items[16..][0..4], aarch64.Instruction.ldr( + mem.writeIntLittle(u32, code[16..][0..4], aarch64.Instruction.ldr( .x16, .x16, aarch64.Instruction.LoadStoreOffset.imm(0), ).toU32()); - atom.relocs.appendAssumeCapacity(.{ - .offset = 16, - .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = false, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGEOFF12), - }); // br x16 - mem.writeIntLittle(u32, atom.code.items[20..][0..4], aarch64.Instruction.br(.x16).toU32()); + mem.writeIntLittle(u32, code[20..][0..4], aarch64.Instruction.br(.x16).toU32()); + + if (self.mode == .incremental) { + try atom.addRelocations(self, 4, .{ .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, + .offset = 0, + .addend = 0, + .pcrel = true, + .length = 2, + }, .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, + .offset = 4, + .addend = 0, + .pcrel = false, + .length = 2, + }, .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGE21), + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, + .offset = 12, + .addend = 0, + .pcrel = true, + .length = 2, + }, .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGEOFF12), + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, + .offset = 16, + .addend = 0, + .pcrel = false, + .length = 2, + } }); + } else { + try atom.relocs.ensureUnusedCapacity(gpa, 4); + atom.relocs.appendAssumeCapacity(.{ + .offset = 0, + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), + }); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = false, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), + }); + atom.relocs.appendAssumeCapacity(.{ + .offset = 12, + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGE21), + }); + atom.relocs.appendAssumeCapacity(.{ + .offset = 16, + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = false, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGEOFF12), + }); + } }, + else => unreachable, } self.stub_helper_preamble_atom = atom; - try self.allocateAtomCommon(atom); - try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + if (self.mode == .incremental) { + sym.n_value = try self.allocateAtom(atom, size, math.powi(u32, 2, alignment) catch unreachable); + log.debug("allocated stub preamble atom at 0x{x}", .{sym.n_value}); + try self.writeAtom(atom, code); + } else { + mem.copy(u8, atom.code.items, code); + try self.addAtomToSection(atom); + } } pub fn createStubHelperAtom(self: *MachO) !*Atom { const gpa = self.base.allocator; const arch = self.base.options.target.cpu.arch; - const stub_size: u4 = switch (arch) { + const size: u4 = switch (arch) { .x86_64 => 10, .aarch64 => 3 * @sizeOf(u32), else => unreachable, @@ -1537,52 +1644,92 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { else => unreachable, }; const sym_index = try self.allocateSymbol(); - const atom = try MachO.createEmptyAtom(gpa, sym_index, stub_size, alignment); + const atom = switch (self.mode) { + .incremental => blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = size; + atom.alignment = alignment; + break :blk atom; + }, + .one_shot => try MachO.createEmptyAtom(gpa, sym_index, size, alignment), + }; + errdefer gpa.destroy(atom); + const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; sym.n_sect = self.stub_helper_section_index.? + 1; - try atom.relocs.ensureTotalCapacity(gpa, 1); + const code = try gpa.alloc(u8, size); + defer gpa.free(code); + mem.set(u8, code, 0); switch (arch) { .x86_64 => { // pushq - atom.code.items[0] = 0x68; + code[0] = 0x68; // Next 4 bytes 1..4 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. // jmpq - atom.code.items[5] = 0xe9; - atom.relocs.appendAssumeCapacity(.{ - .offset = 6, - .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), - }); + code[5] = 0xe9; + + if (self.mode == .incremental) { + try atom.addRelocation(self, .{ + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), + .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, + .offset = 6, + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else { + try atom.relocs.ensureTotalCapacity(gpa, 1); + atom.relocs.appendAssumeCapacity(.{ + .offset = 6, + .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), + }); + } }, .aarch64 => { const literal = blk: { - const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4); + const div_res = try math.divExact(u64, size - @sizeOf(u32), 4); break :blk math.cast(u18, div_res) orelse return error.Overflow; }; // ldr w16, literal - mem.writeIntLittle(u32, atom.code.items[0..4], aarch64.Instruction.ldrLiteral( + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldrLiteral( .w16, literal, ).toU32()); // b disp - mem.writeIntLittle(u32, atom.code.items[4..8], aarch64.Instruction.b(0).toU32()); - atom.relocs.appendAssumeCapacity(.{ - .offset = 4, - .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_BRANCH26), - }); + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(0).toU32()); // Next 4 bytes 8..12 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. + + if (self.mode == .incremental) { + try atom.addRelocation(self, .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_BRANCH26), + .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, + .offset = 4, + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else { + try atom.relocs.ensureTotalCapacity(gpa, 1); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_BRANCH26), + }); + } }, else => unreachable, } @@ -1590,7 +1737,14 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom); + if (self.mode == .incremental) { + sym.n_value = try self.allocateAtom(atom, size, math.powi(u32, 2, alignment) catch unreachable); + log.debug("allocated stub helper atom at 0x{x}", .{sym.n_value}); + try self.writeAtom(atom, code); + } else { + mem.copy(u8, atom.code.items, code); + try self.addAtomToSection(atom); + } return atom; } @@ -1598,36 +1752,73 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWithLoc) !*Atom { const gpa = self.base.allocator; const sym_index = try self.allocateSymbol(); - const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); + const atom = switch (self.mode) { + .incremental => blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = @sizeOf(u64); + atom.alignment = 3; + break :blk atom; + }, + .one_shot => try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3), + }; + errdefer gpa.destroy(atom); + const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; sym.n_sect = self.la_symbol_ptr_section_index.? + 1; - try atom.relocs.append(gpa, .{ - .offset = 0, - .target = .{ .sym_index = stub_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = false, - .length = 3, - .@"type" = switch (self.base.options.target.cpu.arch) { - .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), - .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), - else => unreachable, - }, - }); - try atom.rebases.append(gpa, 0); - - const global = self.getGlobal(self.getSymbolName(target)).?; - try atom.lazy_bindings.append(gpa, .{ - .target = global, - .offset = 0, - }); + if (self.mode == .incremental) { + try atom.addRelocation(self, .{ + .@"type" = switch (self.base.options.target.cpu.arch) { + .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), + .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), + else => unreachable, + }, + .target = .{ .sym_index = stub_sym_index, .file = null }, + .offset = 0, + .addend = 0, + .pcrel = false, + .length = 3, + }); + try atom.addRebase(self, 0); + try atom.addLazyBinding(self, .{ + .target = self.getGlobal(self.getSymbolName(target)).?, + .offset = 0, + }); + } else { + try atom.relocs.append(gpa, .{ + .offset = 0, + .target = .{ .sym_index = stub_sym_index, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = false, + .length = 3, + .@"type" = switch (self.base.options.target.cpu.arch) { + .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), + .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), + else => unreachable, + }, + }); + try atom.rebases.append(gpa, 0); + const global = self.getGlobal(self.getSymbolName(target)).?; + try atom.lazy_bindings.append(gpa, .{ + .target = global, + .offset = 0, + }); + } try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom); + if (self.mode == .incremental) { + sym.n_value = try self.allocateAtom(atom, atom.size, @alignOf(u64)); + log.debug("allocated lazy pointer atom at 0x{x}", .{sym.n_value}); + try self.writePtrWidthAtom(atom); + } else { + try self.addAtomToSection(atom); + } return atom; } @@ -1640,62 +1831,112 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { .aarch64 => 2, else => unreachable, // unhandled architecture type }; - const stub_size: u4 = switch (arch) { + const size: u4 = switch (arch) { .x86_64 => 6, .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; const sym_index = try self.allocateSymbol(); - const atom = try MachO.createEmptyAtom(gpa, sym_index, stub_size, alignment); + const atom = switch (self.mode) { + .incremental => blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = size; + atom.alignment = alignment; + break :blk atom; + }, + .one_shot => try MachO.createEmptyAtom(gpa, sym_index, size, alignment), + }; + errdefer gpa.destroy(atom); + const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; sym.n_sect = self.stubs_section_index.? + 1; + const code = try gpa.alloc(u8, size); + defer gpa.free(code); + mem.set(u8, code, 0); + switch (arch) { .x86_64 => { // jmp - atom.code.items[0] = 0xff; - atom.code.items[1] = 0x25; - try atom.relocs.append(gpa, .{ - .offset = 2, - .target = .{ .sym_index = laptr_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), - }); + code[0] = 0xff; + code[1] = 0x25; + + if (self.mode == .incremental) { + try atom.addRelocation(self, .{ + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), + .target = .{ .sym_index = laptr_sym_index, .file = null }, + .offset = 2, + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else { + try atom.relocs.append(gpa, .{ + .offset = 2, + .target = .{ .sym_index = laptr_sym_index, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), + }); + } }, .aarch64 => { - try atom.relocs.ensureTotalCapacity(gpa, 2); // adrp x16, pages - mem.writeIntLittle(u32, atom.code.items[0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); - atom.relocs.appendAssumeCapacity(.{ - .offset = 0, - .target = .{ .sym_index = laptr_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), - }); + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); // ldr x16, x16, offset - mem.writeIntLittle(u32, atom.code.items[4..8], aarch64.Instruction.ldr( + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr( .x16, .x16, aarch64.Instruction.LoadStoreOffset.imm(0), ).toU32()); - atom.relocs.appendAssumeCapacity(.{ - .offset = 4, - .target = .{ .sym_index = laptr_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = false, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), - }); // br x16 - mem.writeIntLittle(u32, atom.code.items[8..12], aarch64.Instruction.br(.x16).toU32()); + mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32()); + + if (self.mode == .incremental) { + try atom.addRelocations(self, 2, .{ + .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), + .target = .{ .sym_index = laptr_sym_index, .file = null }, + .offset = 0, + .addend = 0, + .pcrel = true, + .length = 2, + }, + .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), + .target = .{ .sym_index = laptr_sym_index, .file = null }, + .offset = 4, + .addend = 0, + .pcrel = false, + .length = 2, + }, + }); + } else { + try atom.relocs.ensureTotalCapacity(gpa, 2); + atom.relocs.appendAssumeCapacity(.{ + .offset = 0, + .target = .{ .sym_index = laptr_sym_index, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), + }); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .target = .{ .sym_index = laptr_sym_index, .file = null }, + .addend = 0, + .subtractor = null, + .pcrel = false, + .length = 2, + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), + }); + } }, else => unreachable, } @@ -1703,7 +1944,14 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom); + if (self.mode == .incremental) { + sym.n_value = try self.allocateAtom(atom, size, math.powi(u32, 2, alignment) catch unreachable); + log.debug("allocated stub atom at 0x{x}", .{sym.n_value}); + try self.writeAtom(atom, code); + } else { + mem.copy(u8, atom.code.items, code); + try self.addAtomToSection(atom); + } return atom; } @@ -1976,6 +2224,7 @@ pub fn resolveSymbolsInDylibs(self: *MachO) !void { const laptr_atom = try self.createLazyPointerAtom(stub_helper_atom.sym_index, global); const stub_atom = try self.createStubAtom(laptr_atom.sym_index); self.stubs.items[stub_index].sym_index = stub_atom.sym_index; + self.markRelocsDirtyByTarget(global); } continue :loop; @@ -2073,6 +2322,10 @@ pub fn resolveDyldStubBinder(self: *MachO) !void { const got_index = try self.allocateGotEntry(global); const got_atom = try self.createGotAtom(global); self.got_entries.items[got_index].sym_index = got_atom.sym_index; + + if (self.mode == .incremental) { + try self.writePtrWidthAtom(got_atom); + } } pub fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { @@ -2356,6 +2609,30 @@ pub fn deinit(self: *MachO) void { } self.relocs.deinit(gpa); } + + { + var it = self.rebases.valueIterator(); + while (it.next()) |rebases| { + rebases.deinit(gpa); + } + self.rebases.deinit(gpa); + } + + { + var it = self.bindings.valueIterator(); + while (it.next()) |bindings| { + bindings.deinit(gpa); + } + self.bindings.deinit(gpa); + } + + { + var it = self.lazy_bindings.valueIterator(); + while (it.next()) |bindings| { + bindings.deinit(gpa); + } + self.lazy_bindings.deinit(gpa); + } } fn freeAtom(self: *MachO, atom: *Atom, owns_atom: bool) void { @@ -2363,6 +2640,8 @@ fn freeAtom(self: *MachO, atom: *Atom, owns_atom: bool) void { if (!owns_atom) { atom.deinit(self.base.allocator); } + // Remove any relocs and base relocs associated with this Atom + self.freeRelocationsForAtom(atom); const sect_id = atom.getSymbol(self).n_sect - 1; const free_list = &self.sections.items(.free_list)[sect_id]; @@ -2569,13 +2848,7 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv const decl_index = func.owner_decl; const decl = module.declPtr(decl_index); self.freeUnnamedConsts(decl_index); - - // TODO clearing the code and relocs buffer should probably be orchestrated - // in a different, smarter, more automatic way somewhere else, in a more centralised - // way than this. - // If we don't clear the buffers here, we are up for some nasty surprises when - // this atom is reused later on and was not freed by freeAtom(). - decl.link.macho.clearRetainingCapacity(); + self.freeRelocationsForAtom(&decl.link.macho); var code_buffer = std.ArrayList(u8).init(self.base.allocator); defer code_buffer.deinit(); @@ -2593,18 +2866,16 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv else try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .none); - switch (res) { - .appended => { - try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); - }, + const code = switch (res) { + .appended => code_buffer.items, .fail => |em| { decl.analysis = .codegen_failure; try module.failed_decls.put(module.gpa, decl_index, em); return; }, - } + }; - const addr = try self.placeDecl(decl_index, decl.link.macho.code.items.len); + const addr = try self.updateDeclCode(decl_index, code); if (decl_state) |*ds| { try self.d_sym.?.dwarf.commitDeclState( @@ -2650,20 +2921,17 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu log.debug("allocating symbol indexes for {?s}", .{name}); - const required_alignment = typed_value.ty.abiAlignment(self.base.options.target); - const sym_index = try self.allocateSymbol(); - const atom = try MachO.createEmptyAtom( - gpa, - sym_index, - @sizeOf(u64), - math.log2(required_alignment), - ); + const atom = try gpa.create(Atom); + errdefer gpa.destroy(atom); + atom.* = Atom.empty; + + atom.sym_index = try self.allocateSymbol(); try self.managed_atoms.append(gpa, atom); - try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom); const res = try codegen.generateSymbol(&self.base, decl.srcLoc(), typed_value, &code_buffer, .none, .{ - .parent_atom_index = sym_index, + .parent_atom_index = atom.sym_index, }); const code = switch (res) { .externally_managed => |x| x, @@ -2676,9 +2944,9 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu }, }; - atom.code.clearRetainingCapacity(); - try atom.code.appendSlice(gpa, code); - + const required_alignment = typed_value.ty.abiAlignment(self.base.options.target); + atom.size = code.len; + atom.alignment = math.log2(required_alignment); const sect_id = try self.getOutputSectionAtom( atom, decl_name, @@ -2691,13 +2959,14 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu symbol.n_type = macho.N_SECT; symbol.n_sect = sect_id + 1; symbol.n_value = try self.allocateAtom(atom, code.len, required_alignment); + errdefer self.freeAtom(atom, true); + + try unnamed_consts.append(gpa, atom); log.debug("allocated atom for {?s} at 0x{x}", .{ name, symbol.n_value }); log.debug(" (required alignment 0x{x})", .{required_alignment}); - errdefer self.freeAtom(atom, true); - - try unnamed_consts.append(gpa, atom); + try self.writeAtom(atom, code); return atom.sym_index; } @@ -2724,6 +2993,8 @@ pub fn updateDecl(self: *MachO, module: *Module, decl_index: Module.Decl.Index) } } + self.freeRelocationsForAtom(&decl.link.macho); + var code_buffer = std.ArrayList(u8).init(self.base.allocator); defer code_buffer.deinit(); @@ -2751,27 +3022,16 @@ pub fn updateDecl(self: *MachO, module: *Module, decl_index: Module.Decl.Index) .parent_atom_index = decl.link.macho.sym_index, }); - const code = blk: { - switch (res) { - .externally_managed => |x| break :blk x, - .appended => { - // TODO clearing the code and relocs buffer should probably be orchestrated - // in a different, smarter, more automatic way somewhere else, in a more centralised - // way than this. - // If we don't clear the buffers here, we are up for some nasty surprises when - // this atom is reused later on and was not freed by freeAtom(). - decl.link.macho.code.clearAndFree(self.base.allocator); - try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); - break :blk decl.link.macho.code.items; - }, - .fail => |em| { - decl.analysis = .codegen_failure; - try module.failed_decls.put(module.gpa, decl_index, em); - return; - }, - } + const code = switch (res) { + .externally_managed => |x| x, + .appended => code_buffer.items, + .fail => |em| { + decl.analysis = .codegen_failure; + try module.failed_decls.put(module.gpa, decl_index, em); + return; + }, }; - const addr = try self.placeDecl(decl_index, code.len); + const addr = try self.updateDeclCode(decl_index, code); if (decl_state) |*ds| { try self.d_sym.?.dwarf.commitDeclState( @@ -2936,19 +3196,22 @@ fn getOutputSectionAtom( return sect_id; } -fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 { - const module = self.base.options.module.?; - const decl = module.declPtr(decl_index); +fn updateDeclCode(self: *MachO, decl_index: Module.Decl.Index, code: []const u8) !u64 { + const gpa = self.base.allocator; + const mod = self.base.options.module.?; + const decl = mod.declPtr(decl_index); + const required_alignment = decl.getAlignment(self.base.options.target); assert(decl.link.macho.sym_index != 0); // Caller forgot to call allocateDeclIndexes() - const sym_name = try decl.getFullyQualifiedName(module); + const sym_name = try decl.getFullyQualifiedName(mod); defer self.base.allocator.free(sym_name); + const atom = &decl.link.macho; const decl_ptr = self.decls.getPtr(decl_index).?; if (decl_ptr.* == null) { decl_ptr.* = try self.getOutputSectionAtom( - &decl.link.macho, + atom, sym_name, decl.ty, decl.val, @@ -2956,55 +3219,63 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 ); } const sect_id = decl_ptr.*.?; + const code_len = code.len; - if (decl.link.macho.size != 0) { - const symbol = decl.link.macho.getSymbolPtr(self); - symbol.n_strx = try self.strtab.insert(self.base.allocator, sym_name); - symbol.n_type = macho.N_SECT; - symbol.n_sect = sect_id + 1; - symbol.n_desc = 0; + if (atom.size != 0) { + const sym = atom.getSymbolPtr(self); + sym.n_strx = try self.strtab.insert(gpa, sym_name); + sym.n_type = macho.N_SECT; + sym.n_sect = sect_id + 1; + sym.n_desc = 0; const capacity = decl.link.macho.capacity(self); - const need_realloc = code_len > capacity or !mem.isAlignedGeneric(u64, symbol.n_value, required_alignment); + const need_realloc = code_len > capacity or !mem.isAlignedGeneric(u64, sym.n_value, required_alignment); if (need_realloc) { - const vaddr = try self.growAtom(&decl.link.macho, code_len, required_alignment); - log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ sym_name, symbol.n_value, vaddr }); + const vaddr = try self.growAtom(atom, code_len, required_alignment); + log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ sym_name, sym.n_value, vaddr }); log.debug(" (required alignment 0x{x})", .{required_alignment}); - symbol.n_value = vaddr; - const got_atom = self.getGotAtomForSymbol(.{ - .sym_index = decl.link.macho.sym_index, - .file = null, - }).?; - got_atom.dirty = true; - } else if (code_len < decl.link.macho.size) { - self.shrinkAtom(&decl.link.macho, code_len); + if (vaddr != sym.n_value) { + sym.n_value = vaddr; + log.debug(" (updating GOT entry)", .{}); + const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null }; + const got_atom = self.getGotAtomForSymbol(got_target).?; + self.markRelocsDirtyByTarget(got_target); + try self.writePtrWidthAtom(got_atom); + } + } else if (code_len < atom.size) { + self.shrinkAtom(atom, code_len); } - - decl.link.macho.size = code_len; - decl.link.macho.dirty = true; + atom.size = code_len; } else { - const name_str_index = try self.strtab.insert(self.base.allocator, sym_name); - const symbol = decl.link.macho.getSymbolPtr(self); - symbol.n_strx = name_str_index; - symbol.n_type = macho.N_SECT; - symbol.n_sect = sect_id + 1; - symbol.n_desc = 0; - symbol.n_value = try self.allocateAtom(&decl.link.macho, code_len, required_alignment); + const name_str_index = try self.strtab.insert(gpa, sym_name); + const sym = atom.getSymbolPtr(self); + sym.n_strx = name_str_index; + sym.n_type = macho.N_SECT; + sym.n_sect = sect_id + 1; + sym.n_desc = 0; - log.debug("allocated atom for {s} at 0x{x}", .{ sym_name, symbol.n_value }); + const vaddr = try self.allocateAtom(atom, code_len, required_alignment); + errdefer self.freeAtom(atom, false); + + log.debug("allocated atom for {s} at 0x{x}", .{ sym_name, vaddr }); log.debug(" (required alignment 0x{x})", .{required_alignment}); - errdefer self.freeAtom(&decl.link.macho, false); + atom.size = code_len; + sym.n_value = vaddr; - const got_target = SymbolWithLoc{ .sym_index = decl.link.macho.sym_index, .file = null }; + const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null }; const got_index = try self.allocateGotEntry(got_target); const got_atom = try self.createGotAtom(got_target); self.got_entries.items[got_index].sym_index = got_atom.sym_index; + try self.writePtrWidthAtom(got_atom); } - return decl.link.macho.getSymbol(self).n_value; + self.markRelocsDirtyByTarget(atom.getSymbolWithLoc()); + try self.writeAtom(atom, code); + + return atom.getSymbol(self).n_value; } pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void { @@ -3152,17 +3423,25 @@ pub fn deleteExport(self: *MachO, exp: Export) void { } } +fn freeRelocationsForAtom(self: *MachO, atom: *Atom) void { + _ = self.relocs.remove(atom); + _ = self.rebases.remove(atom); + _ = self.bindings.remove(atom); + _ = self.lazy_bindings.remove(atom); +} + fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { + const gpa = self.base.allocator; const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { self.freeAtom(atom, true); - self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; + self.locals_free_list.append(gpa, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; _ = self.atom_by_index_table.remove(atom.sym_index); log.debug(" adding local symbol index {d} to free list", .{atom.sym_index}); atom.sym_index = 0; } - unnamed_consts.clearAndFree(self.base.allocator); + unnamed_consts.clearAndFree(gpa); } pub fn freeDecl(self: *MachO, decl_index: Module.Decl.Index) void { @@ -3171,20 +3450,25 @@ pub fn freeDecl(self: *MachO, decl_index: Module.Decl.Index) void { } const mod = self.base.options.module.?; const decl = mod.declPtr(decl_index); + log.debug("freeDecl {*}", .{decl}); + const kv = self.decls.fetchSwapRemove(decl_index); if (kv.?.value) |_| { self.freeAtom(&decl.link.macho, false); self.freeUnnamedConsts(decl_index); } + // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. - if (decl.link.macho.sym_index != 0) { - self.locals_free_list.append(self.base.allocator, decl.link.macho.sym_index) catch {}; + const gpa = self.base.allocator; + const sym_index = decl.link.macho.sym_index; + if (sym_index != 0) { + self.locals_free_list.append(gpa, sym_index) catch {}; // Try freeing GOT atom if this decl had one - const got_target = SymbolWithLoc{ .sym_index = decl.link.macho.sym_index, .file = null }; + const got_target = SymbolWithLoc{ .sym_index = sym_index, .file = null }; if (self.got_entries_table.get(got_target)) |got_index| { - self.got_entries_free_list.append(self.base.allocator, @intCast(u32, got_index)) catch {}; + self.got_entries_free_list.append(gpa, @intCast(u32, got_index)) catch {}; self.got_entries.items[got_index] = .{ .target = .{ .sym_index = 0, .file = null }, .sym_index = 0, @@ -3192,20 +3476,18 @@ pub fn freeDecl(self: *MachO, decl_index: Module.Decl.Index) void { _ = self.got_entries_table.remove(got_target); if (self.d_sym) |*d_sym| { - d_sym.swapRemoveRelocs(decl.link.macho.sym_index); + d_sym.swapRemoveRelocs(sym_index); } - log.debug(" adding GOT index {d} to free list (target local@{d})", .{ - got_index, - decl.link.macho.sym_index, - }); + log.debug(" adding GOT index {d} to free list (target local@{d})", .{ got_index, sym_index }); } - self.locals.items[decl.link.macho.sym_index].n_type = 0; - _ = self.atom_by_index_table.remove(decl.link.macho.sym_index); - log.debug(" adding local symbol index {d} to free list", .{decl.link.macho.sym_index}); + self.locals.items[sym_index].n_type = 0; + _ = self.atom_by_index_table.remove(sym_index); + log.debug(" adding local symbol index {d} to free list", .{sym_index}); decl.link.macho.sym_index = 0; } + if (self.d_sym) |*d_sym| { d_sym.dwarf.freeDecl(decl); } @@ -3218,21 +3500,20 @@ pub fn getDeclVAddr(self: *MachO, decl_index: Module.Decl.Index, reloc_info: Fil assert(self.llvm_object == null); assert(decl.link.macho.sym_index != 0); - const atom = self.atom_by_index_table.get(reloc_info.parent_atom_index).?; - try atom.relocs.append(self.base.allocator, .{ - .offset = @intCast(u32, reloc_info.offset), - .target = .{ .sym_index = decl.link.macho.sym_index, .file = null }, - .addend = reloc_info.addend, - .subtractor = null, - .pcrel = false, - .length = 3, + const atom = self.getAtomForSymbol(.{ .sym_index = reloc_info.parent_atom_index, .file = null }).?; + try atom.addRelocation(self, .{ .@"type" = switch (self.base.options.target.cpu.arch) { .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), else => unreachable, }, + .target = .{ .sym_index = decl.link.macho.sym_index, .file = null }, + .offset = @intCast(u32, reloc_info.offset), + .addend = reloc_info.addend, + .pcrel = false, + .length = 3, }); - try atom.rebases.append(self.base.allocator, reloc_info.offset); + try atom.addRebase(self, @intCast(u32, reloc_info.offset)); return 0; } @@ -3575,6 +3856,7 @@ fn allocateSection(self: *MachO, sect_id: u8, size: u64, alignment: u32) !void { if (size > max_size) { try self.growSection(sect_id, @intCast(u32, size)); + self.markRelocsDirtyByAddress(header.addr + size); } log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); @@ -3885,7 +4167,13 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) ! const header = &self.sections.items(.header)[sect_id]; const free_list = &self.sections.items(.free_list)[sect_id]; const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; - const new_atom_ideal_capacity = if (header.isCode()) padToIdeal(new_atom_size) else new_atom_size; + const requires_padding = blk: { + if (!header.isCode()) break :blk false; + if (mem.eql(u8, "__stubs", header.sectName())) break :blk false; + if (mem.eql(u8, "__stub_helper", header.sectName())) break :blk false; + break :blk true; + }; + const new_atom_ideal_capacity = if (requires_padding) padToIdeal(new_atom_size) else new_atom_size; // We use these to indicate our intention to update metadata, placing the new atom, // and possibly removing a free list node. @@ -3905,7 +4193,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) ! // Is it enough that we could fit this new atom? const sym = big_atom.getSymbol(self); const capacity = big_atom.capacity(self); - const ideal_capacity = if (header.isCode()) padToIdeal(capacity) else capacity; + const ideal_capacity = if (requires_padding) padToIdeal(capacity) else capacity; const ideal_capacity_end_vaddr = math.add(u64, sym.n_value, ideal_capacity) catch ideal_capacity; const capacity_end_vaddr = sym.n_value + capacity; const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; @@ -3935,7 +4223,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) ! break :blk new_start_vaddr; } else if (maybe_last_atom.*) |last| { const last_symbol = last.getSymbol(self); - const ideal_capacity = if (header.isCode()) padToIdeal(last.size) else last.size; + const ideal_capacity = if (requires_padding) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); atom_placement = last; @@ -3949,6 +4237,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) ! if (expand_section) { const needed_size = @intCast(u32, (vaddr + new_atom_size) - header.addr); try self.growSection(sect_id, needed_size); + self.markRelocsDirtyByAddress(header.addr + needed_size); maybe_last_atom.* = atom; header.size = needed_size; } @@ -4105,64 +4394,70 @@ pub fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const sym = atom.getSymbol(self); const base_offset = sym.n_value - seg.vmaddr; - for (atom.rebases.items) |offset| { - log.debug(" | rebase at {x}", .{base_offset + offset}); - try rebase_pointers.append(.{ - .offset = base_offset + offset, - .segment_id = segment_index, - }); + if (self.rebases.get(atom)) |rebases| { + for (rebases.items) |offset| { + log.debug(" | rebase at {x}", .{base_offset + offset}); + try rebase_pointers.append(.{ + .offset = base_offset + offset, + .segment_id = segment_index, + }); + } } - for (atom.bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + if (self.bindings.get(atom)) |bindings| { + for (bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + try bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); } - try bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = segment_index, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); } - for (atom.lazy_bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + if (self.lazy_bindings.get(atom)) |lazy_bindings| { + for (lazy_bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + try lazy_bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); } - try lazy_bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = segment_index, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); } if (atom.prev) |prev| { @@ -4387,25 +4682,25 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { const asc_u64 = std.sort.asc(u64); -fn writeFunctionStarts(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeFunctionStarts(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - const text_seg_index = self.text_segment_cmd_index orelse return; - const text_sect_index = self.text_section_index orelse return; - const text_seg = self.segments.items[text_seg_index]; + const text_seg_index = macho_file.text_segment_cmd_index orelse return; + const text_sect_index = macho_file.text_section_index orelse return; + const text_seg = macho_file.segments.items[text_seg_index]; - const gpa = self.base.allocator; + const gpa = macho_file.base.allocator; // We need to sort by address first var addresses = std.ArrayList(u64).init(gpa); defer addresses.deinit(); - try addresses.ensureTotalCapacityPrecise(self.globals.items.len); + try addresses.ensureTotalCapacityPrecise(macho_file.globals.items.len); - for (self.globals.items) |global| { - const sym = self.getSymbol(global); + for (macho_file.globals.items) |global| { + const sym = macho_file.getSymbol(global); if (sym.undf()) continue; - if (sym.n_desc == N_DESC_GCED) continue; + if (sym.n_desc == MachO.N_DESC_GCED) continue; const sect_id = sym.n_sect - 1; if (sect_id != text_sect_index) continue; @@ -4439,14 +4734,14 @@ fn writeFunctionStarts(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { try std.leb.writeULEB128(buffer.writer(), offset); } - const link_seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const link_seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64)); const needed_size = buffer.items.len; link_seg.filesize = offset + needed_size - link_seg.fileoff; log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - try self.base.file.?.pwriteAll(buffer.items, offset); + try macho_file.base.file.?.pwriteAll(buffer.items, offset); try lc_writer.writeStruct(macho.linkedit_data_command{ .cmd = .FUNCTION_STARTS, @@ -4465,8 +4760,8 @@ fn filterDataInCode( const Predicate = struct { addr: u64, - pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { - return dice.offset >= self.addr; + pub fn predicate(macho_file: @This(), dice: macho.data_in_code_entry) bool { + return dice.offset >= macho_file.addr; } }; @@ -4476,26 +4771,26 @@ fn filterDataInCode( return dices[start..end]; } -fn writeDataInCode(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeDataInCode(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator); + var out_dice = std.ArrayList(macho.data_in_code_entry).init(macho_file.base.allocator); defer out_dice.deinit(); - const text_sect_id = self.text_section_index orelse return; - const text_sect_header = self.sections.items(.header)[text_sect_id]; + const text_sect_id = macho_file.text_section_index orelse return; + const text_sect_header = macho_file.sections.items(.header)[text_sect_id]; - for (self.objects.items) |object| { + for (macho_file.objects.items) |object| { const dice = object.parseDataInCode() orelse continue; try out_dice.ensureUnusedCapacity(dice.len); for (object.managed_atoms.items) |atom| { - const sym = atom.getSymbol(self); - if (sym.n_desc == N_DESC_GCED) continue; + const sym = atom.getSymbol(macho_file); + if (sym.n_desc == MachO.N_DESC_GCED) continue; const sect_id = sym.n_sect - 1; - if (sect_id != self.text_section_index.?) { + if (sect_id != macho_file.text_section_index.?) { continue; } @@ -4516,14 +4811,14 @@ fn writeDataInCode(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { } } - const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); seg.filesize = offset + needed_size - seg.fileoff; log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); + try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); try lc_writer.writeStruct(macho.linkedit_data_command{ .cmd = .DATA_IN_CODE, .cmdsize = @sizeOf(macho.linkedit_data_command), @@ -4533,7 +4828,7 @@ fn writeDataInCode(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { ncmds.* += 1; } -fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { var symtab_cmd = macho.symtab_command{ .cmdsize = @sizeOf(macho.symtab_command), .symoff = 0, @@ -4571,7 +4866,7 @@ fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { ncmds.* += 2; } -fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { +pub fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { const gpa = self.base.allocator; var locals = std.ArrayList(macho.nlist_64).init(gpa); @@ -5485,7 +5780,7 @@ pub fn logSymtab(self: *MachO) void { const def_index = if (sym.undf() and !sym.tentative()) @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER) else - sym.n_sect; + sym.n_sect + 1; log.debug(" %{d}: {s} @{x} in {s}({d}), {s}", .{ sym_id, object.getString(sym.n_strx), @@ -5502,7 +5797,7 @@ pub fn logSymtab(self: *MachO) void { const def_index = if (sym.undf() and !sym.tentative()) @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER) else - sym.n_sect; + sym.n_sect + 1; log.debug(" %{d}: {?s} @{x} in {s}({d}), {s}", .{ sym_id, self.strtab.get(sym.n_strx), @@ -5633,6 +5928,6 @@ pub fn copyRangeAllOverlappingAlloc( ) !void { const buf = try allocator.alloc(u8, len); defer allocator.free(buf); - _ = try file.preadAll(buf, in_offset); - try file.pwriteAll(buf, out_offset); + const amt = try file.preadAll(buf, in_offset); + try file.pwriteAll(buf[0..amt], out_offset); } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 45846c5e09..bcd85ad82c 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -66,8 +66,6 @@ prev: ?*Atom, dbg_info_atom: Dwarf.Atom, -dirty: bool = true, - pub const Binding = struct { target: SymbolWithLoc, offset: u64, @@ -898,46 +896,81 @@ inline fn isArithmeticOp(inst: *const [4]u8) bool { } pub fn addRelocation(self: *Atom, macho_file: *MachO, reloc: RelocationIncr) !void { + return self.addRelocations(macho_file, 1, .{reloc}); +} + +pub fn addRelocations( + self: *Atom, + macho_file: *MachO, + comptime count: comptime_int, + relocs: [count]RelocationIncr, +) !void { const gpa = macho_file.base.allocator; - log.debug(" (adding reloc of type {s} to target %{d})", .{ @tagName(reloc.@"type"), reloc.target.sym_index }); + const target = macho_file.base.options.target; const gop = try macho_file.relocs.getOrPut(gpa, self); if (!gop.found_existing) { gop.value_ptr.* = .{}; } - try gop.value_ptr.append(gpa, reloc); -} - -pub fn resolveRelocationsInCodeBuffer(self: *Atom, macho_file: *MachO, code: []u8) !void { - const relocs = macho_file.relocs.get(self) orelse return; - - log.debug("relocating '{s}'", .{self.getName(macho_file)}); - - for (relocs.items) |*reloc| { - // We don't check for dirty relocation as we resolve in memory so it's effectively free. - try reloc.resolve(self, macho_file, code); - reloc.dirty = false; + try gop.value_ptr.ensureUnusedCapacity(gpa, count); + for (relocs) |reloc| { + log.debug(" (adding reloc of type {s} to target %{d})", .{ + reloc.fmtType(target), + reloc.target.sym_index, + }); + gop.value_ptr.appendAssumeCapacity(reloc); } } -pub fn resolveRelocationsInFile(self: *Atom, macho_file: *MachO) !void { - const relocs = macho_file.relocs.get(self) orelse return; +pub fn addRebase(self: *Atom, macho_file: *MachO, offset: u32) !void { const gpa = macho_file.base.allocator; + log.debug(" (adding rebase at offset 0x{x} in %{d})", .{ offset, self.sym_index }); + const gop = try macho_file.rebases.getOrPut(gpa, self); + if (!gop.found_existing) { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.append(gpa, offset); +} - // No code available in a buffer; we need to read it in from the binary. +pub fn addBinding(self: *Atom, macho_file: *MachO, binding: Binding) !void { + const gpa = macho_file.base.allocator; + log.debug(" (adding binding to symbol {s} at offset 0x{x} in %{d})", .{ + macho_file.getSymbolName(binding.target), + binding.offset, + self.sym_index, + }); + const gop = try macho_file.bindings.getOrPut(gpa, self); + if (!gop.found_existing) { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.append(gpa, binding); +} + +pub fn addLazyBinding(self: *Atom, macho_file: *MachO, binding: Binding) !void { + const gpa = macho_file.base.allocator; + log.debug(" (adding lazy binding to symbol {s} at offset 0x{x} in %{d})", .{ + macho_file.getSymbolName(binding.target), + binding.offset, + self.sym_index, + }); + const gop = try macho_file.lazy_bindings.getOrPut(gpa, self); + if (!gop.found_existing) { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.append(gpa, binding); +} + +pub fn resolveRelocations(self: *Atom, macho_file: *MachO) !void { + const relocs = macho_file.relocs.get(self) orelse return; const source_sym = self.getSymbol(macho_file); const source_section = macho_file.sections.get(source_sym.n_sect - 1).header; - const file_offset = source_section.offset + source_sym.value - source_section.addr; - const code = try gpa.alloc(u8, self.size); - try self.base.file.?.preadAll(code, file_offset); - defer gpa.free(code); + const file_offset = source_section.offset + source_sym.n_value - source_section.addr; log.debug("relocating '{s}'", .{self.getName(macho_file)}); for (relocs.items) |*reloc| { if (!reloc.dirty) continue; - try reloc.resolve(self, macho_file, code); + + try reloc.resolve(self, macho_file, file_offset); reloc.dirty = false; } - - try self.base.file.?.pwriteAll(code, file_offset); } diff --git a/src/link/MachO/Relocation.zig b/src/link/MachO/Relocation.zig index 945def7302..333d8bd6d2 100644 --- a/src/link/MachO/Relocation.zig +++ b/src/link/MachO/Relocation.zig @@ -13,31 +13,35 @@ const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; -pub const Table = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Relocation)); - -/// Offset within the atom's code buffer. -/// Note relocation size can be inferred by relocation's kind. -offset: u32, +@"type": u4, target: SymbolWithLoc, +offset: u32, addend: i64, pcrel: bool, length: u2, -@"type": u4, dirty: bool = true, +pub fn fmtType(self: Relocation, target: std.Target) []const u8 { + switch (target.cpu.arch) { + .aarch64 => return @tagName(@intToEnum(macho.reloc_type_arm64, self.@"type")), + .x86_64 => return @tagName(@intToEnum(macho.reloc_type_x86_64, self.@"type")), + else => unreachable, + } +} + pub fn getTargetAtom(self: Relocation, macho_file: *MachO) ?*Atom { switch (macho_file.base.options.target.cpu.arch) { .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, self.@"type")) { .ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGEOFF12, .ARM64_RELOC_POINTER_TO_GOT, - => return macho_file.getGotAtomForSymbol(self.target).?, + => return macho_file.getGotAtomForSymbol(self.target), else => {}, }, .x86_64 => switch (@intToEnum(macho.reloc_type_x86_64, self.@"type")) { .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD, - => return macho_file.getGotAtomForSymbol(self.target).?, + => return macho_file.getGotAtomForSymbol(self.target), else => {}, }, else => unreachable, @@ -47,54 +51,72 @@ pub fn getTargetAtom(self: Relocation, macho_file: *MachO) ?*Atom { return macho_file.getAtomForSymbol(self.target); } -pub fn resolve(self: Relocation, atom: *Atom, macho_file: *MachO, code: []u8) !void { +pub fn resolve(self: Relocation, atom: *Atom, macho_file: *MachO, base_offset: u64) !void { const arch = macho_file.base.options.target.cpu.arch; const source_sym = atom.getSymbol(macho_file); const source_addr = source_sym.n_value + self.offset; const target_atom = self.getTargetAtom(macho_file) orelse return; - const target_addr = target_atom.getSymbol(macho_file).n_value + self.addend; + const target_addr = @intCast(i64, target_atom.getSymbol(macho_file).n_value) + self.addend; log.debug(" ({x}: [() => 0x{x} ({s})) ({s})", .{ source_addr, target_addr, macho_file.getSymbolName(self.target), - switch (arch) { - .aarch64 => @tagName(@intToEnum(macho.reloc_type_arm64, self.@"type")), - .x86_64 => @tagName(@intToEnum(macho.reloc_type_x86_64, self.@"type")), - else => unreachable, - }, + self.fmtType(macho_file.base.options.target), }); switch (arch) { - .aarch64 => return self.resolveAarch64(source_addr, target_addr, macho_file, code), - .x86_64 => return self.resolveX8664(source_addr, target_addr, code), + .aarch64 => return self.resolveAarch64(macho_file, source_addr, target_addr, base_offset), + .x86_64 => return self.resolveX8664(macho_file, source_addr, target_addr, base_offset), else => unreachable, } } -fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: u64, macho_file: *MachO, code: []u8) !void { +fn resolveAarch64( + self: Relocation, + macho_file: *MachO, + source_addr: u64, + target_addr: i64, + base_offset: u64, +) !void { const rel_type = @intToEnum(macho.reloc_type_arm64, self.@"type"); + if (rel_type == .ARM64_RELOC_UNSIGNED) { + var buffer: [@sizeOf(u64)]u8 = undefined; + const code = blk: { + switch (self.length) { + 2 => { + mem.writeIntLittle(u32, buffer[0..4], @truncate(u32, @bitCast(u64, target_addr))); + break :blk buffer[0..4]; + }, + 3 => { + mem.writeIntLittle(u64, &buffer, @bitCast(u64, target_addr)); + break :blk &buffer; + }, + else => unreachable, + } + }; + return macho_file.base.file.?.pwriteAll(code, base_offset + self.offset); + } + + var buffer: [@sizeOf(u32)]u8 = undefined; + const amt = try macho_file.base.file.?.preadAll(&buffer, base_offset + self.offset); + if (amt != buffer.len) return error.InputOutput; + switch (rel_type) { .ARM64_RELOC_BRANCH26 => { - const displacement = math.cast(i28, @intCast(i64, target_addr) - @intCast(i64, source_addr)) orelse { - log.err("jump too big to encode as i28 displacement value", .{}); - log.err(" (target - source) = displacement => 0x{x} - 0x{x} = 0x{x}", .{ - target_addr, - source_addr, - @intCast(i64, target_addr) - @intCast(i64, source_addr), - }); - log.err(" TODO implement branch islands to extend jump distance for arm64", .{}); - return error.TODOImplementBranchIslands; - }; + const displacement = math.cast( + i28, + @intCast(i64, target_addr) - @intCast(i64, source_addr), + ) orelse unreachable; // TODO codegen should never allow for jump larger than i28 displacement var inst = aarch64.Instruction{ .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.unconditional_branch_immediate, - ), code), + ), &buffer), }; inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); - mem.writeIntLittle(u32, code, inst.toU32()); + mem.writeIntLittle(u32, &buffer, inst.toU32()); }, .ARM64_RELOC_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGE21, @@ -107,45 +129,45 @@ fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: u64, macho_fi .pc_relative_address = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.pc_relative_address, - ), code), + ), &buffer), }; inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); inst.pc_relative_address.immlo = @truncate(u2, pages); - mem.writeIntLittle(u32, code, inst.toU32()); + mem.writeIntLittle(u32, &buffer, inst.toU32()); }, .ARM64_RELOC_PAGEOFF12 => { const narrowed = @truncate(u12, @intCast(u64, target_addr)); - if (isArithmeticOp(code)) { + if (isArithmeticOp(&buffer)) { var inst = aarch64.Instruction{ .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.add_subtract_immediate, - ), code), + ), &buffer), }; inst.add_subtract_immediate.imm12 = narrowed; - mem.writeIntLittle(u32, code, inst.toU32()); + mem.writeIntLittle(u32, &buffer, inst.toU32()); } else { var inst = aarch64.Instruction{ .load_store_register = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.load_store_register, - ), code), + ), &buffer), }; const offset: u12 = blk: { if (inst.load_store_register.size == 0) { if (inst.load_store_register.v == 1) { // 128-bit SIMD is scaled by 16. - break :blk try math.divExact(u12, narrowed, 16); + break :blk @divExact(narrowed, 16); } // Otherwise, 8-bit SIMD or ldrb. break :blk narrowed; } else { - const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); - break :blk try math.divExact(u12, narrowed, denom); + const denom: u4 = math.powi(u4, 2, inst.load_store_register.size) catch unreachable; + break :blk @divExact(narrowed, denom); } }; inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, code, inst.toU32()); + mem.writeIntLittle(u32, &buffer, inst.toU32()); } }, .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { @@ -154,11 +176,12 @@ fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: u64, macho_fi .load_store_register = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.load_store_register, - ), code), + ), &buffer), }; - const offset = try math.divExact(u12, narrowed, 8); + const offset = @divExact(narrowed, 8); inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, code, inst.toU32()); + mem.writeIntLittle(u32, &buffer, inst.toU32()); + log.debug("HMM = {x}", .{std.fmt.fmtSliceHexLower(&buffer)}); }, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { const RegInfo = struct { @@ -167,11 +190,11 @@ fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: u64, macho_fi size: u2, }; const reg_info: RegInfo = blk: { - if (isArithmeticOp(code)) { + if (isArithmeticOp(&buffer)) { const inst = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.add_subtract_immediate, - ), code); + ), &buffer); break :blk .{ .rd = inst.rd, .rn = inst.rn, @@ -181,7 +204,7 @@ fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: u64, macho_fi const inst = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.load_store_register, - ), code); + ), &buffer); break :blk .{ .rd = inst.rt, .rn = inst.rn, @@ -190,20 +213,7 @@ fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: u64, macho_fi } }; const narrowed = @truncate(u12, @intCast(u64, target_addr)); - var inst = if (macho_file.tlv_ptr_entries_table.contains(self.target)) blk: { - const offset = try math.divExact(u12, narrowed, 8); - break :blk aarch64.Instruction{ - .load_store_register = .{ - .rt = reg_info.rd, - .rn = reg_info.rn, - .offset = offset, - .opc = 0b01, - .op1 = 0b01, - .v = 0, - .size = reg_info.size, - }, - }; - } else aarch64.Instruction{ + var inst = aarch64.Instruction{ .add_subtract_immediate = .{ .rd = reg_info.rd, .rn = reg_info.rn, @@ -214,61 +224,81 @@ fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: u64, macho_fi .sf = @truncate(u1, reg_info.size), }, }; - mem.writeIntLittle(u32, code, inst.toU32()); + mem.writeIntLittle(u32, &buffer, inst.toU32()); }, .ARM64_RELOC_POINTER_TO_GOT => { - const result = math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)) orelse - return error.Overflow; - mem.writeIntLittle(u32, code, @bitCast(u32, result)); - }, - .ARM64_RELOC_UNSIGNED => { - switch (self.length) { - 2 => mem.writeIntLittle(u32, code, @truncate(u32, @bitCast(u64, target_addr))), - 3 => mem.writeIntLittle(u64, code, target_addr), - else => unreachable, - } + const result = math.cast( + i32, + @intCast(i64, target_addr) - @intCast(i64, source_addr), + ) orelse return error.Overflow; + mem.writeIntLittle(u32, &buffer, @bitCast(u32, result)); }, .ARM64_RELOC_SUBTRACTOR => unreachable, .ARM64_RELOC_ADDEND => unreachable, + .ARM64_RELOC_UNSIGNED => unreachable, } + try macho_file.base.file.?.pwriteAll(&buffer, base_offset + self.offset); } -fn resolveX8664(self: Relocation, source_addr: u64, target_addr: u64, code: []u8) !void { +fn resolveX8664( + self: Relocation, + macho_file: *MachO, + source_addr: u64, + target_addr: i64, + base_offset: u64, +) !void { const rel_type = @intToEnum(macho.reloc_type_x86_64, self.@"type"); - switch (rel_type) { - .X86_64_RELOC_BRANCH, - .X86_64_RELOC_GOT, - .X86_64_RELOC_GOT_LOAD, - .X86_64_RELOC_TLV, - => { - const displacement = math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4) orelse - return error.Overflow; - mem.writeIntLittle(u32, code, @bitCast(u32, displacement)); - }, - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => { - const correction: u3 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - const displacement = math.cast(i32, target_addr - @intCast(i64, source_addr + correction + 4)) orelse - return error.Overflow; - mem.writeIntLittle(u32, code, @bitCast(u32, displacement)); - }, - .X86_64_RELOC_UNSIGNED => { - switch (self.length) { - 2 => mem.writeIntLittle(u32, code, @truncate(u32, @bitCast(u64, target_addr))), - 3 => mem.writeIntLittle(u64, code, target_addr), - } - }, - .X86_64_RELOC_SUBTRACTOR => unreachable, - } + var buffer: [@sizeOf(u64)]u8 = undefined; + const code = blk: { + switch (rel_type) { + .X86_64_RELOC_BRANCH, + .X86_64_RELOC_GOT, + .X86_64_RELOC_GOT_LOAD, + .X86_64_RELOC_TLV, + => { + const displacement = math.cast( + i32, + @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4, + ) orelse return error.Overflow; + mem.writeIntLittle(u32, buffer[0..4], @bitCast(u32, displacement)); + break :blk buffer[0..4]; + }, + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + const correction: u3 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + const displacement = math.cast( + i32, + target_addr - @intCast(i64, source_addr + correction + 4), + ) orelse return error.Overflow; + mem.writeIntLittle(u32, buffer[0..4], @bitCast(u32, displacement)); + break :blk buffer[0..4]; + }, + .X86_64_RELOC_UNSIGNED => { + switch (self.length) { + 2 => { + mem.writeIntLittle(u32, buffer[0..4], @truncate(u32, @bitCast(u64, target_addr))); + break :blk buffer[0..4]; + }, + 3 => { + mem.writeIntLittle(u64, buffer[0..8], @bitCast(u64, target_addr)); + break :blk &buffer; + }, + else => unreachable, + } + }, + .X86_64_RELOC_SUBTRACTOR => unreachable, + } + }; + try macho_file.base.file.?.pwriteAll(code, base_offset + self.offset); } inline fn isArithmeticOp(inst: *const [4]u8) bool { diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index e4ca890bbb..c712598fdc 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -233,7 +233,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac if (sym.n_desc != MachO.N_DESC_GCED) continue; // TODO tombstone - const atom = entry.getAtom(macho_file); + const atom = entry.getAtom(macho_file).?; const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); @@ -245,7 +245,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac if (sym.n_desc != MachO.N_DESC_GCED) continue; // TODO tombstone - const atom = entry.getAtom(macho_file); + const atom = entry.getAtom(macho_file).?; const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); @@ -257,7 +257,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac if (sym.n_desc != MachO.N_DESC_GCED) continue; // TODO tombstone - const atom = entry.getAtom(macho_file); + const atom = entry.getAtom(macho_file).?; const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 3cf4d54014..092a80a8ea 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -7,14 +7,19 @@ const macho = std.macho; const math = std.math; const mem = std.mem; +const aarch64 = @import("../../arch/aarch64/bits.zig"); +const bind = @import("bind.zig"); const link = @import("../../link.zig"); const trace = @import("../../tracy.zig").trace; +const Atom = MachO.Atom; const Cache = @import("../../Cache.zig"); const CodeSignature = @import("CodeSignature.zig"); const Compilation = @import("../../Compilation.zig"); const Dylib = @import("Dylib.zig"); const MachO = @import("../MachO.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; +const Trie = @import("Trie.zig"); const dead_strip = @import("dead_strip.zig"); @@ -545,7 +550,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const lc_writer = lc_buffer.writer(); var ncmds: u32 = 0; - try macho_file.writeLinkeditSegmentData(&ncmds, lc_writer); + try writeLinkeditSegmentData(macho_file, &ncmds, lc_writer); // If the last section of __DATA segment is zerofill section, we need to ensure // that the free space between the end of the last non-zerofill section of __DATA @@ -952,3 +957,326 @@ fn allocateSymbols(macho_file: *MachO) !void { } } } + +fn writeLinkeditSegmentData(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; + seg.filesize = 0; + seg.vmsize = 0; + + try writeDyldInfoData(macho_file, ncmds, lc_writer); + try macho_file.writeFunctionStarts(ncmds, lc_writer); + try macho_file.writeDataInCode(ncmds, lc_writer); + try macho_file.writeSymtabs(ncmds, lc_writer); + + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, macho_file.page_size); +} + +fn writeDyldInfoData(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + + var rebase_pointers = std.ArrayList(bind.Pointer).init(gpa); + defer rebase_pointers.deinit(); + var bind_pointers = std.ArrayList(bind.Pointer).init(gpa); + defer bind_pointers.deinit(); + var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); + defer lazy_bind_pointers.deinit(); + + const slice = macho_file.sections.slice(); + for (slice.items(.last_atom)) |last_atom, sect_id| { + var atom = last_atom orelse continue; + const segment_index = slice.items(.segment_index)[sect_id]; + const header = slice.items(.header)[sect_id]; + + if (mem.eql(u8, header.segName(), "__TEXT")) continue; // __TEXT is non-writable + + log.debug("dyld info for {s},{s}", .{ header.segName(), header.sectName() }); + + const seg = macho_file.segments.items[segment_index]; + + while (true) { + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(macho_file) }); + const sym = atom.getSymbol(macho_file); + const base_offset = sym.n_value - seg.vmaddr; + + for (atom.rebases.items) |offset| { + log.debug(" | rebase at {x}", .{base_offset + offset}); + try rebase_pointers.append(.{ + .offset = base_offset + offset, + .segment_id = segment_index, + }); + } + + for (atom.bindings.items) |binding| { + const bind_sym = macho_file.getSymbol(binding.target); + const bind_sym_name = macho_file.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + try bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); + } + + for (atom.lazy_bindings.items) |binding| { + const bind_sym = macho_file.getSymbol(binding.target); + const bind_sym_name = macho_file.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + try lazy_bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); + } + + if (atom.prev) |prev| { + atom = prev; + } else break; + } + } + + var trie: Trie = .{}; + defer trie.deinit(gpa); + + { + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("generating export trie", .{}); + + const text_segment = macho_file.segments.items[macho_file.text_segment_cmd_index.?]; + const base_address = text_segment.vmaddr; + + if (macho_file.base.options.output_mode == .Exe) { + for (&[_]SymbolWithLoc{ + try macho_file.getEntryPoint(), + macho_file.getGlobal("__mh_execute_header").?, + }) |global| { + const sym = macho_file.getSymbol(global); + const sym_name = macho_file.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + } else { + assert(macho_file.base.options.output_mode == .Lib); + for (macho_file.globals.items) |global| { + const sym = macho_file.getSymbol(global); + + if (sym.undf()) continue; + if (!sym.ext()) continue; + if (sym.n_desc == MachO.N_DESC_GCED) continue; + + const sym_name = macho_file.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + } + + try trie.finalize(gpa); + } + + const link_seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; + const rebase_off = mem.alignForwardGeneric(u64, link_seg.fileoff, @alignOf(u64)); + assert(rebase_off == link_seg.fileoff); + const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); + log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size }); + + const bind_off = mem.alignForwardGeneric(u64, rebase_off + rebase_size, @alignOf(u64)); + const bind_size = try bind.bindInfoSize(bind_pointers.items); + log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size }); + + const lazy_bind_off = mem.alignForwardGeneric(u64, bind_off + bind_size, @alignOf(u64)); + const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); + log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + lazy_bind_size }); + + const export_off = mem.alignForwardGeneric(u64, lazy_bind_off + lazy_bind_size, @alignOf(u64)); + const export_size = trie.size; + log.debug("writing export trie from 0x{x} to 0x{x}", .{ export_off, export_off + export_size }); + + const needed_size = export_off + export_size - rebase_off; + link_seg.filesize = needed_size; + + var buffer = try gpa.alloc(u8, math.cast(usize, needed_size) orelse return error.Overflow); + defer gpa.free(buffer); + mem.set(u8, buffer, 0); + + var stream = std.io.fixedBufferStream(buffer); + const writer = stream.writer(); + + try bind.writeRebaseInfo(rebase_pointers.items, writer); + try stream.seekTo(bind_off - rebase_off); + + try bind.writeBindInfo(bind_pointers.items, writer); + try stream.seekTo(lazy_bind_off - rebase_off); + + try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); + try stream.seekTo(export_off - rebase_off); + + _ = try trie.write(writer); + + log.debug("writing dyld info from 0x{x} to 0x{x}", .{ + rebase_off, + rebase_off + needed_size, + }); + + try macho_file.base.file.?.pwriteAll(buffer, rebase_off); + const start = math.cast(usize, lazy_bind_off - rebase_off) orelse return error.Overflow; + const end = start + (math.cast(usize, lazy_bind_size) orelse return error.Overflow); + try populateLazyBindOffsetsInStubHelper(macho_file, buffer[start..end]); + + try lc_writer.writeStruct(macho.dyld_info_command{ + .cmd = .DYLD_INFO_ONLY, + .cmdsize = @sizeOf(macho.dyld_info_command), + .rebase_off = @intCast(u32, rebase_off), + .rebase_size = @intCast(u32, rebase_size), + .bind_off = @intCast(u32, bind_off), + .bind_size = @intCast(u32, bind_size), + .weak_bind_off = 0, + .weak_bind_size = 0, + .lazy_bind_off = @intCast(u32, lazy_bind_off), + .lazy_bind_size = @intCast(u32, lazy_bind_size), + .export_off = @intCast(u32, export_off), + .export_size = @intCast(u32, export_size), + }); + ncmds.* += 1; +} + +fn populateLazyBindOffsetsInStubHelper(macho_file: *MachO, buffer: []const u8) !void { + const gpa = macho_file.base.allocator; + + const stub_helper_section_index = macho_file.stub_helper_section_index orelse return; + if (macho_file.stub_helper_preamble_atom == null) return; + + const section = macho_file.sections.get(stub_helper_section_index); + const last_atom = section.last_atom orelse return; + if (last_atom == macho_file.stub_helper_preamble_atom.?) return; // TODO is this a redundant check? + + var table = std.AutoHashMap(i64, *Atom).init(gpa); + defer table.deinit(); + + { + var stub_atom = last_atom; + var laptr_atom = macho_file.sections.items(.last_atom)[macho_file.la_symbol_ptr_section_index.?].?; + const base_addr = blk: { + const seg = macho_file.segments.items[macho_file.data_segment_cmd_index.?]; + break :blk seg.vmaddr; + }; + + while (true) { + const laptr_off = blk: { + const sym = laptr_atom.getSymbol(macho_file); + break :blk @intCast(i64, sym.n_value - base_addr); + }; + try table.putNoClobber(laptr_off, stub_atom); + if (laptr_atom.prev) |prev| { + laptr_atom = prev; + stub_atom = stub_atom.prev.?; + } else break; + } + } + + var stream = std.io.fixedBufferStream(buffer); + var reader = stream.reader(); + var offsets = std.ArrayList(struct { sym_offset: i64, offset: u32 }).init(gpa); + try offsets.append(.{ .sym_offset = undefined, .offset = 0 }); + defer offsets.deinit(); + var valid_block = false; + + while (true) { + const inst = reader.readByte() catch |err| switch (err) { + error.EndOfStream => break, + }; + const opcode: u8 = inst & macho.BIND_OPCODE_MASK; + + switch (opcode) { + macho.BIND_OPCODE_DO_BIND => { + valid_block = true; + }, + macho.BIND_OPCODE_DONE => { + if (valid_block) { + const offset = try stream.getPos(); + try offsets.append(.{ .sym_offset = undefined, .offset = @intCast(u32, offset) }); + } + valid_block = false; + }, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + var next = try reader.readByte(); + while (next != @as(u8, 0)) { + next = try reader.readByte(); + } + }, + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + var inserted = offsets.pop(); + inserted.sym_offset = try std.leb.readILEB128(i64, reader); + try offsets.append(inserted); + }, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { + _ = try std.leb.readULEB128(u64, reader); + }, + macho.BIND_OPCODE_SET_ADDEND_SLEB => { + _ = try std.leb.readILEB128(i64, reader); + }, + else => {}, + } + } + + const header = macho_file.sections.items(.header)[stub_helper_section_index]; + const stub_offset: u4 = switch (macho_file.base.options.target.cpu.arch) { + .x86_64 => 1, + .aarch64 => 2 * @sizeOf(u32), + else => unreachable, + }; + var buf: [@sizeOf(u32)]u8 = undefined; + _ = offsets.pop(); + + while (offsets.popOrNull()) |bind_offset| { + const atom = table.get(bind_offset.sym_offset).?; + const sym = atom.getSymbol(macho_file); + const file_offset = header.offset + sym.n_value - header.addr + stub_offset; + mem.writeIntLittle(u32, &buf, bind_offset.offset); + log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ + bind_offset.offset, + atom.getName(macho_file), + file_offset, + }); + try macho_file.base.file.?.pwriteAll(&buf, file_offset); + } +} From 79ab46ec918edc5d31c87a2535a30b8d2207228c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 13 Sep 2022 10:05:21 +0200 Subject: [PATCH 06/17] macho: start separating linking contexts --- src/link/MachO.zig | 366 +++------------------- src/link/MachO/zld.zig | 673 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 714 insertions(+), 325 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 2d88930768..75b983be03 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -269,7 +269,7 @@ pub const SymbolWithLoc = struct { /// When allocating, the ideal_capacity is calculated by /// actual_capacity + (actual_capacity / ideal_factor) -const ideal_factor = 4; +const ideal_factor = 3; /// Default path to dyld const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; @@ -4322,7 +4322,7 @@ pub fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u return .{ .vmaddr = 0, .fileoff = 0 }; } -pub fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { +fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { for (self.segments.items) |seg, i| { const indexes = self.getSectionIndexes(@intCast(u8, i)); var out_seg = seg; @@ -4351,20 +4351,18 @@ pub fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { } } -pub fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; seg.filesize = 0; seg.vmsize = 0; try self.writeDyldInfoData(ncmds, lc_writer); - try self.writeFunctionStarts(ncmds, lc_writer); - try self.writeDataInCode(ncmds, lc_writer); try self.writeSymtabs(ncmds, lc_writer); seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); } -pub fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -4680,155 +4678,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } } -const asc_u64 = std.sort.asc(u64); - -pub fn writeFunctionStarts(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const text_seg_index = macho_file.text_segment_cmd_index orelse return; - const text_sect_index = macho_file.text_section_index orelse return; - const text_seg = macho_file.segments.items[text_seg_index]; - - const gpa = macho_file.base.allocator; - - // We need to sort by address first - var addresses = std.ArrayList(u64).init(gpa); - defer addresses.deinit(); - try addresses.ensureTotalCapacityPrecise(macho_file.globals.items.len); - - for (macho_file.globals.items) |global| { - const sym = macho_file.getSymbol(global); - if (sym.undf()) continue; - if (sym.n_desc == MachO.N_DESC_GCED) continue; - const sect_id = sym.n_sect - 1; - if (sect_id != text_sect_index) continue; - - addresses.appendAssumeCapacity(sym.n_value); - } - - std.sort.sort(u64, addresses.items, {}, asc_u64); - - var offsets = std.ArrayList(u32).init(gpa); - defer offsets.deinit(); - try offsets.ensureTotalCapacityPrecise(addresses.items.len); - - var last_off: u32 = 0; - for (addresses.items) |addr| { - const offset = @intCast(u32, addr - text_seg.vmaddr); - const diff = offset - last_off; - - if (diff == 0) continue; - - offsets.appendAssumeCapacity(diff); - last_off = offset; - } - - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - - const max_size = @intCast(usize, offsets.items.len * @sizeOf(u64)); - try buffer.ensureTotalCapacity(max_size); - - for (offsets.items) |offset| { - try std.leb.writeULEB128(buffer.writer(), offset); - } - - const link_seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; - const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64)); - const needed_size = buffer.items.len; - link_seg.filesize = offset + needed_size - link_seg.fileoff; - - log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - - try macho_file.base.file.?.pwriteAll(buffer.items, offset); - - try lc_writer.writeStruct(macho.linkedit_data_command{ - .cmd = .FUNCTION_STARTS, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = @intCast(u32, offset), - .datasize = @intCast(u32, needed_size), - }); - ncmds.* += 1; -} - -fn filterDataInCode( - dices: []align(1) const macho.data_in_code_entry, - start_addr: u64, - end_addr: u64, -) []align(1) const macho.data_in_code_entry { - const Predicate = struct { - addr: u64, - - pub fn predicate(macho_file: @This(), dice: macho.data_in_code_entry) bool { - return dice.offset >= macho_file.addr; - } - }; - - const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); - const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); - - return dices[start..end]; -} - -pub fn writeDataInCode(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { - const tracy = trace(@src()); - defer tracy.end(); - - var out_dice = std.ArrayList(macho.data_in_code_entry).init(macho_file.base.allocator); - defer out_dice.deinit(); - - const text_sect_id = macho_file.text_section_index orelse return; - const text_sect_header = macho_file.sections.items(.header)[text_sect_id]; - - for (macho_file.objects.items) |object| { - const dice = object.parseDataInCode() orelse continue; - try out_dice.ensureUnusedCapacity(dice.len); - - for (object.managed_atoms.items) |atom| { - const sym = atom.getSymbol(macho_file); - if (sym.n_desc == MachO.N_DESC_GCED) continue; - - const sect_id = sym.n_sect - 1; - if (sect_id != macho_file.text_section_index.?) { - continue; - } - - const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; - const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; - const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); - const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse - return error.Overflow; - - for (filtered_dice) |single| { - const offset = single.offset - source_addr + base; - out_dice.appendAssumeCapacity(.{ - .offset = offset, - .length = single.length, - .kind = single.kind, - }); - } - } - } - - const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; - const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); - const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); - seg.filesize = offset + needed_size - seg.fileoff; - - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - - try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); - try lc_writer.writeStruct(macho.linkedit_data_command{ - .cmd = .DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = @intCast(u32, offset), - .datasize = @intCast(u32, needed_size), - }); - ncmds.* += 1; -} - -pub fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { var symtab_cmd = macho.symtab_command{ .cmdsize = @sizeOf(macho.symtab_command), .symoff = 0, @@ -4866,7 +4716,7 @@ pub fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { ncmds.* += 2; } -pub fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { +fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { const gpa = self.base.allocator; var locals = std.ArrayList(macho.nlist_64).init(gpa); @@ -4892,10 +4742,6 @@ pub fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc)); try locals.append(out_sym); } - - if (!self.base.options.strip) { - try self.generateSymbolStabs(object, &locals); - } } var exports = std.ArrayList(macho.nlist_64).init(gpa); @@ -5056,7 +4902,7 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !voi lc.nindirectsyms = nindirectsyms; } -pub fn writeCodeSignaturePadding( +fn writeCodeSignaturePadding( self: *MachO, code_sig: *CodeSignature, ncmds: *u32, @@ -5085,7 +4931,7 @@ pub fn writeCodeSignaturePadding( return @intCast(u32, offset); } -pub fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void { +fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void { const seg = self.segments.items[self.text_segment_cmd_index.?]; var buffer = std.ArrayList(u8).init(self.base.allocator); @@ -5109,7 +4955,7 @@ pub fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) ! } /// Writes Mach-O file header. -pub fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { +fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; @@ -5157,6 +5003,45 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { std.math.maxInt(@TypeOf(actual_size)); } +fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 { + // TODO: header and load commands have to be part of the __TEXT segment + const header_size = default_headerpad_size; + if (start < header_size) + return header_size; + + const end = start + padToIdeal(size); + + for (self.sections.items(.header)) |header| { + const tight_size = header.size; + const increased_size = padToIdeal(tight_size); + const test_end = header.offset + increased_size; + if (end > header.offset and start < test_end) { + return test_end; + } + } + + return null; +} + +// fn allocatedSize(self: *MachO, start: u64) u64 { +// if (start == 0) +// return 0; +// var min_pos: u64 = std.math.maxInt(u64); +// for (self.sections.items(.header)) |header| { +// if (header.offset <= start) continue; +// if (header.offset < min_pos) min_pos = header.offset; +// } +// return min_pos - start; +// } + +fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 { + var start: u64 = 0; + while (self.detectAllocCollision(start, object_size)) |item_end| { + start = mem.alignForwardGeneric(u64, item_end, min_alignment); + } + return start; +} + pub fn makeStaticString(bytes: []const u8) [16]u8 { var buf = [_]u8{0} ** 16; assert(bytes.len <= buf.len); @@ -5321,161 +5206,6 @@ pub fn findFirst(comptime T: type, haystack: []align(1) const T, start: usize, p return i; } -pub fn generateSymbolStabs( - self: *MachO, - object: Object, - locals: *std.ArrayList(macho.nlist_64), -) !void { - assert(!self.base.options.strip); - - log.debug("parsing debug info in '{s}'", .{object.name}); - - const gpa = self.base.allocator; - var debug_info = try object.parseDwarfInfo(); - defer debug_info.deinit(gpa); - try dwarf.openDwarfDebugInfo(&debug_info, gpa); - - // We assume there is only one CU. - const compile_unit = debug_info.findCompileUnit(0x0) catch |err| switch (err) { - error.MissingDebugInfo => { - // TODO audit cases with missing debug info and audit our dwarf.zig module. - log.debug("invalid or missing debug info in {s}; skipping", .{object.name}); - return; - }, - else => |e| return e, - }; - - const tu_name = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.name, debug_info.debug_str, compile_unit.*); - const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.comp_dir, debug_info.debug_str, compile_unit.*); - - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, tu_comp_dir), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, tu_name), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime, - }); - - var stabs_buf: [4]macho.nlist_64 = undefined; - - for (object.managed_atoms.items) |atom| { - const stabs = try self.generateSymbolStabsForSymbol( - atom.getSymbolWithLoc(), - debug_info, - &stabs_buf, - ); - try locals.appendSlice(stabs); - - for (atom.contained.items) |sym_at_off| { - const sym_loc = SymbolWithLoc{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }; - const contained_stabs = try self.generateSymbolStabsForSymbol( - sym_loc, - debug_info, - &stabs_buf, - ); - try locals.appendSlice(contained_stabs); - } - } - - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); -} - -fn generateSymbolStabsForSymbol( - self: *MachO, - sym_loc: SymbolWithLoc, - debug_info: dwarf.DwarfInfo, - buf: *[4]macho.nlist_64, -) ![]const macho.nlist_64 { - const gpa = self.base.allocator; - const object = self.objects.items[sym_loc.file.?]; - const sym = self.getSymbol(sym_loc); - const sym_name = self.getSymbolName(sym_loc); - - if (sym.n_strx == 0) return buf[0..0]; - if (sym.n_desc == N_DESC_GCED) return buf[0..0]; - if (self.symbolIsTemp(sym_loc)) return buf[0..0]; - - const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; - const size: ?u64 = size: { - if (source_sym.tentative()) break :size null; - for (debug_info.func_list.items) |func| { - if (func.pc_range) |range| { - if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { - break :size range.end - range.start; - } - } - } - break :size null; - }; - - if (size) |ss| { - buf[0] = .{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - buf[1] = .{ - .n_strx = try self.strtab.insert(gpa, sym_name), - .n_type = macho.N_FUN, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - buf[2] = .{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = ss, - }; - buf[3] = .{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = ss, - }; - return buf; - } else { - buf[0] = .{ - .n_strx = try self.strtab.insert(gpa, sym_name), - .n_type = macho.N_STSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - return buf[0..1]; - } -} - // fn snapshotState(self: *MachO) !void { // const emit = self.base.options.emit orelse { // log.debug("no emit directory found; skipping snapshot...", .{}); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 092a80a8ea..b3f229ebc4 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -1,6 +1,7 @@ const std = @import("std"); const build_options = @import("build_options"); const assert = std.debug.assert; +const dwarf = std.dwarf; const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; @@ -18,6 +19,7 @@ const CodeSignature = @import("CodeSignature.zig"); const Compilation = @import("../../Compilation.zig"); const Dylib = @import("Dylib.zig"); const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; const Trie = @import("Trie.zig"); @@ -618,20 +620,20 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr if (macho_file.base.options.entitlements) |path| { try codesig.addEntitlements(arena, path); } - codesig_offset = try macho_file.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + codesig_offset = try writeCodeSignaturePadding(macho_file, &codesig, &ncmds, lc_writer); break :blk codesig; } else null; var headers_buf = std.ArrayList(u8).init(arena); - try macho_file.writeSegmentHeaders(&ncmds, headers_buf.writer()); + try writeSegmentHeaders(macho_file, &ncmds, headers_buf.writer()); try macho_file.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); try macho_file.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - try macho_file.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + try writeHeader(macho_file, ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); if (codesig) |*csig| { - try macho_file.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last + try writeCodeSignature(macho_file, csig, codesig_offset.?); // code signing always comes last } } @@ -964,9 +966,9 @@ fn writeLinkeditSegmentData(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) seg.vmsize = 0; try writeDyldInfoData(macho_file, ncmds, lc_writer); - try macho_file.writeFunctionStarts(ncmds, lc_writer); - try macho_file.writeDataInCode(ncmds, lc_writer); - try macho_file.writeSymtabs(ncmds, lc_writer); + try writeFunctionStarts(macho_file, ncmds, lc_writer); + try writeDataInCode(macho_file, ncmds, lc_writer); + try writeSymtabs(macho_file, ncmds, lc_writer); seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, macho_file.page_size); } @@ -1280,3 +1282,660 @@ fn populateLazyBindOffsetsInStubHelper(macho_file: *MachO, buffer: []const u8) ! try macho_file.base.file.?.pwriteAll(&buf, file_offset); } } + +const asc_u64 = std.sort.asc(u64); + +fn writeFunctionStarts(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const text_seg_index = macho_file.text_segment_cmd_index orelse return; + const text_sect_index = macho_file.text_section_index orelse return; + const text_seg = macho_file.segments.items[text_seg_index]; + + const gpa = macho_file.base.allocator; + + // We need to sort by address first + var addresses = std.ArrayList(u64).init(gpa); + defer addresses.deinit(); + try addresses.ensureTotalCapacityPrecise(macho_file.globals.items.len); + + for (macho_file.globals.items) |global| { + const sym = macho_file.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == MachO.N_DESC_GCED) continue; + const sect_id = sym.n_sect - 1; + if (sect_id != text_sect_index) continue; + + addresses.appendAssumeCapacity(sym.n_value); + } + + std.sort.sort(u64, addresses.items, {}, asc_u64); + + var offsets = std.ArrayList(u32).init(gpa); + defer offsets.deinit(); + try offsets.ensureTotalCapacityPrecise(addresses.items.len); + + var last_off: u32 = 0; + for (addresses.items) |addr| { + const offset = @intCast(u32, addr - text_seg.vmaddr); + const diff = offset - last_off; + + if (diff == 0) continue; + + offsets.appendAssumeCapacity(diff); + last_off = offset; + } + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + + const max_size = @intCast(usize, offsets.items.len * @sizeOf(u64)); + try buffer.ensureTotalCapacity(max_size); + + for (offsets.items) |offset| { + try std.leb.writeULEB128(buffer.writer(), offset); + } + + const link_seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64)); + const needed_size = buffer.items.len; + link_seg.filesize = offset + needed_size - link_seg.fileoff; + + log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + try macho_file.base.file.?.pwriteAll(buffer.items, offset); + + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .FUNCTION_STARTS, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; +} + +fn filterDataInCode( + dices: []align(1) const macho.data_in_code_entry, + start_addr: u64, + end_addr: u64, +) []align(1) const macho.data_in_code_entry { + const Predicate = struct { + addr: u64, + + pub fn predicate(macho_file: @This(), dice: macho.data_in_code_entry) bool { + return dice.offset >= macho_file.addr; + } + }; + + const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); + const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + + return dices[start..end]; +} + +fn writeDataInCode(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var out_dice = std.ArrayList(macho.data_in_code_entry).init(macho_file.base.allocator); + defer out_dice.deinit(); + + const text_sect_id = macho_file.text_section_index orelse return; + const text_sect_header = macho_file.sections.items(.header)[text_sect_id]; + + for (macho_file.objects.items) |object| { + const dice = object.parseDataInCode() orelse continue; + try out_dice.ensureUnusedCapacity(dice.len); + + for (object.managed_atoms.items) |atom| { + const sym = atom.getSymbol(macho_file); + if (sym.n_desc == MachO.N_DESC_GCED) continue; + + const sect_id = sym.n_sect - 1; + if (sect_id != macho_file.text_section_index.?) { + continue; + } + + const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; + const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; + const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); + const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse + return error.Overflow; + + for (filtered_dice) |single| { + const offset = single.offset - source_addr + base; + out_dice.appendAssumeCapacity(.{ + .offset = offset, + .length = single.length, + .kind = single.kind, + }); + } + } + } + + const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); + seg.filesize = offset + needed_size - seg.fileoff; + + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; +} + +fn writeSymtabs(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { + var symtab_cmd = macho.symtab_command{ + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }; + var dysymtab_cmd = macho.dysymtab_command{ + .cmdsize = @sizeOf(macho.dysymtab_command), + .ilocalsym = 0, + .nlocalsym = 0, + .iextdefsym = 0, + .nextdefsym = 0, + .iundefsym = 0, + .nundefsym = 0, + .tocoff = 0, + .ntoc = 0, + .modtaboff = 0, + .nmodtab = 0, + .extrefsymoff = 0, + .nextrefsyms = 0, + .indirectsymoff = 0, + .nindirectsyms = 0, + .extreloff = 0, + .nextrel = 0, + .locreloff = 0, + .nlocrel = 0, + }; + var ctx = try writeSymtab(macho_file, &symtab_cmd); + defer ctx.imports_table.deinit(); + try writeDysymtab(macho_file, ctx, &dysymtab_cmd); + try writeStrtab(macho_file, &symtab_cmd); + try lc_writer.writeStruct(symtab_cmd); + try lc_writer.writeStruct(dysymtab_cmd); + ncmds.* += 2; +} + +fn writeSymtab(macho_file: *MachO, lc: *macho.symtab_command) !SymtabCtx { + const gpa = macho_file.base.allocator; + + var locals = std.ArrayList(macho.nlist_64).init(gpa); + defer locals.deinit(); + + for (macho_file.locals.items) |sym, sym_id| { + if (sym.n_strx == 0) continue; // no name, skip + if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip + const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null }; + if (macho_file.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + if (macho_file.getGlobal(macho_file.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip + try locals.append(sym); + } + + for (macho_file.objects.items) |object, object_id| { + for (object.symtab.items) |sym, sym_id| { + if (sym.n_strx == 0) continue; // no name, skip + if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip + const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = @intCast(u32, object_id) }; + if (macho_file.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + if (macho_file.getGlobal(macho_file.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip + var out_sym = sym; + out_sym.n_strx = try macho_file.strtab.insert(gpa, macho_file.getSymbolName(sym_loc)); + try locals.append(out_sym); + } + + if (!macho_file.base.options.strip) { + try generateSymbolStabs(macho_file, object, &locals); + } + } + + var exports = std.ArrayList(macho.nlist_64).init(gpa); + defer exports.deinit(); + + for (macho_file.globals.items) |global| { + const sym = macho_file.getSymbol(global); + if (sym.undf()) continue; // import, skip + if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip + var out_sym = sym; + out_sym.n_strx = try macho_file.strtab.insert(gpa, macho_file.getSymbolName(global)); + try exports.append(out_sym); + } + + var imports = std.ArrayList(macho.nlist_64).init(gpa); + defer imports.deinit(); + + var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); + + for (macho_file.globals.items) |global| { + const sym = macho_file.getSymbol(global); + if (sym.n_strx == 0) continue; // no name, skip + if (!sym.undf()) continue; // not an import, skip + const new_index = @intCast(u32, imports.items.len); + var out_sym = sym; + out_sym.n_strx = try macho_file.strtab.insert(gpa, macho_file.getSymbolName(global)); + try imports.append(out_sym); + try imports_table.putNoClobber(global, new_index); + } + + const nlocals = @intCast(u32, locals.items.len); + const nexports = @intCast(u32, exports.items.len); + const nimports = @intCast(u32, imports.items.len); + const nsyms = nlocals + nexports + nimports; + + const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric( + u64, + seg.fileoff + seg.filesize, + @alignOf(macho.nlist_64), + ); + const needed_size = nsyms * @sizeOf(macho.nlist_64); + seg.filesize = offset + needed_size - seg.fileoff; + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacityPrecise(needed_size); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); + + log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + try macho_file.base.file.?.pwriteAll(buffer.items, offset); + + lc.symoff = @intCast(u32, offset); + lc.nsyms = nsyms; + + return SymtabCtx{ + .nlocalsym = nlocals, + .nextdefsym = nexports, + .nundefsym = nimports, + .imports_table = imports_table, + }; +} + +fn writeStrtab(macho_file: *MachO, lc: *macho.symtab_command) !void { + const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = macho_file.strtab.buffer.items.len; + seg.filesize = offset + needed_size - seg.fileoff; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + try macho_file.base.file.?.pwriteAll(macho_file.strtab.buffer.items, offset); + + lc.stroff = @intCast(u32, offset); + lc.strsize = @intCast(u32, needed_size); +} + +pub fn generateSymbolStabs( + macho_file: *MachO, + object: Object, + locals: *std.ArrayList(macho.nlist_64), +) !void { + assert(!macho_file.base.options.strip); + + log.debug("parsing debug info in '{s}'", .{object.name}); + + const gpa = macho_file.base.allocator; + var debug_info = try object.parseDwarfInfo(); + defer debug_info.deinit(gpa); + try dwarf.openDwarfDebugInfo(&debug_info, gpa); + + // We assume there is only one CU. + const compile_unit = debug_info.findCompileUnit(0x0) catch |err| switch (err) { + error.MissingDebugInfo => { + // TODO audit cases with missing debug info and audit our dwarf.zig module. + log.debug("invalid or missing debug info in {s}; skipping", .{object.name}); + return; + }, + else => |e| return e, + }; + + const tu_name = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.name, debug_info.debug_str, compile_unit.*); + const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.comp_dir, debug_info.debug_str, compile_unit.*); + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try macho_file.strtab.insert(gpa, tu_comp_dir), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try macho_file.strtab.insert(gpa, tu_name), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try macho_file.strtab.insert(gpa, object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime, + }); + + var stabs_buf: [4]macho.nlist_64 = undefined; + + for (object.managed_atoms.items) |atom| { + const stabs = try generateSymbolStabsForSymbol( + macho_file, + atom.getSymbolWithLoc(), + debug_info, + &stabs_buf, + ); + try locals.appendSlice(stabs); + + for (atom.contained.items) |sym_at_off| { + const sym_loc = SymbolWithLoc{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }; + const contained_stabs = try generateSymbolStabsForSymbol( + macho_file, + sym_loc, + debug_info, + &stabs_buf, + ); + try locals.appendSlice(contained_stabs); + } + } + + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); +} + +fn generateSymbolStabsForSymbol( + macho_file: *MachO, + sym_loc: SymbolWithLoc, + debug_info: dwarf.DwarfInfo, + buf: *[4]macho.nlist_64, +) ![]const macho.nlist_64 { + const gpa = macho_file.base.allocator; + const object = macho_file.objects.items[sym_loc.file.?]; + const sym = macho_file.getSymbol(sym_loc); + const sym_name = macho_file.getSymbolName(sym_loc); + + if (sym.n_strx == 0) return buf[0..0]; + if (sym.n_desc == MachO.N_DESC_GCED) return buf[0..0]; + if (macho_file.symbolIsTemp(sym_loc)) return buf[0..0]; + + const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; + const size: ?u64 = size: { + if (source_sym.tentative()) break :size null; + for (debug_info.func_list.items) |func| { + if (func.pc_range) |range| { + if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { + break :size range.end - range.start; + } + } + } + break :size null; + }; + + if (size) |ss| { + buf[0] = .{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[1] = .{ + .n_strx = try macho_file.strtab.insert(gpa, sym_name), + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[2] = .{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = ss, + }; + buf[3] = .{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = ss, + }; + return buf; + } else { + buf[0] = .{ + .n_strx = try macho_file.strtab.insert(gpa, sym_name), + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + return buf[0..1]; + } +} + +const SymtabCtx = struct { + nlocalsym: u32, + nextdefsym: u32, + nundefsym: u32, + imports_table: std.AutoHashMap(SymbolWithLoc, u32), +}; + +fn writeDysymtab(macho_file: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !void { + const gpa = macho_file.base.allocator; + const nstubs = @intCast(u32, macho_file.stubs_table.count()); + const ngot_entries = @intCast(u32, macho_file.got_entries_table.count()); + const nindirectsyms = nstubs * 2 + ngot_entries; + const iextdefsym = ctx.nlocalsym; + const iundefsym = iextdefsym + ctx.nextdefsym; + + const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = nindirectsyms * @sizeOf(u32); + seg.filesize = offset + needed_size - seg.fileoff; + + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + var buf = std.ArrayList(u8).init(gpa); + defer buf.deinit(); + try buf.ensureTotalCapacity(needed_size); + const writer = buf.writer(); + + if (macho_file.stubs_section_index) |sect_id| { + const stubs = &macho_file.sections.items(.header)[sect_id]; + stubs.reserved1 = 0; + for (macho_file.stubs.items) |entry| { + if (entry.sym_index == 0) continue; + const atom_sym = entry.getSymbol(macho_file); + if (atom_sym.n_desc == MachO.N_DESC_GCED) continue; + const target_sym = macho_file.getSymbol(entry.target); + assert(target_sym.undf()); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + } + } + + if (macho_file.got_section_index) |sect_id| { + const got = &macho_file.sections.items(.header)[sect_id]; + got.reserved1 = nstubs; + for (macho_file.got_entries.items) |entry| { + if (entry.sym_index == 0) continue; + const atom_sym = entry.getSymbol(macho_file); + if (atom_sym.n_desc == MachO.N_DESC_GCED) continue; + const target_sym = macho_file.getSymbol(entry.target); + if (target_sym.undf()) { + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + } else { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + } + } + } + + if (macho_file.la_symbol_ptr_section_index) |sect_id| { + const la_symbol_ptr = &macho_file.sections.items(.header)[sect_id]; + la_symbol_ptr.reserved1 = nstubs + ngot_entries; + for (macho_file.stubs.items) |entry| { + if (entry.sym_index == 0) continue; + const atom_sym = entry.getSymbol(macho_file); + if (atom_sym.n_desc == MachO.N_DESC_GCED) continue; + const target_sym = macho_file.getSymbol(entry.target); + assert(target_sym.undf()); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + } + } + + assert(buf.items.len == needed_size); + try macho_file.base.file.?.pwriteAll(buf.items, offset); + + lc.nlocalsym = ctx.nlocalsym; + lc.iextdefsym = iextdefsym; + lc.nextdefsym = ctx.nextdefsym; + lc.iundefsym = iundefsym; + lc.nundefsym = ctx.nundefsym; + lc.indirectsymoff = @intCast(u32, offset); + lc.nindirectsyms = nindirectsyms; +} + +fn writeCodeSignaturePadding( + macho_file: *MachO, + code_sig: *CodeSignature, + ncmds: *u32, + lc_writer: anytype, +) !u32 { + const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; + // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file + // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, 16); + const needed_size = code_sig.estimateSize(offset); + seg.filesize = offset + needed_size - seg.fileoff; + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, macho_file.page_size); + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + // Pad out the space. We need to do this to calculate valid hashes for everything in the file + // except for code signature data. + try macho_file.base.file.?.pwriteAll(&[_]u8{0}, offset + needed_size - 1); + + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; + + return @intCast(u32, offset); +} + +fn writeCodeSignature(macho_file: *MachO, code_sig: *CodeSignature, offset: u32) !void { + const seg = macho_file.segments.items[macho_file.text_segment_cmd_index.?]; + + var buffer = std.ArrayList(u8).init(macho_file.base.allocator); + defer buffer.deinit(); + try buffer.ensureTotalCapacityPrecise(code_sig.size()); + try code_sig.writeAdhocSignature(macho_file.base.allocator, .{ + .file = macho_file.base.file.?, + .exec_seg_base = seg.fileoff, + .exec_seg_limit = seg.filesize, + .file_size = offset, + .output_mode = macho_file.base.options.output_mode, + }, buffer.writer()); + assert(buffer.items.len == code_sig.size()); + + log.debug("writing code signature from 0x{x} to 0x{x}", .{ + offset, + offset + buffer.items.len, + }); + + try macho_file.base.file.?.pwriteAll(buffer.items, offset); +} + +fn writeSegmentHeaders(macho_file: *MachO, ncmds: *u32, writer: anytype) !void { + for (macho_file.segments.items) |seg, i| { + const indexes = macho_file.getSectionIndexes(@intCast(u8, i)); + var out_seg = seg; + out_seg.cmdsize = @sizeOf(macho.segment_command_64); + out_seg.nsects = 0; + + // Update section headers count; any section with size of 0 is excluded + // since it doesn't have any data in the final binary file. + for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + out_seg.cmdsize += @sizeOf(macho.section_64); + out_seg.nsects += 1; + } + + if (out_seg.nsects == 0 and + (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or + mem.eql(u8, out_seg.segName(), "__DATA"))) continue; + + try writer.writeStruct(out_seg); + for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + try writer.writeStruct(header); + } + + ncmds.* += 1; + } +} + +/// Writes Mach-O file header. +fn writeHeader(macho_file: *MachO, ncmds: u32, sizeofcmds: u32) !void { + var header: macho.mach_header_64 = .{}; + header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; + + switch (macho_file.base.options.target.cpu.arch) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; + }, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; + }, + else => return error.UnsupportedCpuArchitecture, + } + + switch (macho_file.base.options.output_mode) { + .Exe => { + header.filetype = macho.MH_EXECUTE; + }, + .Lib => { + // By this point, it can only be a dylib. + header.filetype = macho.MH_DYLIB; + header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; + }, + else => unreachable, + } + + if (macho_file.getSectionByName("__DATA", "__thread_vars")) |sect_id| { + if (macho_file.sections.items(.header)[sect_id].size > 0) { + header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; + } + } + + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; + + log.debug("writing Mach-O header {}", .{header}); + + try macho_file.base.file.?.pwriteAll(mem.asBytes(&header), 0); +} From e601969244d9e3da7f6c88792932297d87c821eb Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 13 Sep 2022 15:27:25 +0200 Subject: [PATCH 07/17] macho: rewrite how we allocate space in incremental context --- lib/std/macho.zig | 5 + src/link/Dwarf.zig | 17 +- src/link/MachO.zig | 1283 +++++++++++-------------------- src/link/MachO/Atom.zig | 3 +- src/link/MachO/DebugSymbols.zig | 17 +- src/link/MachO/Object.zig | 9 +- src/link/MachO/Relocation.zig | 1 - src/link/MachO/zld.zig | 14 +- 8 files changed, 516 insertions(+), 833 deletions(-) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 1955a00334..7511b482bd 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -798,6 +798,11 @@ pub const section_64 = extern struct { return tt == S_ZEROFILL or tt == S_GB_ZEROFILL or tt == S_THREAD_LOCAL_ZEROFILL; } + pub fn isSymbolStubs(sect: section_64) bool { + const tt = sect.@"type"(); + return tt == S_SYMBOL_STUBS; + } + pub fn isDebug(sect: section_64) bool { return sect.attrs() & S_ATTR_DEBUG != 0; } diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 474c822ae6..d7a64bc0d7 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -948,7 +948,7 @@ pub fn commitDeclState( new_offset, }); - try File.MachO.copyRangeAllOverlappingAlloc( + try copyRangeAllOverlappingAlloc( gpa, d_sym.file, debug_line_sect.offset, @@ -1247,7 +1247,7 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co new_offset, }); - try File.MachO.copyRangeAllOverlappingAlloc( + try copyRangeAllOverlappingAlloc( gpa, d_sym.file, debug_info_sect.offset, @@ -2338,3 +2338,16 @@ fn addDbgInfoErrorSet( // DW.AT.enumeration_type delimit children try dbg_info_buffer.append(0); } + +fn copyRangeAllOverlappingAlloc( + allocator: Allocator, + file: std.fs.File, + in_offset: u64, + out_offset: u64, + len: usize, +) !void { + const buf = try allocator.alloc(u8, len); + defer allocator.free(buf); + const amt = try file.preadAll(buf, in_offset); + try file.pwriteAll(buf[0..amt], out_offset); +} diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 75b983be03..6537d926db 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -115,6 +115,7 @@ segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, sections: std.MultiArrayList(Section) = .{}, pagezero_segment_cmd_index: ?u8 = null, +header_segment_cmd_index: ?u8 = null, text_segment_cmd_index: ?u8 = null, data_const_segment_cmd_index: ?u8 = null, data_segment_cmd_index: ?u8 = null, @@ -124,6 +125,7 @@ text_section_index: ?u8 = null, stubs_section_index: ?u8 = null, stub_helper_section_index: ?u8 = null, got_section_index: ?u8 = null, +data_const_section_index: ?u8 = null, la_symbol_ptr_section_index: ?u8 = null, data_section_index: ?u8 = null, @@ -157,6 +159,8 @@ stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, +segment_table_dirty: bool = false, + /// A helper var to indicate if we are at the start of the incremental updates, or /// already somewhere further along the update-and-run chain. /// TODO once we add opening a prelinked output binary from file, this will become @@ -1066,136 +1070,6 @@ pub fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: } } -const GetOutputSectionResult = struct { - found_existing: bool, - sect_id: u8, -}; - -pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?GetOutputSectionResult { - const segname = sect.segName(); - const sectname = sect.sectName(); - - var found_existing: bool = true; - const sect_id: u8 = blk: { - if (mem.eql(u8, "__LLVM", segname)) { - log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - return null; - } - - if (sect.isCode()) { - if (self.text_section_index == null) { - self.text_section_index = try self.initSection("__TEXT", "__text", .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }); - found_existing = false; - } - break :blk self.text_section_index.?; - } - - if (sect.isDebug()) { - // TODO debug attributes - if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { - log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - return null; - } - - switch (sect.@"type"()) { - macho.S_4BYTE_LITERALS, - macho.S_8BYTE_LITERALS, - macho.S_16BYTE_LITERALS, - => { - if (self.getSectionByName("__TEXT", "__const")) |sect_id| break :blk sect_id; - found_existing = false; - break :blk try self.initSection("__TEXT", "__const", .{}); - }, - macho.S_CSTRING_LITERALS => { - if (mem.startsWith(u8, sectname, "__objc")) { - if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; - found_existing = false; - break :blk try self.initSection(segname, sectname, .{}); - } - if (self.getSectionByName("__TEXT", "__cstring")) |sect_id| break :blk sect_id; - found_existing = false; - break :blk try self.initSection("__TEXT", "__cstring", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - }, - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => { - if (self.getSectionByName("__DATA_CONST", sectname)) |sect_id| break :blk sect_id; - found_existing = false; - break :blk try self.initSection("__DATA_CONST", sectname, .{ - .flags = sect.flags, - }); - }, - macho.S_LITERAL_POINTERS, - macho.S_ZEROFILL, - macho.S_THREAD_LOCAL_VARIABLES, - macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - macho.S_THREAD_LOCAL_REGULAR, - macho.S_THREAD_LOCAL_ZEROFILL, - => { - if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; - found_existing = false; - break :blk try self.initSection(segname, sectname, .{ .flags = sect.flags }); - }, - macho.S_COALESCED => { - if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; - found_existing = false; - break :blk try self.initSection(segname, sectname, .{}); - }, - macho.S_REGULAR => { - if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__rodata") or - mem.eql(u8, sectname, "__typelink") or - mem.eql(u8, sectname, "__itablink") or - mem.eql(u8, sectname, "__gosymtab") or - mem.eql(u8, sectname, "__gopclntab")) - { - if (self.getSectionByName("__DATA_CONST", "__const")) |sect_id| break :blk sect_id; - found_existing = false; - break :blk try self.initSection("__DATA_CONST", "__const", .{}); - } - } - if (mem.eql(u8, segname, "__DATA")) { - if (mem.eql(u8, sectname, "__const") or - mem.eql(u8, sectname, "__cfstring") or - mem.eql(u8, sectname, "__objc_classlist") or - mem.eql(u8, sectname, "__objc_imageinfo")) - { - if (self.getSectionByName("__DATA_CONST", sectname)) |sect_id| break :blk sect_id; - found_existing = false; - break :blk try self.initSection("__DATA_CONST", sectname, .{}); - } else if (mem.eql(u8, sectname, "__data")) { - if (self.data_section_index == null) { - self.data_section_index = try self.initSection(segname, sectname, .{}); - found_existing = false; - } - break :blk self.data_section_index.?; - } - } - if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; - found_existing = false; - break :blk try self.initSection(segname, sectname, .{}); - }, - else => return null, - } - }; - - return GetOutputSectionResult{ - .found_existing = found_existing, - .sect_id = sect_id, - }; -} - pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32) !*Atom { const size_usize = math.cast(usize, size) orelse return error.Overflow; const atom = try gpa.create(Atom); @@ -1263,7 +1137,11 @@ pub fn allocateSpecialSymbols(self: *MachO) !void { const global = self.getGlobal(name) orelse continue; if (global.file != null) continue; const sym = self.getSymbolPtr(global); - const seg = self.segments.items[self.text_segment_cmd_index.?]; + const seg_id = switch (self.mode) { + .incremental => self.sections.items(.segment_index)[self.text_section_index.?], + .one_shot => self.text_segment_cmd_index.?, + }; + const seg = self.segments.items[seg_id]; sym.n_sect = 1; sym.n_value = seg.vmaddr; @@ -1284,7 +1162,7 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { atom.* = Atom.empty; atom.sym_index = sym_index; atom.size = @sizeOf(u64); - atom.alignment = 3; + atom.alignment = @alignOf(u64); break :blk atom; }, .one_shot => try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3), @@ -1357,39 +1235,6 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { return atom; } -pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { - assert(self.mode == .one_shot); - - const gpa = self.base.allocator; - const sym_index = try self.allocateSymbol(); - const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); - - const target_sym = self.getSymbol(target); - assert(target_sym.undf()); - - const global = self.getGlobal(self.getSymbolName(target)).?; - try atom.bindings.append(gpa, .{ - .target = global, - .offset = 0, - }); - - try self.managed_atoms.append(gpa, atom); - try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - - const sym = atom.getSymbolPtr(self); - sym.n_type = macho.N_SECT; - const gop = (try self.getOutputSection(.{ - .segname = makeStaticString("__DATA"), - .sectname = makeStaticString("__thread_ptrs"), - .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - })).?; - sym.n_sect = gop.sect_id + 1; - - try self.addAtomToSection(atom); - - return atom; -} - pub fn createDyldPrivateAtom(self: *MachO) !void { if (self.dyld_stub_binder_index == null) return; if (self.dyld_private_atom != null) return; @@ -1403,7 +1248,7 @@ pub fn createDyldPrivateAtom(self: *MachO) !void { atom.* = Atom.empty; atom.sym_index = sym_index; atom.size = @sizeOf(u64); - atom.alignment = 3; + atom.alignment = @alignOf(u64); break :blk atom; }, .one_shot => try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3), @@ -1450,7 +1295,11 @@ pub fn createStubHelperPreambleAtom(self: *MachO) !void { atom.* = Atom.empty; atom.sym_index = sym_index; atom.size = size; - atom.alignment = alignment; + atom.alignment = switch (arch) { + .x86_64 => 1, + .aarch64 => @alignOf(u32), + else => unreachable, + }; break :blk atom; }, .one_shot => try MachO.createEmptyAtom(gpa, sym_index, size, alignment), @@ -1621,7 +1470,7 @@ pub fn createStubHelperPreambleAtom(self: *MachO) !void { try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); if (self.mode == .incremental) { - sym.n_value = try self.allocateAtom(atom, size, math.powi(u32, 2, alignment) catch unreachable); + sym.n_value = try self.allocateAtom(atom, size, atom.alignment); log.debug("allocated stub preamble atom at 0x{x}", .{sym.n_value}); try self.writeAtom(atom, code); } else { @@ -1650,7 +1499,11 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { atom.* = Atom.empty; atom.sym_index = sym_index; atom.size = size; - atom.alignment = alignment; + atom.alignment = switch (arch) { + .x86_64 => 1, + .aarch64 => @alignOf(u32), + else => unreachable, + }; break :blk atom; }, .one_shot => try MachO.createEmptyAtom(gpa, sym_index, size, alignment), @@ -1738,7 +1591,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); if (self.mode == .incremental) { - sym.n_value = try self.allocateAtom(atom, size, math.powi(u32, 2, alignment) catch unreachable); + sym.n_value = try self.allocateAtom(atom, size, atom.alignment); log.debug("allocated stub helper atom at 0x{x}", .{sym.n_value}); try self.writeAtom(atom, code); } else { @@ -1758,7 +1611,7 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWi atom.* = Atom.empty; atom.sym_index = sym_index; atom.size = @sizeOf(u64); - atom.alignment = 3; + atom.alignment = @alignOf(u64); break :blk atom; }, .one_shot => try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3), @@ -1843,7 +1696,12 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { atom.* = Atom.empty; atom.sym_index = sym_index; atom.size = size; - atom.alignment = alignment; + atom.alignment = switch (arch) { + .x86_64 => 1, + .aarch64 => @alignOf(u32), + else => unreachable, // unhandled architecture type + + }; break :blk atom; }, .one_shot => try MachO.createEmptyAtom(gpa, sym_index, size, alignment), @@ -1945,7 +1803,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); if (self.mode == .incremental) { - sym.n_value = try self.allocateAtom(atom, size, math.powi(u32, 2, alignment) catch unreachable); + sym.n_value = try self.allocateAtom(atom, size, atom.alignment); log.debug("allocated stub atom at 0x{x}", .{sym.n_value}); try self.writeAtom(atom, code); } else { @@ -1956,7 +1814,41 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { return atom; } +pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { + assert(self.mode == .one_shot); + + const gpa = self.base.allocator; + const sym_index = try self.allocateSymbol(); + const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); + + const target_sym = self.getSymbol(target); + assert(target_sym.undf()); + + const global = self.getGlobal(self.getSymbolName(target)).?; + try atom.bindings.append(gpa, .{ + .target = global, + .offset = 0, + }); + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + const sym = atom.getSymbolPtr(self); + sym.n_type = macho.N_SECT; + const sect_id = (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__thread_ptrs"), + .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + })).?; + sym.n_sect = sect_id + 1; + + try self.addAtomToSection(atom); + + return atom; +} + pub fn createTentativeDefAtoms(self: *MachO) !void { + assert(self.mode == .one_shot); const gpa = self.base.allocator; for (self.globals.items) |global| { @@ -1971,20 +1863,15 @@ pub fn createTentativeDefAtoms(self: *MachO) !void { // text blocks for each tentative definition. const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; - const gop = (try self.getOutputSection(.{ + const sect_id = (try self.getOutputSection(.{ .segname = makeStaticString("__DATA"), .sectname = makeStaticString("__bss"), .flags = macho.S_ZEROFILL, })).?; - if (self.mode == .incremental and !gop.found_existing) { - // TODO allocate section - try self.allocateSection(gop.sect_id, size, alignment); - } - sym.* = .{ .n_strx = sym.n_strx, .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = gop.sect_id, + .n_sect = sect_id + 1, .n_desc = 0, .n_value = 0, }; @@ -1992,7 +1879,7 @@ pub fn createTentativeDefAtoms(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); atom.file = global.file; - try self.allocateAtomCommon(atom); + try self.addAtomToSection(atom); if (global.file) |file| { const object = &self.objects.items[file]; @@ -2350,7 +2237,12 @@ pub fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { pub fn writeMainLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { if (self.base.options.output_mode != .Exe) return; - const seg = self.segments.items[self.text_segment_cmd_index.?]; + const seg_id = switch (self.mode) { + .incremental => self.header_segment_cmd_index.?, + // .incremental => self.sections.items(.segment_index)[self.text_section_index.?], + .one_shot => self.text_segment_cmd_index.?, + }; + const seg = self.segments.items[seg_id]; const global = try self.getEntryPoint(); const sym = self.getSymbol(global); try lc_writer.writeStruct(macho.entry_point_command{ @@ -2946,14 +2838,8 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu const required_alignment = typed_value.ty.abiAlignment(self.base.options.target); atom.size = code.len; - atom.alignment = math.log2(required_alignment); - const sect_id = try self.getOutputSectionAtom( - atom, - decl_name, - typed_value.ty, - typed_value.val, - required_alignment, - ); + atom.alignment = required_alignment; + const sect_id = self.getDeclOutputSection(decl); const symbol = atom.getSymbolPtr(self); symbol.n_strx = name_str_index; symbol.n_type = macho.N_SECT; @@ -3050,85 +2936,16 @@ pub fn updateDecl(self: *MachO, module: *Module, decl_index: Module.Decl.Index) try self.updateDeclExports(module, decl_index, decl_exports); } -/// Checks if the value, or any of its embedded values stores a pointer, and thus requires -/// a rebase opcode for the dynamic linker. -fn needsPointerRebase(ty: Type, val: Value, mod: *Module) bool { - if (ty.zigTypeTag() == .Fn) { - return false; - } - if (val.pointerDecl()) |_| { - return true; - } - - switch (ty.zigTypeTag()) { - .Fn => unreachable, - .Pointer => return true, - .Array, .Vector => { - if (ty.arrayLen() == 0) return false; - const elem_ty = ty.childType(); - var elem_value_buf: Value.ElemValueBuffer = undefined; - const elem_val = val.elemValueBuffer(mod, 0, &elem_value_buf); - return needsPointerRebase(elem_ty, elem_val, mod); - }, - .Struct => { - const fields = ty.structFields().values(); - if (fields.len == 0) return false; - if (val.castTag(.aggregate)) |payload| { - const field_values = payload.data; - for (field_values) |field_val, i| { - if (needsPointerRebase(fields[i].ty, field_val, mod)) return true; - } else return false; - } else return false; - }, - .Optional => { - if (val.castTag(.opt_payload)) |payload| { - const sub_val = payload.data; - var buffer: Type.Payload.ElemType = undefined; - const sub_ty = ty.optionalChild(&buffer); - return needsPointerRebase(sub_ty, sub_val, mod); - } else return false; - }, - .Union => { - const union_obj = val.cast(Value.Payload.Union).?.data; - const active_field_ty = ty.unionFieldType(union_obj.tag, mod); - return needsPointerRebase(active_field_ty, union_obj.val, mod); - }, - .ErrorUnion => { - if (val.castTag(.eu_payload)) |payload| { - const payload_ty = ty.errorUnionPayload(); - return needsPointerRebase(payload_ty, payload.data, mod); - } else return false; - }, - else => return false, - } -} - -fn getOutputSectionAtom( - self: *MachO, - atom: *Atom, - name: []const u8, - ty: Type, - val: Value, - alignment: u32, -) !u8 { - const code = atom.code.items; - const mod = self.base.options.module.?; - const align_log_2 = math.log2(alignment); +fn getDeclOutputSection(self: *MachO, decl: *Module.Decl) u8 { + const ty = decl.ty; + const val = decl.val; const zig_ty = ty.zigTypeTag(); const mode = self.base.options.optimize_mode; - const sect_id: u8 = blk: { // TODO finish and audit this function if (val.isUndefDeep()) { if (mode == .ReleaseFast or mode == .ReleaseSmall) { - const gop = (try self.getOutputSection(.{ - .segname = makeStaticString("__DATA"), - .sectname = makeStaticString("__bss"), - })).?; - if (!gop.found_existing) { - try self.allocateSection(gop.sect_id, code.len, align_log_2); - } - break :blk gop.sect_id; + @panic("TODO __DATA,__bss"); } else { break :blk self.data_section_index.?; } @@ -3138,61 +2955,124 @@ fn getOutputSectionAtom( break :blk self.data_section_index.?; } - if (needsPointerRebase(ty, val, mod)) { - const gop = (try self.getOutputSection(.{ - .segname = makeStaticString("__DATA_CONST"), - .sectname = makeStaticString("__const"), - })).?; - if (!gop.found_existing) { - try self.allocateSection(gop.sect_id, code.len, align_log_2); - } - break :blk gop.sect_id; + switch (zig_ty) { + .Fn => break :blk self.text_section_index.?, + else => { + if (val.castTag(.variable)) |_| { + break :blk self.data_section_index.?; + } + break :blk self.data_const_section_index.?; + }, + } + }; + return sect_id; +} + +pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { + const segname = sect.segName(); + const sectname = sect.sectName(); + const sect_id: ?u8 = blk: { + if (mem.eql(u8, "__LLVM", segname)) { + log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + break :blk null; } - switch (zig_ty) { - .Fn => { - break :blk self.text_section_index.?; + if (sect.isCode()) { + if (self.text_section_index == null) { + self.text_section_index = try self.initSection("__TEXT", "__text", .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + break :blk self.text_section_index.?; + } + + if (sect.isDebug()) { + // TODO debug attributes + if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { + log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + break :blk null; + } + + switch (sect.@"type"()) { + macho.S_4BYTE_LITERALS, + macho.S_8BYTE_LITERALS, + macho.S_16BYTE_LITERALS, + => { + if (self.getSectionByName("__TEXT", "__const")) |sect_id| break :blk sect_id; + break :blk try self.initSection("__TEXT", "__const", .{}); }, - .Array => { - if (val.tag() == .bytes) { - switch (ty.tag()) { - .array_u8_sentinel_0, - .const_slice_u8_sentinel_0, - .manyptr_const_u8_sentinel_0, - => { - const gop = (try self.getOutputSection(.{ - .segname = makeStaticString("__TEXT"), - .sectname = makeStaticString("__cstring"), - .flags = macho.S_CSTRING_LITERALS, - })).?; - if (!gop.found_existing) { - try self.allocateSection(gop.sect_id, code.len, align_log_2); - } - break :blk gop.sect_id; - }, - else => {}, + macho.S_CSTRING_LITERALS => { + if (mem.startsWith(u8, sectname, "__objc")) { + if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; + break :blk try self.initSection(segname, sectname, .{}); + } + if (self.getSectionByName("__TEXT", "__cstring")) |sect_id| break :blk sect_id; + break :blk try self.initSection("__TEXT", "__cstring", .{ + .flags = macho.S_CSTRING_LITERALS, + }); + }, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => { + if (self.getSectionByName("__DATA_CONST", sectname)) |sect_id| break :blk sect_id; + break :blk try self.initSection("__DATA_CONST", sectname, .{ + .flags = sect.flags, + }); + }, + macho.S_LITERAL_POINTERS, + macho.S_ZEROFILL, + macho.S_THREAD_LOCAL_VARIABLES, + macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + macho.S_THREAD_LOCAL_REGULAR, + macho.S_THREAD_LOCAL_ZEROFILL, + => { + if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; + break :blk try self.initSection(segname, sectname, .{ .flags = sect.flags }); + }, + macho.S_COALESCED => { + if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; + break :blk try self.initSection(segname, sectname, .{}); + }, + macho.S_REGULAR => { + if (mem.eql(u8, segname, "__TEXT")) { + if (mem.eql(u8, sectname, "__rodata") or + mem.eql(u8, sectname, "__typelink") or + mem.eql(u8, sectname, "__itablink") or + mem.eql(u8, sectname, "__gosymtab") or + mem.eql(u8, sectname, "__gopclntab")) + { + if (self.getSectionByName("__DATA_CONST", "__const")) |sect_id| break :blk sect_id; + break :blk try self.initSection("__DATA_CONST", "__const", .{}); } } + if (mem.eql(u8, segname, "__DATA")) { + if (mem.eql(u8, sectname, "__const") or + mem.eql(u8, sectname, "__cfstring") or + mem.eql(u8, sectname, "__objc_classlist") or + mem.eql(u8, sectname, "__objc_imageinfo")) + { + if (self.getSectionByName("__DATA_CONST", sectname)) |sect_id| break :blk sect_id; + break :blk try self.initSection("__DATA_CONST", sectname, .{}); + } else if (mem.eql(u8, sectname, "__data")) { + if (self.data_section_index == null) { + self.data_section_index = try self.initSection(segname, sectname, .{}); + } + break :blk self.data_section_index.?; + } + } + if (self.getSectionByName(segname, sectname)) |sect_id| break :blk sect_id; + break :blk try self.initSection(segname, sectname, .{}); }, - else => {}, + else => break :blk null, } - const gop = (try self.getOutputSection(.{ - .segname = makeStaticString("__TEXT"), - .sectname = makeStaticString("__const"), - })).?; - if (!gop.found_existing) { - try self.allocateSection(gop.sect_id, code.len, align_log_2); - } - break :blk gop.sect_id; }; - - const header = self.sections.items(.header)[sect_id]; - log.debug(" allocating atom '{s}' in '{s},{s}', ord({d})", .{ - name, - header.segName(), - header.sectName(), - sect_id, - }); return sect_id; } @@ -3210,13 +3090,7 @@ fn updateDeclCode(self: *MachO, decl_index: Module.Decl.Index, code: []const u8) const atom = &decl.link.macho; const decl_ptr = self.decls.getPtr(decl_index).?; if (decl_ptr.* == null) { - decl_ptr.* = try self.getOutputSectionAtom( - atom, - sym_name, - decl.ty, - decl.val, - required_alignment, - ); + decl_ptr.* = self.getDeclOutputSection(decl); } const sect_id = decl_ptr.*.?; const code_len = code.len; @@ -3536,15 +3410,13 @@ pub fn populateMissingMetadata(self: *MachO) !void { } } - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u8, self.segments.items.len); - const headerpad_size = @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size); - const program_code_size_hint = self.base.options.program_code_size_hint; - const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; - const ideal_size = headerpad_size + program_code_size_hint + got_size_hint; + if (self.header_segment_cmd_index == null) { + // The first __TEXT segment is immovable and covers MachO header and load commands. + self.header_segment_cmd_index = @intCast(u8, self.segments.items.len); + const ideal_size = @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size); const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); + log.debug("found __TEXT segment (header-only) free space 0x{x} to 0x{x}", .{ 0, needed_size }); try self.segments.append(gpa, .{ .segname = makeStaticString("__TEXT"), @@ -3555,150 +3427,101 @@ pub fn populateMissingMetadata(self: *MachO) !void { .initprot = macho.PROT.READ | macho.PROT.EXEC, .cmdsize = @sizeOf(macho.segment_command_64), }); + self.segment_table_dirty = true; } if (self.text_section_index == null) { - const alignment: u2 = switch (cpu_arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const needed_size = self.base.options.program_code_size_hint; - self.text_section_index = try self.initSection("__TEXT", "__text", .{ + self.text_section_index = try self.allocateSection("__TEXT1", "__text", .{ + .size = self.base.options.program_code_size_hint, + .alignment = switch (cpu_arch) { + .x86_64 => 1, + .aarch64 => @sizeOf(u32), + else => unreachable, // unhandled architecture type + }, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .prot = macho.PROT.READ | macho.PROT.EXEC, }); - try self.allocateSection(self.text_section_index.?, needed_size, alignment); + self.segment_table_dirty = true; } if (self.stubs_section_index == null) { - const alignment: u2 = switch (cpu_arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_size: u4 = switch (cpu_arch) { + const stub_size: u32 = switch (cpu_arch) { .x86_64 => 6, .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; - const needed_size = stub_size * self.base.options.symbol_count_hint; - self.stubs_section_index = try self.initSection("__TEXT", "__stubs", .{ + self.stubs_section_index = try self.allocateSection("__TEXT2", "__stubs", .{ + .size = stub_size, + .alignment = switch (cpu_arch) { + .x86_64 => 1, + .aarch64 => @sizeOf(u32), + else => unreachable, // unhandled architecture type + }, .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, .reserved2 = stub_size, + .prot = macho.PROT.READ | macho.PROT.EXEC, }); - try self.allocateSection(self.stubs_section_index.?, needed_size, alignment); + self.segment_table_dirty = true; } if (self.stub_helper_section_index == null) { - const alignment: u2 = switch (cpu_arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const preamble_size: u6 = switch (cpu_arch) { - .x86_64 => 15, - .aarch64 => 6 * @sizeOf(u32), - else => unreachable, - }; - const stub_size: u4 = switch (cpu_arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const needed_size = stub_size * self.base.options.symbol_count_hint + preamble_size; - self.stub_helper_section_index = try self.initSection("__TEXT", "__stub_helper", .{ + self.stub_helper_section_index = try self.allocateSection("__TEXT3", "__stub_helper", .{ + .size = @sizeOf(u32), + .alignment = switch (cpu_arch) { + .x86_64 => 1, + .aarch64 => @sizeOf(u32), + else => unreachable, // unhandled architecture type + }, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .prot = macho.PROT.READ | macho.PROT.EXEC, }); - try self.allocateSection(self.stub_helper_section_index.?, needed_size, alignment); - } - - if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u8, self.segments.items.len); - const base = self.getSegmentAllocBase(&.{self.text_segment_cmd_index.?}); - const vmaddr = base.vmaddr; - const fileoff = base.fileoff; - const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - - log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ - fileoff, - fileoff + needed_size, - }); - - try self.segments.append(gpa, .{ - .segname = makeStaticString("__DATA_CONST"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }); + self.segment_table_dirty = true; } if (self.got_section_index == null) { - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.got_section_index = try self.initSection("__DATA_CONST", "__got", .{ + self.got_section_index = try self.allocateSection("__DATA_CONST", "__got", .{ + .size = @sizeOf(u64) * self.base.options.symbol_count_hint, + .alignment = @alignOf(u64), .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + .prot = macho.PROT.READ | macho.PROT.WRITE, }); - try self.allocateSection(self.got_section_index.?, needed_size, alignment); + self.segment_table_dirty = true; } - if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u8, self.segments.items.len); - const base = self.getSegmentAllocBase(&.{self.data_const_segment_cmd_index.?}); - const vmaddr = base.vmaddr; - const fileoff = base.fileoff; - const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint; - const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - - log.debug("found __DATA segment free space 0x{x} to 0x{x}", .{ - fileoff, - fileoff + needed_size, - }); - - try self.segments.append(gpa, .{ - .segname = makeStaticString("__DATA"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), + if (self.data_const_section_index == null) { + self.data_const_section_index = try self.allocateSection("__DATA_CONST1", "__const", .{ + .size = @sizeOf(u64), + .alignment = @alignOf(u64), + .flags = macho.S_REGULAR, + .prot = macho.PROT.READ | macho.PROT.WRITE, }); + self.segment_table_dirty = true; } if (self.la_symbol_ptr_section_index == null) { - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.la_symbol_ptr_section_index = try self.initSection("__DATA", "__la_symbol_ptr", .{ + self.la_symbol_ptr_section_index = try self.allocateSection("__DATA", "__la_symbol_ptr", .{ + .size = @sizeOf(u64), + .alignment = @alignOf(u64), .flags = macho.S_LAZY_SYMBOL_POINTERS, + .prot = macho.PROT.READ | macho.PROT.WRITE, }); - try self.allocateSection(self.la_symbol_ptr_section_index.?, needed_size, alignment); + self.segment_table_dirty = true; } if (self.data_section_index == null) { - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.data_section_index = try self.initSection("__DATA", "__data", .{}); - try self.allocateSection(self.data_section_index.?, needed_size, alignment); + self.data_section_index = try self.allocateSection("__DATA1", "__data", .{ + .size = @sizeOf(u64), + .alignment = @alignOf(u64), + .flags = macho.S_REGULAR, + .prot = macho.PROT.READ | macho.PROT.WRITE, + }); + self.segment_table_dirty = true; } if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); - const base = self.getSegmentAllocBase(&.{self.data_segment_cmd_index.?}); - const vmaddr = base.vmaddr; - const fileoff = base.fileoff; - - log.debug("found __LINKEDIT segment free space at 0x{x}", .{fileoff}); - try self.segments.append(gpa, .{ .segname = makeStaticString("__LINKEDIT"), - .vmaddr = vmaddr, - .fileoff = fileoff, .maxprot = macho.PROT.READ, .initprot = macho.PROT.READ, .cmdsize = @sizeOf(macho.segment_command_64), @@ -3825,338 +3648,65 @@ pub fn calcMinHeaderPad(self: *MachO) !u64 { return offset; } -fn allocateSection(self: *MachO, sect_id: u8, size: u64, alignment: u32) !void { - const segment_id = self.sections.items(.segment_index)[sect_id]; - const seg = &self.segments.items[segment_id]; - const header = &self.sections.items(.header)[sect_id]; - header.size = size; - header.@"align" = alignment; - - const prev_end_off = if (sect_id > 0) blk: { - const prev_section = self.sections.get(sect_id - 1); - if (prev_section.segment_index == segment_id) { - const prev_header = prev_section.header; - break :blk prev_header.offset + padToIdeal(prev_header.size); - } else break :blk seg.fileoff; - } else 0; - const alignment_pow_2 = try math.powi(u32, 2, alignment); - // TODO better prealloc for __text section - // const padding: u64 = if (sect_id == 0) try self.calcMinHeaderPad() else 0; - const padding: u64 = if (sect_id == 0) 0x1000 else 0; - const off = mem.alignForwardGeneric(u64, padding + prev_end_off, alignment_pow_2); - - if (!header.isZerofill()) { - header.offset = @intCast(u32, off); - } - header.addr = seg.vmaddr + off - seg.fileoff; - - // TODO Will this break if we are inserting section that is not the last section - // in a segment? - const max_size = self.allocatedSize(segment_id, off); - - if (size > max_size) { - try self.growSection(sect_id, @intCast(u32, size)); - self.markRelocsDirtyByAddress(header.addr + size); - } - - log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); - - self.updateSectionOrdinals(sect_id + 1); -} - -fn getSectionPrecedence(header: macho.section_64) u4 { - if (header.isCode()) { - if (mem.eql(u8, "__text", header.sectName())) return 0x0; - if (header.@"type"() == macho.S_SYMBOL_STUBS) return 0x1; - return 0x2; - } - switch (header.@"type"()) { - macho.S_NON_LAZY_SYMBOL_POINTERS, - macho.S_LAZY_SYMBOL_POINTERS, - => return 0x0, - macho.S_MOD_INIT_FUNC_POINTERS => return 0x1, - macho.S_MOD_TERM_FUNC_POINTERS => return 0x2, - macho.S_ZEROFILL => return 0xf, - macho.S_THREAD_LOCAL_REGULAR => return 0xd, - macho.S_THREAD_LOCAL_ZEROFILL => return 0xe, - else => if (mem.eql(u8, "__eh_frame", header.sectName())) - return 0xf - else - return 0x3, - } -} - -const InitSectionOpts = struct { +fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts: struct { + size: u64 = 0, + alignment: u32 = 0, + prot: macho.vm_prot_t = macho.PROT.NONE, flags: u32 = macho.S_REGULAR, - reserved1: u32 = 0, reserved2: u32 = 0, -}; +}) !u8 { + const gpa = self.base.allocator; + // In incremental context, we create one section per segment pairing. This way, + // we can move the segment in raw file as we please. + const segment_id = @intCast(u8, self.segments.items.len); + const section_id = @intCast(u8, self.sections.slice().len); + const vmaddr = blk: { + const prev_segment = self.segments.items[segment_id - 1]; + break :blk mem.alignForwardGeneric(u64, prev_segment.vmaddr + prev_segment.vmsize, self.page_size); + }; + // We commit more memory than needed upfront so that we don't have to reallocate too soon. + const vmsize = mem.alignForwardGeneric(u64, opts.size, self.page_size); + const off = self.findFreeSpace(opts.size, self.page_size); -pub fn initSection( - self: *MachO, - segname: []const u8, - sectname: []const u8, - opts: InitSectionOpts, -) !u8 { - const segment_id = self.getSegmentByName(segname).?; - const seg = &self.segments.items[segment_id]; - const index = try self.insertSection(segment_id, .{ + log.debug("found {s},{s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{ + segname, + sectname, + off, + off + opts.size, + vmaddr, + vmaddr + vmsize, + }); + + const seg = try self.segments.addOne(gpa); + seg.* = .{ + .segname = makeStaticString(segname), + .vmaddr = vmaddr, + .vmsize = vmsize, + .fileoff = off, + .filesize = opts.size, + .maxprot = opts.prot, + .initprot = opts.prot, + .nsects = 1, + .cmdsize = @sizeOf(macho.segment_command_64) + @sizeOf(macho.section_64), + }; + + var section = macho.section_64{ .sectname = makeStaticString(sectname), - .segname = seg.segname, + .segname = makeStaticString(segname), + .addr = mem.alignForwardGeneric(u64, vmaddr, opts.alignment), + .offset = mem.alignForwardGeneric(u32, @intCast(u32, off), opts.alignment), + .size = opts.size, + .@"align" = math.log2(opts.alignment), .flags = opts.flags, - .reserved1 = opts.reserved1, .reserved2 = opts.reserved2, + }; + assert(!section.isZerofill()); // TODO zerofill sections + + try self.sections.append(gpa, .{ + .segment_index = segment_id, + .header = section, }); - seg.cmdsize += @sizeOf(macho.section_64); - seg.nsects += 1; - return index; -} - -fn insertSection(self: *MachO, segment_index: u8, header: macho.section_64) !u8 { - const precedence = getSectionPrecedence(header); - const indexes = self.getSectionIndexes(segment_index); - const insertion_index = for (self.sections.items(.header)[indexes.start..indexes.end]) |hdr, i| { - if (getSectionPrecedence(hdr) > precedence) break @intCast(u8, i + indexes.start); - } else indexes.end; - log.debug("inserting section '{s},{s}' at index {d}", .{ - header.segName(), - header.sectName(), - insertion_index, - }); - for (&[_]*?u8{ - &self.text_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.got_section_index, - &self.la_symbol_ptr_section_index, - &self.data_section_index, - }) |maybe_index| { - const index = maybe_index.* orelse continue; - if (insertion_index <= index) maybe_index.* = index + 1; - } - try self.sections.insert(self.base.allocator, insertion_index, .{ - .segment_index = segment_index, - .header = header, - }); - return insertion_index; -} - -fn updateSectionOrdinals(self: *MachO, start: u8) void { - const tracy = trace(@src()); - defer tracy.end(); - - const slice = self.sections.slice(); - for (slice.items(.last_atom)[start..]) |last_atom| { - var atom = last_atom orelse continue; - - while (true) { - const sym = atom.getSymbolPtr(self); - sym.n_sect = start + 1; - - for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }); - contained_sym.n_sect = start + 1; - } - - if (atom.prev) |prev| { - atom = prev; - } else break; - } - } -} - -fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { - var atom = self.sections.items(.last_atom)[sect_id] orelse return; - - while (true) { - const atom_sym = atom.getSymbolPtr(self); - atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); - - for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }); - contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); - } - - if (atom.prev) |prev| { - atom = prev; - } else break; - } -} - -fn growSegment(self: *MachO, segment_index: u8, new_size: u64) !void { - const segment = &self.segments.items[segment_index]; - const new_segment_size = mem.alignForwardGeneric(u64, new_size, self.page_size); - assert(new_segment_size > segment.filesize); - const offset_amt = new_segment_size - segment.filesize; - log.debug("growing segment {s} from 0x{x} to 0x{x}", .{ - segment.segname, - segment.filesize, - new_segment_size, - }); - segment.filesize = new_segment_size; - segment.vmsize = new_segment_size; - - log.debug(" (new segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - segment.fileoff, - segment.fileoff + segment.filesize, - segment.vmaddr, - segment.vmaddr + segment.vmsize, - }); - - var next: u8 = segment_index + 1; - while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { - const next_segment = &self.segments.items[next]; - - try MachO.copyRangeAllOverlappingAlloc( - self.base.allocator, - self.base.file.?, - next_segment.fileoff, - next_segment.fileoff + offset_amt, - math.cast(usize, next_segment.filesize) orelse return error.Overflow, - ); - - next_segment.fileoff += offset_amt; - next_segment.vmaddr += offset_amt; - - log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - next_segment.segname, - next_segment.fileoff, - next_segment.fileoff + next_segment.filesize, - next_segment.vmaddr, - next_segment.vmaddr + next_segment.vmsize, - }); - - const indexes = self.getSectionIndexes(next); - for (self.sections.items(.header)[indexes.start..indexes.end]) |*header, i| { - header.offset += @intCast(u32, offset_amt); - header.addr += offset_amt; - - log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - header.segName(), - header.sectName(), - header.offset, - header.offset + header.size, - header.addr, - header.addr + header.size, - }); - - try self.shiftLocalsByOffset(@intCast(u8, i + indexes.start), @intCast(i64, offset_amt)); - } - } -} - -fn growSection(self: *MachO, sect_id: u8, new_size: u32) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const section = self.sections.get(sect_id); - const segment_index = section.segment_index; - const header = section.header; - const segment = self.segments.items[segment_index]; - - const alignment = try math.powi(u32, 2, header.@"align"); - const max_size = self.allocatedSize(segment_index, header.offset); - const ideal_size = padToIdeal(new_size); - const needed_size = mem.alignForwardGeneric(u32, ideal_size, alignment); - - if (needed_size > max_size) blk: { - log.debug(" (need to grow! needed 0x{x}, max 0x{x})", .{ needed_size, max_size }); - - const indexes = self.getSectionIndexes(segment_index); - if (sect_id == indexes.end - 1) { - // Last section, just grow segments - try self.growSegment(segment_index, segment.filesize + needed_size - max_size); - break :blk; - } - - // Need to move all sections below in file and address spaces. - const offset_amt = offset: { - const max_alignment = try self.getSectionMaxAlignment(sect_id + 1, indexes.end); - break :offset mem.alignForwardGeneric(u64, needed_size - max_size, max_alignment); - }; - - // Before we commit to this, check if the segment needs to grow too. - // We assume that each section header is growing linearly with the increasing - // file offset / virtual memory address space. - const last_sect_header = self.sections.items(.header)[indexes.end - 1]; - const last_sect_off = last_sect_header.offset + last_sect_header.size; - const seg_off = segment.fileoff + segment.filesize; - - if (last_sect_off + offset_amt > seg_off) { - // Need to grow segment first. - const spill_size = (last_sect_off + offset_amt) - seg_off; - try self.growSegment(segment_index, segment.filesize + spill_size); - } - - // We have enough space to expand within the segment, so move all sections by - // the required amount and update their header offsets. - const next_sect = self.sections.items(.header)[sect_id + 1]; - const total_size = last_sect_off - next_sect.offset; - - try MachO.copyRangeAllOverlappingAlloc( - self.base.allocator, - self.base.file.?, - next_sect.offset, - next_sect.offset + offset_amt, - math.cast(usize, total_size) orelse return error.Overflow, - ); - - for (self.sections.items(.header)[sect_id + 1 .. indexes.end]) |*moved_sect, i| { - moved_sect.offset += @intCast(u32, offset_amt); - moved_sect.addr += offset_amt; - - log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - moved_sect.segName(), - moved_sect.sectName(), - moved_sect.offset, - moved_sect.offset + moved_sect.size, - moved_sect.addr, - moved_sect.addr + moved_sect.size, - }); - - try self.shiftLocalsByOffset(@intCast(u8, sect_id + 1 + i), @intCast(i64, offset_amt)); - } - } -} - -fn allocatedSize(self: MachO, segment_id: u8, start: u64) u64 { - const segment = self.segments.items[segment_id]; - const indexes = self.getSectionIndexes(segment_id); - assert(start >= segment.fileoff); - var min_pos: u64 = segment.fileoff + segment.filesize; - if (start > min_pos) return 0; - for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { - if (header.offset <= start) continue; - if (header.offset < min_pos) min_pos = header.offset; - } - return min_pos - start; -} - -fn getSectionMaxAlignment(self: *MachO, start: u8, end: u8) !u32 { - var max_alignment: u32 = 1; - const slice = self.sections.slice(); - for (slice.items(.header)[start..end]) |header| { - const alignment = try math.powi(u32, 2, header.@"align"); - max_alignment = math.max(max_alignment, alignment); - } - return max_alignment; -} - -fn allocateAtomCommon(self: *MachO, atom: *Atom) !void { - if (self.mode == .incremental) { - const sym_name = atom.getName(self); - const size = atom.size; - const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, size, alignment); - log.debug("allocated {s} atom at 0x{x}", .{ sym_name, vaddr }); - atom.getSymbolPtr(self).n_value = vaddr; - } else try self.addAtomToSection(atom); + return section_id; } fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) !u64 { @@ -4164,12 +3714,13 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) ! defer tracy.end(); const sect_id = atom.getSymbol(self).n_sect - 1; + const segment = &self.segments.items[self.sections.items(.segment_index)[sect_id]]; const header = &self.sections.items(.header)[sect_id]; const free_list = &self.sections.items(.free_list)[sect_id]; const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; const requires_padding = blk: { if (!header.isCode()) break :blk false; - if (mem.eql(u8, "__stubs", header.sectName())) break :blk false; + if (header.isSymbolStubs()) break :blk false; if (mem.eql(u8, "__stub_helper", header.sectName())) break :blk false; break :blk true; }; @@ -4229,24 +3780,58 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) ! atom_placement = last; break :blk new_start_vaddr; } else { - break :blk mem.alignForwardGeneric(u64, header.addr, alignment); + break :blk mem.alignForwardGeneric(u64, segment.vmaddr, alignment); } }; const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { - const needed_size = @intCast(u32, (vaddr + new_atom_size) - header.addr); - try self.growSection(sect_id, needed_size); - self.markRelocsDirtyByAddress(header.addr + needed_size); - maybe_last_atom.* = atom; + const sect_capacity = self.allocatedSize(header.offset); + const needed_size = (vaddr + new_atom_size) - segment.vmaddr; + if (needed_size > sect_capacity) { + const new_offset = self.findFreeSpace(needed_size, self.page_size); + const current_size = if (maybe_last_atom.*) |last_atom| blk: { + const sym = last_atom.getSymbol(self); + break :blk (sym.n_value + last_atom.size) - segment.vmaddr; + } else 0; + + log.debug("moving {s},{s} from 0x{x} to 0x{x}", .{ + header.segName(), + header.sectName(), + header.offset, + new_offset, + }); + + const amt = try self.base.file.?.copyRangeAll( + header.offset, + self.base.file.?, + new_offset, + current_size, + ); + if (amt != current_size) return error.InputOutput; + header.offset = @intCast(u32, new_offset); + segment.fileoff = new_offset; + } + + const sect_vm_capacity = self.allocatedVirtualSize(segment.vmaddr); + if (needed_size > sect_vm_capacity) { + self.markRelocsDirtyByAddress(segment.vmaddr + needed_size); + @panic("TODO grow section in VM"); + } + header.size = needed_size; + segment.filesize = needed_size; + segment.vmsize = mem.alignForwardGeneric(u64, needed_size, self.page_size); + log.warn("updating {s},{s}: {x}, {x}", .{ header.segName(), header.sectName(), segment.vmsize, segment.filesize }); + maybe_last_atom.* = atom; + + self.segment_table_dirty = true; } + const align_pow = @intCast(u32, math.log2(alignment)); if (header.@"align" < align_pow) { header.@"align" = align_pow; } - atom.size = new_atom_size; - atom.alignment = align_pow; if (atom.prev) |prev| { prev.next = atom.next; @@ -4270,6 +3855,78 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) ! return vaddr; } +fn getSectionPrecedence(header: macho.section_64) u4 { + if (header.isCode()) { + if (mem.eql(u8, "__text", header.sectName())) return 0x0; + if (header.@"type"() == macho.S_SYMBOL_STUBS) return 0x1; + return 0x2; + } + switch (header.@"type"()) { + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + => return 0x0, + macho.S_MOD_INIT_FUNC_POINTERS => return 0x1, + macho.S_MOD_TERM_FUNC_POINTERS => return 0x2, + macho.S_ZEROFILL => return 0xf, + macho.S_THREAD_LOCAL_REGULAR => return 0xd, + macho.S_THREAD_LOCAL_ZEROFILL => return 0xe, + else => if (mem.eql(u8, "__eh_frame", header.sectName())) + return 0xf + else + return 0x3, + } +} + +const InitSectionOpts = struct { + flags: u32 = macho.S_REGULAR, + reserved1: u32 = 0, + reserved2: u32 = 0, +}; + +pub fn initSection(self: *MachO, segname: []const u8, sectname: []const u8, opts: InitSectionOpts) !u8 { + const segment_id = self.getSegmentByName(segname).?; + const seg = &self.segments.items[segment_id]; + const index = try self.insertSection(segment_id, .{ + .sectname = makeStaticString(sectname), + .segname = seg.segname, + .flags = opts.flags, + .reserved1 = opts.reserved1, + .reserved2 = opts.reserved2, + }); + seg.cmdsize += @sizeOf(macho.section_64); + seg.nsects += 1; + return index; +} + +fn insertSection(self: *MachO, segment_index: u8, header: macho.section_64) !u8 { + const precedence = getSectionPrecedence(header); + const indexes = self.getSectionIndexes(segment_index); + const insertion_index = for (self.sections.items(.header)[indexes.start..indexes.end]) |hdr, i| { + if (getSectionPrecedence(hdr) > precedence) break @intCast(u8, i + indexes.start); + } else indexes.end; + log.debug("inserting section '{s},{s}' at index {d}", .{ + header.segName(), + header.sectName(), + insertion_index, + }); + for (&[_]*?u8{ + &self.text_section_index, + &self.stubs_section_index, + &self.stub_helper_section_index, + &self.got_section_index, + &self.la_symbol_ptr_section_index, + &self.data_section_index, + }) |maybe_index| { + const index = maybe_index.* orelse continue; + if (insertion_index <= index) maybe_index.* = index + 1; + } + try self.sections.insert(self.base.allocator, insertion_index, .{ + .segment_index = segment_index, + .header = header, + }); + return insertion_index; +} + pub fn addAtomToSection(self: *MachO, atom: *Atom) !void { const sect_id = atom.getSymbol(self).n_sect - 1; var section = self.sections.get(sect_id); @@ -4310,43 +3967,13 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { return global_index; } -pub fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, fileoff: u64 } { - for (indices) |maybe_prev_id| { - const prev_id = maybe_prev_id orelse continue; - const prev = self.segments.items[prev_id]; - return .{ - .vmaddr = prev.vmaddr + prev.vmsize, - .fileoff = prev.fileoff + prev.filesize, - }; - } - return .{ .vmaddr = 0, .fileoff = 0 }; -} - fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { for (self.segments.items) |seg, i| { const indexes = self.getSectionIndexes(@intCast(u8, i)); - var out_seg = seg; - out_seg.cmdsize = @sizeOf(macho.segment_command_64); - out_seg.nsects = 0; - - // Update section headers count; any section with size of 0 is excluded - // since it doesn't have any data in the final binary file. + try writer.writeStruct(seg); for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { - if (header.size == 0) continue; - out_seg.cmdsize += @sizeOf(macho.section_64); - out_seg.nsects += 1; - } - - if (out_seg.nsects == 0 and - (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or - mem.eql(u8, out_seg.segName(), "__DATA"))) continue; - - try writer.writeStruct(out_seg); - for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { - if (header.size == 0) continue; try writer.writeStruct(header); } - ncmds.* += 1; } } @@ -4356,6 +3983,24 @@ fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void seg.filesize = 0; seg.vmsize = 0; + for (self.segments.items) |segment, id| { + if (self.linkedit_segment_cmd_index.? == @intCast(u8, id)) continue; + if (seg.vmaddr < segment.vmaddr + segment.vmsize) { + seg.vmaddr = mem.alignForwardGeneric(u64, segment.vmaddr + segment.vmsize, self.page_size); + } + if (seg.fileoff < segment.fileoff + segment.filesize) { + seg.fileoff = mem.alignForwardGeneric(u64, segment.fileoff + segment.filesize, self.page_size); + } + } + // seg.vmaddr = blk: { + // const prev_segment = self.segments.items[self.linkedit_segment_cmd_index.? - 1]; + // break :blk mem.alignForwardGeneric(u64, prev_segment.vmaddr + prev_segment.vmsize, self.page_size); + // }; + // seg.fileoff = blk: { + // const prev_segment = self.segments.items[self.linkedit_segment_cmd_index.? - 1]; + // break :blk mem.alignForwardGeneric(u64, prev_segment.fileoff + prev_segment.filesize, self.page_size); + // }; + try self.writeDyldInfoData(ncmds, lc_writer); try self.writeSymtabs(ncmds, lc_writer); @@ -4471,7 +4116,7 @@ fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. log.debug("generating export trie", .{}); - const text_segment = self.segments.items[self.text_segment_cmd_index.?]; + const text_segment = self.segments.items[self.header_segment_cmd_index.?]; const base_address = text_segment.vmaddr; if (self.base.options.output_mode == .Exe) { @@ -4593,7 +4238,8 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { var stub_atom = last_atom; var laptr_atom = self.sections.items(.last_atom)[self.la_symbol_ptr_section_index.?].?; const base_addr = blk: { - const seg = self.segments.items[self.data_segment_cmd_index.?]; + const seg_id = self.sections.items(.segment_index)[self.la_symbol_ptr_section_index.?]; + const seg = self.segments.items[seg_id]; break :blk seg.vmaddr; }; @@ -4932,7 +4578,8 @@ fn writeCodeSignaturePadding( } fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void { - const seg = self.segments.items[self.text_segment_cmd_index.?]; + const seg_id = self.sections.items(.segment_index)[self.text_section_index.?]; + const seg = self.segments.items[seg_id]; var buffer = std.ArrayList(u8).init(self.base.allocator); defer buffer.deinit(); @@ -5005,7 +4652,7 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 { // TODO: header and load commands have to be part of the __TEXT segment - const header_size = default_headerpad_size; + const header_size = self.segments.items[self.header_segment_cmd_index.?].filesize; if (start < header_size) return header_size; @@ -5023,16 +4670,16 @@ fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 { return null; } -// fn allocatedSize(self: *MachO, start: u64) u64 { -// if (start == 0) -// return 0; -// var min_pos: u64 = std.math.maxInt(u64); -// for (self.sections.items(.header)) |header| { -// if (header.offset <= start) continue; -// if (header.offset < min_pos) min_pos = header.offset; -// } -// return min_pos - start; -// } +fn allocatedSize(self: *MachO, start: u64) u64 { + if (start == 0) + return 0; + var min_pos: u64 = std.math.maxInt(u64); + for (self.sections.items(.header)) |header| { + if (header.offset <= start) continue; + if (header.offset < min_pos) min_pos = header.offset; + } + return min_pos - start; +} fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 { var start: u64 = 0; @@ -5042,6 +4689,18 @@ fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 { return start; } +fn allocatedVirtualSize(self: *MachO, start: u64) u64 { + if (start == 0) + return 0; + var min_pos: u64 = std.math.maxInt(u64); + for (self.sections.items(.segment_index)) |seg_id| { + const segment = self.segments.items[seg_id]; + if (segment.vmaddr <= start) continue; + if (segment.vmaddr < min_pos) min_pos = segment.vmaddr; + } + return min_pos - start; +} + pub fn makeStaticString(bytes: []const u8) [16]u8 { var buf = [_]u8{0} ** 16; assert(bytes.len <= buf.len); @@ -5645,19 +5304,3 @@ pub fn logAtom(self: *MachO, atom: *const Atom) void { }); } } - -/// Since `os.copy_file_range` cannot be used when copying overlapping ranges within the same file, -/// and since `File.copyRangeAll` uses `os.copy_file_range` under-the-hood, we use heap allocated -/// buffers on all hosts except Linux (if `copy_file_range` syscall is available). -pub fn copyRangeAllOverlappingAlloc( - allocator: Allocator, - file: std.fs.File, - in_offset: u64, - out_offset: u64, - len: usize, -) !void { - const buf = try allocator.alloc(u8, len); - defer allocator.free(buf); - const amt = try file.preadAll(buf, in_offset); - try file.pwriteAll(buf[0..amt], out_offset); -} diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index bcd85ad82c..8f22f74e4c 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -312,9 +312,8 @@ pub fn parseRelocs(self: *Atom, relocs: []align(1) const macho.relocation_info, const sect_id = @intCast(u16, rel.r_symbolnum - 1); const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { const sect = object.getSourceSection(sect_id); - const gop = (try context.macho_file.getOutputSection(sect)) orelse + const out_sect_id = (try context.macho_file.getOutputSection(sect)) orelse unreachable; - const out_sect_id = gop.sect_id; const sym_index = @intCast(u32, object.symtab.items.len); try object.symtab.append(gpa, .{ .n_strx = 0, diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index a991ba8882..c81602543e 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -512,7 +512,7 @@ fn writeSymtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; seg.filesize = aligned_size; - try MachO.copyRangeAllOverlappingAlloc( + try copyRangeAllOverlappingAlloc( self.base.base.allocator, self.file, dwarf_seg.fileoff, @@ -571,7 +571,7 @@ fn writeStrtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; seg.filesize = aligned_size; - try MachO.copyRangeAllOverlappingAlloc( + try copyRangeAllOverlappingAlloc( self.base.base.allocator, self.file, dwarf_seg.fileoff, @@ -601,3 +601,16 @@ fn writeStrtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { try self.file.pwriteAll(self.strtab.buffer.items, lc.stroff); } + +fn copyRangeAllOverlappingAlloc( + allocator: Allocator, + file: std.fs.File, + in_offset: u64, + out_offset: u64, + len: usize, +) !void { + const buf = try allocator.alloc(u8, len); + defer allocator.free(buf); + const amt = try file.preadAll(buf, in_offset); + try file.pwriteAll(buf[0..amt], out_offset); +} diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 9a6aae9b8b..28244c674f 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -220,15 +220,15 @@ fn filterRelocs( pub fn scanInputSections(self: Object, macho_file: *MachO) !void { for (self.sections.items) |sect| { - const gop = (try macho_file.getOutputSection(sect)) orelse { + const sect_id = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; - const output = macho_file.sections.items(.header)[gop.sect_id]; + const output = macho_file.sections.items(.header)[sect_id]; log.debug("mapping '{s},{s}' into output sect({d}, '{s},{s}')", .{ sect.segName(), sect.sectName(), - gop.sect_id + 1, + sect_id + 1, output.segName(), output.sectName(), }); @@ -335,11 +335,10 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. - const gop = (try macho_file.getOutputSection(sect)) orelse { + const out_sect_id = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; - const out_sect_id = gop.sect_id; log.debug(" output sect({d}, '{s},{s}')", .{ out_sect_id + 1, diff --git a/src/link/MachO/Relocation.zig b/src/link/MachO/Relocation.zig index 333d8bd6d2..2b68051c5b 100644 --- a/src/link/MachO/Relocation.zig +++ b/src/link/MachO/Relocation.zig @@ -181,7 +181,6 @@ fn resolveAarch64( const offset = @divExact(narrowed, 8); inst.load_store_register.offset = offset; mem.writeIntLittle(u32, &buffer, inst.toU32()); - log.debug("HMM = {x}", .{std.fmt.fmtSliceHexLower(&buffer)}); }, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { const RegInfo = struct { diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index b3f229ebc4..04bf6176f6 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -876,11 +876,23 @@ fn allocateSegments(macho_file: *MachO) !void { }, 0); } +fn getSegmentAllocBase(macho_file: *MachO, indices: []const ?u8) struct { vmaddr: u64, fileoff: u64 } { + for (indices) |maybe_prev_id| { + const prev_id = maybe_prev_id orelse continue; + const prev = macho_file.segments.items[prev_id]; + return .{ + .vmaddr = prev.vmaddr + prev.vmsize, + .fileoff = prev.fileoff + prev.filesize, + }; + } + return .{ .vmaddr = 0, .fileoff = 0 }; +} + fn allocateSegment(macho_file: *MachO, maybe_index: ?u8, indices: []const ?u8, init_size: u64) !void { const index = maybe_index orelse return; const seg = &macho_file.segments.items[index]; - const base = macho_file.getSegmentAllocBase(indices); + const base = getSegmentAllocBase(macho_file, indices); seg.vmaddr = base.vmaddr; seg.fileoff = base.fileoff; seg.filesize = init_size; From 618c7a3546bb5e73a4f7ff4bd38e53700f63c90c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Sep 2022 13:48:00 +0200 Subject: [PATCH 08/17] macho: add logic for expanding segments in memory --- src/link/Coff.zig | 1 + src/link/Elf.zig | 1 + src/link/MachO.zig | 43 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/link/Coff.zig b/src/link/Coff.zig index d9b887f831..e177c62658 100644 --- a/src/link/Coff.zig +++ b/src/link/Coff.zig @@ -1135,6 +1135,7 @@ fn getDeclOutputSection(self: *Coff, decl: *Module.Decl) u16 { } switch (zig_ty) { + // TODO: what if this is a function pointer? .Fn => break :blk self.text_section_index.?, else => { if (val.castTag(.variable)) |_| { diff --git a/src/link/Elf.zig b/src/link/Elf.zig index a70473fe07..4e67c095c0 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -2320,6 +2320,7 @@ fn getDeclPhdrIndex(self: *Elf, decl: *Module.Decl) !u16 { } switch (zig_ty) { + // TODO: what if this is a function pointer? .Fn => break :blk self.phdr_load_re_index.?, else => { if (val.castTag(.variable)) |_| { diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6537d926db..2b7a3d43ae 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2839,7 +2839,9 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu const required_alignment = typed_value.ty.abiAlignment(self.base.options.target); atom.size = code.len; atom.alignment = required_alignment; - const sect_id = self.getDeclOutputSection(decl); + // TODO: work out logic for disambiguating functions from function pointers + // const sect_id = self.getDeclOutputSection(decl); + const sect_id = self.data_const_section_index.?; const symbol = atom.getSymbolPtr(self); symbol.n_strx = name_str_index; symbol.n_type = macho.N_SECT; @@ -2956,6 +2958,7 @@ fn getDeclOutputSection(self: *MachO, decl: *Module.Decl) u8 { } switch (zig_ty) { + // TODO: what if this is a function pointer? .Fn => break :blk self.text_section_index.?, else => { if (val.castTag(.variable)) |_| { @@ -3709,6 +3712,41 @@ fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts return section_id; } +fn moveSectionInVirtualMemory(self: *MachO, sect_id: u32, needed_size: u64) !void { + const header = &self.sections.items(.header)[sect_id]; + const segment = &self.segments.items[self.sections.items(.segment_index)[sect_id]]; + const increased_size = padToIdeal(needed_size); + const old_aligned_end = segment.vmaddr + segment.vmsize; + const new_aligned_end = segment.vmaddr + mem.alignForwardGeneric(u64, increased_size, self.page_size); + const diff = new_aligned_end - old_aligned_end; + log.debug("shifting every segment after {s},{s} in virtual memory by {x}", .{ + header.segName(), + header.sectName(), + diff, + }); + + // TODO: enforce order by increasing VM addresses in self.sections container. + for (self.sections.items(.header)[sect_id + 1 ..]) |*next_header, next_sect_id| { + const index = sect_id + 1 + next_sect_id; + const maybe_last_atom = &self.sections.items(.last_atom)[index]; + const next_segment = &self.segments.items[self.sections.items(.segment_index)[index]]; + next_header.addr += diff; + next_segment.vmaddr += diff; + + if (maybe_last_atom.*) |last_atom| { + var atom = last_atom; + while (true) { + const sym = atom.getSymbolPtr(self); + sym.n_value += diff; + + if (atom.prev) |prev| { + atom = prev; + } else break; + } + } + } +} + fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) !u64 { const tracy = trace(@src()); defer tracy.end(); @@ -3816,13 +3854,12 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) ! const sect_vm_capacity = self.allocatedVirtualSize(segment.vmaddr); if (needed_size > sect_vm_capacity) { self.markRelocsDirtyByAddress(segment.vmaddr + needed_size); - @panic("TODO grow section in VM"); + try self.moveSectionInVirtualMemory(sect_id, needed_size); } header.size = needed_size; segment.filesize = needed_size; segment.vmsize = mem.alignForwardGeneric(u64, needed_size, self.page_size); - log.warn("updating {s},{s}: {x}, {x}", .{ header.segName(), header.sectName(), segment.vmsize, segment.filesize }); maybe_last_atom.* = atom; self.segment_table_dirty = true; From dc6480dba5ec4533f6a20292f93fe5c25a1a689f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Sep 2022 22:02:40 +0200 Subject: [PATCH 09/17] macho: allow for add and ldr when resolving GOT_LOAD_* relocs --- src/arch/aarch64/Emit.zig | 5 +++-- src/link/MachO.zig | 1 + src/link/MachO/Relocation.zig | 33 +++++++-------------------------- 3 files changed, 11 insertions(+), 28 deletions(-) diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index a868b74edc..5e1e1c7135 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -845,7 +845,8 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { try emit.writeInstruction(Instruction.adrp(reg.to64(), 0)); switch (tag) { - .load_memory_got => { + .load_memory_got, + => { // ldr reg, reg, offset try emit.writeInstruction(Instruction.ldr( reg, @@ -871,8 +872,8 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { Instruction.LoadStoreOffset.imm(0), )); }, - .load_memory_ptr_got, .load_memory_ptr_direct, + .load_memory_ptr_got, => { // add reg, reg, offset try emit.writeInstruction(Instruction.add(reg, reg, 0, false)); diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 2b7a3d43ae..2e21aa1af9 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3434,6 +3434,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { } if (self.text_section_index == null) { + // Sadly, segments need unique string identfiers for some reason. self.text_section_index = try self.allocateSection("__TEXT1", "__text", .{ .size = self.base.options.program_code_size_hint, .alignment = switch (cpu_arch) { diff --git a/src/link/MachO/Relocation.zig b/src/link/MachO/Relocation.zig index 2b68051c5b..1acbb30a24 100644 --- a/src/link/MachO/Relocation.zig +++ b/src/link/MachO/Relocation.zig @@ -135,7 +135,9 @@ fn resolveAarch64( inst.pc_relative_address.immlo = @truncate(u2, pages); mem.writeIntLittle(u32, &buffer, inst.toU32()); }, - .ARM64_RELOC_PAGEOFF12 => { + .ARM64_RELOC_PAGEOFF12, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + => { const narrowed = @truncate(u12, @intCast(u64, target_addr)); if (isArithmeticOp(&buffer)) { var inst = aarch64.Instruction{ @@ -170,18 +172,6 @@ fn resolveAarch64( mem.writeIntLittle(u32, &buffer, inst.toU32()); } }, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { - const narrowed = @truncate(u12, @intCast(u64, target_addr)); - var inst: aarch64.Instruction = .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), &buffer), - }; - const offset = @divExact(narrowed, 8); - inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, &buffer, inst.toU32()); - }, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { const RegInfo = struct { rd: u5, @@ -226,11 +216,8 @@ fn resolveAarch64( mem.writeIntLittle(u32, &buffer, inst.toU32()); }, .ARM64_RELOC_POINTER_TO_GOT => { - const result = math.cast( - i32, - @intCast(i64, target_addr) - @intCast(i64, source_addr), - ) orelse return error.Overflow; - mem.writeIntLittle(u32, &buffer, @bitCast(u32, result)); + const result = @intCast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)); + mem.writeIntLittle(i32, &buffer, result); }, .ARM64_RELOC_SUBTRACTOR => unreachable, .ARM64_RELOC_ADDEND => unreachable, @@ -255,10 +242,7 @@ fn resolveX8664( .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_TLV, => { - const displacement = math.cast( - i32, - @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4, - ) orelse return error.Overflow; + const displacement = @intCast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4); mem.writeIntLittle(u32, buffer[0..4], @bitCast(u32, displacement)); break :blk buffer[0..4]; }, @@ -274,10 +258,7 @@ fn resolveX8664( .X86_64_RELOC_SIGNED_4 => 4, else => unreachable, }; - const displacement = math.cast( - i32, - target_addr - @intCast(i64, source_addr + correction + 4), - ) orelse return error.Overflow; + const displacement = @intCast(i32, target_addr - @intCast(i64, source_addr + correction + 4)); mem.writeIntLittle(u32, buffer[0..4], @bitCast(u32, displacement)); break :blk buffer[0..4]; }, From 7f7669a09e398b5b5856edf0795ed25dfd28405b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Sep 2022 22:13:50 +0200 Subject: [PATCH 10/17] macho: fix building on 32bit targets --- src/link/MachO.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 2e21aa1af9..ada695d6a7 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1278,7 +1278,7 @@ pub fn createStubHelperPreambleAtom(self: *MachO) !void { const gpa = self.base.allocator; const arch = self.base.options.target.cpu.arch; - const size: u64 = switch (arch) { + const size: u5 = switch (arch) { .x86_64 => 15, .aarch64 => 6 * @sizeOf(u32), else => unreachable, From e35a16c7e0a7382be90b93c734c23bed585cd199 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Sep 2022 22:14:10 +0200 Subject: [PATCH 11/17] macho: make sure both vmsize and filesize for segments match --- src/link/MachO.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ada695d6a7..61ba365e33 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3859,7 +3859,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) ! } header.size = needed_size; - segment.filesize = needed_size; + segment.filesize = mem.alignForwardGeneric(u64, needed_size, self.page_size); segment.vmsize = mem.alignForwardGeneric(u64, needed_size, self.page_size); maybe_last_atom.* = atom; From 2c971f00854e339562412dd9af98b51b70731915 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Sep 2022 22:42:31 +0200 Subject: [PATCH 12/17] fix code formatting --- src/arch/aarch64/Emit.zig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 5e1e1c7135..abcbf15a05 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -845,8 +845,7 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { try emit.writeInstruction(Instruction.adrp(reg.to64(), 0)); switch (tag) { - .load_memory_got, - => { + .load_memory_got => { // ldr reg, reg, offset try emit.writeInstruction(Instruction.ldr( reg, From 66942cbc1a42b8d959da657bf0c2b192a3726719 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Sep 2022 22:47:51 +0200 Subject: [PATCH 13/17] macho: make sure both vmsize and filesize for segments match always --- src/link/MachO.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 61ba365e33..1d9f031adc 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3687,7 +3687,7 @@ fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts .vmaddr = vmaddr, .vmsize = vmsize, .fileoff = off, - .filesize = opts.size, + .filesize = vmsize, .maxprot = opts.prot, .initprot = opts.prot, .nsects = 1, From 275abf7c5712e572c96db825cc4a0e46a4890250 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 16 Sep 2022 12:31:02 +0200 Subject: [PATCH 14/17] macho: fix overlapping segments in file offset --- src/link/MachO.zig | 75 ++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 1d9f031adc..708608b414 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1137,11 +1137,10 @@ pub fn allocateSpecialSymbols(self: *MachO) !void { const global = self.getGlobal(name) orelse continue; if (global.file != null) continue; const sym = self.getSymbolPtr(global); - const seg_id = switch (self.mode) { - .incremental => self.sections.items(.segment_index)[self.text_section_index.?], - .one_shot => self.text_segment_cmd_index.?, + const seg = switch (self.mode) { + .incremental => self.getSegment(self.text_section_index.?), + .one_shot => self.segments.items[self.text_segment_cmd_index.?], }; - const seg = self.segments.items[seg_id]; sym.n_sect = 1; sym.n_value = seg.vmaddr; @@ -2239,7 +2238,6 @@ pub fn writeMainLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { if (self.base.options.output_mode != .Exe) return; const seg_id = switch (self.mode) { .incremental => self.header_segment_cmd_index.?, - // .incremental => self.sections.items(.segment_index)[self.text_section_index.?], .one_shot => self.text_segment_cmd_index.?, }; const seg = self.segments.items[seg_id]; @@ -3713,9 +3711,9 @@ fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts return section_id; } -fn moveSectionInVirtualMemory(self: *MachO, sect_id: u32, needed_size: u64) !void { +fn moveSectionInVirtualMemory(self: *MachO, sect_id: u8, needed_size: u64) !void { const header = &self.sections.items(.header)[sect_id]; - const segment = &self.segments.items[self.sections.items(.segment_index)[sect_id]]; + const segment = self.getSegmentPtr(sect_id); const increased_size = padToIdeal(needed_size); const old_aligned_end = segment.vmaddr + segment.vmsize; const new_aligned_end = segment.vmaddr + mem.alignForwardGeneric(u64, increased_size, self.page_size); @@ -3728,9 +3726,9 @@ fn moveSectionInVirtualMemory(self: *MachO, sect_id: u32, needed_size: u64) !voi // TODO: enforce order by increasing VM addresses in self.sections container. for (self.sections.items(.header)[sect_id + 1 ..]) |*next_header, next_sect_id| { - const index = sect_id + 1 + next_sect_id; + const index = @intCast(u8, sect_id + 1 + next_sect_id); const maybe_last_atom = &self.sections.items(.last_atom)[index]; - const next_segment = &self.segments.items[self.sections.items(.segment_index)[index]]; + const next_segment = self.getSegmentPtr(index); next_header.addr += diff; next_segment.vmaddr += diff; @@ -3753,7 +3751,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64) ! defer tracy.end(); const sect_id = atom.getSymbol(self).n_sect - 1; - const segment = &self.segments.items[self.sections.items(.segment_index)[sect_id]]; + const segment = self.getSegmentPtr(sect_id); const header = &self.sections.items(.header)[sect_id]; const free_list = &self.sections.items(.free_list)[sect_id]; const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; @@ -4017,7 +4015,7 @@ fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { } fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { - const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const seg = self.getLinkeditSegmentPtr(); seg.filesize = 0; seg.vmsize = 0; @@ -4061,15 +4059,14 @@ fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const slice = self.sections.slice(); for (slice.items(.last_atom)) |last_atom, sect_id| { var atom = last_atom orelse continue; - const segment_index = slice.items(.segment_index)[sect_id]; const header = slice.items(.header)[sect_id]; + const segment_index = slice.items(.segment_index)[sect_id]; + const seg = self.getSegment(@intCast(u8, sect_id)); if (mem.eql(u8, header.segName(), "__TEXT")) continue; // __TEXT is non-writable log.debug("dyld info for {s},{s}", .{ header.segName(), header.sectName() }); - const seg = self.segments.items[segment_index]; - while (true) { log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); const sym = atom.getSymbol(self); @@ -4193,7 +4190,7 @@ fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { try trie.finalize(gpa); } - const link_seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const link_seg = self.getLinkeditSegmentPtr(); const rebase_off = mem.alignForwardGeneric(u64, link_seg.fileoff, @alignOf(u64)); assert(rebase_off == link_seg.fileoff); const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); @@ -4275,11 +4272,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { { var stub_atom = last_atom; var laptr_atom = self.sections.items(.last_atom)[self.la_symbol_ptr_section_index.?].?; - const base_addr = blk: { - const seg_id = self.sections.items(.segment_index)[self.la_symbol_ptr_section_index.?]; - const seg = self.segments.items[seg_id]; - break :blk seg.vmaddr; - }; + const base_addr = self.getSegment(self.la_symbol_ptr_section_index.?).vmaddr; while (true) { const laptr_off = blk: { @@ -4461,7 +4454,7 @@ fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { const nimports = @intCast(u32, imports.items.len); const nsyms = nlocals + nexports + nimports; - const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const seg = self.getLinkeditSegmentPtr(); const offset = mem.alignForwardGeneric( u64, seg.fileoff + seg.filesize, @@ -4492,7 +4485,7 @@ fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { } fn writeStrtab(self: *MachO, lc: *macho.symtab_command) !void { - const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const seg = self.getLinkeditSegmentPtr(); const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); const needed_size = self.strtab.buffer.items.len; seg.filesize = offset + needed_size - seg.fileoff; @@ -4520,7 +4513,7 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !voi const iextdefsym = ctx.nlocalsym; const iundefsym = iextdefsym + ctx.nextdefsym; - const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const seg = self.getLinkeditSegmentPtr(); const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); const needed_size = nindirectsyms * @sizeOf(u32); seg.filesize = offset + needed_size - seg.fileoff; @@ -4592,7 +4585,7 @@ fn writeCodeSignaturePadding( ncmds: *u32, lc_writer: anytype, ) !u32 { - const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const seg = self.getLinkeditSegmentPtr(); // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, 16); @@ -4616,8 +4609,7 @@ fn writeCodeSignaturePadding( } fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void { - const seg_id = self.sections.items(.segment_index)[self.text_section_index.?]; - const seg = self.segments.items[seg_id]; + const seg = self.getSegment(self.text_section_index.?); var buffer = std.ArrayList(u8).init(self.base.allocator); defer buffer.deinit(); @@ -4696,11 +4688,12 @@ fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 { const end = start + padToIdeal(size); - for (self.sections.items(.header)) |header| { - const tight_size = header.size; + for (self.sections.items(.segment_index)) |segment_index| { + const segment = self.segments.items[segment_index]; + const tight_size = segment.filesize; const increased_size = padToIdeal(tight_size); - const test_end = header.offset + increased_size; - if (end > header.offset and start < test_end) { + const test_end = segment.fileoff + increased_size; + if (end > segment.fileoff and start < test_end) { return test_end; } } @@ -4712,9 +4705,10 @@ fn allocatedSize(self: *MachO, start: u64) u64 { if (start == 0) return 0; var min_pos: u64 = std.math.maxInt(u64); - for (self.sections.items(.header)) |header| { - if (header.offset <= start) continue; - if (header.offset < min_pos) min_pos = header.offset; + for (self.sections.items(.segment_index)) |segment_index| { + const segment = self.segments.items[segment_index]; + if (segment.fileoff <= start) continue; + if (segment.fileoff < min_pos) min_pos = segment.fileoff; } return min_pos - start; } @@ -4752,6 +4746,21 @@ fn getSegmentByName(self: MachO, segname: []const u8) ?u8 { } else return null; } +pub fn getSegment(self: MachO, sect_id: u8) macho.segment_command_64 { + const index = self.sections.items(.segment_index)[sect_id]; + return self.segments.items[index]; +} + +pub fn getSegmentPtr(self: *MachO, sect_id: u8) *macho.segment_command_64 { + const index = self.sections.items(.segment_index)[sect_id]; + return &self.segments.items[index]; +} + +pub fn getLinkeditSegmentPtr(self: *MachO) *macho.segment_command_64 { + const index = self.linkedit_segment_cmd_index.?; + return &self.segments.items[index]; +} + pub fn getSectionByName(self: MachO, segname: []const u8, sectname: []const u8) ?u8 { // TODO investigate caching with a hashmap for (self.sections.items(.header)) |header, i| { From 372acb83500e9f910f48b78eaca4bf35a6e4f9d8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 16 Sep 2022 13:49:13 +0200 Subject: [PATCH 15/17] macho: ensure we extend section size when updating last atom --- src/link/MachO.zig | 23 +++++++++++++---------- src/link/MachO/Atom.zig | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 708608b414..f85c710c5d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3121,6 +3121,11 @@ fn updateDeclCode(self: *MachO, decl_index: Module.Decl.Index, code: []const u8) } } else if (code_len < atom.size) { self.shrinkAtom(atom, code_len); + } else if (atom.next == null) { + const header = &self.sections.items(.header)[sect_id]; + const segment = self.getSegment(sect_id); + const needed_size = (sym.n_value + code_len) - segment.vmaddr; + header.size = needed_size; } atom.size = code_len; } else { @@ -4688,12 +4693,11 @@ fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 { const end = start + padToIdeal(size); - for (self.sections.items(.segment_index)) |segment_index| { - const segment = self.segments.items[segment_index]; - const tight_size = segment.filesize; + for (self.sections.items(.header)) |header| { + const tight_size = header.size; const increased_size = padToIdeal(tight_size); - const test_end = segment.fileoff + increased_size; - if (end > segment.fileoff and start < test_end) { + const test_end = header.offset + increased_size; + if (end > header.offset and start < test_end) { return test_end; } } @@ -4705,10 +4709,9 @@ fn allocatedSize(self: *MachO, start: u64) u64 { if (start == 0) return 0; var min_pos: u64 = std.math.maxInt(u64); - for (self.sections.items(.segment_index)) |segment_index| { - const segment = self.segments.items[segment_index]; - if (segment.fileoff <= start) continue; - if (segment.fileoff < min_pos) min_pos = segment.fileoff; + for (self.sections.items(.header)) |header| { + if (header.offset <= start) continue; + if (header.offset < min_pos) min_pos = header.offset; } return min_pos - start; } @@ -4721,7 +4724,7 @@ fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 { return start; } -fn allocatedVirtualSize(self: *MachO, start: u64) u64 { +pub fn allocatedVirtualSize(self: *MachO, start: u64) u64 { if (start == 0) return 0; var min_pos: u64 = std.math.maxInt(u64); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 8f22f74e4c..b5bc82e769 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -195,7 +195,7 @@ pub fn capacity(self: Atom, macho_file: *MachO) u64 { } else { // We are the last atom. // The capacity is limited only by virtual address space. - return std.math.maxInt(u64) - self_sym.n_value; + return macho_file.allocatedVirtualSize(self_sym.n_value); } } From f4706c23e9aadb53dd6620107ae5c7676e2dab86 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 18 Sep 2022 11:01:29 +0200 Subject: [PATCH 16/17] macho: fix after sync with master --- src/link/MachO/Object.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 28244c674f..6d24bccad8 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -249,7 +249,7 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { const in_symtab = self.in_symtab orelse { for (self.sections.items) |sect, id| { if (sect.isDebug()) continue; - const match = (try macho_file.getOutputSection(sect)) orelse { + const out_sect_id = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; @@ -261,7 +261,7 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = match + 1, + .n_sect = out_sect_id + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -282,10 +282,10 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { code, relocs, &.{}, - match, + out_sect_id, sect, ); - try macho_file.addAtomToSection(atom, match); + try macho_file.addAtomToSection(atom); } return; }; From 4474f8dd6ed58875930f440aad0b893c1c9a414d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 18 Sep 2022 15:13:45 +0200 Subject: [PATCH 17/17] macho: cleanup how we collect dyld data in incremental linker --- src/link/MachO.zig | 293 ++++++++++++++++++++++++--------------------- 1 file changed, 154 insertions(+), 139 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f85c710c5d..347ec14164 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4033,14 +4033,6 @@ fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void seg.fileoff = mem.alignForwardGeneric(u64, segment.fileoff + segment.filesize, self.page_size); } } - // seg.vmaddr = blk: { - // const prev_segment = self.segments.items[self.linkedit_segment_cmd_index.? - 1]; - // break :blk mem.alignForwardGeneric(u64, prev_segment.vmaddr + prev_segment.vmsize, self.page_size); - // }; - // seg.fileoff = blk: { - // const prev_segment = self.segments.items[self.linkedit_segment_cmd_index.? - 1]; - // break :blk mem.alignForwardGeneric(u64, prev_segment.fileoff + prev_segment.filesize, self.page_size); - // }; try self.writeDyldInfoData(ncmds, lc_writer); try self.writeSymtabs(ncmds, lc_writer); @@ -4048,6 +4040,154 @@ fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); } +const AtomLessThanByAddressContext = struct { + macho_file: *MachO, +}; + +fn atomLessThanByAddress(ctx: AtomLessThanByAddressContext, lhs: *Atom, rhs: *Atom) bool { + return lhs.getSymbol(ctx.macho_file).n_value < rhs.getSymbol(ctx.macho_file).n_value; +} + +fn collectRebaseData(self: *MachO, pointers: *std.ArrayList(bind.Pointer)) !void { + const gpa = self.base.allocator; + + var sorted_atoms_by_address = std.ArrayList(*Atom).init(gpa); + defer sorted_atoms_by_address.deinit(); + try sorted_atoms_by_address.ensureTotalCapacityPrecise(self.rebases.count()); + + var it = self.rebases.keyIterator(); + while (it.next()) |key_ptr| { + sorted_atoms_by_address.appendAssumeCapacity(key_ptr.*); + } + + std.sort.sort(*Atom, sorted_atoms_by_address.items, AtomLessThanByAddressContext{ + .macho_file = self, + }, atomLessThanByAddress); + + const slice = self.sections.slice(); + for (sorted_atoms_by_address.items) |atom| { + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); + + const sym = atom.getSymbol(self); + const segment_index = slice.items(.segment_index)[sym.n_sect - 1]; + const seg = self.getSegment(sym.n_sect - 1); + + const base_offset = sym.n_value - seg.vmaddr; + + const rebases = self.rebases.get(atom).?; + try pointers.ensureUnusedCapacity(rebases.items.len); + for (rebases.items) |offset| { + log.debug(" | rebase at {x}", .{base_offset + offset}); + + pointers.appendAssumeCapacity(.{ + .offset = base_offset + offset, + .segment_id = segment_index, + }); + } + } +} + +fn collectBindData(self: *MachO, pointers: *std.ArrayList(bind.Pointer), raw_bindings: anytype) !void { + const gpa = self.base.allocator; + + var sorted_atoms_by_address = std.ArrayList(*Atom).init(gpa); + defer sorted_atoms_by_address.deinit(); + try sorted_atoms_by_address.ensureTotalCapacityPrecise(raw_bindings.count()); + + var it = raw_bindings.keyIterator(); + while (it.next()) |key_ptr| { + sorted_atoms_by_address.appendAssumeCapacity(key_ptr.*); + } + + std.sort.sort(*Atom, sorted_atoms_by_address.items, AtomLessThanByAddressContext{ + .macho_file = self, + }, atomLessThanByAddress); + + const slice = self.sections.slice(); + for (sorted_atoms_by_address.items) |atom| { + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); + + const sym = atom.getSymbol(self); + const segment_index = slice.items(.segment_index)[sym.n_sect - 1]; + const seg = self.getSegment(sym.n_sect - 1); + + const base_offset = sym.n_value - seg.vmaddr; + + const bindings = raw_bindings.get(atom).?; + try pointers.ensureUnusedCapacity(bindings.items.len); + for (bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + pointers.appendAssumeCapacity(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); + } + } +} + +fn collectExportData(self: *MachO, trie: *Trie) !void { + const gpa = self.base.allocator; + + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("generating export trie", .{}); + + const exec_segment = self.segments.items[self.header_segment_cmd_index.?]; + const base_address = exec_segment.vmaddr; + + if (self.base.options.output_mode == .Exe) { + for (&[_]SymbolWithLoc{ + try self.getEntryPoint(), + self.getGlobal("__mh_execute_header").?, + }) |global| { + const sym = self.getSymbol(global); + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + } else { + assert(self.base.options.output_mode == .Lib); + for (self.globals.items) |global| { + const sym = self.getSymbol(global); + + if (sym.undf()) continue; + if (!sym.ext()) continue; + if (sym.n_desc == N_DESC_GCED) continue; + + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + } + + try trie.finalize(gpa); +} + fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -4056,144 +4196,19 @@ fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { var rebase_pointers = std.ArrayList(bind.Pointer).init(gpa); defer rebase_pointers.deinit(); + try self.collectRebaseData(&rebase_pointers); + var bind_pointers = std.ArrayList(bind.Pointer).init(gpa); defer bind_pointers.deinit(); + try self.collectBindData(&bind_pointers, self.bindings); + var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); defer lazy_bind_pointers.deinit(); - - const slice = self.sections.slice(); - for (slice.items(.last_atom)) |last_atom, sect_id| { - var atom = last_atom orelse continue; - const header = slice.items(.header)[sect_id]; - const segment_index = slice.items(.segment_index)[sect_id]; - const seg = self.getSegment(@intCast(u8, sect_id)); - - if (mem.eql(u8, header.segName(), "__TEXT")) continue; // __TEXT is non-writable - - log.debug("dyld info for {s},{s}", .{ header.segName(), header.sectName() }); - - while (true) { - log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); - const sym = atom.getSymbol(self); - const base_offset = sym.n_value - seg.vmaddr; - - if (self.rebases.get(atom)) |rebases| { - for (rebases.items) |offset| { - log.debug(" | rebase at {x}", .{base_offset + offset}); - try rebase_pointers.append(.{ - .offset = base_offset + offset, - .segment_id = segment_index, - }); - } - } - - if (self.bindings.get(atom)) |bindings| { - for (bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = segment_index, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); - } - } - - if (self.lazy_bindings.get(atom)) |lazy_bindings| { - for (lazy_bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try lazy_bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = segment_index, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); - } - } - - if (atom.prev) |prev| { - atom = prev; - } else break; - } - } + try self.collectBindData(&lazy_bind_pointers, self.lazy_bindings); var trie: Trie = .{}; defer trie.deinit(gpa); - - { - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("generating export trie", .{}); - - const text_segment = self.segments.items[self.header_segment_cmd_index.?]; - const base_address = text_segment.vmaddr; - - if (self.base.options.output_mode == .Exe) { - for (&[_]SymbolWithLoc{ - try self.getEntryPoint(), - self.getGlobal("__mh_execute_header").?, - }) |global| { - const sym = self.getSymbol(global); - const sym_name = self.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - } else { - assert(self.base.options.output_mode == .Lib); - for (self.globals.items) |global| { - const sym = self.getSymbol(global); - - if (sym.undf()) continue; - if (!sym.ext()) continue; - if (sym.n_desc == N_DESC_GCED) continue; - - const sym_name = self.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - } - - try trie.finalize(gpa); - } + try self.collectExportData(&trie); const link_seg = self.getLinkeditSegmentPtr(); const rebase_off = mem.alignForwardGeneric(u64, link_seg.fileoff, @alignOf(u64));