zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 85f065a5115b201b7b6b0b7325a4626211bbb642 (tree)
parent d1908c9f661abebb2879b02c8ea3ac823fec27e7
Author: Jakub Konka <kubkon@jakubkonka.com>
Date:   Tue, 14 Sep 2021 14:20:11 +0200

Merge pull request #9676 from ziglang/zld-incr

MachO: merges stage1 with self-hosted codepath
Diffstat:
MCMakeLists.txt | 2+-
Mlib/std/macho.zig | 40++++++++++++++++++++--------------------
Msrc/codegen.zig | 62+++++++++++++++++---------------------------------------------
Msrc/link/MachO.zig | 7383+++++++++++++++++++++++++++++++++++--------------------------------------------
Asrc/link/MachO/Atom.zig | 1324+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/link/MachO/DebugSymbols.zig | 172+++++++++++++++++++++++++++++++++++--------------------------------------------
Msrc/link/MachO/Object.zig | 270++++++++++++++++++++++++++++++++++++++-----------------------------------------
Dsrc/link/MachO/TextBlock.zig | 1221------------------------------------------------------------------------------
Msrc/link/MachO/bind.zig | 9---------
Msrc/link/MachO/commands.zig | 101+++++++------------------------------------------------------------------------
Mtest/stage2/darwin.zig | 22+++++++++++-----------
11 files changed, 4819 insertions(+), 5787 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -574,11 +574,11 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Archive.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/Atom.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/CodeSignature.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/TextBlock.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig" diff --git a/lib/std/macho.zig b/lib/std/macho.zig @@ -601,35 +601,35 @@ pub const segment_command = extern struct { /// command and their size is reflected in cmdsize. pub const segment_command_64 = extern struct { /// LC_SEGMENT_64 - cmd: u32, + cmd: u32 = LC_SEGMENT_64, /// includes sizeof section_64 structs - cmdsize: u32, + cmdsize: u32 = @sizeOf(segment_command_64), /// segment name segname: [16]u8, /// memory address of this segment - vmaddr: u64, + vmaddr: u64 = 0, /// memory size of this segment - vmsize: u64, + vmsize: u64 = 0, /// file offset of this segment - fileoff: u64, + fileoff: u64 = 0, /// amount to map from the file - filesize: u64, + filesize: u64 = 0, /// maximum VM protection - maxprot: vm_prot_t, + maxprot: vm_prot_t = VM_PROT_NONE, /// initial VM protection - initprot: vm_prot_t, + initprot: vm_prot_t = VM_PROT_NONE, /// number of sections in segment - nsects: u32, - flags: u32, + nsects: u32 = 0, + flags: u32 = 0, }; /// A segment is made up of zero or more sections. Non-MH_OBJECT files have @@ -700,34 +700,34 @@ pub const section_64 = extern struct { segname: [16]u8, /// memory address of this section - addr: u64, + addr: u64 = 0, /// size in bytes of this section - size: u64, + size: u64 = 0, /// file offset of this section - offset: u32, + offset: u32 = 0, /// section alignment (power of 2) - @"align": u32, + @"align": u32 = 0, /// file offset of relocation entries - reloff: u32, + reloff: u32 = 0, /// number of relocation entries - nreloc: u32, + nreloc: u32 = 0, /// flags (section type and attributes - flags: u32, + flags: u32 = S_REGULAR, /// reserved (for offset or index) - reserved1: u32, + reserved1: u32 = 0, /// reserved (for count or sizeof) - reserved2: u32, + reserved2: u32 = 0, /// reserved - reserved3: u32, + reserved3: u32 = 0, }; pub const nlist = extern struct { diff --git a/src/codegen.zig b/src/codegen.zig @@ -2816,24 +2816,21 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (self.air.value(callee)) |func_value| { if (func_value.castTag(.function)) |func_payload| { const func = func_payload.data; - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - const got_index = macho_file.got_entries_map.get(.{ - .where = .local, - .where_index = func.owner_decl.link.macho.local_sym_index, - }) orelse unreachable; - break :blk got.addr + got_index * @sizeOf(u64); - }; + // TODO I'm hacking my way through here by repurposing .memory for storing + // index to the GOT target symbol index. switch (arch) { .x86_64 => { - try self.genSetReg(Type.initTag(.u64), .rax, .{ .memory = got_addr }); + try self.genSetReg(Type.initTag(.u64), .rax, .{ + .memory = func.owner_decl.link.macho.local_sym_index, + }); // callq *%rax try self.code.ensureCapacity(self.code.items.len + 2); self.code.appendSliceAssumeCapacity(&[2]u8{ 0xff, 0xd0 }); }, .aarch64 => { - try self.genSetReg(Type.initTag(.u64), .x30, .{ .memory = got_addr }); + try self.genSetReg(Type.initTag(.u64), .x30, .{ + .memory = func.owner_decl.link.macho.local_sym_index, + }); // blr x30 writeInt(u32, try self.code.addManyAsArray(4), Instruction.blr(.x30).toU32()); }, @@ -4345,29 +4342,20 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }).toU32()); if (self.bin_file.cast(link.File.MachO)) |macho_file| { - // TODO this is super awkward. We are reversing the address of the GOT entry here. - // We should probably have it cached or move the reloc adding somewhere else. - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - break :blk got.addr; - }; - const where_index = blk: for (macho_file.got_entries.items) |key, id| { - if (got_addr + id * @sizeOf(u64) == addr) break :blk key.where_index; - } else unreachable; + // TODO I think the reloc might be in the wrong place. const decl = macho_file.active_decl.?; // Page reloc for adrp instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset, .where = .local, - .where_index = where_index, + .where_index = @intCast(u32, addr), .payload = .{ .page = .{ .kind = .got } }, }); // Pageoff reloc for adrp instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset + 4, .where = .local, - .where_index = where_index, + .where_index = @intCast(u32, addr), .payload = .{ .page_off = .{ .kind = .got } }, }); } else { @@ -4628,22 +4616,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const offset = @intCast(u32, self.code.items.len); if (self.bin_file.cast(link.File.MachO)) |macho_file| { - // TODO this is super awkward. We are reversing the address of the GOT entry here. - // We should probably have it cached or move the reloc adding somewhere else. - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - break :blk got.addr; - }; - const where_index = blk: for (macho_file.got_entries.items) |key, id| { - if (got_addr + id * @sizeOf(u64) == x) break :blk key.where_index; - } else unreachable; + // TODO I think the reloc might be in the wrong place. const decl = macho_file.active_decl.?; // Load reloc for LEA instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset - 4, .where = .local, - .where_index = where_index, + .where_index = @intCast(u32, x), .payload = .{ .load = .{ .kind = .got } }, }); } else { @@ -4869,17 +4848,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const got = &elf_file.program_headers.items[elf_file.phdr_got_index.?]; const got_addr = got.p_vaddr + decl.link.elf.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; - } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - const got_index = macho_file.got_entries_map.get(.{ - .where = .local, - .where_index = decl.link.macho.local_sym_index, - }) orelse unreachable; - break :blk got.addr + got_index * ptr_bytes; - }; - return MCValue{ .memory = got_addr }; + } else if (self.bin_file.cast(link.File.MachO)) |_| { + // TODO I'm hacking my way through here by repurposing .memory for storing + // index to the GOT target symbol index. + return MCValue{ .memory = decl.link.macho.local_sym_index }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; diff --git a/src/link/MachO.zig b/src/link/MachO.zig @@ -24,6 +24,7 @@ const trace = @import("../tracy.zig").trace; const Air = @import("../Air.zig"); const Allocator = mem.Allocator; const Archive = @import("MachO/Archive.zig"); +const Atom = @import("MachO/Atom.zig"); const Cache = @import("../Cache.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); @@ -39,9 +40,10 @@ const Module = @import("../Module.zig"); const SegmentCommand = commands.SegmentCommand; const StringIndexAdapter = std.hash_map.StringIndexAdapter; const StringIndexContext = std.hash_map.StringIndexContext; -pub const TextBlock = @import("MachO/TextBlock.zig"); const Trie = @import("MachO/Trie.zig"); +pub const TextBlock = Atom; + pub const base_tag: File.Tag = File.Tag.macho; base: File, @@ -95,9 +97,6 @@ source_version_cmd_index: ?u16 = null, build_version_cmd_index: ?u16 = null, uuid_cmd_index: ?u16 = null, code_signature_cmd_index: ?u16 = null, -/// Path to libSystem -/// TODO this is obsolete, remove it. -libsystem_cmd_index: ?u16 = null, // __TEXT segment sections text_section_index: ?u16 = null, @@ -132,65 +131,59 @@ tlv_bss_section_index: ?u16 = null, la_symbol_ptr_section_index: ?u16 = null, data_section_index: ?u16 = null, bss_section_index: ?u16 = null, -common_section_index: ?u16 = null, objc_const_section_index: ?u16 = null, objc_selrefs_section_index: ?u16 = null, objc_classrefs_section_index: ?u16 = null, objc_data_section_index: ?u16 = null, +bss_file_offset: u32 = 0, +tlv_bss_file_offset: u32 = 0, + locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, symbol_resolver: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, +unresolved: std.AutoArrayHashMapUnmanaged(u32, enum { + none, + stub, + got, +}) = .{}, +tentatives: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, globals_free_list: std.ArrayListUnmanaged(u32) = .{}, -stub_helper_stubs_start_off: ?u64 = null, +dyld_stub_binder_index: ?u32 = null, +dyld_private_atom: ?*Atom = null, +stub_helper_preamble_atom: ?*Atom = null, strtab: std.ArrayListUnmanaged(u8) = .{}, strtab_dir: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, -got_entries: std.ArrayListUnmanaged(GotIndirectionKey) = .{}, -got_entries_map: std.AutoHashMapUnmanaged(GotIndirectionKey, u32) = .{}, - -got_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, - -stubs: std.ArrayListUnmanaged(u32) = .{}, -stubs_map: std.AutoHashMapUnmanaged(u32, u32) = .{}, +got_entries_map: std.AutoArrayHashMapUnmanaged(GotIndirectionKey, *Atom) = .{}, +stubs_map: std.AutoArrayHashMapUnmanaged(u32, *Atom) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, -got_entries_count_dirty: bool = false, load_commands_dirty: bool = false, -rebase_info_dirty: bool = false, -binding_info_dirty: bool = false, -lazy_binding_info_dirty: bool = false, -export_info_dirty: bool = false, - -strtab_dirty: bool = false, -strtab_needs_relocation: bool = false, - +sections_order_dirty: bool = false, has_dices: bool = false, has_stabs: bool = false, +/// A helper var to indicate if we are at the start of the incremental updates, or +/// already somewhere further along the update-and-run chain. +/// TODO once we add opening a prelinked output binary from file, this will become +/// obsolete as we will carry on where we left off. +cold_start: bool = false, section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, -pending_updates: std.ArrayListUnmanaged(struct { - kind: enum { - got, - stub, - }, - index: u32, -}) = .{}, - -/// A list of text blocks that have surplus capacity. This list can have false +/// A list of atoms that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added /// or removed from the freelist. /// -/// A text block has surplus capacity when its overcapacity value is greater than -/// padToIdeal(minimum_text_block_size). That is, when it has so +/// An atom has surplus capacity when its overcapacity value is greater than +/// padToIdeal(minimum_atom_size). That is, when it has so /// much extra capacity, that we could fit a small new symbol in it, itself with /// ideal_capacity or more. /// @@ -198,25 +191,23 @@ pending_updates: std.ArrayListUnmanaged(struct { /// /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that /// overcapacity can be negative. A simple way to have negative overcapacity is to -/// allocate a fresh text block, which will have ideal capacity, and then grow it +/// allocate a fresh atom, which will have ideal capacity, and then grow it /// by 1 byte. It will then have -1 overcapacity. -text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{}, +atom_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanaged(*Atom)) = .{}, -/// Pointer to the last allocated text block -last_text_block: ?*TextBlock = null, +/// Pointer to the last allocated atom +atoms: std.AutoHashMapUnmanaged(MatchingSection, *Atom) = .{}, -/// List of TextBlocks that are owned directly by the linker. -/// Currently these are only TextBlocks that are the result of linking -/// object files. TextBlock which take part in incremental linking are +/// List of atoms that are owned directly by the linker. +/// Currently these are only atoms that are the result of linking +/// object files. Atoms which take part in incremental linking are /// at present owned by Module.Decl. /// TODO consolidate this. -managed_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, - -blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, +managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, /// Table of Decls that are currently alive. /// We store them here so that we can properly dispose of any allocated -/// memory within the TextBlock in the incremental linker. +/// memory within the atom in the incremental linker. /// TODO consolidate this. decls: std.AutoArrayHashMapUnmanaged(*Module.Decl, void) = .{}, @@ -226,6 +217,12 @@ decls: std.AutoArrayHashMapUnmanaged(*Module.Decl, void) = .{}, /// somewhere else in the codegen. active_decl: ?*Module.Decl = null, +const PendingUpdate = union(enum) { + resolve_undef: u32, + add_stub_entry: u32, + add_got_entry: u32, +}; + const SymbolWithLoc = struct { // Table where the symbol can be found. where: enum { @@ -247,21 +244,10 @@ pub const GotIndirectionKey = struct { /// When allocating, the ideal_capacity is calculated by /// actual_capacity + (actual_capacity / ideal_factor) -const ideal_factor = 2; +const ideal_factor = 4; /// Default path to dyld -/// TODO instead of hardcoding it, we should probably look through some env vars and search paths -/// instead but this will do for now. -const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; - -/// Default lib search path -/// TODO instead of hardcoding it, we should probably look through some env vars and search paths -/// instead but this will do for now. -const DEFAULT_LIB_SEARCH_PATH: []const u8 = "/usr/lib"; - -const LIB_SYSTEM_NAME: [*:0]const u8 = "System"; -/// TODO we should search for libSystem and fail if it doesn't exist, instead of hardcoding it -const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B.dylib"; +const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; /// In order for a slice of bytes to be considered eligible to keep metadata pointing at /// it as a possible place to put new symbols, it must have enough room for this many bytes @@ -269,6 +255,10 @@ const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B. const minimum_text_block_size = 64; pub const min_text_capacity = padToIdeal(minimum_text_block_size); +/// Virtual memory offset corresponds to the size of __PAGEZERO segment and start of +/// __TEXT segment. +const pagezero_vmsize: u64 = 0x100000000; + pub const Export = struct { sym_index: ?u32 = null, }; @@ -323,31 +313,32 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio return self; } - if (!options.strip and options.module != null) { - // Create dSYM bundle. - const dir = options.module.?.zig_cache_artifact_directory; - log.debug("creating {s}.dSYM bundle in {s}", .{ sub_path, dir.path }); + // TODO Migrate DebugSymbols to the merged linker codepaths + // if (!options.strip and options.module != null) { + // // Create dSYM bundle. + // const dir = options.module.?.zig_cache_artifact_directory; + // log.debug("creating {s}.dSYM bundle in {s}", .{ sub_path, dir.path }); - const d_sym_path = try fmt.allocPrint( - allocator, - "{s}.dSYM" ++ fs.path.sep_str ++ "Contents" ++ fs.path.sep_str ++ "Resources" ++ fs.path.sep_str ++ "DWARF", - .{sub_path}, - ); - defer allocator.free(d_sym_path); + // const d_sym_path = try fmt.allocPrint( + // allocator, + // "{s}.dSYM" ++ fs.path.sep_str ++ "Contents" ++ fs.path.sep_str ++ "Resources" ++ fs.path.sep_str ++ "DWARF", + // .{sub_path}, + // ); + // defer allocator.free(d_sym_path); - var d_sym_bundle = try dir.handle.makeOpenPath(d_sym_path, .{}); - defer d_sym_bundle.close(); + // var d_sym_bundle = try dir.handle.makeOpenPath(d_sym_path, .{}); + // defer d_sym_bundle.close(); - const d_sym_file = try d_sym_bundle.createFile(sub_path, .{ - .truncate = false, - .read = true, - }); + // const d_sym_file = try d_sym_bundle.createFile(sub_path, .{ + // .truncate = false, + // .read = true, + // }); - self.d_sym = .{ - .base = self, - .file = d_sym_file, - }; - } + // self.d_sym = .{ + // .base = self, + // .file = d_sym_file, + // }; + // } // Index 0 is always a null symbol. try self.locals.append(allocator, .{ @@ -357,13 +348,12 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio .n_desc = 0, .n_value = 0, }); + try self.strtab.append(allocator, 0); try self.populateMissingMetadata(); - try self.writeLocalSymbol(0); if (self.d_sym) |*ds| { try ds.populateMissingMetadata(allocator); - try ds.writeLocalSymbol(0); } return self; @@ -403,179 +393,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } } - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - if (use_stage1) { - return self.linkWithZld(comp); - } else { - switch (self.base.options.effectiveOutputMode()) { - .Exe, .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, - } - return self.flushModule(comp); - } -} - -pub fn flushModule(self: *MachO, comp: *Compilation) !void { - _ = comp; - const tracy = trace(@src()); - defer tracy.end(); - - const output_mode = self.base.options.output_mode; - - switch (output_mode) { - .Exe => { - if (self.entry_addr) |addr| { - // Update LC_MAIN with entry offset. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const main_cmd = &self.load_commands.items[self.main_cmd_index.?].Main; - main_cmd.entryoff = addr - text_segment.inner.vmaddr; - main_cmd.stacksize = self.base.options.stack_size_override orelse 0; - self.load_commands_dirty = true; - } - try self.writeRebaseInfoTable(); - try self.writeBindInfoTable(); - try self.writeLazyBindInfoTable(); - try self.writeExportInfo(); - try self.writeAllGlobalAndUndefSymbols(); - try self.writeIndirectSymbolTable(); - try self.writeStringTable(); - try self.updateLinkeditSegmentSizes(); - - if (self.d_sym) |*ds| { - // Flush debug symbols bundle. - try ds.flushModule(self.base.allocator, self.base.options); - } - - if (self.requires_adhoc_codesig) { - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - try self.writeCodeSignaturePadding(); - } - }, - .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, - } - - try self.writeLoadCommands(); - try self.writeHeader(); - - if (self.entry_addr == null and self.base.options.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true", .{}); - self.error_flags.no_entry_point_found = true; - } else { - log.debug("flushing. no_entry_point_found = false", .{}); - self.error_flags.no_entry_point_found = false; - } - - assert(!self.got_entries_count_dirty); - assert(!self.load_commands_dirty); - assert(!self.rebase_info_dirty); - assert(!self.binding_info_dirty); - assert(!self.lazy_binding_info_dirty); - assert(!self.export_info_dirty); - assert(!self.strtab_dirty); - assert(!self.strtab_needs_relocation); - - if (self.requires_adhoc_codesig) { - try self.writeCodeSignature(); // code signing always comes last - } -} - -fn resolveSearchDir( - arena: *Allocator, - dir: []const u8, - syslibroot: ?[]const u8, -) !?[]const u8 { - var candidates = std.ArrayList([]const u8).init(arena); - - if (fs.path.isAbsolute(dir)) { - if (syslibroot) |root| { - const common_dir = if (std.Target.current.os.tag == .windows) blk: { - // We need to check for disk designator and strip it out from dir path so - // that we can concat dir with syslibroot. - // TODO we should backport this mechanism to 'MachO.Dylib.parseDependentLibs()' - const disk_designator = fs.path.diskDesignatorWindows(dir); - - if (mem.indexOf(u8, dir, disk_designator)) |where| { - break :blk dir[where + disk_designator.len ..]; - } - - break :blk dir; - } else dir; - const full_path = try fs.path.join(arena, &[_][]const u8{ root, common_dir }); - try candidates.append(full_path); - } - } - - try candidates.append(dir); - - for (candidates.items) |candidate| { - // Verify that search path actually exists - var tmp = fs.cwd().openDir(candidate, .{}) catch |err| switch (err) { - error.FileNotFound => continue, - else => |e| return e, - }; - defer tmp.close(); - - return candidate; - } - - return null; -} - -fn resolveLib( - arena: *Allocator, - search_dirs: []const []const u8, - name: []const u8, - ext: []const u8, -) !?[]const u8 { - const search_name = try std.fmt.allocPrint(arena, "lib{s}{s}", .{ name, ext }); - - for (search_dirs) |dir| { - const full_path = try fs.path.join(arena, &[_][]const u8{ dir, search_name }); - - // Check if the file exists. - const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { - error.FileNotFound => continue, - else => |e| return e, - }; - defer tmp.close(); - - return full_path; - } - - return null; -} - -fn resolveFramework( - arena: *Allocator, - search_dirs: []const []const u8, - name: []const u8, - ext: []const u8, -) !?[]const u8 { - const search_name = try std.fmt.allocPrint(arena, "{s}{s}", .{ name, ext }); - const prefix_path = try std.fmt.allocPrint(arena, "{s}.framework", .{name}); - - for (search_dirs) |dir| { - const full_path = try fs.path.join(arena, &[_][]const u8{ dir, prefix_path, search_name }); - - // Check if the file exists. - const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { - error.FileNotFound => continue, - else => |e| return e, - }; - defer tmp.close(); - - return full_path; - } - - return null; -} - -fn linkWithZld(self: *MachO, comp: *Compilation) !void { const tracy = trace(@src()); defer tracy.end(); @@ -584,11 +401,11 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { const arena = &arena_allocator.allocator; const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type. + const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; // If there is no Zig code to compile, then we should skip flushing the output file because it // will not be part of the linker line anyway. const module_obj_path: ?[]const u8 = if (self.base.options.module) |module| blk: { - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; if (use_stage1) { const obj_basename = try std.zig.binNameAlloc(arena, .{ .root_name = self.base.options.root_name, @@ -600,8 +417,8 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { break :blk full_obj_path; } + const obj_basename = self.base.intermediary_basename orelse break :blk null; try self.flushModule(comp); - const obj_basename = self.base.intermediary_basename.?; const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename}); break :blk full_obj_path; } else null; @@ -617,8 +434,11 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { defer if (!self.base.options.disable_lld_caching) man.deinit(); var digest: [Cache.hex_digest_len]u8 = undefined; + var needs_full_relink = true; + + cache: { + if (use_stage1 and self.base.options.disable_lld_caching) break :cache; - if (!self.base.options.disable_lld_caching) { man = comp.cache_parent.obtain(); // We are about to obtain this lock, so here we give other processes a chance first. @@ -652,17 +472,36 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { id_symlink_basename, &prev_digest_buf, ) catch |err| blk: { - log.debug("MachO Zld new_digest={s} error: {s}", .{ std.fmt.fmtSliceHexLower(&digest), @errorName(err) }); + log.debug("MachO Zld new_digest={s} error: {s}", .{ + std.fmt.fmtSliceHexLower(&digest), + @errorName(err), + }); // Handle this as a cache miss. break :blk prev_digest_buf[0..0]; }; if (mem.eql(u8, prev_digest, &digest)) { - log.debug("MachO Zld digest={s} match - skipping invocation", .{std.fmt.fmtSliceHexLower(&digest)}); // Hot diggity dog! The output binary is already there. - self.base.lock = man.toOwnedLock(); - return; + + if (use_stage1) { + log.debug("MachO Zld digest={s} match - skipping invocation", .{std.fmt.fmtSliceHexLower(&digest)}); + self.base.lock = man.toOwnedLock(); + return; + } else { + log.debug("MachO Zld digest={s} match", .{std.fmt.fmtSliceHexLower(&digest)}); + if (!self.cold_start) { + log.debug(" no need to relink objects", .{}); + needs_full_relink = false; + } else { + log.debug(" TODO parse prelinked binary and continue linking where we left off", .{}); + // TODO until such time however, perform a full relink of objects. + needs_full_relink = true; + } + } } - log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ std.fmt.fmtSliceHexLower(prev_digest), std.fmt.fmtSliceHexLower(&digest) }); + log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ + std.fmt.fmtSliceHexLower(prev_digest), + std.fmt.fmtSliceHexLower(&digest), + }); // We are about to change the output file to be different, so we invalidate the build hash now. directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { @@ -670,7 +509,6 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { else => |e| return e, }; } - const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); if (self.base.options.output_mode == .Obj) { @@ -697,270 +535,456 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { - // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList([]const u8).init(arena); - - try positionals.appendSlice(self.base.options.objects); + if (use_stage1) { + const sub_path = self.base.options.emit.?.sub_path; + self.base.file = try directory.handle.createFile(sub_path, .{ + .truncate = true, + .read = true, + .mode = link.determineMode(self.base.options), + }); + try self.populateMissingMetadata(); - for (comp.c_object_table.keys()) |key| { - try positionals.append(key.status.success.object_path); + // TODO mimicking insertion of null symbol from incremental linker. + // This will need to moved. + try self.locals.append(self.base.allocator, .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.strtab.append(self.base.allocator, 0); } - if (module_obj_path) |p| { - try positionals.append(p); - } + if (needs_full_relink) { + self.objects.clearRetainingCapacity(); + self.archives.clearRetainingCapacity(); + self.dylibs.clearRetainingCapacity(); + self.dylibs_map.clearRetainingCapacity(); + self.referenced_dylibs.clearRetainingCapacity(); - try positionals.append(comp.compiler_rt_static_lib.?.full_object_path); + // TODO figure out how to clear atoms from objects, etc. - // libc++ dep - if (self.base.options.link_libcpp) { - try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); - try positionals.append(comp.libcxx_static_lib.?.full_object_path); - } + // Positional arguments to the linker such as object files and static archives. + var positionals = std.ArrayList([]const u8).init(arena); - // Shared and static libraries passed via `-l` flag. - var search_lib_names = std.ArrayList([]const u8).init(arena); + try positionals.appendSlice(self.base.options.objects); - const system_libs = self.base.options.system_libs.keys(); - for (system_libs) |link_lib| { - // By this time, we depend on these libs being dynamically linked libraries and not static libraries - // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which - // case we want to avoid prepending "-l". - if (Compilation.classifyFileExt(link_lib) == .shared_library) { - try positionals.append(link_lib); - continue; + for (comp.c_object_table.keys()) |key| { + try positionals.append(key.status.success.object_path); } - try search_lib_names.append(link_lib); - } + if (module_obj_path) |p| { + try positionals.append(p); + } - var lib_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.lib_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try lib_dirs.append(search_dir); - } else { - log.warn("directory not found for '-L{s}'", .{dir}); + if (comp.compiler_rt_static_lib) |lib| { + try positionals.append(lib.full_object_path); } - } - var libs = std.ArrayList([]const u8).init(arena); - var lib_not_found = false; - for (search_lib_names.items) |lib_name| { - // Assume ld64 default: -search_paths_first - // Look in each directory for a dylib (stub first), and then for archive - // TODO implement alternative: -search_dylibs_first - for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { - if (try resolveLib(arena, lib_dirs.items, lib_name, ext)) |full_path| { - try libs.append(full_path); - break; + // libc++ dep + if (self.base.options.link_libcpp) { + try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); + try positionals.append(comp.libcxx_static_lib.?.full_object_path); + } + + // Shared and static libraries passed via `-l` flag. + var search_lib_names = std.ArrayList([]const u8).init(arena); + + const system_libs = self.base.options.system_libs.keys(); + for (system_libs) |link_lib| { + // By this time, we depend on these libs being dynamically linked libraries and not static libraries + // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which + // case we want to avoid prepending "-l". + if (Compilation.classifyFileExt(link_lib) == .shared_library) { + try positionals.append(link_lib); + continue; } - } else { - log.warn("library not found for '-l{s}'", .{lib_name}); - lib_not_found = true; + + try search_lib_names.append(link_lib); } - } - if (lib_not_found) { - log.warn("Library search paths:", .{}); - for (lib_dirs.items) |dir| { - log.warn(" {s}", .{dir}); + var lib_dirs = std.ArrayList([]const u8).init(arena); + for (self.base.options.lib_dirs) |dir| { + if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { + try lib_dirs.append(search_dir); + } else { + log.warn("directory not found for '-L{s}'", .{dir}); + } } - } - // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. - var libsystem_available = false; - if (self.base.options.sysroot != null) blk: { - // Try stub file first. If we hit it, then we're done as the stub file - // re-exports every single symbol definition. - if (try resolveLib(arena, lib_dirs.items, "System", ".tbd")) |full_path| { - try libs.append(full_path); - libsystem_available = true; - break :blk; + var libs = std.ArrayList([]const u8).init(arena); + var lib_not_found = false; + for (search_lib_names.items) |lib_name| { + // Assume ld64 default: -search_paths_first + // Look in each directory for a dylib (stub first), and then for archive + // TODO implement alternative: -search_dylibs_first + for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { + if (try resolveLib(arena, lib_dirs.items, lib_name, ext)) |full_path| { + try libs.append(full_path); + break; + } + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; + } + } + + if (lib_not_found) { + log.warn("Library search paths:", .{}); + for (lib_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } } - // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib - // doesn't export libc.dylib which we'll need to resolve subsequently also. - if (try resolveLib(arena, lib_dirs.items, "System", ".dylib")) |libsystem_path| { - if (try resolveLib(arena, lib_dirs.items, "c", ".dylib")) |libc_path| { - try libs.append(libsystem_path); - try libs.append(libc_path); + + // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. + var libsystem_available = false; + if (self.base.options.sysroot != null) blk: { + // Try stub file first. If we hit it, then we're done as the stub file + // re-exports every single symbol definition. + if (try resolveLib(arena, lib_dirs.items, "System", ".tbd")) |full_path| { + try libs.append(full_path); libsystem_available = true; break :blk; } + // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib + // doesn't export libc.dylib which we'll need to resolve subsequently also. + if (try resolveLib(arena, lib_dirs.items, "System", ".dylib")) |libsystem_path| { + if (try resolveLib(arena, lib_dirs.items, "c", ".dylib")) |libc_path| { + try libs.append(libsystem_path); + try libs.append(libc_path); + libsystem_available = true; + break :blk; + } + } + } + if (!libsystem_available) { + const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ + "libc", "darwin", "libSystem.B.tbd", + }); + try libs.append(full_path); } - } - if (!libsystem_available) { - const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ - "libc", "darwin", "libSystem.B.tbd", - }); - try libs.append(full_path); - } - // frameworks - var framework_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.framework_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try framework_dirs.append(search_dir); - } else { - log.warn("directory not found for '-F{s}'", .{dir}); + // frameworks + var framework_dirs = std.ArrayList([]const u8).init(arena); + for (self.base.options.framework_dirs) |dir| { + if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { + try framework_dirs.append(search_dir); + } else { + log.warn("directory not found for '-F{s}'", .{dir}); + } } - } - var framework_not_found = false; - for (self.base.options.frameworks) |framework| { - for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { - if (try resolveFramework(arena, framework_dirs.items, framework, ext)) |full_path| { - try libs.append(full_path); - break; + var framework_not_found = false; + for (self.base.options.frameworks) |framework| { + for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { + if (try resolveFramework(arena, framework_dirs.items, framework, ext)) |full_path| { + try libs.append(full_path); + break; + } + } else { + log.warn("framework not found for '-framework {s}'", .{framework}); + framework_not_found = true; } - } else { - log.warn("framework not found for '-framework {s}'", .{framework}); - framework_not_found = true; } - } - if (framework_not_found) { - log.warn("Framework search paths:", .{}); - for (framework_dirs.items) |dir| { - log.warn(" {s}", .{dir}); + if (framework_not_found) { + log.warn("Framework search paths:", .{}); + for (framework_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } } - } - // rpaths - var rpath_table = std.StringArrayHashMap(void).init(arena); - for (self.base.options.rpath_list) |rpath| { - if (rpath_table.contains(rpath)) continue; - try rpath_table.putNoClobber(rpath, {}); - } + // rpaths + var rpath_table = std.StringArrayHashMap(void).init(arena); + for (self.base.options.rpath_list) |rpath| { + if (rpath_table.contains(rpath)) continue; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath.len + 1, + @sizeOf(u64), + )); + var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ + .cmd = macho.LC_RPATH, + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); + mem.set(u8, rpath_cmd.data, 0); + mem.copy(u8, rpath_cmd.data, rpath); + try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); + try rpath_table.putNoClobber(rpath, {}); + self.load_commands_dirty = true; + } - var rpaths = std.ArrayList([]const u8).init(arena); - try rpaths.ensureCapacity(rpath_table.count()); - for (rpath_table.keys()) |*key| { - rpaths.appendAssumeCapacity(key.*); - } + if (self.base.options.verbose_link) { + var argv = std.ArrayList([]const u8).init(arena); - if (self.base.options.verbose_link) { - var argv = std.ArrayList([]const u8).init(arena); + try argv.append("zig"); + try argv.append("ld"); - try argv.append("zig"); - try argv.append("ld"); + if (is_exe_or_dyn_lib) { + try argv.append("-dynamic"); + } - if (is_exe_or_dyn_lib) { - try argv.append("-dynamic"); - } + if (is_dyn_lib) { + try argv.append("-dylib"); - if (is_dyn_lib) { - try argv.append("-dylib"); + const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); + try argv.append("-install_name"); + try argv.append(install_name); + } - const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ - self.base.options.emit.?.sub_path, - }); - try argv.append("-install_name"); - try argv.append(install_name); - } + if (self.base.options.sysroot) |syslibroot| { + try argv.append("-syslibroot"); + try argv.append(syslibroot); + } - if (self.base.options.sysroot) |syslibroot| { - try argv.append("-syslibroot"); - try argv.append(syslibroot); - } + for (rpath_table.keys()) |rpath| { + try argv.append("-rpath"); + try argv.append(rpath); + } - for (rpaths.items) |rpath| { - try argv.append("-rpath"); - try argv.append(rpath); - } + try argv.appendSlice(positionals.items); - try argv.appendSlice(positionals.items); + try argv.append("-o"); + try argv.append(full_out_path); - try argv.append("-o"); - try argv.append(full_out_path); + try argv.append("-lSystem"); + try argv.append("-lc"); - try argv.append("-lSystem"); - try argv.append("-lc"); + for (search_lib_names.items) |l_name| { + try argv.append(try std.fmt.allocPrint(arena, "-l{s}", .{l_name})); + } - for (search_lib_names.items) |l_name| { - try argv.append(try std.fmt.allocPrint(arena, "-l{s}", .{l_name})); - } + for (self.base.options.lib_dirs) |lib_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); + } - for (self.base.options.lib_dirs) |lib_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); - } + for (self.base.options.frameworks) |framework| { + try argv.append(try std.fmt.allocPrint(arena, "-framework {s}", .{framework})); + } - for (self.base.options.frameworks) |framework| { - try argv.append(try std.fmt.allocPrint(arena, "-framework {s}", .{framework})); - } + for (self.base.options.framework_dirs) |framework_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); + } - for (self.base.options.framework_dirs) |framework_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); + Compilation.dump_argv(argv.items); } - Compilation.dump_argv(argv.items); + try self.parseInputFiles(positionals.items, self.base.options.sysroot); + try self.parseLibs(libs.items, self.base.options.sysroot); } - const sub_path = self.base.options.emit.?.sub_path; - self.base.file = try directory.handle.createFile(sub_path, .{ - .truncate = true, - .read = true, - .mode = link.determineMode(self.base.options), - }); - - // TODO mimicking insertion of null symbol from incremental linker. - // This will need to moved. - try self.locals.append(self.base.allocator, .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.strtab.append(self.base.allocator, 0); - - try self.populateMetadata(); - try self.addRpathLCs(rpaths.items); - try self.parseInputFiles(positionals.items, self.base.options.sysroot); - try self.parseLibs(libs.items, self.base.options.sysroot); - try self.resolveSymbols(); - try self.parseTextBlocks(); - try self.addLoadDylibLCs(); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); + if (self.bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + sect.offset = self.bss_file_offset; + } + if (self.tlv_bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + sect.offset = self.tlv_bss_file_offset; + } - { - // Add dyld_stub_binder as the final GOT entry. - const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse unreachable; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - const got_index = @intCast(u32, self.got_entries.items.len); - const got_entry = GotIndirectionKey{ - .where = .undef, - .where_index = resolv.where_index, - }; - try self.got_entries.append(self.base.allocator, got_entry); - try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); + for (self.objects.items) |*object, object_id| { + if (object.analyzed) continue; + try self.resolveSymbolsInObject(@intCast(u16, object_id)); } - try self.sortSections(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateTextBlocks(); - try self.flushZld(); + try self.resolveSymbolsInArchives(); + try self.resolveDyldStubBinder(); + try self.createDyldPrivateAtom(); + try self.createStubHelperPreambleAtom(); + try self.resolveSymbolsInDylibs(); + try self.createDsoHandleAtom(); + try self.addCodeSignatureLC(); + + for (self.unresolved.keys()) |index| { + const sym = self.undefs.items[index]; + const sym_name = self.getString(sym.n_strx); + const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; + + log.err("undefined reference to symbol '{s}'", .{sym_name}); + log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name}); + } + if (self.unresolved.count() > 0) { + return error.UndefinedSymbolReference; + } + + try self.createTentativeDefAtoms(); + try self.parseObjectsIntoAtoms(); + try self.allocateGlobalSymbols(); + try self.writeAtoms(); + + if (self.bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + self.bss_file_offset = sect.offset; + sect.offset = 0; + } + if (self.tlv_bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + self.tlv_bss_file_offset = sect.offset; + sect.offset = 0; + } + + try self.flushModule(comp); } - if (!self.base.options.disable_lld_caching) { + cache: { + if (use_stage1 and self.base.options.disable_lld_caching) break :cache; // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { - log.warn("failed to save linking hash digest file: {s}", .{@errorName(err)}); + log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); }; // Again failure here only means an unnecessary cache miss. man.writeManifest() catch |err| { - log.warn("failed to write cache manifest when linking: {s}", .{@errorName(err)}); + log.debug("failed to write cache manifest when linking: {s}", .{@errorName(err)}); }; // We hang on to this lock so that the output file path can be used without // other processes clobbering it. self.base.lock = man.toOwnedLock(); } + + self.cold_start = false; +} + +pub fn flushModule(self: *MachO, comp: *Compilation) !void { + _ = comp; + + const tracy = trace(@src()); + defer tracy.end(); + + try self.setEntryPoint(); + try self.updateSectionOrdinals(); + try self.writeLinkeditSegment(); + + if (self.d_sym) |*ds| { + // Flush debug symbols bundle. + try ds.flushModule(self.base.allocator, self.base.options); + } + + if (self.requires_adhoc_codesig) { + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + try self.writeCodeSignaturePadding(); + } + + try self.writeLoadCommands(); + try self.writeHeader(); + + if (self.entry_addr == null and self.base.options.output_mode == .Exe) { + log.debug("flushing. no_entry_point_found = true", .{}); + self.error_flags.no_entry_point_found = true; + } else { + log.debug("flushing. no_entry_point_found = false", .{}); + self.error_flags.no_entry_point_found = false; + } + + assert(!self.load_commands_dirty); + + if (self.requires_adhoc_codesig) { + try self.writeCodeSignature(); // code signing always comes last + } +} + +fn resolveSearchDir( + arena: *Allocator, + dir: []const u8, + syslibroot: ?[]const u8, +) !?[]const u8 { + var candidates = std.ArrayList([]const u8).init(arena); + + if (fs.path.isAbsolute(dir)) { + if (syslibroot) |root| { + const common_dir = if (std.Target.current.os.tag == .windows) blk: { + // We need to check for disk designator and strip it out from dir path so + // that we can concat dir with syslibroot. + // TODO we should backport this mechanism to 'MachO.Dylib.parseDependentLibs()' + const disk_designator = fs.path.diskDesignatorWindows(dir); + + if (mem.indexOf(u8, dir, disk_designator)) |where| { + break :blk dir[where + disk_designator.len ..]; + } + + break :blk dir; + } else dir; + const full_path = try fs.path.join(arena, &[_][]const u8{ root, common_dir }); + try candidates.append(full_path); + } + } + + try candidates.append(dir); + + for (candidates.items) |candidate| { + // Verify that search path actually exists + var tmp = fs.cwd().openDir(candidate, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer tmp.close(); + + return candidate; + } + + return null; +} + +fn resolveLib( + arena: *Allocator, + search_dirs: []const []const u8, + name: []const u8, + ext: []const u8, +) !?[]const u8 { + const search_name = try std.fmt.allocPrint(arena, "lib{s}{s}", .{ name, ext }); + + for (search_dirs) |dir| { + const full_path = try fs.path.join(arena, &[_][]const u8{ dir, search_name }); + + // Check if the file exists. + const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer tmp.close(); + + return full_path; + } + + return null; +} + +fn resolveFramework( + arena: *Allocator, + search_dirs: []const []const u8, + name: []const u8, + ext: []const u8, +) !?[]const u8 { + const search_name = try std.fmt.allocPrint(arena, "{s}{s}", .{ name, ext }); + const prefix_path = try std.fmt.allocPrint(arena, "{s}.framework", .{name}); + + for (search_dirs) |dir| { + const full_path = try fs.path.join(arena, &[_][]const u8{ dir, prefix_path, search_name }); + + // Check if the file exists. + const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer tmp.close(); + + return full_path; + } + + return null; } fn parseObject(self: *MachO, path: []const u8) !bool { @@ -1080,6 +1104,7 @@ pub fn parseDylib(self: *MachO, path: []const u8, opts: DylibCreateOpts) ParseDy try self.dylibs_map.putNoClobber(self.base.allocator, dylib.id.?.name, dylib_id); if (!(opts.is_dependent or self.referenced_dylibs.contains(dylib_id))) { + try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -1098,6 +1123,7 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const break :full_path try self.base.allocator.dupe(u8, path); }; defer self.base.allocator.free(full_path); + log.debug("parsing input file path '{s}'", .{full_path}); if (try self.parseObject(full_path)) continue; if (try self.parseArchive(full_path)) continue; @@ -1111,6 +1137,7 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const fn parseLibs(self: *MachO, libs: []const []const u8, syslibroot: ?[]const u8) !void { for (libs) |lib| { + log.debug("parsing lib path '{s}'", .{lib}); if (try self.parseDylib(lib, .{ .syslibroot = syslibroot, })) continue; @@ -1126,18 +1153,19 @@ pub const MatchingSection = struct { }; pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const segname = commands.segmentName(sect); const sectname = commands.sectionName(sect); - const res: ?MatchingSection = blk: { switch (commands.sectionType(sect)) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__const", .{}); + self.text_const_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1150,10 +1178,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // TODO it seems the common values within the sections in objects are deduplicated/merged // on merging the sections' contents. if (self.objc_methname_section_index == null) { - self.objc_methname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_methname", .{ - .flags = macho.S_CSTRING_LITERALS, - }); + self.objc_methname_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_methname", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1162,10 +1193,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_methtype")) { if (self.objc_methtype_section_index == null) { - self.objc_methtype_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_methtype", .{ - .flags = macho.S_CSTRING_LITERALS, - }); + self.objc_methtype_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_methtype", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1174,8 +1208,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_classname")) { if (self.objc_classname_section_index == null) { - self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_classname", .{}); + self.objc_classname_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_classname", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1185,10 +1224,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio } if (self.cstring_section_index == null) { - self.cstring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__cstring", .{ - .flags = macho.S_CSTRING_LITERALS, - }); + self.cstring_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__cstring", + sect.size, + sect.@"align", + .{ + .flags = macho.S_CSTRING_LITERALS, + }, + ); } break :blk .{ @@ -1199,27 +1243,37 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio macho.S_LITERAL_POINTERS => { if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { if (self.objc_selrefs_section_index == null) { - self.objc_selrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_selrefs", .{ - .flags = macho.S_LITERAL_POINTERS, - }); + self.objc_selrefs_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_selrefs", + sect.size, + sect.@"align", + .{ + .flags = macho.S_LITERAL_POINTERS, + }, + ); } break :blk .{ .seg = self.data_segment_cmd_index.?, .sect = self.objc_selrefs_section_index.?, }; + } else { + // TODO investigate + break :blk null; } - - // TODO investigate - break :blk null; }, macho.S_MOD_INIT_FUNC_POINTERS => { if (self.mod_init_func_section_index == null) { - self.mod_init_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__mod_init_func", .{ - .flags = macho.S_MOD_INIT_FUNC_POINTERS, - }); + self.mod_init_func_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__mod_init_func", + sect.size, + sect.@"align", + .{ + .flags = macho.S_MOD_INIT_FUNC_POINTERS, + }, + ); } break :blk .{ @@ -1229,10 +1283,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_MOD_TERM_FUNC_POINTERS => { if (self.mod_term_func_section_index == null) { - self.mod_term_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__mod_term_func", .{ - .flags = macho.S_MOD_TERM_FUNC_POINTERS, - }); + self.mod_term_func_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__mod_term_func", + sect.size, + sect.@"align", + .{ + .flags = macho.S_MOD_TERM_FUNC_POINTERS, + }, + ); } break :blk .{ @@ -1241,38 +1300,34 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; }, macho.S_ZEROFILL => { - if (mem.eql(u8, sectname, "__common")) { - if (self.common_section_index == null) { - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - } else { - if (self.bss_section_index == null) { - self.bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__bss", .{ + if (self.bss_section_index == null) { + self.bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__bss", + sect.size, + sect.@"align", + .{ .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + }, + ); } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, + }; }, macho.S_THREAD_LOCAL_VARIABLES => { if (self.tlv_section_index == null) { - self.tlv_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__thread_vars", .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }); + self.tlv_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_vars", + sect.size, + sect.@"align", + .{ + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }, + ); } break :blk .{ @@ -1282,10 +1337,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_THREAD_LOCAL_REGULAR => { if (self.tlv_data_section_index == null) { - self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__thread_data", .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }); + self.tlv_data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_data", + sect.size, + sect.@"align", + .{ + .flags = macho.S_THREAD_LOCAL_REGULAR, + }, + ); } break :blk .{ @@ -1295,10 +1355,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_THREAD_LOCAL_ZEROFILL => { if (self.tlv_bss_section_index == null) { - self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__thread_bss", .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }); + self.tlv_bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_bss", + sect.size, + sect.@"align", + .{ + .flags = macho.S_THREAD_LOCAL_ZEROFILL, + }, + ); } break :blk .{ @@ -1311,8 +1376,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // TODO I believe __eh_frame is currently part of __unwind_info section // in the latest ld64 output. if (self.eh_frame_section_index == null) { - self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__eh_frame", .{}); + self.eh_frame_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__eh_frame", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1323,8 +1393,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // TODO audit this: is this the right mapping? if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1335,10 +1410,17 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio macho.S_REGULAR => { if (commands.sectionIsCode(sect)) { if (self.text_section_index == null) { - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__text", .{ - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); + self.text_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__text", + sect.size, + sect.@"align", + .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); } break :blk .{ @@ -1359,8 +1441,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (mem.eql(u8, segname, "__TEXT")) { if (mem.eql(u8, sectname, "__ustring")) { if (self.ustring_section_index == null) { - self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__ustring", .{}); + self.ustring_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__ustring", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1369,8 +1456,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { if (self.gcc_except_tab_section_index == null) { - self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__gcc_except_tab", .{}); + self.gcc_except_tab_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__gcc_except_tab", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1379,8 +1471,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_methlist")) { if (self.objc_methlist_section_index == null) { - self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_methlist", .{}); + self.objc_methlist_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_methlist", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1394,8 +1491,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio mem.eql(u8, sectname, "__gopclntab")) { if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1404,8 +1506,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else { if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__const", .{}); + self.text_const_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1417,8 +1524,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (mem.eql(u8, segname, "__DATA_CONST")) { if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1430,8 +1542,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (mem.eql(u8, segname, "__DATA")) { if (mem.eql(u8, sectname, "__const")) { if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1440,8 +1557,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__cfstring")) { if (self.objc_cfstring_section_index == null) { - self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__cfstring", .{}); + self.objc_cfstring_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__cfstring", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1450,8 +1572,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_classlist")) { if (self.objc_classlist_section_index == null) { - self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__objc_classlist", .{}); + self.objc_classlist_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__objc_classlist", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1460,8 +1587,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { if (self.objc_imageinfo_section_index == null) { - self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__objc_imageinfo", .{}); + self.objc_imageinfo_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__objc_imageinfo", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1470,8 +1602,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_const")) { if (self.objc_const_section_index == null) { - self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_const", .{}); + self.objc_const_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1480,8 +1617,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_classrefs")) { if (self.objc_classrefs_section_index == null) { - self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_classrefs", .{}); + self.objc_classrefs_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_classrefs", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1490,8 +1632,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_data")) { if (self.objc_data_section_index == null) { - self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_data", .{}); + self.objc_data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_data", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1500,8 +1647,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else { if (self.data_section_index == null) { - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__data", .{}); + self.data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__data", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1522,563 +1674,605 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio else => break :blk null, } }; - - if (res) |match| { - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - } - return res; } -fn sortSections(self: *MachO) !void { - var text_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); - defer text_index_mapping.deinit(); - var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); - defer data_const_index_mapping.deinit(); - var data_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); - defer data_index_mapping.deinit(); - - { - // __TEXT segment - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureCapacity(self.base.allocator, sections.len); - - const indices = &[_]*?u16{ - &self.text_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methlist_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try text_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } +pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*Atom { + const code = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(code); + mem.set(u8, code, 0); + + const atom = try self.base.allocator.create(Atom); + errdefer self.base.allocator.destroy(atom); + atom.* = Atom.empty; + atom.local_sym_index = local_sym_index; + atom.size = size; + atom.alignment = alignment; + try atom.code.appendSlice(self.base.allocator, code); + try self.managed_atoms.append(self.base.allocator, atom); + + return atom; +} - { - // __DATA_CONST segment - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureCapacity(self.base.allocator, sections.len); - - const indices = &[_]*?u16{ - &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_const_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } +pub fn writeAtom(self: *MachO, atom: *Atom, match: MatchingSection) !void { + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const sym = self.locals.items[atom.local_sym_index]; + const file_offset = sect.offset + sym.n_value - sect.addr; + try atom.resolveRelocs(self); + log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); + try self.base.file.?.pwriteAll(atom.code.items, file_offset); +} - { - // __DATA segment - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureCapacity(self.base.allocator, sections.len); - - // __DATA segment - const indices = &[_]*?u16{ - &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, - &self.data_section_index, - &self.tlv_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, - &self.common_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } +fn allocateLocalSymbols(self: *MachO, match: MatchingSection, offset: i64) !void { + var atom = self.atoms.get(match) orelse return; - { - var transient: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}; - try transient.ensureCapacity(self.base.allocator, self.blocks.count()); + while (true) { + const atom_sym = &self.locals.items[atom.local_sym_index]; + atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const old = entry.key_ptr.*; - const sect = if (old.seg == self.text_segment_cmd_index.?) - text_index_mapping.get(old.sect).? - else if (old.seg == self.data_const_segment_cmd_index.?) - data_const_index_mapping.get(old.sect).? - else - data_index_mapping.get(old.sect).?; - transient.putAssumeCapacityNoClobber(.{ - .seg = old.seg, - .sect = sect, - }, entry.value_ptr.*); + for (atom.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = @intCast(u64, @intCast(i64, alias_sym.n_value) + offset); } - self.blocks.clearAndFree(self.base.allocator); - self.blocks.deinit(self.base.allocator); - self.blocks = transient; - } - - { - // Create new section ordinals. - self.section_ordinals.clearRetainingCapacity(); - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - for (text_seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = self.text_segment_cmd_index.?, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - for (data_const_seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = self.data_const_segment_cmd_index.?, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - for (data_seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = self.data_segment_cmd_index.?, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); + for (atom.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); } + + if (atom.prev) |prev| { + atom = prev; + } else break; } } -fn allocateTextSegment(self: *MachO) !void { - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.items.len); +fn allocateGlobalSymbols(self: *MachO) !void { + var sym_it = self.symbol_resolver.valueIterator(); + while (sym_it.next()) |resolv| { + if (resolv.where != .global) continue; - const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; - seg.inner.fileoff = 0; - seg.inner.vmaddr = base_vmaddr; + assert(resolv.local_sym_index != 0); + const local_sym = self.locals.items[resolv.local_sym_index]; + const sym = &self.globals.items[resolv.where_index]; + sym.n_value = local_sym.n_value; + sym.n_sect = local_sym.n_sect; + log.debug("allocating global symbol {s} at 0x{x}", .{ self.getString(sym.n_strx), local_sym.n_value }); + } +} - // Set stubs and stub_helper sizes - const stubs = &seg.sections.items[self.stubs_section_index.?]; - const stub_helper = &seg.sections.items[self.stub_helper_section_index.?]; - stubs.size += nstubs * stubs.reserved2; +fn writeAtoms(self: *MachO) !void { + var buffer = std.ArrayList(u8).init(self.base.allocator); + defer buffer.deinit(); + var file_offset: ?u64 = null; - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - stub_helper.size += nstubs * stub_size; + var it = self.atoms.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + var atom: *Atom = entry.value_ptr.*; - var sizeofcmds: u64 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } + log.debug("writing atoms in {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); - try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds); + while (atom.prev) |prev| { + atom = prev; + } - // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. - var min_alignment: u32 = 0; - for (seg.sections.items) |sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - min_alignment = math.max(min_alignment, alignment); - } + while (true) { + if (atom.dirty) { + const atom_sym = self.locals.items[atom.local_sym_index]; + const padding_size: u64 = if (atom.next) |next| blk: { + const next_sym = self.locals.items[next.local_sym_index]; + break :blk next_sym.n_value - (atom_sym.n_value + atom.size); + } else 0; + + log.debug(" (adding atom {s} to buffer: {})", .{ self.getString(atom_sym.n_strx), atom_sym }); + + try atom.resolveRelocs(self); + try buffer.appendSlice(atom.code.items); + try buffer.ensureUnusedCapacity(padding_size); + + var i: usize = 0; + while (i < padding_size) : (i += 1) { + buffer.appendAssumeCapacity(0); + } - assert(min_alignment > 0); - const last_sect_idx = seg.sections.items.len - 1; - const last_sect = seg.sections.items[last_sect_idx]; - const shift: u32 = blk: { - const diff = seg.inner.filesize - last_sect.offset - last_sect.size; - const factor = @divTrunc(diff, min_alignment); - break :blk @intCast(u32, factor * min_alignment); - }; + if (file_offset == null) { + file_offset = sect.offset + atom_sym.n_value - sect.addr; + } + atom.dirty = false; + } else { + if (file_offset) |off| { + try self.base.file.?.pwriteAll(buffer.items, off); + } + file_offset = null; + buffer.clearRetainingCapacity(); + } - if (shift > 0) { - for (seg.sections.items) |*sect| { - sect.offset += shift; - sect.addr += shift; + if (atom.next) |next| { + atom = next; + } else { + if (file_offset) |off| { + try self.base.file.?.pwriteAll(buffer.items, off); + } + file_offset = null; + buffer.clearRetainingCapacity(); + break; + } } } } -fn allocateDataConstSegment(self: *MachO) !void { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const nentries = @intCast(u32, self.got_entries.items.len); - - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; - seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize; +pub fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*Atom { + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("l_zld_got_entry"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + switch (key.where) { + .local => { + try atom.relocs.append(self.base.allocator, .{ + .offset = 0, + .where = .local, + .where_index = key.where_index, + .payload = .{ + .unsigned = .{ + .subtractor = null, + .addend = 0, + .is_64bit = true, + }, + }, + }); + try atom.rebases.append(self.base.allocator, 0); + }, + .undef => { + try atom.bindings.append(self.base.allocator, .{ + .local_sym_index = key.where_index, + .offset = 0, + }); + }, + } + return atom; +} - // Set got size - const got = &seg.sections.items[self.got_section_index.?]; - got.size += nentries * @sizeOf(u64); - - try self.allocateSegment(self.data_const_segment_cmd_index.?, 0); -} - -fn allocateDataSegment(self: *MachO) !void { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.items.len); - - const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; - seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; - - // Set la_symbol_ptr and data size - const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?]; - const data = &seg.sections.items[self.data_section_index.?]; - la_symbol_ptr.size += nstubs * @sizeOf(u64); - data.size += @sizeOf(u64); // We need at least 8bytes for address of dyld_stub_binder - - try self.allocateSegment(self.data_segment_cmd_index.?, 0); +fn createDyldPrivateAtom(self: *MachO) !void { + if (self.dyld_private_atom != null) return; + const local_sym_index = @intCast(u32, self.locals.items.len); + const sym = try self.locals.addOne(self.base.allocator); + sym.* = .{ + .n_strx = try self.makeString("l_zld_dyld_private"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + self.dyld_private_atom = atom; + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }; + const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); + sym.n_value = vaddr; + sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); } -fn allocateLinkeditSegment(self: *MachO) void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize; - seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize; +fn createStubHelperPreambleAtom(self: *MachO) !void { + if (self.stub_helper_preamble_atom != null) return; + const arch = self.base.options.target.cpu.arch; + const size: u64 = switch (arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; + const alignment: u32 = switch (arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + const sym = try self.locals.addOne(self.base.allocator); + sym.* = .{ + .n_strx = try self.makeString("l_zld_stub_preamble"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + const atom = try self.createEmptyAtom(local_sym_index, size, alignment); + const dyld_private_sym_index = self.dyld_private_atom.?.local_sym_index; + switch (arch) { + .x86_64 => { + try atom.relocs.ensureUnusedCapacity(self.base.allocator, 2); + // lea %r11, [rip + disp] + atom.code.items[0] = 0x4c; + atom.code.items[1] = 0x8d; + atom.code.items[2] = 0x1d; + atom.relocs.appendAssumeCapacity(.{ + .offset = 3, + .where = .local, + .where_index = dyld_private_sym_index, + .payload = .{ + .signed = .{ + .addend = 0, + .correction = 0, + }, + }, + }); + // push %r11 + atom.code.items[7] = 0x41; + atom.code.items[8] = 0x53; + // jmp [rip + disp] + atom.code.items[9] = 0xff; + atom.code.items[10] = 0x25; + atom.relocs.appendAssumeCapacity(.{ + .offset = 11, + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + .payload = .{ + .load = .{ + .kind = .got, + .addend = 0, + }, + }, + }); + }, + .aarch64 => { + try atom.relocs.ensureUnusedCapacity(self.base.allocator, 4); + // adrp x17, 0 + mem.writeIntLittle(u32, atom.code.items[0..][0..4], aarch64.Instruction.adrp(.x17, 0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 0, + .where = .local, + .where_index = dyld_private_sym_index, + .payload = .{ + .page = .{ + .kind = .page, + .addend = 0, + }, + }, + }); + // add x17, x17, 0 + mem.writeIntLittle(u32, atom.code.items[4..][0..4], aarch64.Instruction.add(.x17, .x17, 0, false).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .where = .local, + .where_index = dyld_private_sym_index, + .payload = .{ + .page_off = .{ + .kind = .page, + .addend = 0, + .op_kind = .arithmetic, + }, + }, + }); + // stp x16, x17, [sp, #-16]! + mem.writeIntLittle(u32, atom.code.items[8..][0..4], aarch64.Instruction.stp( + .x16, + .x17, + aarch64.Register.sp, + aarch64.Instruction.LoadStorePairOffset.pre_index(-16), + ).toU32()); + // adrp x16, 0 + mem.writeIntLittle(u32, atom.code.items[12..][0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 12, + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + .payload = .{ + .page = .{ + .kind = .got, + .addend = 0, + }, + }, + }); + // ldr x16, [x16, 0] + mem.writeIntLittle(u32, atom.code.items[16..][0..4], aarch64.Instruction.ldr(.x16, .{ + .register = .{ + .rn = .x16, + .offset = aarch64.Instruction.LoadStoreOffset.imm(0), + }, + }).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 16, + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + .payload = .{ + .page_off = .{ + .kind = .got, + .addend = 0, + }, + }, + }); + // br x16 + mem.writeIntLittle(u32, atom.code.items[20..][0..4], aarch64.Instruction.br(.x16).toU32()); + }, + else => unreachable, + } + self.stub_helper_preamble_atom = atom; + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }; + const alignment_pow_2 = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment_pow_2, match); + sym.n_value = vaddr; + sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); } -fn allocateSegment(self: *MachO, index: u16, offset: u64) !void { - const seg = &self.load_commands.items[index].Segment; +pub fn createStubHelperAtom(self: *MachO) !*Atom { + const arch = self.base.options.target.cpu.arch; + const stub_size: u4 = switch (arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + const alignment: u2 = switch (arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("l_zld_stub_in_stub_helper"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(local_sym_index, stub_size, alignment); + try atom.relocs.ensureTotalCapacity(self.base.allocator, 1); - // Allocate the sections according to their alignment at the beginning of the segment. - var start: u64 = offset; - for (seg.sections.items) |*sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment); - sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); - sect.addr = seg.inner.vmaddr + start_aligned; - start = end_aligned; + switch (arch) { + .x86_64 => { + // pushq + atom.code.items[0] = 0x68; + // Next 4 bytes 1..4 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. + // jmpq + atom.code.items[5] = 0xe9; + atom.relocs.appendAssumeCapacity(.{ + .offset = 6, + .where = .local, + .where_index = self.stub_helper_preamble_atom.?.local_sym_index, + .payload = .{ + .branch = .{ .arch = arch }, + }, + }); + }, + .aarch64 => { + const literal = blk: { + const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4); + break :blk try math.cast(u18, div_res); + }; + // ldr w16, literal + mem.writeIntLittle(u32, atom.code.items[0..4], aarch64.Instruction.ldr(.w16, .{ + .literal = literal, + }).toU32()); + // b disp + mem.writeIntLittle(u32, atom.code.items[4..8], aarch64.Instruction.b(0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .where = .local, + .where_index = self.stub_helper_preamble_atom.?.local_sym_index, + .payload = .{ + .branch = .{ .arch = arch }, + }, + }); + // Next 4 bytes 8..12 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. + }, + else => unreachable, } - const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size); - seg.inner.filesize = seg_size_aligned; - seg.inner.vmsize = seg_size_aligned; + return atom; } -fn allocateTextBlocks(self: *MachO) !void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - // Find the first block - while (block.prev) |prev| { - block = prev; - } - - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - - var base_addr: u64 = sect.addr; - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - - log.debug(" within section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); - log.debug(" {}", .{sect}); - - while (true) { - const block_alignment = try math.powi(u32, 2, block.alignment); - base_addr = mem.alignForwardGeneric(u64, base_addr, block_alignment); - - const sym = &self.locals.items[block.local_sym_index]; - sym.n_value = base_addr; - sym.n_sect = n_sect; +pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, lazy_binding_sym_index: u32) !*Atom { + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("l_zld_lazy_ptr"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + try atom.relocs.append(self.base.allocator, .{ + .offset = 0, + .where = .local, + .where_index = stub_sym_index, + .payload = .{ + .unsigned = .{ + .subtractor = null, + .addend = 0, + .is_64bit = true, + }, + }, + }); + try atom.rebases.append(self.base.allocator, 0); + try atom.lazy_bindings.append(self.base.allocator, .{ + .local_sym_index = lazy_binding_sym_index, + .offset = 0, + }); + return atom; +} - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.n_strx), - base_addr, - base_addr + block.size, - block.size, - block.alignment, +pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { + const arch = self.base.options.target.cpu.arch; + const alignment: u2 = switch (arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_size: u4 = switch (arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("l_zld_stub"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(local_sym_index, stub_size, alignment); + switch (arch) { + .x86_64 => { + // jmp + atom.code.items[0] = 0xff; + atom.code.items[1] = 0x25; + try atom.relocs.append(self.base.allocator, .{ + .offset = 2, + .where = .local, + .where_index = laptr_sym_index, + .payload = .{ + .branch = .{ .arch = arch }, + }, }); - - // Update each alias (if any) - for (block.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = base_addr; - alias_sym.n_sect = n_sect; - } - - // Update each symbol contained within the TextBlock - for (block.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; - contained_sym.n_value = base_addr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - - base_addr += block.size; - - if (block.next) |next| { - block = next; - } else break; - } - } - - // Update globals - { - var sym_it = self.symbol_resolver.valueIterator(); - while (sym_it.next()) |resolv| { - if (resolv.where != .global) continue; - - assert(resolv.local_sym_index != 0); - const local_sym = self.locals.items[resolv.local_sym_index]; - const sym = &self.globals.items[resolv.where_index]; - sym.n_value = local_sym.n_value; - sym.n_sect = local_sym.n_sect; - } + }, + .aarch64 => { + try atom.relocs.ensureTotalCapacity(self.base.allocator, 2); + // adrp x16, pages + mem.writeIntLittle(u32, atom.code.items[0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 0, + .where = .local, + .where_index = laptr_sym_index, + .payload = .{ + .page = .{ + .kind = .page, + .addend = 0, + }, + }, + }); + // ldr x16, x16, offset + mem.writeIntLittle(u32, atom.code.items[4..8], aarch64.Instruction.ldr(.x16, .{ + .register = .{ + .rn = .x16, + .offset = aarch64.Instruction.LoadStoreOffset.imm(0), + }, + }).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .where = .local, + .where_index = laptr_sym_index, + .payload = .{ + .page_off = .{ + .kind = .page, + .addend = 0, + .op_kind = .load, + }, + }, + }); + // br x16 + mem.writeIntLittle(u32, atom.code.items[8..12], aarch64.Instruction.br(.x16).toU32()); + }, + else => unreachable, } + return atom; } -fn writeTextBlocks(self: *MachO) !void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - while (block.prev) |prev| { - block = prev; - } - - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - const sect_type = commands.sectionType(sect); - - log.debug(" for section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); - log.debug(" {}", .{sect}); - - var code = try self.base.allocator.alloc(u8, sect.size); - defer self.base.allocator.free(code); - - if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { - mem.set(u8, code, 0); - } else { - var base_off: u64 = 0; +fn createTentativeDefAtoms(self: *MachO) !void { + if (self.tentatives.count() == 0) return; + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative defintion. + while (self.tentatives.popOrNull()) |entry| { + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - while (true) { - const block_alignment = try math.powi(u32, 2, block.alignment); - const aligned_base_off = mem.alignForwardGeneric(u64, base_off, block_alignment); + const global_sym = &self.globals.items[entry.key]; + const size = global_sym.n_value; + const alignment = (global_sym.n_desc >> 8) & 0x0f; - const sym = self.locals.items[block.local_sym_index]; - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.n_strx), - aligned_base_off, - aligned_base_off + block.size, - block.size, - block.alignment, - }); + global_sym.n_value = 0; + global_sym.n_desc = 0; + global_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - try block.resolveRelocs(self); - mem.copy(u8, code[aligned_base_off..][0..block.size], block.code.items); + const local_sym_index = @intCast(u32, self.locals.items.len); + const local_sym = try self.locals.addOne(self.base.allocator); + local_sym.* = .{ + .n_strx = global_sym.n_strx, + .n_type = macho.N_SECT, + .n_sect = global_sym.n_sect, + .n_desc = 0, + .n_value = 0, + }; - // TODO NOP for machine code instead of just zeroing out - const padding_len = aligned_base_off - base_off; - mem.set(u8, code[base_off..][0..padding_len], 0); + const resolv = self.symbol_resolver.getPtr(local_sym.n_strx) orelse unreachable; + resolv.local_sym_index = local_sym_index; - base_off = aligned_base_off + block.size; + const atom = try self.createEmptyAtom(local_sym_index, size, alignment); + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const vaddr = try self.allocateAtom(atom, size, alignment_pow_2, match); + local_sym.n_value = vaddr; + global_sym.n_value = vaddr; + } +} - if (block.next) |next| { - block = next; - } else break; - } +fn createDsoHandleAtom(self: *MachO) !void { + if (self.strtab_dir.getKeyAdapted(@as([]const u8, "___dso_handle"), StringIndexAdapter{ + .bytes = &self.strtab, + })) |n_strx| blk: { + const resolv = self.symbol_resolver.getPtr(n_strx) orelse break :blk; + if (resolv.where != .undef) break :blk; - mem.set(u8, code[base_off..], 0); - } + const undef = &self.undefs.items[resolv.where_index]; + const match: MatchingSection = .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ + .n_strx = undef.n_strx, + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.base.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + nlist.n_desc = macho.N_WEAK_DEF; + try self.globals.append(self.base.allocator, nlist); - try self.base.file.?.pwriteAll(code, sect.offset); - } -} + _ = self.unresolved.fetchSwapRemove(resolv.where_index); -fn writeStubHelperCommon(self: *MachO) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const data = &data_segment.sections.items[self.data_section_index.?]; - - self.stub_helper_stubs_start_off = blk: { - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const code_size = 15; - var code: [code_size]u8 = undefined; - // lea %r11, [rip + disp] - code[0] = 0x4c; - code[1] = 0x8d; - code[2] = 0x1d; - { - const target_addr = data.addr + data.size - @sizeOf(u64); - const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); - mem.writeIntLittle(u32, code[3..7], displacement); - } - // push %r11 - code[7] = 0x41; - code[8] = 0x53; - // jmp [rip + disp] - code[9] = 0xff; - code[10] = 0x25; - { - const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse unreachable; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - const got_index = self.got_entries_map.get(.{ - .where = .undef, - .where_index = resolv.where_index, - }) orelse unreachable; - const addr = got.addr + got_index * @sizeOf(u64); - const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); - mem.writeIntLittle(u32, code[11..], displacement); - } - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + code_size; - }, - .aarch64 => { - var code: [6 * @sizeOf(u32)]u8 = undefined; - data_blk_outer: { - const this_addr = stub_helper.addr; - const target_addr = data.addr + data.size - @sizeOf(u64); - data_blk: { - const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; - // adr x17, disp - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :data_blk_outer; - } - data_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // adr x17, disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); - break :data_blk_outer; - } - // Jump is too big, replace adr with adrp and add. - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); - } - // stp x16, x17, [sp, #-16]! - code[8] = 0xf0; - code[9] = 0x47; - code[10] = 0xbf; - code[11] = 0xa9; - binder_blk_outer: { - const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse unreachable; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - const got_index = self.got_entries_map.get(.{ - .where = .undef, - .where_index = resolv.where_index, - }) orelse unreachable; - const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - const target_addr = got.addr + got_index * @sizeOf(u64); - binder_blk: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // ldr x16, label - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); - break :binder_blk_outer; - } - binder_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // Pad with nop to please division. - // nop - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); - // ldr x16, label - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :binder_blk_outer; - } - // Use adrp followed by ldr(immediate). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - code[20] = 0x00; - code[21] = 0x02; - code[22] = 0x1f; - code[23] = 0xd6; - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + 6 * @sizeOf(u32); - }, - else => unreachable, - } - }; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + }; - for (self.stubs.items) |_, i| { - const index = @intCast(u32, i); - // TODO weak bound pointers - try self.writeLazySymbolPointer(index); - try self.writeStub(index); - try self.writeStubInStubHelper(index); + // We create an empty atom for this symbol. + // TODO perhaps we should special-case special symbols? Create a separate + // linked list of atoms? + const atom = try self.createEmptyAtom(local_sym_index, 0, 0); + const sym = &self.locals.items[local_sym_index]; + const vaddr = try self.allocateAtom(atom, 0, 1, match); + sym.n_value = vaddr; + atom.dirty = false; // We don't really want to write it to file. } } -fn resolveSymbolsInObject( - self: *MachO, - object_id: u16, - tentatives: *std.AutoArrayHashMap(u32, void), - unresolved: *std.AutoArrayHashMap(u32, void), -) !void { +fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { const object = &self.objects.items[object_id]; log.debug("resolving symbols in '{s}'", .{object.name}); @@ -2150,7 +2344,7 @@ fn resolveSymbolsInObject( const global = &self.globals.items[resolv.where_index]; if (symbolIsTentative(global.*)) { - _ = tentatives.fetchSwapRemove(resolv.where_index); + _ = self.tentatives.fetchSwapRemove(resolv.where_index); } else if (!(symbolIsWeakDef(sym) or symbolIsPext(sym)) and !(symbolIsWeakDef(global.*) or symbolIsPext(global.*))) { @@ -2168,15 +2362,7 @@ fn resolveSymbolsInObject( continue; }, .undef => { - const undef = &self.undefs.items[resolv.where_index]; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - _ = unresolved.fetchSwapRemove(resolv.where_index); + _ = self.unresolved.fetchSwapRemove(resolv.where_index); }, } @@ -2210,7 +2396,7 @@ fn resolveSymbolsInObject( .where_index = global_sym_index, .file = object_id, }); - _ = try tentatives.getOrPut(global_sym_index); + _ = try self.tentatives.getOrPut(self.base.allocator, global_sym_index); continue; }; @@ -2234,7 +2420,7 @@ fn resolveSymbolsInObject( .n_desc = sym.n_desc, .n_value = sym.n_value, }); - _ = try tentatives.getOrPut(global_sym_index); + _ = try self.tentatives.getOrPut(self.base.allocator, global_sym_index); resolv.* = .{ .where = .global, .where_index = global_sym_index, @@ -2247,7 +2433,7 @@ fn resolveSymbolsInObject( .n_desc = 0, .n_value = 0, }; - _ = unresolved.fetchSwapRemove(resolv.where_index); + _ = self.unresolved.fetchSwapRemove(resolv.where_index); }, } } else { @@ -2267,27 +2453,17 @@ fn resolveSymbolsInObject( .where_index = undef_sym_index, .file = object_id, }); - _ = try unresolved.getOrPut(undef_sym_index); + try self.unresolved.putNoClobber(self.base.allocator, undef_sym_index, .none); } } } -fn resolveSymbols(self: *MachO) !void { - var tentatives = std.AutoArrayHashMap(u32, void).init(self.base.allocator); - defer tentatives.deinit(); - - var unresolved = std.AutoArrayHashMap(u32, void).init(self.base.allocator); - defer unresolved.deinit(); - - // First pass, resolve symbols in provided objects. - for (self.objects.items) |_, object_id| { - try self.resolveSymbolsInObject(@intCast(u16, object_id), &tentatives, &unresolved); - } +fn resolveSymbolsInArchives(self: *MachO) !void { + if (self.archives.items.len == 0) return; - // Second pass, resolve symbols in static libraries. var next_sym: usize = 0; - loop: while (next_sym < unresolved.count()) { - const sym = self.undefs.items[unresolved.keys()[next_sym]]; + loop: while (next_sym < self.unresolved.count()) { + const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; const sym_name = self.getString(sym.n_strx); for (self.archives.items) |archive| { @@ -2301,102 +2477,21 @@ fn resolveSymbols(self: *MachO) !void { const object_id = @intCast(u16, self.objects.items.len); const object = try self.objects.addOne(self.base.allocator); object.* = try archive.parseObject(self.base.allocator, self.base.options.target, offsets.items[0]); - try self.resolveSymbolsInObject(object_id, &tentatives, &unresolved); + try self.resolveSymbolsInObject(object_id); continue :loop; } next_sym += 1; } +} - // Convert any tentative definition into a regular symbol and allocate - // text blocks for each tentative defintion. - while (tentatives.popOrNull()) |entry| { - const sym = &self.globals.items[entry.key]; - const match: MatchingSection = blk: { - if (self.common_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - - const size = sym.n_value; - const code = try self.base.allocator.alloc(u8, size); - defer self.base.allocator.free(code); - mem.set(u8, code, 0); - const alignment = (sym.n_desc >> 8) & 0x0f; - - sym.n_value = 0; - sym.n_desc = 0; - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - var local_sym = sym.*; - local_sym.n_type = macho.N_SECT; - - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, local_sym); - - const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; - resolv.local_sym_index = local_sym_index; - - const block = try self.base.allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = local_sym_index; - block.size = size; - block.alignment = alignment; - try self.managed_blocks.append(self.base.allocator, block); - - try block.code.appendSlice(self.base.allocator, code); - - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &self.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.base.allocator, match, block); - } - } - - // Third pass, resolve symbols in dynamic libraries. - { - // Put dyld_stub_binder as an undefined special symbol. - const n_strx = try self.makeString("dyld_stub_binder"); - const undef_sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = undef_sym_index, - }); - _ = try unresolved.getOrPut(undef_sym_index); - } +fn resolveSymbolsInDylibs(self: *MachO) !void { + if (self.dylibs.items.len == 0) return; - next_sym = 0; - loop: while (next_sym < unresolved.count()) { - const sym = self.undefs.items[unresolved.keys()[next_sym]]; + var next_sym: usize = 0; + loop: while (next_sym < self.unresolved.count()) { + const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; const sym_name = self.getString(sym.n_strx); for (self.dylibs.items) |dylib, id| { @@ -2404,6 +2499,7 @@ fn resolveSymbols(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { + try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -2413,3115 +2509,1859 @@ fn resolveSymbols(self: *MachO) !void { undef.n_type |= macho.N_EXT; undef.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; - _ = unresolved.fetchSwapRemove(resolv.where_index); + if (self.unresolved.fetchSwapRemove(resolv.where_index)) |entry| outer_blk: { + switch (entry.value) { + .none => {}, + .got => return error.TODOGotHint, + .stub => { + if (self.stubs_map.contains(resolv.where_index)) break :outer_blk; + const stub_helper_atom = blk: { + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }; + const atom = try self.createStubHelperAtom(); + const atom_sym = &self.locals.items[atom.local_sym_index]; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + break :blk atom; + }; + const laptr_atom = blk: { + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, + }; + const atom = try self.createLazyPointerAtom( + stub_helper_atom.local_sym_index, + resolv.where_index, + ); + const atom_sym = &self.locals.items[atom.local_sym_index]; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + break :blk atom; + }; + const stub_atom = blk: { + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stubs_section_index.?, + }; + const atom = try self.createStubAtom(laptr_atom.local_sym_index); + const atom_sym = &self.locals.items[atom.local_sym_index]; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + break :blk atom; + }; + try self.stubs_map.putNoClobber(self.base.allocator, resolv.where_index, stub_atom); + }, + } + } continue :loop; } next_sym += 1; } +} - // Fourth pass, handle synthetic symbols and flag any undefined references. - if (self.strtab_dir.getKeyAdapted(@as([]const u8, "___dso_handle"), StringIndexAdapter{ - .bytes = &self.strtab, - })) |n_strx| blk: { - const resolv = self.symbol_resolver.getPtr(n_strx) orelse break :blk; - if (resolv.where != .undef) break :blk; - - const undef = &self.undefs.items[resolv.where_index]; - const match: MatchingSection = .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = undef.n_strx, - .n_type = macho.N_SECT, - .n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1), - .n_desc = 0, - .n_value = 0, - }; - try self.locals.append(self.base.allocator, nlist); - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - nlist.n_desc = macho.N_WEAK_DEF; - try self.globals.append(self.base.allocator, nlist); +fn resolveDyldStubBinder(self: *MachO) !void { + if (self.dyld_stub_binder_index != null) return; - _ = unresolved.fetchSwapRemove(resolv.where_index); + const n_strx = try self.makeString("dyld_stub_binder"); + const sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .undef, + .where_index = sym_index, + }); + const sym = &self.undefs.items[sym_index]; + const sym_name = self.getString(n_strx); - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - }; + for (self.dylibs.items) |dylib, id| { + if (!dylib.symbols.contains(sym_name)) continue; - // We create an empty atom for this symbol. - // TODO perhaps we should special-case special symbols? Create a separate - // linked list of atoms? - const block = try self.base.allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = local_sym_index; - block.size = 0; - block.alignment = 0; - try self.managed_blocks.append(self.base.allocator, block); - - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.base.allocator, match, block); + const dylib_id = @intCast(u16, id); + if (!self.referenced_dylibs.contains(dylib_id)) { + try self.addLoadDylibLC(dylib_id); + try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } - } - for (unresolved.keys()) |index| { - const sym = self.undefs.items[index]; - const sym_name = self.getString(sym.n_strx); - const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; + const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; + sym.n_type |= macho.N_EXT; + sym.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; + self.dyld_stub_binder_index = sym_index; - log.err("undefined reference to symbol '{s}'", .{sym_name}); - log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name}); + break; } - if (unresolved.count() > 0) + if (self.dyld_stub_binder_index == null) { + log.err("undefined reference to symbol '{s}'", .{sym_name}); return error.UndefinedSymbolReference; + } + + // Add dyld_stub_binder as the final GOT entry. + const got_entry = GotIndirectionKey{ + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + }; + const atom = try self.createGotAtom(got_entry); + try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, atom); + const match = MatchingSection{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }; + const atom_sym = &self.locals.items[atom.local_sym_index]; + const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); } -fn parseTextBlocks(self: *MachO) !void { +fn parseObjectsIntoAtoms(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var parsed_atoms = Object.ParsedAtoms.init(self.base.allocator); + defer parsed_atoms.deinit(); + + var first_atoms = Object.ParsedAtoms.init(self.base.allocator); + defer first_atoms.deinit(); + + var section_metadata = std.AutoHashMap(MatchingSection, struct { + size: u64, + alignment: u32, + }).init(self.base.allocator); + defer section_metadata.deinit(); + for (self.objects.items) |*object, object_id| { - try object.parseTextBlocks(self.base.allocator, @intCast(u16, object_id), self); - } -} + if (object.analyzed) continue; -fn populateMetadata(self: *MachO) !void { - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__PAGEZERO", .{ - .vmsize = 0x100000000, // size always set to 4GB - }), - }); - } + var atoms_in_objects = try object.parseIntoAtoms(self.base.allocator, @intCast(u16, object_id), self); + defer atoms_in_objects.deinit(); - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__TEXT", .{ - .vmaddr = 0x100000000, // always starts at 4GB - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - }), - }); - } + var it = atoms_in_objects.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + const last_atom = entry.value_ptr.*; + var atom = last_atom; + + const metadata = try section_metadata.getOrPut(match); + if (!metadata.found_existing) { + metadata.value_ptr.* = .{ + .size = 0, + .alignment = 0, + }; + } - if (self.text_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.base.allocator, "__text", .{ - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }); - } + while (true) { + const alignment = try math.powi(u32, 2, atom.alignment); + metadata.value_ptr.size += mem.alignForwardGeneric(u64, atom.size, alignment); + metadata.value_ptr.alignment = math.max(metadata.value_ptr.alignment, atom.alignment); - if (self.stubs_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stubs_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 6, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.base.allocator, "__stubs", .{ - .@"align" = alignment, - .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }); - } + const sym = self.locals.items[atom.local_sym_index]; + log.debug(" {s}: n_value=0x{x}, size=0x{x}, alignment=0x{x}", .{ + self.getString(sym.n_strx), + sym.n_value, + atom.size, + atom.alignment, + }); - if (self.stub_helper_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_helper_size: u6 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 15, - .aarch64 => 6 * @sizeOf(u32), - else => unreachable, - }; - try text_seg.addSection(self.base.allocator, "__stub_helper", .{ - .size = stub_helper_size, - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); - } + if (atom.prev) |prev| { + atom = prev; + } else break; + } - if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA_CONST", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } + _ = try parsed_atoms.put(match, last_atom); - if (self.got_section_index == null) { - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - self.got_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__got", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }); - } + if (!first_atoms.contains(match)) { + try first_atoms.putNoClobber(match, atom); + } + } - if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); + object.analyzed = true; } - if (self.la_symbol_ptr_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__la_symbol_ptr", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, + var it = section_metadata.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + const metadata = entry.value_ptr.*; + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; + log.debug("{s},{s} => size: 0x{x}, alignment: 0x{x}", .{ + commands.segmentName(sect.*), + commands.sectionName(sect.*), + metadata.size, + metadata.alignment, }); - } - if (self.data_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__data", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }); - } + const sect_size = if (self.atoms.get(match)) |last| blk: { + const last_atom_sym = self.locals.items[last.local_sym_index]; + break :blk last_atom_sym.n_value + last.size - sect.addr; + } else 0; - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__LINKEDIT", .{ - .maxprot = macho.VM_PROT_READ, - .initprot = macho.VM_PROT_READ, - }), - }); - } + sect.@"align" = math.max(sect.@"align", metadata.alignment); + const needed_size = @intCast(u32, metadata.size + sect_size); + try self.growSection(match, needed_size); + sect.size = needed_size; - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .DyldInfoOnly = .{ - .cmd = macho.LC_DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, - }); - } + var base_vaddr = if (self.atoms.get(match)) |last| blk: { + const last_atom_sym = self.locals.items[last.local_sym_index]; + break :blk last_atom_sym.n_value + last.size; + } else sect.addr; + const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Symtab = .{ - .cmd = macho.LC_SYMTAB, - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); + var atom = first_atoms.get(match).?; + while (true) { + const alignment = try math.powi(u32, 2, atom.alignment); + base_vaddr = mem.alignForwardGeneric(u64, base_vaddr, alignment); + + const sym = &self.locals.items[atom.local_sym_index]; + sym.n_value = base_vaddr; + sym.n_sect = n_sect; + + log.debug(" {s}: start=0x{x}, end=0x{x}, size=0x{x}, alignment=0x{x}", .{ + self.getString(sym.n_strx), + base_vaddr, + base_vaddr + atom.size, + atom.size, + atom.alignment, + }); + + // Update each alias (if any) + for (atom.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = base_vaddr; + alias_sym.n_sect = n_sect; + } + + // Update each symbol contained within the atom + for (atom.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = base_vaddr + sym_at_off.offset; + contained_sym.n_sect = n_sect; + } + + base_vaddr += atom.size; + + if (atom.next) |next| { + atom = next; + } else break; + } + + if (self.atoms.getPtr(match)) |last| { + const first_atom = first_atoms.get(match).?; + last.*.next = first_atom; + first_atom.prev = last.*; + last.* = first_atom; + } + _ = try self.atoms.put(self.base.allocator, match, parsed_atoms.get(match).?); } +} - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Dysymtab = .{ - .cmd = macho.LC_DYSYMTAB, - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - } +fn addLoadDylibLC(self: *MachO, id: u16) !void { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.load_commands_dirty = true; +} - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), - @sizeOf(u64), - )); - var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = macho.LC_LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); - try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); - } +fn addCodeSignatureLC(self: *MachO) !void { + if (self.code_signature_cmd_index != null or !self.requires_adhoc_codesig) return; + self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + self.load_commands_dirty = true; +} - if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Main = .{ - .cmd = macho.LC_MAIN, - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - } +fn setEntryPoint(self: *MachO) !void { + if (self.base.options.output_mode != .Exe) return; - if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { - self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); - const install_name = try std.fmt.allocPrint(self.base.allocator, "@rpath/{s}", .{ - self.base.options.emit.?.sub_path, - }); - defer self.base.allocator.free(install_name); - var dylib_cmd = try commands.createLoadDylibCommand( - self.base.allocator, - install_name, - 2, - 0x10000, // TODO forward user-provided versions - 0x10000, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; - try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + // TODO we should respect the -entry flag passed in by the user to set a custom + // entrypoint. For now, assume default of `_main`. + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "_main"), StringIndexAdapter{ + .bytes = &self.strtab, + }) orelse { + log.err("'_main' export not found", .{}); + return error.MissingMainEntrypoint; + }; + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; + assert(resolv.where == .global); + const sym = self.globals.items[resolv.where_index]; + const ec = &self.load_commands.items[self.main_cmd_index.?].Main; + ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); + ec.stacksize = self.base.options.stack_size_override orelse 0; + self.entry_addr = sym.n_value; + self.load_commands_dirty = true; +} + +pub fn deinit(self: *MachO) void { + if (build_options.have_llvm) { + if (self.llvm_object) |llvm_object| llvm_object.destroy(self.base.allocator); } - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .SourceVersion = .{ - .cmd = macho.LC_SOURCE_VERSION, - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); + if (self.d_sym) |*ds| { + ds.deinit(self.base.allocator); } - if (self.build_version_cmd_index == null) { - self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version), - @sizeOf(u64), - )); - const ver = self.base.options.target.os.version_range.semver.min; - const version = ver.major << 16 | ver.minor << 8 | ver.patch; - const is_simulator_abi = self.base.options.target.abi == .simulator; - var cmd = commands.emptyGenericCommandWithData(macho.build_version_command{ - .cmd = macho.LC_BUILD_VERSION, - .cmdsize = cmdsize, - .platform = switch (self.base.options.target.os.tag) { - .macos => macho.PLATFORM_MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM_IOSSIMULATOR else macho.PLATFORM_IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM_WATCHOSSIMULATOR else macho.PLATFORM_WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM_TVOSSIMULATOR else macho.PLATFORM_TVOS, - else => unreachable, - }, - .minos = version, - .sdk = version, - .ntools = 1, - }); - const ld_ver = macho.build_tool_version{ - .tool = macho.TOOL_LD, - .version = 0x0, - }; - cmd.data = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.build_version_command)); - mem.set(u8, cmd.data, 0); - mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); - try self.load_commands.append(self.base.allocator, .{ .BuildVersion = cmd }); + self.section_ordinals.deinit(self.base.allocator); + self.got_entries_map.deinit(self.base.allocator); + self.stubs_map.deinit(self.base.allocator); + self.strtab_dir.deinit(self.base.allocator); + self.strtab.deinit(self.base.allocator); + self.undefs.deinit(self.base.allocator); + self.globals.deinit(self.base.allocator); + self.globals_free_list.deinit(self.base.allocator); + self.locals.deinit(self.base.allocator); + self.locals_free_list.deinit(self.base.allocator); + self.symbol_resolver.deinit(self.base.allocator); + self.unresolved.deinit(self.base.allocator); + self.tentatives.deinit(self.base.allocator); + + for (self.objects.items) |*object| { + object.deinit(self.base.allocator); } + self.objects.deinit(self.base.allocator); - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmd = macho.LC_UUID, - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); + for (self.archives.items) |*archive| { + archive.deinit(self.base.allocator); } -} + self.archives.deinit(self.base.allocator); -fn addDataInCodeLC(self: *MachO) !void { - if (self.data_in_code_cmd_index == null) { - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); + for (self.dylibs.items) |*dylib| { + dylib.deinit(self.base.allocator); } -} + self.dylibs.deinit(self.base.allocator); + self.dylibs_map.deinit(self.base.allocator); + self.referenced_dylibs.deinit(self.base.allocator); -fn addCodeSignatureLC(self: *MachO) !void { - if (self.code_signature_cmd_index == null and self.requires_adhoc_codesig) { - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); + for (self.load_commands.items) |*lc| { + lc.deinit(self.base.allocator); } -} + self.load_commands.deinit(self.base.allocator); -fn addRpathLCs(self: *MachO, rpaths: []const []const u8) !void { - for (rpaths) |rpath| { - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ - .cmd = macho.LC_RPATH, - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); + for (self.managed_atoms.items) |atom| { + atom.deinit(self.base.allocator); + self.base.allocator.destroy(atom); + } + self.managed_atoms.deinit(self.base.allocator); + self.atoms.deinit(self.base.allocator); + { + var it = self.atom_free_lists.valueIterator(); + while (it.next()) |free_list| { + free_list.deinit(self.base.allocator); + } + self.atom_free_lists.deinit(self.base.allocator); + } + for (self.decls.keys()) |decl| { + decl.link.macho.deinit(self.base.allocator); } + self.decls.deinit(self.base.allocator); } -fn addLoadDylibLCs(self: *MachO) !void { - for (self.referenced_dylibs.keys()) |id| { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try commands.createLoadDylibCommand( - self.base.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); +pub fn closeFiles(self: MachO) void { + for (self.objects.items) |object| { + object.file.close(); + } + for (self.archives.items) |archive| { + archive.file.close(); + } + for (self.dylibs.items) |dylib| { + dylib.file.close(); } } -fn flushZld(self: *MachO) !void { - self.load_commands_dirty = true; - try self.writeTextBlocks(); - try self.writeStubHelperCommon(); +fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection) void { + log.debug("freeAtom {*}", .{atom}); + atom.deinit(self.base.allocator); - if (self.common_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; + const free_list = self.atom_free_lists.getPtr(match).?; + var already_have_free_list_node = false; + { + var i: usize = 0; + // TODO turn free_list into a hash map + while (i < free_list.items.len) { + if (free_list.items[i] == atom) { + _ = free_list.swapRemove(i); + continue; + } + if (free_list.items[i] == atom.prev) { + already_have_free_list_node = true; + } + i += 1; + } } + // TODO process free list for dbg info just like we do above for vaddrs - if (self.bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; + if (self.atoms.getPtr(match)) |last_atom| { + if (last_atom.* == atom) { + if (atom.prev) |prev| { + // TODO shrink the section size here + last_atom.* = prev; + } else { + _ = self.atoms.fetchRemove(match); + } + } } - if (self.tlv_bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; + if (self.d_sym) |*ds| { + if (ds.dbg_info_decl_first == atom) { + ds.dbg_info_decl_first = atom.dbg_info_next; + } + if (ds.dbg_info_decl_last == atom) { + // TODO shrink the .debug_info section size here + ds.dbg_info_decl_last = atom.dbg_info_prev; + } } - try self.writeGotEntries(); - try self.setEntryPoint(); - try self.writeRebaseInfoTableZld(); - try self.writeBindInfoTableZld(); - try self.writeLazyBindInfoTableZld(); - try self.writeExportInfoZld(); - try self.writeDices(); + if (atom.prev) |prev| { + prev.next = atom.next; - { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + if (!already_have_free_list_node and prev.freeListEligible(self.*)) { + // The free list is heuristics, it doesn't have to be perfect, so we can ignore + // the OOM here. + free_list.append(self.base.allocator, prev) catch {}; + } + } else { + atom.prev = null; } - try self.writeSymbolTable(); - try self.writeStringTableZld(); + if (atom.next) |next| { + next.prev = atom.prev; + } else { + atom.next = null; + } - { - // Seal __LINKEDIT size - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); + if (atom.dbg_info_prev) |prev| { + prev.dbg_info_next = atom.dbg_info_next; + + // TODO the free list logic like we do for atoms above + } else { + atom.dbg_info_prev = null; } - if (self.requires_adhoc_codesig) { - try self.writeCodeSignaturePadding(); + if (atom.dbg_info_next) |next| { + next.dbg_info_prev = atom.dbg_info_prev; + } else { + atom.dbg_info_next = null; } +} - try self.writeLoadCommands(); - try self.writeHeader(); - - if (self.requires_adhoc_codesig) { - try self.writeCodeSignature(); - } +fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, match: MatchingSection) void { + _ = self; + _ = atom; + _ = new_block_size; + _ = match; + // TODO check the new capacity, and if it crosses the size threshold into a big enough + // capacity, insert a free list node for it. } -fn writeGotEntries(self: *MachO) !void { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[self.got_section_index.?]; +fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { + const sym = self.locals.items[atom.local_sym_index]; + const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; + const need_realloc = !align_ok or new_atom_size > atom.capacity(self.*); + if (!need_realloc) return sym.n_value; + return self.allocateAtom(atom, new_atom_size, alignment, match); +} - var buffer = try self.base.allocator.alloc(u8, self.got_entries.items.len * @sizeOf(u64)); - defer self.base.allocator.free(buffer); +pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { + if (decl.link.macho.local_sym_index != 0) return; - var stream = std.io.fixedBufferStream(buffer); - var writer = stream.writer(); + try self.locals.ensureUnusedCapacity(self.base.allocator, 1); + try self.decls.putNoClobber(self.base.allocator, decl, {}); - for (self.got_entries.items) |key| { - const address: u64 = switch (key.where) { - .local => self.locals.items[key.where_index].n_value, - .undef => 0, - }; - try writer.writeIntLittle(u64, address); + if (self.locals_free_list.popOrNull()) |i| { + log.debug("reusing symbol index {d} for {s}", .{ i, decl.name }); + decl.link.macho.local_sym_index = i; + } else { + log.debug("allocating symbol index {d} for {s}", .{ self.locals.items.len, decl.name }); + decl.link.macho.local_sym_index = @intCast(u32, self.locals.items.len); + _ = self.locals.addOneAssumeCapacity(); } - log.debug("writing GOT pointers at 0x{x} to 0x{x}", .{ sect.offset, sect.offset + buffer.len }); - - try self.base.file.?.pwriteAll(buffer, sect.offset); -} - -fn setEntryPoint(self: *MachO) !void { - if (self.base.options.output_mode != .Exe) return; + self.locals.items[decl.link.macho.local_sym_index] = .{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; - // TODO we should respect the -entry flag passed in by the user to set a custom - // entrypoint. For now, assume default of `_main`. - const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "_main"), StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse { - log.err("'_main' export not found", .{}); - return error.MissingMainEntrypoint; + // TODO try popping from free list first before allocating a new GOT atom. + const key = GotIndirectionKey{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, }; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - assert(resolv.where == .global); - const sym = self.globals.items[resolv.where_index]; - const ec = &self.load_commands.items[self.main_cmd_index.?].Main; - ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); - ec.stacksize = self.base.options.stack_size_override orelse 0; + const got_atom = try self.createGotAtom(key); + try self.got_entries_map.put(self.base.allocator, key, got_atom); } -fn writeRebaseInfoTableZld(self: *MachO) !void { - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; +pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liveness: Liveness) !void { + if (build_options.skip_non_native and builtin.object_format != .macho) { + @panic("Attempted to compile for object format that was disabled by build configuration"); + } + if (build_options.have_llvm) { + if (self.llvm_object) |llvm_object| return llvm_object.updateFunc(module, func, air, liveness); + } + const tracy = trace(@src()); + defer tracy.end(); - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; + const decl = func.owner_decl; + // TODO clearing the code and relocs buffer should probably be orchestrated + // in a different, smarter, more automatic way somewhere else, in a more centralised + // way than this. + // If we don't clear the buffers here, we are up for some nasty surprises when + // this atom is reused later on and was not freed by freeAtom(). + decl.link.macho.clearRetainingCapacity(); - for (block.rebases.items) |offset| { - try pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); - } + var code_buffer = std.ArrayList(u8).init(self.base.allocator); + defer code_buffer.deinit(); - if (block.prev) |prev| { - block = prev; - } else break; + var debug_buffers_buf: DebugSymbols.DeclDebugBuffers = undefined; + const debug_buffers = if (self.d_sym) |*ds| blk: { + debug_buffers_buf = try ds.initDeclDebugBuffers(self.base.allocator, module, decl); + break :blk &debug_buffers_buf; + } else null; + defer { + if (debug_buffers) |dbg| { + dbg.dbg_line_buffer.deinit(); + dbg.dbg_info_buffer.deinit(); + var it = dbg.dbg_info_type_relocs.valueIterator(); + while (it.next()) |value| { + value.relocs.deinit(self.base.allocator); } + dbg.dbg_info_type_relocs.deinit(self.base.allocator); } } - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |entry, i| { - if (entry.where == .undef) continue; - - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } + self.active_decl = decl; - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.items.len); - for (self.stubs.items) |_, i| { - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } + const res = if (debug_buffers) |dbg| + try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .{ + .dwarf = .{ + .dbg_line = &dbg.dbg_line_buffer, + .dbg_info = &dbg.dbg_info_buffer, + .dbg_info_type_relocs = &dbg.dbg_info_type_relocs, + }, + }) + else + try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .none); + switch (res) { + .appended => { + try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); + }, + .fail => |em| { + decl.analysis = .codegen_failure; + try module.failed_decls.put(module.gpa, decl, em); + return; + }, } - std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); - - const size = try bind.rebaseInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeRebaseInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); - dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @sizeOf(u64))); - seg.inner.filesize += dyld_info.rebase_size; + _ = try self.placeDecl(decl, decl.link.macho.code.items.len); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); + if (debug_buffers) |db| { + try self.d_sym.?.commitDeclDebugInfo( + self.base.allocator, + module, + decl, + db, + self.base.options.target, + ); + } - try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); + // Since we updated the vaddr and the size, each corresponding export symbol also + // needs to be updated. + const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; + try self.updateDeclExports(module, decl, decl_exports); } -fn writeBindInfoTableZld(self: *MachO) !void { - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |entry, i| { - if (entry.where == .local) continue; - - const sym = self.undefs.items[entry.where_index]; - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); - } +pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { + if (build_options.skip_non_native and builtin.object_format != .macho) { + @panic("Attempted to compile for object format that was disabled by build configuration"); } + if (build_options.have_llvm) { + if (self.llvm_object) |llvm_object| return llvm_object.updateDecl(module, decl); + } + const tracy = trace(@src()); + defer tracy.end(); - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; + if (decl.val.tag() == .extern_fn) { + return; // TODO Should we do more when front-end analyzed extern decl? + } - for (block.bindings.items) |binding| { - const bind_sym = self.undefs.items[binding.local_sym_index]; - try pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(bind_sym.n_strx), - }); - } + var code_buffer = std.ArrayList(u8).init(self.base.allocator); + defer code_buffer.deinit(); - if (block.prev) |prev| { - block = prev; - } else break; + var debug_buffers_buf: DebugSymbols.DeclDebugBuffers = undefined; + const debug_buffers = if (self.d_sym) |*ds| blk: { + debug_buffers_buf = try ds.initDeclDebugBuffers(self.base.allocator, module, decl); + break :blk &debug_buffers_buf; + } else null; + defer { + if (debug_buffers) |dbg| { + dbg.dbg_line_buffer.deinit(); + dbg.dbg_info_buffer.deinit(); + var it = dbg.dbg_info_type_relocs.valueIterator(); + while (it.next()) |value| { + value.relocs.deinit(self.base.allocator); } + dbg.dbg_info_type_relocs.deinit(self.base.allocator); } } - const size = try bind.bindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeBindInfo(pointers.items, stream.writer()); + self.active_decl = decl; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.bind_size; + const res = if (debug_buffers) |dbg| + try codegen.generateSymbol(&self.base, decl.srcLoc(), .{ + .ty = decl.ty, + .val = decl.val, + }, &code_buffer, .{ + .dwarf = .{ + .dbg_line = &dbg.dbg_line_buffer, + .dbg_info = &dbg.dbg_info_buffer, + .dbg_info_type_relocs = &dbg.dbg_info_type_relocs, + }, + }) + else + try codegen.generateSymbol(&self.base, decl.srcLoc(), .{ + .ty = decl.ty, + .val = decl.val, + }, &code_buffer, .none); - log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); + const code = blk: { + switch (res) { + .externally_managed => |x| break :blk x, + .appended => { + // TODO clearing the code and relocs buffer should probably be orchestrated + // in a different, smarter, more automatic way somewhere else, in a more centralised + // way than this. + // If we don't clear the buffers here, we are up for some nasty surprises when + // this atom is reused later on and was not freed by freeAtom(). + decl.link.macho.code.clearAndFree(self.base.allocator); + try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); + break :blk decl.link.macho.code.items; + }, + .fail => |em| { + decl.analysis = .codegen_failure; + try module.failed_decls.put(module.gpa, decl, em); + return; + }, + } + }; + _ = try self.placeDecl(decl, code.len); - try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); + // Since we updated the vaddr and the size, each corresponding export symbol also + // needs to be updated. + const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; + try self.updateDeclExports(module, decl, decl_exports); } -fn writeLazyBindInfoTableZld(self: *MachO) !void { - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); +fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 { + const required_alignment = decl.ty.abiAlignment(self.base.options.target); + assert(decl.link.macho.local_sym_index != 0); // Caller forgot to call allocateDeclIndexes() + const symbol = &self.locals.items[decl.link.macho.local_sym_index]; - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.items.len); - - for (self.stubs.items) |import_id, i| { - const sym = self.undefs.items[import_id]; - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), + if (decl.link.macho.size != 0) { + const capacity = decl.link.macho.capacity(self.*); + const need_realloc = code_len > capacity or !mem.isAlignedGeneric(u64, symbol.n_value, required_alignment); + if (need_realloc) { + const vaddr = try self.growAtom(&decl.link.macho, code_len, required_alignment, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, }); - } - } - - const size = try bind.lazyBindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - try bind.writeLazyBindInfo(pointers.items, stream.writer()); + log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ decl.name, symbol.n_value, vaddr }); - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.lazy_bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.lazy_bind_size; + if (vaddr != symbol.n_value) { + log.debug(" (writing new GOT entry)", .{}); + const got_atom = self.got_entries_map.get(.{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, + }) orelse unreachable; + const got_sym = &self.locals.items[got_atom.local_sym_index]; + const got_vaddr = try self.allocateAtom(got_atom, @sizeOf(u64), 8, .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }); + got_sym.n_value = got_vaddr; + got_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(.{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }).? + 1); + } - log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); + symbol.n_value = vaddr; + } else if (code_len < decl.link.macho.size) { + self.shrinkAtom(&decl.link.macho, code_len, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); + } + decl.link.macho.size = code_len; + decl.link.macho.dirty = true; - try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - try self.populateLazyBindOffsetsInStubHelper(buffer); -} + const new_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); + defer self.base.allocator.free(new_name); -fn writeExportInfoZld(self: *MachO) !void { - var trie: Trie = .{}; - defer trie.deinit(self.base.allocator); + symbol.n_strx = try self.makeString(new_name); + symbol.n_type = macho.N_SECT; + symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; + symbol.n_desc = 0; + } else { + const decl_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); + defer self.base.allocator.free(decl_name); - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const base_address = text_segment.inner.vmaddr; + const name_str_index = try self.makeString(decl_name); + const addr = try self.allocateAtom(&decl.link.macho, code_len, required_alignment, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("writing export trie", .{}); + log.debug("allocated atom for {s} at 0x{x}", .{ decl_name, addr }); - for (self.globals.items) |sym| { - const sym_name = self.getString(sym.n_strx); - log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); + errdefer self.freeAtom(&decl.link.macho, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); - try trie.put(self.base.allocator, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + symbol.* = .{ + .n_strx = name_str_index, + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, self.text_section_index.?) + 1, + .n_desc = 0, + .n_value = addr, + }; + const got_atom = self.got_entries_map.get(.{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, + }) orelse unreachable; + const got_sym = &self.locals.items[got_atom.local_sym_index]; + const vaddr = try self.allocateAtom(got_atom, @sizeOf(u64), 8, .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, }); + got_sym.n_value = vaddr; + got_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(.{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }).? + 1); } - try trie.finalize(self.base.allocator); - - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, trie.size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - const nwritten = try trie.write(stream.writer()); - assert(nwritten == trie.size); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.export_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.export_size; - - log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); + return symbol; } -fn writeSymbolTable(self: *MachO) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; +pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void { + if (self.d_sym) |*ds| { + try ds.updateDeclLineNumber(module, decl); + } +} - var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); - defer locals.deinit(); - try locals.appendSlice(self.locals.items); +pub fn updateDeclExports( + self: *MachO, + module: *Module, + decl: *Module.Decl, + exports: []const *Module.Export, +) !void { + // TODO If we are exporting with global linkage, check for already defined globals and flag + // symbol duplicate/collision! + if (build_options.skip_non_native and builtin.object_format != .macho) { + @panic("Attempted to compile for object format that was disabled by build configuration"); + } + if (build_options.have_llvm) { + if (self.llvm_object) |llvm_object| return llvm_object.updateDeclExports(module, decl, exports); + } + const tracy = trace(@src()); + defer tracy.end(); - if (self.has_stabs) { - for (self.objects.items) |object| { - if (object.debug_info == null) continue; + try self.globals.ensureCapacity(self.base.allocator, self.globals.items.len + exports.len); + if (decl.link.macho.local_sym_index == 0) return; + const decl_sym = &self.locals.items[decl.link.macho.local_sym_index]; - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_comp_dir.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_name.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime orelse 0, - }); + for (exports) |exp| { + const exp_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{exp.options.name}); + defer self.base.allocator.free(exp_name); - for (object.text_blocks.items) |block| { - if (block.stab) |stab| { - const nlists = try stab.asNlists(block.local_sym_index, self); - defer self.base.allocator.free(nlists); - try locals.appendSlice(nlists); - } else { - for (block.contained.items) |sym_at_off| { - const stab = sym_at_off.stab orelse continue; - const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); - defer self.base.allocator.free(nlists); - try locals.appendSlice(nlists); - } - } + if (exp.options.section) |section_name| { + if (!mem.eql(u8, section_name, "__text")) { + try module.failed_exports.ensureCapacity(module.gpa, module.failed_exports.count() + 1); + module.failed_exports.putAssumeCapacityNoClobber( + exp, + try Module.ErrorMsg.create(self.base.allocator, decl.srcLoc(), "Unimplemented: ExportOptions.section", .{}), + ); + continue; } - - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); } - } - - const nlocals = locals.items.len; - const nexports = self.globals.items.len; - const nundefs = self.undefs.items.len; - - const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); - const locals_size = nlocals * @sizeOf(macho.nlist_64); - log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); - - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); - log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); - - const undefs_off = exports_off + exports_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); - - symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs); - seg.inner.filesize += locals_size + exports_size + undefs_size; - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym += @intCast(u32, nlocals); - dysymtab.iextdefsym = dysymtab.nlocalsym; - dysymtab.nextdefsym = @intCast(u32, nexports); - dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nundefs); - - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = &text_segment.sections.items[self.stubs_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const nstubs = @intCast(u32, self.stubs.items.len); - const ngot_entries = @intCast(u32, self.got_entries.items.len); - - dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; - - const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); - seg.inner.filesize += needed_size; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + needed_size, - }); - var buf = try self.base.allocator.alloc(u8, needed_size); - defer self.base.allocator.free(buf); - - var stream = std.io.fixedBufferStream(buf); - var writer = stream.writer(); - - stubs.reserved1 = 0; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); - } + var n_type: u8 = macho.N_SECT | macho.N_EXT; + var n_desc: u16 = 0; - got.reserved1 = nstubs; - for (self.got_entries.items) |entry| { - switch (entry.where) { - .undef => { - try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); + switch (exp.options.linkage) { + .Internal => { + // Symbol should be hidden, or in MachO lingo, private extern. + // We should also mark the symbol as Weak: n_desc == N_WEAK_DEF. + // TODO work out when to add N_WEAK_REF. + n_type |= macho.N_PEXT; + n_desc |= macho.N_WEAK_DEF; }, - .local => { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + .Strong => {}, + .Weak => { + // Weak linkage is specified as part of n_desc field. + // Symbol's n_type is like for a symbol with strong linkage. + n_desc |= macho.N_WEAK_DEF; + }, + .LinkOnce => { + try module.failed_exports.ensureCapacity(module.gpa, module.failed_exports.count() + 1); + module.failed_exports.putAssumeCapacityNoClobber( + exp, + try Module.ErrorMsg.create(self.base.allocator, decl.srcLoc(), "Unimplemented: GlobalLinkage.LinkOnce", .{}), + ); + continue; }, } - } - la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); - } + const global_sym_index = if (exp.link.macho.sym_index) |i| i else blk: { + const i = if (self.globals_free_list.popOrNull()) |i| i else inner: { + _ = self.globals.addOneAssumeCapacity(); + break :inner @intCast(u32, self.globals.items.len - 1); + }; + break :blk i; + }; - try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); -} + const n_strx = try self.makeString(exp_name); + const sym = &self.globals.items[global_sym_index]; + sym.* = .{ + .n_strx = try self.makeString(exp_name), + .n_type = n_type, + .n_sect = @intCast(u8, self.text_section_index.?) + 1, + .n_desc = n_desc, + .n_value = decl_sym.n_value, + }; + exp.link.macho.sym_index = global_sym_index; -pub fn deinit(self: *MachO) void { - if (build_options.have_llvm) { - if (self.llvm_object) |llvm_object| llvm_object.destroy(self.base.allocator); + const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, n_strx); + resolv.value_ptr.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = decl.link.macho.local_sym_index, + }; } +} - if (self.d_sym) |*ds| { - ds.deinit(self.base.allocator); - } +pub fn deleteExport(self: *MachO, exp: Export) void { + const sym_index = exp.sym_index orelse return; + self.globals_free_list.append(self.base.allocator, sym_index) catch {}; + const global = &self.globals.items[sym_index]; + global.n_type = 0; + assert(self.symbol_resolver.remove(global.n_strx)); +} - self.section_ordinals.deinit(self.base.allocator); - self.pending_updates.deinit(self.base.allocator); - self.got_entries.deinit(self.base.allocator); - self.got_entries_map.deinit(self.base.allocator); - self.got_entries_free_list.deinit(self.base.allocator); - self.stubs.deinit(self.base.allocator); - self.stubs_map.deinit(self.base.allocator); - self.strtab_dir.deinit(self.base.allocator); - self.strtab.deinit(self.base.allocator); - self.undefs.deinit(self.base.allocator); - self.globals.deinit(self.base.allocator); - self.globals_free_list.deinit(self.base.allocator); - self.locals.deinit(self.base.allocator); - self.locals_free_list.deinit(self.base.allocator); - self.symbol_resolver.deinit(self.base.allocator); +pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { + log.debug("freeDecl {*}", .{decl}); + _ = self.decls.swapRemove(decl); + // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. + self.freeAtom(&decl.link.macho, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); + if (decl.link.macho.local_sym_index != 0) { + self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {}; - for (self.objects.items) |*object| { - object.deinit(self.base.allocator); - } - self.objects.deinit(self.base.allocator); + // TODO free GOT atom here. - for (self.archives.items) |*archive| { - archive.deinit(self.base.allocator); + self.locals.items[decl.link.macho.local_sym_index].n_type = 0; + decl.link.macho.local_sym_index = 0; } - self.archives.deinit(self.base.allocator); - - for (self.dylibs.items) |*dylib| { - dylib.deinit(self.base.allocator); + if (self.d_sym) |*ds| { + // TODO make this logic match freeAtom. Maybe abstract the logic + // out since the same thing is desired for both. + _ = ds.dbg_line_fn_free_list.remove(&decl.fn_link.macho); + if (decl.fn_link.macho.prev) |prev| { + ds.dbg_line_fn_free_list.put(self.base.allocator, prev, {}) catch {}; + prev.next = decl.fn_link.macho.next; + if (decl.fn_link.macho.next) |next| { + next.prev = prev; + } else { + ds.dbg_line_fn_last = prev; + } + } else if (decl.fn_link.macho.next) |next| { + ds.dbg_line_fn_first = next; + next.prev = null; + } + if (ds.dbg_line_fn_first == &decl.fn_link.macho) { + ds.dbg_line_fn_first = decl.fn_link.macho.next; + } + if (ds.dbg_line_fn_last == &decl.fn_link.macho) { + ds.dbg_line_fn_last = decl.fn_link.macho.prev; + } } - self.dylibs.deinit(self.base.allocator); - self.dylibs_map.deinit(self.base.allocator); - self.referenced_dylibs.deinit(self.base.allocator); +} - for (self.load_commands.items) |*lc| { - lc.deinit(self.base.allocator); - } - self.load_commands.deinit(self.base.allocator); +pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 { + assert(decl.link.macho.local_sym_index != 0); + return self.locals.items[decl.link.macho.local_sym_index].n_value; +} - for (self.managed_blocks.items) |block| { - block.deinit(self.base.allocator); - self.base.allocator.destroy(block); +pub fn populateMissingMetadata(self: *MachO) !void { + if (self.pagezero_segment_cmd_index == null) { + self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__PAGEZERO"), + .vmsize = pagezero_vmsize, + }, + }, + }); + self.load_commands_dirty = true; } - self.managed_blocks.deinit(self.base.allocator); - self.blocks.deinit(self.base.allocator); - self.text_block_free_list.deinit(self.base.allocator); - for (self.decls.keys()) |decl| { - decl.link.macho.deinit(self.base.allocator); + if (self.text_segment_cmd_index == null) { + self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const program_code_size_hint = self.base.options.program_code_size_hint; + const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; + const ideal_size = self.header_pad + program_code_size_hint + got_size_hint; + const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); + + log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); + + try self.load_commands.append(self.base.allocator, .{ + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__TEXT"), + .vmaddr = pagezero_vmsize, + .vmsize = needed_size, + .filesize = needed_size, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + }, + }, + }); + self.load_commands_dirty = true; } - self.decls.deinit(self.base.allocator); -} -pub fn closeFiles(self: MachO) void { - for (self.objects.items) |object| { - object.file.close(); + if (self.text_section_index == null) { + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const needed_size = self.base.options.program_code_size_hint; + self.text_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__text", + needed_size, + alignment, + .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); } - for (self.archives.items) |archive| { - archive.file.close(); + + if (self.stubs_section_index == null) { + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + const needed_size = stub_size * self.base.options.symbol_count_hint; + self.stubs_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__stubs", + needed_size, + alignment, + .{ + .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stub_size, + }, + ); } - for (self.dylibs.items) |dylib| { - dylib.file.close(); + + if (self.stub_helper_section_index == null) { + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const preamble_size: u6 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + const needed_size = stub_size * self.base.options.symbol_count_hint + preamble_size; + self.stub_helper_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__stub_helper", + needed_size, + alignment, + .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); } -} -fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { - log.debug("freeTextBlock {*}", .{text_block}); - text_block.deinit(self.base.allocator); + if (self.data_const_segment_cmd_index == null) { + self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const address_and_offset = self.nextSegmentAddressAndOffset(); + const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - var already_have_free_list_node = false; - { - var i: usize = 0; - // TODO turn text_block_free_list into a hash map - while (i < self.text_block_free_list.items.len) { - if (self.text_block_free_list.items[i] == text_block) { - _ = self.text_block_free_list.swapRemove(i); - continue; - } - if (self.text_block_free_list.items[i] == text_block.prev) { - already_have_free_list_node = true; - } - i += 1; - } - } - // TODO process free list for dbg info just like we do above for vaddrs + log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ + address_and_offset.offset, + address_and_offset.offset + needed_size, + }); - if (self.last_text_block == text_block) { - // TODO shrink the __text section size here - self.last_text_block = text_block.prev; + try self.load_commands.append(self.base.allocator, .{ + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__DATA_CONST"), + .vmaddr = address_and_offset.address, + .vmsize = needed_size, + .fileoff = address_and_offset.offset, + .filesize = needed_size, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }, + }, + }); + self.load_commands_dirty = true; } - if (self.d_sym) |*ds| { - if (ds.dbg_info_decl_first == text_block) { - ds.dbg_info_decl_first = text_block.dbg_info_next; - } - if (ds.dbg_info_decl_last == text_block) { - // TODO shrink the .debug_info section size here - ds.dbg_info_decl_last = text_block.dbg_info_prev; - } + + if (self.got_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.got_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__got", + needed_size, + alignment, + .{ + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }, + ); } - if (text_block.prev) |prev| { - prev.next = text_block.next; + if (self.data_segment_cmd_index == null) { + self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const address_and_offset = self.nextSegmentAddressAndOffset(); + const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint; + const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - if (!already_have_free_list_node and prev.freeListEligible(self.*)) { - // The free list is heuristics, it doesn't have to be perfect, so we can ignore - // the OOM here. - self.text_block_free_list.append(self.base.allocator, prev) catch {}; - } - } else { - text_block.prev = null; + log.debug("found __DATA segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); + + try self.load_commands.append(self.base.allocator, .{ + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__DATA"), + .vmaddr = address_and_offset.address, + .vmsize = needed_size, + .fileoff = address_and_offset.offset, + .filesize = needed_size, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }, + }, + }); + self.load_commands_dirty = true; } - if (text_block.next) |next| { - next.prev = text_block.prev; - } else { - text_block.next = null; + if (self.la_symbol_ptr_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.la_symbol_ptr_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__la_symbol_ptr", + needed_size, + alignment, + .{ + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }, + ); } - if (text_block.dbg_info_prev) |prev| { - prev.dbg_info_next = text_block.dbg_info_next; + if (self.data_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__data", + needed_size, + alignment, + .{}, + ); + } - // TODO the free list logic like we do for text blocks above - } else { - text_block.dbg_info_prev = null; + if (self.tlv_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.tlv_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_vars", + needed_size, + alignment, + .{ + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }, + ); } - if (text_block.dbg_info_next) |next| { - next.dbg_info_prev = text_block.dbg_info_prev; - } else { - text_block.dbg_info_next = null; + if (self.tlv_data_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.tlv_data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_data", + needed_size, + alignment, + .{ + .flags = macho.S_THREAD_LOCAL_REGULAR, + }, + ); } -} -fn shrinkTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64) void { - _ = self; - _ = text_block; - _ = new_block_size; - // TODO check the new capacity, and if it crosses the size threshold into a big enough - // capacity, insert a free list node for it. -} + if (self.tlv_bss_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.tlv_bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_bss", + needed_size, + alignment, + .{ + .flags = macho.S_THREAD_LOCAL_ZEROFILL, + }, + ); + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.tlv_bss_section_index.?]; + self.tlv_bss_file_offset = sect.offset; + } -fn growTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { - const sym = self.locals.items[text_block.local_sym_index]; - const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; - const need_realloc = !align_ok or new_block_size > text_block.capacity(self.*); - if (!need_realloc) return sym.n_value; - return self.allocateTextBlock(text_block, new_block_size, alignment); -} + if (self.bss_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__bss", + needed_size, + alignment, + .{ + .flags = macho.S_ZEROFILL, + }, + ); + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.bss_section_index.?]; + self.bss_file_offset = sect.offset; + } -pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { - if (decl.link.macho.local_sym_index != 0) return; + if (self.linkedit_segment_cmd_index == null) { + self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const address_and_offset = self.nextSegmentAddressAndOffset(); - try self.locals.ensureUnusedCapacity(self.base.allocator, 1); - try self.got_entries.ensureUnusedCapacity(self.base.allocator, 1); + log.debug("found __LINKEDIT segment free space at 0x{x}", .{address_and_offset.offset}); - try self.decls.putNoClobber(self.base.allocator, decl, {}); + try self.load_commands.append(self.base.allocator, .{ + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = address_and_offset.address, + .fileoff = address_and_offset.offset, + .maxprot = macho.VM_PROT_READ, + .initprot = macho.VM_PROT_READ, + }, + }, + }); + self.load_commands_dirty = true; + } - if (self.locals_free_list.popOrNull()) |i| { - log.debug("reusing symbol index {d} for {s}", .{ i, decl.name }); - decl.link.macho.local_sym_index = i; - } else { - log.debug("allocating symbol index {d} for {s}", .{ self.locals.items.len, decl.name }); - decl.link.macho.local_sym_index = @intCast(u32, self.locals.items.len); - _ = self.locals.addOneAssumeCapacity(); + if (self.dyld_info_cmd_index == null) { + self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .DyldInfoOnly = .{ + .cmd = macho.LC_DYLD_INFO_ONLY, + .cmdsize = @sizeOf(macho.dyld_info_command), + .rebase_off = 0, + .rebase_size = 0, + .bind_off = 0, + .bind_size = 0, + .weak_bind_off = 0, + .weak_bind_size = 0, + .lazy_bind_off = 0, + .lazy_bind_size = 0, + .export_off = 0, + .export_size = 0, + }, + }); + self.load_commands_dirty = true; } - const got_index: u32 = blk: { - if (self.got_entries_free_list.popOrNull()) |i| { - log.debug("reusing GOT entry index {d} for {s}", .{ i, decl.name }); - break :blk i; - } else { - const got_index = @intCast(u32, self.got_entries.items.len); - log.debug("allocating GOT entry index {d} for {s}", .{ got_index, decl.name }); - _ = self.got_entries.addOneAssumeCapacity(); - self.got_entries_count_dirty = true; - self.rebase_info_dirty = true; - break :blk got_index; - } - }; - - self.locals.items[decl.link.macho.local_sym_index] = .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - const got_entry = GotIndirectionKey{ - .where = .local, - .where_index = decl.link.macho.local_sym_index, - }; - self.got_entries.items[got_index] = got_entry; - try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); -} - -pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liveness: Liveness) !void { - if (build_options.skip_non_native and builtin.object_format != .macho) { - @panic("Attempted to compile for object format that was disabled by build configuration"); - } - if (build_options.have_llvm) { - if (self.llvm_object) |llvm_object| return llvm_object.updateFunc(module, func, air, liveness); - } - const tracy = trace(@src()); - defer tracy.end(); - - const decl = func.owner_decl; - - var code_buffer = std.ArrayList(u8).init(self.base.allocator); - defer code_buffer.deinit(); - - var debug_buffers_buf: DebugSymbols.DeclDebugBuffers = undefined; - const debug_buffers = if (self.d_sym) |*ds| blk: { - debug_buffers_buf = try ds.initDeclDebugBuffers(self.base.allocator, module, decl); - break :blk &debug_buffers_buf; - } else null; - defer { - if (debug_buffers) |dbg| { - dbg.dbg_line_buffer.deinit(); - dbg.dbg_info_buffer.deinit(); - var it = dbg.dbg_info_type_relocs.valueIterator(); - while (it.next()) |value| { - value.relocs.deinit(self.base.allocator); - } - dbg.dbg_info_type_relocs.deinit(self.base.allocator); - } - } - - self.active_decl = decl; - - const res = if (debug_buffers) |dbg| - try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .{ - .dwarf = .{ - .dbg_line = &dbg.dbg_line_buffer, - .dbg_info = &dbg.dbg_info_buffer, - .dbg_info_type_relocs = &dbg.dbg_info_type_relocs, - }, - }) - else - try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .none); - switch (res) { - .appended => { - // TODO clearing the code and relocs buffer should probably be orchestrated - // in a different, smarter, more automatic way somewhere else, in a more centralised - // way than this. - // If we don't clear the buffers here, we are up for some nasty surprises when - // this TextBlock is reused later on and was not freed by freeTextBlock(). - decl.link.macho.code.clearAndFree(self.base.allocator); - try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); - }, - .fail => |em| { - decl.analysis = .codegen_failure; - try module.failed_decls.put(module.gpa, decl, em); - return; - }, - } - - const symbol = try self.placeDecl(decl, decl.link.macho.code.items.len); - - try self.writeCode(symbol, decl.link.macho.code.items); - - if (debug_buffers) |db| { - try self.d_sym.?.commitDeclDebugInfo( - self.base.allocator, - module, - decl, - db, - self.base.options.target, - ); - } - - // Since we updated the vaddr and the size, each corresponding export symbol also - // needs to be updated. - const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; - try self.updateDeclExports(module, decl, decl_exports); -} - -pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { - if (build_options.skip_non_native and builtin.object_format != .macho) { - @panic("Attempted to compile for object format that was disabled by build configuration"); - } - if (build_options.have_llvm) { - if (self.llvm_object) |llvm_object| return llvm_object.updateDecl(module, decl); - } - const tracy = trace(@src()); - defer tracy.end(); - - if (decl.val.tag() == .extern_fn) { - return; // TODO Should we do more when front-end analyzed extern decl? - } - - var code_buffer = std.ArrayList(u8).init(self.base.allocator); - defer code_buffer.deinit(); - - var debug_buffers_buf: DebugSymbols.DeclDebugBuffers = undefined; - const debug_buffers = if (self.d_sym) |*ds| blk: { - debug_buffers_buf = try ds.initDeclDebugBuffers(self.base.allocator, module, decl); - break :blk &debug_buffers_buf; - } else null; - defer { - if (debug_buffers) |dbg| { - dbg.dbg_line_buffer.deinit(); - dbg.dbg_info_buffer.deinit(); - var it = dbg.dbg_info_type_relocs.valueIterator(); - while (it.next()) |value| { - value.relocs.deinit(self.base.allocator); - } - dbg.dbg_info_type_relocs.deinit(self.base.allocator); - } - } - - self.active_decl = decl; - - const res = if (debug_buffers) |dbg| - try codegen.generateSymbol(&self.base, decl.srcLoc(), .{ - .ty = decl.ty, - .val = decl.val, - }, &code_buffer, .{ - .dwarf = .{ - .dbg_line = &dbg.dbg_line_buffer, - .dbg_info = &dbg.dbg_info_buffer, - .dbg_info_type_relocs = &dbg.dbg_info_type_relocs, - }, - }) - else - try codegen.generateSymbol(&self.base, decl.srcLoc(), .{ - .ty = decl.ty, - .val = decl.val, - }, &code_buffer, .none); - - const code = blk: { - switch (res) { - .externally_managed => |x| break :blk x, - .appended => { - // TODO clearing the code and relocs buffer should probably be orchestrated - // in a different, smarter, more automatic way somewhere else, in a more centralised - // way than this. - // If we don't clear the buffers here, we are up for some nasty surprises when - // this TextBlock is reused later on and was not freed by freeTextBlock(). - decl.link.macho.code.clearAndFree(self.base.allocator); - try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); - break :blk decl.link.macho.code.items; - }, - .fail => |em| { - decl.analysis = .codegen_failure; - try module.failed_decls.put(module.gpa, decl, em); - return; - }, - } - }; - const symbol = try self.placeDecl(decl, code.len); - - try self.writeCode(symbol, code); - - // Since we updated the vaddr and the size, each corresponding export symbol also - // needs to be updated. - const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; - try self.updateDeclExports(module, decl, decl_exports); -} - -fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 { - const required_alignment = decl.ty.abiAlignment(self.base.options.target); - assert(decl.link.macho.local_sym_index != 0); // Caller forgot to call allocateDeclIndexes() - const symbol = &self.locals.items[decl.link.macho.local_sym_index]; - - if (decl.link.macho.size != 0) { - const capacity = decl.link.macho.capacity(self.*); - const need_realloc = code_len > capacity or !mem.isAlignedGeneric(u64, symbol.n_value, required_alignment); - if (need_realloc) { - const vaddr = try self.growTextBlock(&decl.link.macho, code_len, required_alignment); - - log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ decl.name, symbol.n_value, vaddr }); - - if (vaddr != symbol.n_value) { - log.debug(" (writing new GOT entry)", .{}); - const got_index = self.got_entries_map.get(.{ - .where = .local, - .where_index = decl.link.macho.local_sym_index, - }) orelse unreachable; - try self.writeGotEntry(got_index); - } - - symbol.n_value = vaddr; - } else if (code_len < decl.link.macho.size) { - self.shrinkTextBlock(&decl.link.macho, code_len); - } - decl.link.macho.size = code_len; - - const new_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); - defer self.base.allocator.free(new_name); - - symbol.n_strx = try self.makeString(new_name); - symbol.n_type = macho.N_SECT; - symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; - symbol.n_desc = 0; - - try self.writeLocalSymbol(decl.link.macho.local_sym_index); - if (self.d_sym) |*ds| - try ds.writeLocalSymbol(decl.link.macho.local_sym_index); - } else { - const decl_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); - defer self.base.allocator.free(decl_name); - - const name_str_index = try self.makeString(decl_name); - const addr = try self.allocateTextBlock(&decl.link.macho, code_len, required_alignment); - - log.debug("allocated text block for {s} at 0x{x}", .{ decl_name, addr }); - - errdefer self.freeTextBlock(&decl.link.macho); - - symbol.* = .{ - .n_strx = name_str_index, - .n_type = macho.N_SECT, - .n_sect = @intCast(u8, self.text_section_index.?) + 1, - .n_desc = 0, - .n_value = addr, - }; - const got_index = self.got_entries_map.get(.{ - .where = .local, - .where_index = decl.link.macho.local_sym_index, - }) orelse unreachable; - try self.writeGotEntry(got_index); - - try self.writeLocalSymbol(decl.link.macho.local_sym_index); - if (self.d_sym) |*ds| - try ds.writeLocalSymbol(decl.link.macho.local_sym_index); - } - - // Resolve relocations - try decl.link.macho.resolveRelocs(self); - // TODO this requires further investigation: should we dispose of resolved relocs, or keep them - // so that we can reapply them when moving/growing sections? - decl.link.macho.relocs.clearAndFree(self.base.allocator); - - // Apply pending updates - while (self.pending_updates.popOrNull()) |update| { - switch (update.kind) { - .got => unreachable, - .stub => { - try self.writeStub(update.index); - try self.writeStubInStubHelper(update.index); - try self.writeLazySymbolPointer(update.index); - self.rebase_info_dirty = true; - self.lazy_binding_info_dirty = true; - }, - } - } - - return symbol; -} - -fn writeCode(self: *MachO, symbol: *macho.nlist_64, code: []const u8) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; - const section_offset = symbol.n_value - text_section.addr; - const file_offset = text_section.offset + section_offset; - log.debug("writing code for symbol {s} at file offset 0x{x}", .{ self.getString(symbol.n_strx), file_offset }); - try self.base.file.?.pwriteAll(code, file_offset); -} - -pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void { - if (self.d_sym) |*ds| { - try ds.updateDeclLineNumber(module, decl); - } -} - -pub fn updateDeclExports( - self: *MachO, - module: *Module, - decl: *Module.Decl, - exports: []const *Module.Export, -) !void { - if (build_options.skip_non_native and builtin.object_format != .macho) { - @panic("Attempted to compile for object format that was disabled by build configuration"); - } - if (build_options.have_llvm) { - if (self.llvm_object) |llvm_object| return llvm_object.updateDeclExports(module, decl, exports); - } - const tracy = trace(@src()); - defer tracy.end(); - - try self.globals.ensureCapacity(self.base.allocator, self.globals.items.len + exports.len); - if (decl.link.macho.local_sym_index == 0) return; - const decl_sym = &self.locals.items[decl.link.macho.local_sym_index]; - - for (exports) |exp| { - const exp_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{exp.options.name}); - defer self.base.allocator.free(exp_name); - - if (exp.options.section) |section_name| { - if (!mem.eql(u8, section_name, "__text")) { - try module.failed_exports.ensureCapacity(module.gpa, module.failed_exports.count() + 1); - module.failed_exports.putAssumeCapacityNoClobber( - exp, - try Module.ErrorMsg.create(self.base.allocator, decl.srcLoc(), "Unimplemented: ExportOptions.section", .{}), - ); - continue; - } - } - - var n_type: u8 = macho.N_SECT | macho.N_EXT; - var n_desc: u16 = 0; - - switch (exp.options.linkage) { - .Internal => { - // Symbol should be hidden, or in MachO lingo, private extern. - // We should also mark the symbol as Weak: n_desc == N_WEAK_DEF. - // TODO work out when to add N_WEAK_REF. - n_type |= macho.N_PEXT; - n_desc |= macho.N_WEAK_DEF; - }, - .Strong => { - // Check if the export is _main, and note if os. - // Otherwise, don't do anything since we already have all the flags - // set that we need for global (strong) linkage. - // n_type == N_SECT | N_EXT - if (mem.eql(u8, exp_name, "_main")) { - self.entry_addr = decl_sym.n_value; - } - }, - .Weak => { - // Weak linkage is specified as part of n_desc field. - // Symbol's n_type is like for a symbol with strong linkage. - n_desc |= macho.N_WEAK_DEF; - }, - .LinkOnce => { - try module.failed_exports.ensureCapacity(module.gpa, module.failed_exports.count() + 1); - module.failed_exports.putAssumeCapacityNoClobber( - exp, - try Module.ErrorMsg.create(self.base.allocator, decl.srcLoc(), "Unimplemented: GlobalLinkage.LinkOnce", .{}), - ); - continue; - }, - } - - if (exp.link.macho.sym_index) |i| { - const sym = &self.globals.items[i]; - sym.* = .{ - .n_strx = sym.n_strx, - .n_type = n_type, - .n_sect = @intCast(u8, self.text_section_index.?) + 1, - .n_desc = n_desc, - .n_value = decl_sym.n_value, - }; - } else { - const name_str_index = try self.makeString(exp_name); - const i = if (self.globals_free_list.popOrNull()) |i| i else blk: { - _ = self.globals.addOneAssumeCapacity(); - self.export_info_dirty = true; - break :blk @intCast(u32, self.globals.items.len - 1); - }; - self.globals.items[i] = .{ - .n_strx = name_str_index, - .n_type = n_type, - .n_sect = @intCast(u8, self.text_section_index.?) + 1, - .n_desc = n_desc, - .n_value = decl_sym.n_value, - }; - const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, name_str_index); - resolv.value_ptr.* = .{ - .where = .global, - .where_index = i, - .local_sym_index = decl.link.macho.local_sym_index, - }; - - exp.link.macho.sym_index = @intCast(u32, i); - } - } -} - -pub fn deleteExport(self: *MachO, exp: Export) void { - const sym_index = exp.sym_index orelse return; - self.globals_free_list.append(self.base.allocator, sym_index) catch {}; - self.globals.items[sym_index].n_type = 0; -} - -pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { - log.debug("freeDecl {*}", .{decl}); - _ = self.decls.swapRemove(decl); - // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. - self.freeTextBlock(&decl.link.macho); - if (decl.link.macho.local_sym_index != 0) { - self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {}; - - const got_key = GotIndirectionKey{ - .where = .local, - .where_index = decl.link.macho.local_sym_index, - }; - const got_index = self.got_entries_map.get(got_key) orelse unreachable; - _ = self.got_entries_map.remove(got_key); - self.got_entries_free_list.append(self.base.allocator, got_index) catch {}; - - self.locals.items[decl.link.macho.local_sym_index].n_type = 0; - decl.link.macho.local_sym_index = 0; - } - if (self.d_sym) |*ds| { - // TODO make this logic match freeTextBlock. Maybe abstract the logic - // out since the same thing is desired for both. - _ = ds.dbg_line_fn_free_list.remove(&decl.fn_link.macho); - if (decl.fn_link.macho.prev) |prev| { - ds.dbg_line_fn_free_list.put(self.base.allocator, prev, {}) catch {}; - prev.next = decl.fn_link.macho.next; - if (decl.fn_link.macho.next) |next| { - next.prev = prev; - } else { - ds.dbg_line_fn_last = prev; - } - } else if (decl.fn_link.macho.next) |next| { - ds.dbg_line_fn_first = next; - next.prev = null; - } - if (ds.dbg_line_fn_first == &decl.fn_link.macho) { - ds.dbg_line_fn_first = decl.fn_link.macho.next; - } - if (ds.dbg_line_fn_last == &decl.fn_link.macho) { - ds.dbg_line_fn_last = decl.fn_link.macho.prev; - } - } -} - -pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 { - assert(decl.link.macho.local_sym_index != 0); - return self.locals.items[decl.link.macho.local_sym_index].n_value; -} - -pub fn populateMissingMetadata(self: *MachO) !void { - switch (self.base.options.output_mode) { - .Exe => {}, - .Obj => return error.TODOImplementWritingObjFiles, - .Lib => return error.TODOImplementWritingLibFiles, - } - - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__PAGEZERO", .{ - .vmsize = 0x100000000, // size always set to 4GB - }), - }); - self.load_commands_dirty = true; - } - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE; - - const program_code_size_hint = self.base.options.program_code_size_hint; - const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; - const ideal_size = self.header_pad + program_code_size_hint + 3 * got_size_hint; - const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - - log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); - - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__TEXT", .{ - .vmaddr = 0x100000000, // always starts at 4GB - .vmsize = needed_size, - .filesize = needed_size, - .maxprot = maxprot, - .initprot = initprot, - }), - }); - self.load_commands_dirty = true; - } - if (self.text_section_index == null) { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.text_section_index = @intCast(u16, text_segment.sections.items.len); - - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; - const needed_size = self.base.options.program_code_size_hint; - const off = text_segment.findFreeSpace(needed_size, @as(u16, 1) << alignment, self.header_pad); - - log.debug("found __text section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try text_segment.addSection(self.base.allocator, "__text", .{ - .addr = text_segment.inner.vmaddr + off, - .size = @intCast(u32, needed_size), - .offset = @intCast(u32, off), - .@"align" = alignment, - .flags = flags, - }); - self.load_commands_dirty = true; - } - if (self.stubs_section_index == null) { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stubs_section_index = @intCast(u16, text_segment.sections.items.len); - - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 6, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; - const flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); - assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. - - log.debug("found __stubs section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try text_segment.addSection(self.base.allocator, "__stubs", .{ - .addr = text_segment.inner.vmaddr + off, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = alignment, - .flags = flags, - .reserved2 = stub_size, - }); - self.load_commands_dirty = true; - } - if (self.stub_helper_section_index == null) { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stub_helper_section_index = @intCast(u16, text_segment.sections.items.len); - - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); - assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. - - log.debug("found __stub_helper section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try text_segment.addSection(self.base.allocator, "__stub_helper", .{ - .addr = text_segment.inner.vmaddr + off, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = alignment, - .flags = flags, - }); - self.load_commands_dirty = true; - } - if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE; - const address_and_offset = self.nextSegmentAddressAndOffset(); - - const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - - log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); - - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA_CONST", .{ - .vmaddr = address_and_offset.address, - .vmsize = needed_size, - .fileoff = address_and_offset.offset, - .filesize = needed_size, - .maxprot = maxprot, - .initprot = initprot, - }), - }); - self.load_commands_dirty = true; - } - if (self.got_section_index == null) { - const dc_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - self.got_section_index = @intCast(u16, dc_segment.sections.items.len); - - const flags = macho.S_NON_LAZY_SYMBOL_POINTERS; - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = dc_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= dc_segment.inner.fileoff + dc_segment.inner.filesize); // TODO Must expand __DATA_CONST segment. - - log.debug("found __got section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try dc_segment.addSection(self.base.allocator, "__got", .{ - .addr = dc_segment.inner.vmaddr + off - dc_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = flags, - }); - self.load_commands_dirty = true; - } - if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE; - const address_and_offset = self.nextSegmentAddressAndOffset(); - - const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint; - const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - - log.debug("found __DATA segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); - - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA", .{ - .vmaddr = address_and_offset.address, - .vmsize = needed_size, - .fileoff = address_and_offset.offset, - .filesize = needed_size, - .maxprot = maxprot, - .initprot = initprot, - }), - }); - self.load_commands_dirty = true; - } - if (self.la_symbol_ptr_section_index == null) { - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.la_symbol_ptr_section_index = @intCast(u16, data_segment.sections.items.len); - - const flags = macho.S_LAZY_SYMBOL_POINTERS; - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. - - log.debug("found __la_symbol_ptr section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try data_segment.addSection(self.base.allocator, "__la_symbol_ptr", .{ - .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = flags, - }); - self.load_commands_dirty = true; - } - if (self.data_section_index == null) { - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.data_section_index = @intCast(u16, data_segment.sections.items.len); - - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. - - log.debug("found __data section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try data_segment.addSection(self.base.allocator, "__data", .{ - .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - }); - self.load_commands_dirty = true; - } - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ; - const address_and_offset = self.nextSegmentAddressAndOffset(); - - log.debug("found __LINKEDIT segment free space at 0x{x}", .{address_and_offset.offset}); - - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__LINKEDIT", .{ - .vmaddr = address_and_offset.address, - .fileoff = address_and_offset.offset, - .maxprot = maxprot, - .initprot = initprot, - }), - }); - self.load_commands_dirty = true; - } - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - - try self.load_commands.append(self.base.allocator, .{ - .DyldInfoOnly = .{ - .cmd = macho.LC_DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, - }); - - const dyld = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - - // Preallocate rebase, binding, lazy binding info, and export info. - const expected_size = 48; // TODO This is totally random. - const rebase_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found rebase info free space 0x{x} to 0x{x}", .{ rebase_off, rebase_off + expected_size }); - dyld.rebase_off = @intCast(u32, rebase_off); - dyld.rebase_size = expected_size; - - const bind_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found binding info free space 0x{x} to 0x{x}", .{ bind_off, bind_off + expected_size }); - dyld.bind_off = @intCast(u32, bind_off); - dyld.bind_size = expected_size; - - const lazy_bind_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found lazy binding info free space 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + expected_size }); - dyld.lazy_bind_off = @intCast(u32, lazy_bind_off); - dyld.lazy_bind_size = expected_size; - - const export_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found export info free space 0x{x} to 0x{x}", .{ export_off, export_off + expected_size }); - dyld.export_off = @intCast(u32, export_off); - dyld.export_size = expected_size; - - self.load_commands_dirty = true; - } - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - - try self.load_commands.append(self.base.allocator, .{ - .Symtab = .{ - .cmd = macho.LC_SYMTAB, - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - - const symtab_size = self.base.options.symbol_count_hint * @sizeOf(macho.nlist_64); - const symtab_off = self.findFreeSpaceLinkedit(symtab_size, @sizeOf(macho.nlist_64), null); - log.debug("found symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); - symtab.symoff = @intCast(u32, symtab_off); - symtab.nsyms = @intCast(u32, self.base.options.symbol_count_hint); - - try self.strtab.append(self.base.allocator, 0); - const strtab_size = self.strtab.items.len; - const strtab_off = self.findFreeSpaceLinkedit(strtab_size, 1, symtab_off); - log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size }); - symtab.stroff = @intCast(u32, strtab_off); - symtab.strsize = @intCast(u32, strtab_size); - - self.load_commands_dirty = true; - self.strtab_dirty = true; - } - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - - // Preallocate space for indirect symbol table. - const indsymtab_size = self.base.options.symbol_count_hint * @sizeOf(u64); // Each entry is just a u64. - const indsymtab_off = self.findFreeSpaceLinkedit(indsymtab_size, @sizeOf(u64), null); - - log.debug("found indirect symbol table free space 0x{x} to 0x{x}", .{ indsymtab_off, indsymtab_off + indsymtab_size }); - - try self.load_commands.append(self.base.allocator, .{ - .Dysymtab = .{ - .cmd = macho.LC_DYSYMTAB, - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = @intCast(u32, indsymtab_off), - .nindirectsyms = @intCast(u32, self.base.options.symbol_count_hint), - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - self.load_commands_dirty = true; - } - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), - @sizeOf(u64), - )); - var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = macho.LC_LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); - try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); - self.load_commands_dirty = true; - } - if (self.libsystem_cmd_index == null) { - self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len); - - var dylib_cmd = try commands.createLoadDylibCommand(self.base.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0); - errdefer dylib_cmd.deinit(self.base.allocator); - - try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - - self.load_commands_dirty = true; - } - if (self.main_cmd_index == null) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Main = .{ - .cmd = macho.LC_MAIN, - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - self.load_commands_dirty = true; - } - if (self.build_version_cmd_index == null) { - self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version), - @sizeOf(u64), - )); - const ver = self.base.options.target.os.version_range.semver.min; - const version = ver.major << 16 | ver.minor << 8 | ver.patch; - const is_simulator_abi = self.base.options.target.abi == .simulator; - var cmd = commands.emptyGenericCommandWithData(macho.build_version_command{ - .cmd = macho.LC_BUILD_VERSION, - .cmdsize = cmdsize, - .platform = switch (self.base.options.target.os.tag) { - .macos => macho.PLATFORM_MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM_IOSSIMULATOR else macho.PLATFORM_IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM_WATCHOSSIMULATOR else macho.PLATFORM_WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM_TVOSSIMULATOR else macho.PLATFORM_TVOS, - else => unreachable, - }, - .minos = version, - .sdk = version, - .ntools = 1, - }); - const ld_ver = macho.build_tool_version{ - .tool = macho.TOOL_LD, - .version = 0x0, - }; - cmd.data = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.build_version_command)); - mem.set(u8, cmd.data, 0); - mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); - try self.load_commands.append(self.base.allocator, .{ .BuildVersion = cmd }); - } - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .SourceVersion = .{ - .cmd = macho.LC_SOURCE_VERSION, - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); - self.load_commands_dirty = true; - } - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmd = macho.LC_UUID, - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); - self.load_commands_dirty = true; - } - if (self.code_signature_cmd_index == null and self.requires_adhoc_codesig) { - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; - } - if (!self.strtab_dir.containsAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{ - .bytes = &self.strtab, - })) { - const import_sym_index = @intCast(u32, self.undefs.items.len); - const n_strx = try self.makeString("dyld_stub_binder"); - try self.undefs.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = @intCast(u8, 1) * macho.N_SYMBOL_RESOLVER, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = import_sym_index, - }); - const got_key = GotIndirectionKey{ - .where = .undef, - .where_index = import_sym_index, - }; - const got_index = @intCast(u32, self.got_entries.items.len); - try self.got_entries.append(self.base.allocator, got_key); - try self.got_entries_map.putNoClobber(self.base.allocator, got_key, got_index); - try self.writeGotEntry(got_index); - self.binding_info_dirty = true; - } - if (self.stub_helper_stubs_start_off == null) { - try self.writeStubHelperPreamble(); - } -} - -fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_section = &text_segment.sections.items[self.text_section_index.?]; - const new_block_ideal_capacity = padToIdeal(new_block_size); - - // We use these to indicate our intention to update metadata, placing the new block, - // and possibly removing a free list node. - // It would be simpler to do it inside the for loop below, but that would cause a - // problem if an error was returned later in the function. So this action - // is actually carried out at the end of the function, when errors are no longer possible. - var block_placement: ?*TextBlock = null; - var free_list_removal: ?usize = null; - - // First we look for an appropriately sized free list node. - // The list is unordered. We'll just take the first thing that works. - const vaddr = blk: { - var i: usize = 0; - while (i < self.text_block_free_list.items.len) { - const big_block = self.text_block_free_list.items[i]; - // We now have a pointer to a live text block that has too much capacity. - // Is it enough that we could fit this new text block? - const sym = self.locals.items[big_block.local_sym_index]; - const capacity = big_block.capacity(self.*); - const ideal_capacity = padToIdeal(capacity); - const ideal_capacity_end_vaddr = sym.n_value + ideal_capacity; - const capacity_end_vaddr = sym.n_value + capacity; - const new_start_vaddr_unaligned = capacity_end_vaddr - new_block_ideal_capacity; - const new_start_vaddr = mem.alignBackwardGeneric(u64, new_start_vaddr_unaligned, alignment); - if (new_start_vaddr < ideal_capacity_end_vaddr) { - // Additional bookkeeping here to notice if this free list node - // should be deleted because the block that it points to has grown to take up - // more of the extra capacity. - if (!big_block.freeListEligible(self.*)) { - const bl = self.text_block_free_list.swapRemove(i); - bl.deinit(self.base.allocator); - } else { - i += 1; - } - continue; - } - // At this point we know that we will place the new block here. But the - // remaining question is whether there is still yet enough capacity left - // over for there to still be a free list node. - const remaining_capacity = new_start_vaddr - ideal_capacity_end_vaddr; - const keep_free_list_node = remaining_capacity >= min_text_capacity; - - // Set up the metadata to be updated, after errors are no longer possible. - block_placement = big_block; - if (!keep_free_list_node) { - free_list_removal = i; - } - break :blk new_start_vaddr; - } else if (self.last_text_block) |last| { - const last_symbol = self.locals.items[last.local_sym_index]; - // TODO We should pad out the excess capacity with NOPs. For executables, - // no padding seems to be OK, but it will probably not be for objects. - const ideal_capacity = padToIdeal(last.size); - const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; - const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); - block_placement = last; - break :blk new_start_vaddr; - } else { - break :blk text_section.addr; - } - }; - - const expand_text_section = block_placement == null or block_placement.?.next == null; - if (expand_text_section) { - const needed_size = (vaddr + new_block_size) - text_section.addr; - assert(needed_size <= text_segment.inner.filesize); // TODO must move the entire text section. - - self.last_text_block = text_block; - text_section.size = needed_size; - self.load_commands_dirty = true; // TODO Make more granular. - - if (self.d_sym) |*ds| { - const debug_text_seg = &ds.load_commands.items[ds.text_segment_cmd_index.?].Segment; - const debug_text_sect = &debug_text_seg.sections.items[ds.text_section_index.?]; - debug_text_sect.size = needed_size; - ds.load_commands_dirty = true; - } - } - text_block.size = new_block_size; - - if (text_block.prev) |prev| { - prev.next = text_block.next; - } - if (text_block.next) |next| { - next.prev = text_block.prev; - } - - if (block_placement) |big_block| { - text_block.prev = big_block; - text_block.next = big_block.next; - big_block.next = text_block; - } else { - text_block.prev = null; - text_block.next = null; - } - if (free_list_removal) |i| { - _ = self.text_block_free_list.swapRemove(i); - } - - return vaddr; -} - -pub fn addExternFn(self: *MachO, name: []const u8) !u32 { - const sym_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{name}); - defer self.base.allocator.free(sym_name); - - if (self.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), StringIndexAdapter{ - .bytes = &self.strtab, - })) |n_strx| { - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - return resolv.where_index; - } - - log.debug("adding new extern function '{s}' with dylib ordinal 1", .{sym_name}); - const import_sym_index = @intCast(u32, self.undefs.items.len); - const n_strx = try self.makeString(sym_name); - try self.undefs.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = @intCast(u8, 1) * macho.N_SYMBOL_RESOLVER, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = import_sym_index, - }); - - const stubs_index = @intCast(u32, self.stubs.items.len); - try self.stubs.append(self.base.allocator, import_sym_index); - try self.stubs_map.putNoClobber(self.base.allocator, import_sym_index, stubs_index); - - // TODO discuss this. The caller context expects codegen.InnerError{ OutOfMemory, CodegenFail }, - // which obviously doesn't include file writing op errors. So instead of trying to write the stub - // entry right here and now, queue it up and dispose of when updating decl. - try self.pending_updates.append(self.base.allocator, .{ - .kind = .stub, - .index = stubs_index, - }); - - return import_sym_index; -} - -const NextSegmentAddressAndOffset = struct { - address: u64, - offset: u64, -}; - -fn nextSegmentAddressAndOffset(self: *MachO) NextSegmentAddressAndOffset { - var prev_segment_idx: ?usize = null; // We use optional here for safety. - for (self.load_commands.items) |cmd, i| { - if (cmd == .Segment) { - prev_segment_idx = i; - } - } - const prev_segment = self.load_commands.items[prev_segment_idx.?].Segment; - const address = prev_segment.inner.vmaddr + prev_segment.inner.vmsize; - const offset = prev_segment.inner.fileoff + prev_segment.inner.filesize; - return .{ - .address = address, - .offset = offset, - }; -} - -fn allocatedSizeLinkedit(self: *MachO, start: u64) u64 { - assert(start > 0); - var min_pos: u64 = std.math.maxInt(u64); - - // __LINKEDIT is a weird segment where sections get their own load commands so we - // special-case it. - if (self.dyld_info_cmd_index) |idx| { - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - if (dyld_info.rebase_off > start and dyld_info.rebase_off < min_pos) min_pos = dyld_info.rebase_off; - if (dyld_info.bind_off > start and dyld_info.bind_off < min_pos) min_pos = dyld_info.bind_off; - if (dyld_info.weak_bind_off > start and dyld_info.weak_bind_off < min_pos) min_pos = dyld_info.weak_bind_off; - if (dyld_info.lazy_bind_off > start and dyld_info.lazy_bind_off < min_pos) min_pos = dyld_info.lazy_bind_off; - if (dyld_info.export_off > start and dyld_info.export_off < min_pos) min_pos = dyld_info.export_off; - } - - if (self.function_starts_cmd_index) |idx| { - const fstart = self.load_commands.items[idx].LinkeditData; - if (fstart.dataoff > start and fstart.dataoff < min_pos) min_pos = fstart.dataoff; - } - - if (self.data_in_code_cmd_index) |idx| { - const dic = self.load_commands.items[idx].LinkeditData; - if (dic.dataoff > start and dic.dataoff < min_pos) min_pos = dic.dataoff; - } - - if (self.dysymtab_cmd_index) |idx| { - const dysymtab = self.load_commands.items[idx].Dysymtab; - if (dysymtab.indirectsymoff > start and dysymtab.indirectsymoff < min_pos) min_pos = dysymtab.indirectsymoff; - // TODO Handle more dynamic symbol table sections. - } - - if (self.symtab_cmd_index) |idx| { - const symtab = self.load_commands.items[idx].Symtab; - if (symtab.symoff > start and symtab.symoff < min_pos) min_pos = symtab.symoff; - if (symtab.stroff > start and symtab.stroff < min_pos) min_pos = symtab.stroff; - } - - return min_pos - start; -} -inline fn checkForCollision(start: u64, end: u64, off: u64, size: u64) ?u64 { - const increased_size = padToIdeal(size); - const test_end = off + increased_size; - if (end > off and start < test_end) { - return test_end; - } - return null; -} - -fn detectAllocCollisionLinkedit(self: *MachO, start: u64, size: u64) ?u64 { - const end = start + padToIdeal(size); - - // __LINKEDIT is a weird segment where sections get their own load commands so we - // special-case it. - if (self.dyld_info_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - if (checkForCollision(start, end, dyld_info.rebase_off, dyld_info.rebase_size)) |pos| { - return pos; - } - // Binding info - if (checkForCollision(start, end, dyld_info.bind_off, dyld_info.bind_size)) |pos| { - return pos; - } - // Weak binding info - if (checkForCollision(start, end, dyld_info.weak_bind_off, dyld_info.weak_bind_size)) |pos| { - return pos; - } - // Lazy binding info - if (checkForCollision(start, end, dyld_info.lazy_bind_off, dyld_info.lazy_bind_size)) |pos| { - return pos; - } - // Export info - if (checkForCollision(start, end, dyld_info.export_off, dyld_info.export_size)) |pos| { - return pos; - } - } - - if (self.function_starts_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const fstart = self.load_commands.items[idx].LinkeditData; - if (checkForCollision(start, end, fstart.dataoff, fstart.datasize)) |pos| { - return pos; - } - } - - if (self.data_in_code_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dic = self.load_commands.items[idx].LinkeditData; - if (checkForCollision(start, end, dic.dataoff, dic.datasize)) |pos| { - return pos; - } - } - - if (self.dysymtab_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dysymtab = self.load_commands.items[idx].Dysymtab; - // Indirect symbol table - const nindirectsize = dysymtab.nindirectsyms * @sizeOf(u32); - if (checkForCollision(start, end, dysymtab.indirectsymoff, nindirectsize)) |pos| { - return pos; - } - // TODO Handle more dynamic symbol table sections. - } - - if (self.symtab_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const symtab = self.load_commands.items[idx].Symtab; - // Symbol table - const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); - if (checkForCollision(start, end, symtab.symoff, symsize)) |pos| { - return pos; - } - // String table - if (checkForCollision(start, end, symtab.stroff, symtab.strsize)) |pos| { - return pos; - } - } - - return null; -} - -fn findFreeSpaceLinkedit(self: *MachO, object_size: u64, min_alignment: u16, start: ?u64) u64 { - const linkedit = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - var st: u64 = start orelse linkedit.inner.fileoff; - while (self.detectAllocCollisionLinkedit(st, object_size)) |item_end| { - st = mem.alignForwardGeneric(u64, item_end, min_alignment); - } - return st; -} - -fn writeGotEntry(self: *MachO, index: usize) !void { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[self.got_section_index.?]; - const off = sect.offset + @sizeOf(u64) * index; - - if (self.got_entries_count_dirty) { - // TODO relocate. - self.got_entries_count_dirty = false; - } - - const got_entry = self.got_entries.items[index]; - const sym = switch (got_entry.where) { - .local => self.locals.items[got_entry.where_index], - .undef => self.undefs.items[got_entry.where_index], - }; - log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ - off, - sym.n_value, - self.getString(sym.n_strx), - }); - try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off); -} - -fn writeLazySymbolPointer(self: *MachO, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - const end = stub_helper.addr + stub_off - stub_helper.offset; - var buf: [@sizeOf(u64)]u8 = undefined; - mem.writeIntLittle(u64, &buf, end); - const off = la_symbol_ptr.offset + index * @sizeOf(u64); - log.debug("writing lazy symbol pointer entry 0x{x} at 0x{x}", .{ end, off }); - try self.base.file.?.pwriteAll(&buf, off); -} - -fn writeStubHelperPreamble(self: *MachO) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const data = &data_segment.sections.items[self.data_section_index.?]; - - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const code_size = 15; - var code: [code_size]u8 = undefined; - // lea %r11, [rip + disp] - code[0] = 0x4c; - code[1] = 0x8d; - code[2] = 0x1d; - { - const target_addr = data.addr; - const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); - mem.writeIntLittle(u32, code[3..7], displacement); - } - // push %r11 - code[7] = 0x41; - code[8] = 0x53; - // jmp [rip + disp] - code[9] = 0xff; - code[10] = 0x25; - { - const displacement = try math.cast(u32, got.addr - stub_helper.addr - code_size); - mem.writeIntLittle(u32, code[11..], displacement); - } - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - self.stub_helper_stubs_start_off = stub_helper.offset + code_size; - }, - .aarch64 => { - var code: [6 * @sizeOf(u32)]u8 = undefined; - - data_blk_outer: { - const this_addr = stub_helper.addr; - const target_addr = data.addr; - data_blk: { - const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; - // adr x17, disp - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :data_blk_outer; - } - data_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // adr x17, disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); - break :data_blk_outer; - } - // Jump is too big, replace adr with adrp and add. - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - // adrp x17, pages - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); - } - - // stp x16, x17, [sp, #-16]! - mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.stp( - .x16, - .x17, - aarch64.Register.sp, - aarch64.Instruction.LoadStorePairOffset.pre_index(-16), - ).toU32()); - - binder_blk_outer: { - const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - const target_addr = got.addr; - binder_blk: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // ldr x16, label - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); - break :binder_blk_outer; - } - binder_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // nop - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); - // ldr x16, label - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :binder_blk_outer; - } - // Jump is too big, replace ldr with adrp and ldr(register). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - // adrp x16, pages - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - // ldr x16, x16, offset - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - - // br x16 - mem.writeIntLittle(u32, code[20..24], aarch64.Instruction.br(.x16).toU32()); - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - self.stub_helper_stubs_start_off = stub_helper.offset + code.len; - }, - else => unreachable, - } -} - -fn writeStub(self: *MachO, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = text_segment.sections.items[self.stubs_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_off = stubs.offset + index * stubs.reserved2; - const stub_addr = stubs.addr + index * stubs.reserved2; - const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64); - - log.debug("writing stub at 0x{x}", .{stub_off}); - - var code = try self.base.allocator.alloc(u8, stubs.reserved2); - defer self.base.allocator.free(code); - - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - assert(la_ptr_addr >= stub_addr + stubs.reserved2); - const displacement = try math.cast(u32, la_ptr_addr - stub_addr - stubs.reserved2); - // jmp - code[0] = 0xff; - code[1] = 0x25; - mem.writeIntLittle(u32, code[2..][0..4], displacement); - }, - .aarch64 => { - assert(la_ptr_addr >= stub_addr); - outer: { - const this_addr = stub_addr; - const target_addr = la_ptr_addr; - inner: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // ldr x16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :outer; - } - inner: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // ldr x16, literal - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :outer; - } - // Use adrp followed by ldr(register). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - // adrp x16, pages - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - // ldr x16, x16, offset - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32()); - }, - else => unreachable, + if (self.symtab_cmd_index == null) { + self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Symtab = .{ + .cmd = macho.LC_SYMTAB, + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }, + }); + self.load_commands_dirty = true; } - try self.base.file.?.pwriteAll(code, stub_off); -} - -fn writeStubInStubHelper(self: *MachO, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - - var code = try self.base.allocator.alloc(u8, stub_size); - defer self.base.allocator.free(code); - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const displacement = try math.cast( - i32, - @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - stub_size, - ); - // pushq - code[0] = 0x68; - mem.writeIntLittle(u32, code[1..][0..4], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - // jmpq - code[5] = 0xe9; - mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement)); - }, - .aarch64 => { - const literal = blk: { - const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4); - break :blk try math.cast(u18, div_res); - }; - // ldr w16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{ - .literal = literal, - }).toU32()); - const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4); - // b disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(displacement).toU32()); - // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - mem.writeIntLittle(u32, code[8..12], 0x0); - }, - else => unreachable, + if (self.dysymtab_cmd_index == null) { + self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Dysymtab = .{ + .cmd = macho.LC_DYSYMTAB, + .cmdsize = @sizeOf(macho.dysymtab_command), + .ilocalsym = 0, + .nlocalsym = 0, + .iextdefsym = 0, + .nextdefsym = 0, + .iundefsym = 0, + .nundefsym = 0, + .tocoff = 0, + .ntoc = 0, + .modtaboff = 0, + .nmodtab = 0, + .extrefsymoff = 0, + .nextrefsyms = 0, + .indirectsymoff = 0, + .nindirectsyms = 0, + .extreloff = 0, + .nextrel = 0, + .locreloff = 0, + .nlocrel = 0, + }, + }); + self.load_commands_dirty = true; } - try self.base.file.?.pwriteAll(code, stub_off); -} - -fn relocateSymbolTable(self: *MachO) !void { - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const nlocals = self.locals.items.len; - const nglobals = self.globals.items.len; - const nundefs = self.undefs.items.len; - const nsyms = nlocals + nglobals + nundefs; - - if (symtab.nsyms < nsyms) { - const needed_size = nsyms * @sizeOf(macho.nlist_64); - if (needed_size > self.allocatedSizeLinkedit(symtab.symoff)) { - // Move the entire symbol table to a new location - const new_symoff = self.findFreeSpaceLinkedit(needed_size, @alignOf(macho.nlist_64), null); - const existing_size = symtab.nsyms * @sizeOf(macho.nlist_64); - - log.debug("relocating symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ - symtab.symoff, - symtab.symoff + existing_size, - new_symoff, - new_symoff + existing_size, - }); - const amt = try self.base.file.?.copyRangeAll(symtab.symoff, self.base.file.?, new_symoff, existing_size); - if (amt != existing_size) return error.InputOutput; - symtab.symoff = @intCast(u32, new_symoff); - self.strtab_needs_relocation = true; - } - symtab.nsyms = @intCast(u32, nsyms); + if (self.dylinker_cmd_index == null) { + self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + mem.lenZ(default_dyld_path), + @sizeOf(u64), + )); + var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ + .cmd = macho.LC_LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }); + dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); + mem.set(u8, dylinker_cmd.data, 0); + mem.copy(u8, dylinker_cmd.data, mem.spanZ(default_dyld_path)); + try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); self.load_commands_dirty = true; } -} - -fn writeLocalSymbol(self: *MachO, index: usize) !void { - const tracy = trace(@src()); - defer tracy.end(); - try self.relocateSymbolTable(); - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const off = symtab.symoff + @sizeOf(macho.nlist_64) * index; - log.debug("writing local symbol {} at 0x{x}", .{ index, off }); - try self.base.file.?.pwriteAll(mem.asBytes(&self.locals.items[index]), off); -} - -fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - try self.relocateSymbolTable(); - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const nlocals = self.locals.items.len; - const nglobals = self.globals.items.len; - const nundefs = self.undefs.items.len; - - const locals_off = symtab.symoff; - const locals_size = nlocals * @sizeOf(macho.nlist_64); - - const globals_off = locals_off + locals_size; - const globals_size = nglobals * @sizeOf(macho.nlist_64); - log.debug("writing global symbols from 0x{x} to 0x{x}", .{ globals_off, globals_size + globals_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), globals_off); - - const undefs_off = globals_off + globals_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing extern symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym = @intCast(u32, nlocals); - dysymtab.iextdefsym = @intCast(u32, nlocals); - dysymtab.nextdefsym = @intCast(u32, nglobals); - dysymtab.iundefsym = @intCast(u32, nlocals + nglobals); - dysymtab.nundefsym = @intCast(u32, nundefs); - self.load_commands_dirty = true; -} - -fn writeIndirectSymbolTable(self: *MachO) !void { - // TODO figure out a way not to rewrite the table every time if - // no new undefs are not added. - const tracy = trace(@src()); - defer tracy.end(); - - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = &text_segment.sections.items[self.stubs_section_index.?]; - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_seg.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - - const nstubs = @intCast(u32, self.stubs.items.len); - const ngot_entries = @intCast(u32, self.got_entries.items.len); - const allocated_size = self.allocatedSizeLinkedit(dysymtab.indirectsymoff); - const nindirectsyms = nstubs * 2 + ngot_entries; - const needed_size = @intCast(u32, nindirectsyms * @sizeOf(u32)); - - if (needed_size > allocated_size) { - dysymtab.nindirectsyms = 0; - dysymtab.indirectsymoff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, @sizeOf(u32), null)); + if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { + self.main_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Main = .{ + .cmd = macho.LC_MAIN, + .cmdsize = @sizeOf(macho.entry_point_command), + .entryoff = 0x0, + .stacksize = 0, + }, + }); + self.load_commands_dirty = true; } - dysymtab.nindirectsyms = nindirectsyms; - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + needed_size, - }); - - var buf = try self.base.allocator.alloc(u8, needed_size); - defer self.base.allocator.free(buf); - var stream = std.io.fixedBufferStream(buf); - var writer = stream.writer(); - stubs.reserved1 = 0; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { + self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); + const install_name = try std.fmt.allocPrint(self.base.allocator, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); + defer self.base.allocator.free(install_name); + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + install_name, + 2, + 0x10000, // TODO forward user-provided versions + 0x10000, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.load_commands_dirty = true; } - got.reserved1 = nstubs; - for (self.got_entries.items) |entry| { - switch (entry.where) { - .undef => { - try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); - }, - .local => { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + if (self.source_version_cmd_index == null) { + self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .SourceVersion = .{ + .cmd = macho.LC_SOURCE_VERSION, + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, }, - } + }); + self.load_commands_dirty = true; } - la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + if (self.build_version_cmd_index == null) { + self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version), + @sizeOf(u64), + )); + const ver = self.base.options.target.os.version_range.semver.min; + const version = ver.major << 16 | ver.minor << 8 | ver.patch; + const is_simulator_abi = self.base.options.target.abi == .simulator; + var cmd = commands.emptyGenericCommandWithData(macho.build_version_command{ + .cmd = macho.LC_BUILD_VERSION, + .cmdsize = cmdsize, + .platform = switch (self.base.options.target.os.tag) { + .macos => macho.PLATFORM_MACOS, + .ios => if (is_simulator_abi) macho.PLATFORM_IOSSIMULATOR else macho.PLATFORM_IOS, + .watchos => if (is_simulator_abi) macho.PLATFORM_WATCHOSSIMULATOR else macho.PLATFORM_WATCHOS, + .tvos => if (is_simulator_abi) macho.PLATFORM_TVOSSIMULATOR else macho.PLATFORM_TVOS, + else => unreachable, + }, + .minos = version, + .sdk = version, + .ntools = 1, + }); + const ld_ver = macho.build_tool_version{ + .tool = macho.TOOL_LD, + .version = 0x0, + }; + cmd.data = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.build_version_command)); + mem.set(u8, cmd.data, 0); + mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); + try self.load_commands.append(self.base.allocator, .{ .BuildVersion = cmd }); + self.load_commands_dirty = true; } - try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); - self.load_commands_dirty = true; -} - -fn writeDices(self: *MachO) !void { - if (!self.has_dices) return; - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; - const fileoff = seg.inner.fileoff + seg.inner.filesize; - - var buf = std.ArrayList(u8).init(self.base.allocator); - defer buf.deinit(); - - var block: *TextBlock = self.blocks.get(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }) orelse return; - - while (block.prev) |prev| { - block = prev; + if (self.uuid_cmd_index == null) { + self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); + var uuid_cmd: macho.uuid_command = .{ + .cmd = macho.LC_UUID, + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, + }; + std.crypto.random.bytes(&uuid_cmd.uuid); + try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); + self.load_commands_dirty = true; } - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_sect = text_seg.sections.items[self.text_section_index.?]; - - while (true) { - if (block.dices.items.len > 0) { - const sym = self.locals.items[block.local_sym_index]; - const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); - - try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); - for (block.dices.items) |dice| { - const rebased_dice = macho.data_in_code_entry{ - .offset = base_off + dice.offset, - .length = dice.length, - .kind = dice.kind, - }; - buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); - } - } - - if (block.next) |next| { - block = next; - } else break; + if (self.data_in_code_cmd_index == null) { + self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + self.load_commands_dirty = true; } - const datasize = @intCast(u32, buf.items.len); - - dice_cmd.dataoff = @intCast(u32, fileoff); - dice_cmd.datasize = datasize; - seg.inner.filesize += datasize; - - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize }); - - try self.base.file.?.pwriteAll(buf.items, fileoff); + self.cold_start = true; } -fn writeCodeSignaturePadding(self: *MachO) !void { - // TODO figure out how not to rewrite padding every single time. - const tracy = trace(@src()); - defer tracy.end(); - - const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const code_sig_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; - const fileoff = linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize; - const needed_size = CodeSignature.calcCodeSignaturePaddingSize( - self.base.options.emit.?.sub_path, - fileoff, - self.page_size, - ); - code_sig_cmd.dataoff = @intCast(u32, fileoff); - code_sig_cmd.datasize = needed_size; +const AllocateSectionOpts = struct { + flags: u32 = macho.S_REGULAR, + reserved1: u32 = 0, + reserved2: u32 = 0, +}; - // Advance size of __LINKEDIT segment - linkedit_segment.inner.filesize += needed_size; - if (linkedit_segment.inner.vmsize < linkedit_segment.inner.filesize) { - linkedit_segment.inner.vmsize = mem.alignForwardGeneric(u64, linkedit_segment.inner.filesize, self.page_size); - } - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); - // Pad out the space. We need to do this to calculate valid hashes for everything in the file - // except for code signature data. - try self.base.file.?.pwriteAll(&[_]u8{0}, fileoff + needed_size - 1); - self.load_commands_dirty = true; -} +fn allocateSection( + self: *MachO, + segment_id: u16, + sectname: []const u8, + size: u64, + alignment: u32, + opts: AllocateSectionOpts, +) !u16 { + const seg = &self.load_commands.items[segment_id].Segment; + var sect = macho.section_64{ + .sectname = makeStaticString(sectname), + .segname = seg.inner.segname, + .size = @intCast(u32, size), + .@"align" = alignment, + .flags = opts.flags, + .reserved1 = opts.reserved1, + .reserved2 = opts.reserved2, + }; -fn writeCodeSignature(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) self.header_pad else null; + const off = self.findFreeSpace(segment_id, alignment_pow_2, padding); - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; + log.debug("allocating {s},{s} section from 0x{x} to 0x{x}", .{ + commands.segmentName(sect), + commands.sectionName(sect), + off, + off + size, + }); - var code_sig: CodeSignature = .{}; - defer code_sig.deinit(self.base.allocator); + sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.offset = @intCast(u32, off); - try code_sig.calcAdhocSignature( - self.base.allocator, - self.base.file.?, - self.base.options.emit.?.sub_path, - text_segment.inner, - code_sig_cmd, - self.base.options.output_mode, - self.page_size, - ); + const index = @intCast(u16, seg.sections.items.len); + try seg.sections.append(self.base.allocator, sect); + seg.inner.cmdsize += @sizeOf(macho.section_64); + seg.inner.nsects += 1; - var buffer = try self.base.allocator.alloc(u8, code_sig.size()); - defer self.base.allocator.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - try code_sig.write(stream.writer()); + const match = MatchingSection{ + .seg = segment_id, + .sect = index, + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.atom_free_lists.putNoClobber(self.base.allocator, match, .{}); - log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len }); + self.load_commands_dirty = true; + self.sections_order_dirty = true; - try self.base.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); + return index; } -fn writeExportInfo(self: *MachO) !void { - if (!self.export_info_dirty) return; - if (self.globals.items.len == 0) return; +fn findFreeSpace(self: MachO, segment_id: u16, alignment: u64, start: ?u64) u64 { + const seg = self.load_commands.items[segment_id].Segment; + if (seg.sections.items.len == 0) { + return if (start) |v| v else seg.inner.fileoff; + } + const last_sect = seg.sections.items[seg.sections.items.len - 1]; + const final_off = last_sect.offset + padToIdeal(last_sect.size); + return mem.alignForwardGeneric(u64, final_off, alignment); +} +fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { const tracy = trace(@src()); defer tracy.end(); - var trie: Trie = .{}; - defer trie.deinit(self.base.allocator); + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const base_address = text_segment.inner.vmaddr; + const alignment = try math.powi(u32, 2, sect.@"align"); + const max_size = self.allocatedSize(match.seg, sect.offset); + const ideal_size = padToIdeal(new_size); + const needed_size = mem.alignForwardGeneric(u32, ideal_size, alignment); - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("writing export trie", .{}); + if (needed_size > max_size) blk: { + log.debug(" (need to grow!)", .{}); + // Need to move all sections below in file and address spaces. + const offset_amt = offset: { + const max_alignment = try self.getSectionMaxAlignment(match.seg, match.sect + 1); + break :offset mem.alignForwardGeneric(u64, needed_size - max_size, max_alignment); + }; - for (self.globals.items) |sym| { - const sym_name = self.getString(sym.n_strx); - log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); + // Before we commit to this, check if the segment needs to grow too. + // We assume that each section header is growing linearly with the increasing + // file offset / virtual memory address space. + const last_sect = seg.sections.items[seg.sections.items.len - 1]; + const last_sect_off = last_sect.offset + last_sect.size; + const seg_off = seg.inner.fileoff + seg.inner.filesize; + + if (last_sect_off + offset_amt > seg_off) { + // Need to grow segment first. + log.debug(" (need to grow segment first)", .{}); + const spill_size = (last_sect_off + offset_amt) - seg_off; + const seg_offset_amt = mem.alignForwardGeneric(u64, spill_size, self.page_size); + seg.inner.filesize += seg_offset_amt; + seg.inner.vmsize += seg_offset_amt; + + log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + seg.inner.segname, + seg.inner.fileoff, + seg.inner.fileoff + seg.inner.filesize, + seg.inner.vmaddr, + seg.inner.vmaddr + seg.inner.vmsize, + }); - try trie.put(self.base.allocator, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - try trie.finalize(self.base.allocator); + // TODO We should probably nop the expanded by distance, or put 0s. + + // TODO copyRangeAll doesn't automatically extend the file on macOS. + const ledit_seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const new_filesize = seg_offset_amt + ledit_seg.inner.fileoff + ledit_seg.inner.filesize; + try self.base.file.?.pwriteAll(&[_]u8{0}, new_filesize - 1); + + var next: usize = match.seg + 1; + while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { + const next_seg = &self.load_commands.items[next].Segment; + _ = try self.base.file.?.copyRangeAll( + next_seg.inner.fileoff, + self.base.file.?, + next_seg.inner.fileoff + seg_offset_amt, + next_seg.inner.filesize, + ); + next_seg.inner.fileoff += seg_offset_amt; + next_seg.inner.vmaddr += seg_offset_amt; + + log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + next_seg.inner.segname, + next_seg.inner.fileoff, + next_seg.inner.fileoff + next_seg.inner.filesize, + next_seg.inner.vmaddr, + next_seg.inner.vmaddr + next_seg.inner.vmsize, + }); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, trie.size)); - defer self.base.allocator.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - const nwritten = try trie.write(stream.writer()); - assert(nwritten == trie.size); + for (next_seg.sections.items) |*moved_sect, moved_sect_id| { + moved_sect.offset += @intCast(u32, seg_offset_amt); + moved_sect.addr += seg_offset_amt; + + log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + commands.segmentName(moved_sect.*), + commands.sectionName(moved_sect.*), + moved_sect.offset, + moved_sect.offset + moved_sect.size, + moved_sect.addr, + moved_sect.addr + moved_sect.size, + }); - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.export_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); + try self.allocateLocalSymbols(.{ + .seg = @intCast(u16, next), + .sect = @intCast(u16, moved_sect_id), + }, @intCast(i64, seg_offset_amt)); + } + } + } + + if (match.sect + 1 >= seg.sections.items.len) break :blk; + + // We have enough space to expand within the segment, so move all sections by + // the required amount and update their header offsets. + const next_sect = seg.sections.items[match.sect + 1]; + const total_size = last_sect_off - next_sect.offset; + _ = try self.base.file.?.copyRangeAll( + next_sect.offset, + self.base.file.?, + next_sect.offset + offset_amt, + total_size, + ); + + var next = match.sect + 1; + while (next < seg.sections.items.len) : (next += 1) { + const moved_sect = &seg.sections.items[next]; + moved_sect.offset += @intCast(u32, offset_amt); + moved_sect.addr += offset_amt; + + log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + commands.segmentName(moved_sect.*), + commands.sectionName(moved_sect.*), + moved_sect.offset, + moved_sect.offset + moved_sect.size, + moved_sect.addr, + moved_sect.addr + moved_sect.size, + }); - if (needed_size > allocated_size) { - dyld_info.export_off = 0; - dyld_info.export_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. + try self.allocateLocalSymbols(.{ + .seg = match.seg, + .sect = next, + }, @intCast(i64, offset_amt)); + } } - dyld_info.export_size = @intCast(u32, needed_size); - log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); +} - try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); - self.load_commands_dirty = true; - self.export_info_dirty = false; +fn allocatedSize(self: MachO, segment_id: u16, start: u64) u64 { + const seg = self.load_commands.items[segment_id].Segment; + assert(start >= seg.inner.fileoff); + var min_pos: u64 = seg.inner.fileoff + seg.inner.filesize; + if (start > min_pos) return 0; + for (seg.sections.items) |section| { + if (section.offset <= start) continue; + if (section.offset < min_pos) min_pos = section.offset; + } + return min_pos - start; } -fn writeRebaseInfoTable(self: *MachO) !void { - if (!self.rebase_info_dirty) return; +fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u32 { + const seg = self.load_commands.items[segment_id].Segment; + var max_alignment: u32 = 1; + var next = start_sect_id; + while (next < seg.sections.items.len) : (next += 1) { + const sect = seg.sections.items[next]; + const alignment = try math.powi(u32, 2, sect.@"align"); + max_alignment = math.max(max_alignment, alignment); + } + return max_alignment; +} +fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { const tracy = trace(@src()); defer tracy.end(); - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; + var free_list = self.atom_free_lists.get(match).?; + const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; + const new_atom_ideal_capacity = if (needs_padding) padToIdeal(new_atom_size) else new_atom_size; - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; + // We use these to indicate our intention to update metadata, placing the new atom, + // and possibly removing a free list node. + // It would be simpler to do it inside the for loop below, but that would cause a + // problem if an error was returned later in the function. So this action + // is actually carried out at the end of the function, when errors are no longer possible. + var atom_placement: ?*Atom = null; + var free_list_removal: ?usize = null; - for (block.rebases.items) |offset| { - try pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); + // First we look for an appropriately sized free list node. + // The list is unordered. We'll just take the first thing that works. + var vaddr = blk: { + var i: usize = 0; + while (i < free_list.items.len) { + const big_atom = free_list.items[i]; + // We now have a pointer to a live atom that has too much capacity. + // Is it enough that we could fit this new atom? + const sym = self.locals.items[big_atom.local_sym_index]; + const capacity = big_atom.capacity(self.*); + const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity; + const ideal_capacity_end_vaddr = sym.n_value + ideal_capacity; + const capacity_end_vaddr = sym.n_value + capacity; + const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; + const new_start_vaddr = mem.alignBackwardGeneric(u64, new_start_vaddr_unaligned, alignment); + if (new_start_vaddr < ideal_capacity_end_vaddr) { + // Additional bookkeeping here to notice if this free list node + // should be deleted because the atom that it points to has grown to take up + // more of the extra capacity. + if (!big_atom.freeListEligible(self.*)) { + const bl = free_list.swapRemove(i); + bl.deinit(self.base.allocator); + } else { + i += 1; } + continue; + } + // At this point we know that we will place the new atom here. But the + // remaining question is whether there is still yet enough capacity left + // over for there to still be a free list node. + const remaining_capacity = new_start_vaddr - ideal_capacity_end_vaddr; + const keep_free_list_node = remaining_capacity >= min_text_capacity; - if (block.prev) |prev| { - block = prev; - } else break; + // Set up the metadata to be updated, after errors are no longer possible. + atom_placement = big_atom; + if (!keep_free_list_node) { + free_list_removal = i; } + break :blk new_start_vaddr; + } else if (self.atoms.get(match)) |last| { + const last_symbol = self.locals.items[last.local_sym_index]; + const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; + const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; + const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); + atom_placement = last; + break :blk new_start_vaddr; + } else { + break :blk mem.alignForwardGeneric(u64, sect.addr, alignment); } - } - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); + }; - for (self.got_entries.items) |entry, i| { - if (entry.where == .undef) continue; + const expand_section = atom_placement == null or atom_placement.?.next == null; + if (expand_section) { + const needed_size = @intCast(u32, (vaddr + new_atom_size) - sect.addr); + try self.growSection(match, needed_size); + _ = try self.atoms.put(self.base.allocator, match, atom); + sect.size = needed_size; + self.load_commands_dirty = true; + } + const align_pow = @intCast(u32, math.log2(alignment)); + if (sect.@"align" < align_pow) { + sect.@"align" = align_pow; + self.load_commands_dirty = true; + } + atom.size = new_atom_size; + atom.alignment = align_pow; - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } + if (atom.prev) |prev| { + prev.next = atom.next; + } + if (atom.next) |next| { + next.prev = atom.prev; } - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.items.len); - for (self.stubs.items) |_, i| { - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } + if (atom_placement) |big_atom| { + atom.prev = big_atom; + atom.next = big_atom.next; + big_atom.next = atom; + } else { + atom.prev = null; + atom.next = null; + } + if (free_list_removal) |i| { + _ = free_list.swapRemove(i); } - std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); + return vaddr; +} - const size = try bind.rebaseInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); +pub fn addExternFn(self: *MachO, name: []const u8) !u32 { + const sym_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{name}); + defer self.base.allocator.free(sym_name); - var stream = std.io.fixedBufferStream(buffer); - try bind.writeRebaseInfo(pointers.items, stream.writer()); + if (self.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), StringIndexAdapter{ + .bytes = &self.strtab, + })) |n_strx| { + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; + return resolv.where_index; + } - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.rebase_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); + log.debug("adding new extern function '{s}'", .{sym_name}); + const sym_index = @intCast(u32, self.undefs.items.len); + const n_strx = try self.makeString(sym_name); + try self.undefs.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .undef, + .where_index = sym_index, + }); + try self.unresolved.putNoClobber(self.base.allocator, sym_index, .stub); - if (needed_size > allocated_size) { - dyld_info.rebase_off = 0; - dyld_info.rebase_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. - } + return sym_index; +} - dyld_info.rebase_size = @intCast(u32, needed_size); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); +const NextSegmentAddressAndOffset = struct { + address: u64, + offset: u64, +}; - try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); - self.load_commands_dirty = true; - self.rebase_info_dirty = false; +fn nextSegmentAddressAndOffset(self: *MachO) NextSegmentAddressAndOffset { + var prev_segment_idx: ?usize = null; // We use optional here for safety. + for (self.load_commands.items) |cmd, i| { + if (cmd == .Segment) { + prev_segment_idx = i; + } + } + const prev_segment = self.load_commands.items[prev_segment_idx.?].Segment; + const address = prev_segment.inner.vmaddr + prev_segment.inner.vmsize; + const offset = prev_segment.inner.fileoff + prev_segment.inner.filesize; + return .{ + .address = address, + .offset = offset, + }; } -fn writeBindInfoTable(self: *MachO) !void { - if (!self.binding_info_dirty) return; +fn updateSectionOrdinals(self: *MachO) !void { + if (!self.sections_order_dirty) return; const tracy = trace(@src()); defer tracy.end(); - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); + var ordinal_remap = std.AutoHashMap(u8, u8).init(self.base.allocator); + defer ordinal_remap.deinit(); + var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); + var new_ordinal: u8 = 0; + for (self.load_commands.items) |lc, lc_id| { + if (lc != .Segment) break; - for (self.got_entries.items) |entry, i| { - if (entry.where == .local) continue; - - const sym = self.undefs.items[entry.where_index]; - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); + for (lc.Segment.sections.items) |_, sect_id| { + const match = MatchingSection{ + .seg = @intCast(u16, lc_id), + .sect = @intCast(u16, sect_id), + }; + const old_ordinal = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + new_ordinal += 1; + try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); + try ordinals.putNoClobber(self.base.allocator, match, {}); } } + for (self.locals.items) |*sym| { + if (sym.n_sect == 0) continue; + sym.n_sect = ordinal_remap.get(sym.n_sect).?; + } + for (self.globals.items) |*sym| { + sym.n_sect = ordinal_remap.get(sym.n_sect).?; + } + + self.section_ordinals.deinit(self.base.allocator); + self.section_ordinals = ordinals; +} + +fn writeDyldInfoData(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var rebase_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer rebase_pointers.deinit(); + var bind_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer bind_pointers.deinit(); + var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer lazy_bind_pointers.deinit(); + { - var it = self.blocks.iterator(); + var it = self.atoms.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; + var atom: *Atom = entry.value_ptr.*; if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable const seg = self.load_commands.items[match.seg].Segment; while (true) { - const sym = self.locals.items[block.local_sym_index]; + const sym = self.locals.items[atom.local_sym_index]; const base_offset = sym.n_value - seg.inner.vmaddr; - for (block.bindings.items) |binding| { + for (atom.rebases.items) |offset| { + try rebase_pointers.append(.{ + .offset = base_offset + offset, + .segment_id = match.seg, + }); + } + + for (atom.bindings.items) |binding| { + const bind_sym = self.undefs.items[binding.local_sym_index]; + try bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), + .name = self.getString(bind_sym.n_strx), + }); + } + + for (atom.lazy_bindings.items) |binding| { const bind_sym = self.undefs.items[binding.local_sym_index]; - try pointers.append(.{ + try lazy_bind_pointers.append(.{ .offset = binding.offset + base_offset, .segment_id = match.seg, .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), @@ -5529,94 +4369,105 @@ fn writeBindInfoTable(self: *MachO) !void { }); } - if (block.prev) |prev| { - block = prev; + if (atom.prev) |prev| { + atom = prev; } else break; } } } - const size = try bind.bindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeBindInfo(pointers.items, stream.writer()); + var trie: Trie = .{}; + defer trie.deinit(self.base.allocator); - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.bind_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); + { + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("generating export trie", .{}); + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const base_address = text_segment.inner.vmaddr; + + for (self.globals.items) |sym| { + const sym_name = self.getString(sym.n_strx); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + + try trie.put(self.base.allocator, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } - if (needed_size > allocated_size) { - dyld_info.bind_off = 0; - dyld_info.bind_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. + try trie.finalize(self.base.allocator); } - dyld_info.bind_size = @intCast(u32, needed_size); - log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); - self.load_commands_dirty = true; - self.binding_info_dirty = false; -} - -fn writeLazyBindInfoTable(self: *MachO) !void { - if (!self.lazy_binding_info_dirty) return; + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); + const bind_size = try bind.bindInfoSize(bind_pointers.items); + const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); + const export_size = trie.size; - const tracy = trace(@src()); - defer tracy.end(); + dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); + dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, rebase_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.rebase_size; - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); + dyld_info.bind_off = dyld_info.rebase_off + dyld_info.rebase_size; + dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, bind_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.bind_size; - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.items.len); - - for (self.stubs.items) |import_id, i| { - const sym = self.undefs.items[import_id]; - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); - } - } + dyld_info.lazy_bind_off = dyld_info.bind_off + dyld_info.bind_size; + dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, lazy_bind_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.lazy_bind_size; - const size = try bind.lazyBindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); + dyld_info.export_off = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size; + dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, export_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.export_size; + + const needed_size = dyld_info.rebase_size + dyld_info.bind_size + dyld_info.lazy_bind_size + dyld_info.export_size; + var buffer = try self.base.allocator.alloc(u8, needed_size); defer self.base.allocator.free(buffer); + mem.set(u8, buffer, 0); var stream = std.io.fixedBufferStream(buffer); - try bind.writeLazyBindInfo(pointers.items, stream.writer()); + const writer = stream.writer(); - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.lazy_bind_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); + try bind.writeRebaseInfo(rebase_pointers.items, writer); + try stream.seekBy(@intCast(i64, dyld_info.rebase_size) - @intCast(i64, rebase_size)); - if (needed_size > allocated_size) { - dyld_info.lazy_bind_off = 0; - dyld_info.lazy_bind_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. - } + try bind.writeBindInfo(bind_pointers.items, writer); + try stream.seekBy(@intCast(i64, dyld_info.bind_size) - @intCast(i64, bind_size)); - dyld_info.lazy_bind_size = @intCast(u32, needed_size); - log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); + try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); + try stream.seekBy(@intCast(i64, dyld_info.lazy_bind_size) - @intCast(i64, lazy_bind_size)); - try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - try self.populateLazyBindOffsetsInStubHelper(buffer); + _ = try trie.write(writer); + + log.debug("writing dyld info from 0x{x} to 0x{x}", .{ + dyld_info.rebase_off, + dyld_info.rebase_off + needed_size, + }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); + try self.populateLazyBindOffsetsInStubHelper( + buffer[dyld_info.rebase_size + dyld_info.bind_size ..][0..dyld_info.lazy_bind_size], + ); self.load_commands_dirty = true; - self.lazy_binding_info_dirty = false; } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { - if (self.stubs.items.len == 0) return; + const last_atom = self.atoms.get(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }) orelse return; + if (last_atom == self.stub_helper_preamble_atom.?) return; + + // Because we insert lazy binding opcodes in reverse order (from last to the first atom), + // we need reverse the order of atom traversal here as well. + // TODO figure out a less error prone mechanims for this! + var atom = last_atom; + while (atom.prev) |prev| { + atom = prev; + } + atom = atom.next.?; var stream = std.io.fixedBufferStream(buffer); var reader = stream.reader(); @@ -5661,50 +4512,245 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { else => {}, } } - assert(self.stubs.items.len <= offsets.items.len); - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const off: u4 = switch (self.base.options.target.cpu.arch) { + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.stub_helper_section_index.?]; + const stub_offset: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), else => unreachable, }; var buf: [@sizeOf(u32)]u8 = undefined; - for (self.stubs.items) |_, index| { - const placeholder_off = self.stub_helper_stubs_start_off.? + index * stub_size + off; - mem.writeIntLittle(u32, &buf, offsets.items[index]); - try self.base.file.?.pwriteAll(&buf, placeholder_off); + _ = offsets.pop(); + while (offsets.popOrNull()) |bind_offset| { + const sym = self.locals.items[atom.local_sym_index]; + const file_offset = sect.offset + sym.n_value - sect.addr + stub_offset; + mem.writeIntLittle(u32, &buf, bind_offset); + log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ + bind_offset, + self.getString(sym.n_strx), + file_offset, + }); + try self.base.file.?.pwriteAll(&buf, file_offset); + + if (atom.next) |next| { + atom = next; + } else break; } } -fn writeStringTable(self: *MachO) !void { - if (!self.strtab_dirty) return; +fn writeDices(self: *MachO) !void { + if (!self.has_dices) return; + + const tracy = trace(@src()); + defer tracy.end(); + + var buf = std.ArrayList(u8).init(self.base.allocator); + defer buf.deinit(); + + var atom: *Atom = self.atoms.get(.{ + .seg = self.text_segment_cmd_index orelse return, + .sect = self.text_section_index orelse return, + }) orelse return; + + while (atom.prev) |prev| { + atom = prev; + } + + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const text_sect = text_seg.sections.items[self.text_section_index.?]; + + while (true) { + if (atom.dices.items.len > 0) { + const sym = self.locals.items[atom.local_sym_index]; + const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); + + try buf.ensureUnusedCapacity(atom.dices.items.len * @sizeOf(macho.data_in_code_entry)); + for (atom.dices.items) |dice| { + const rebased_dice = macho.data_in_code_entry{ + .offset = base_off + dice.offset, + .length = dice.length, + .kind = dice.kind, + }; + buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); + } + } + + if (atom.next) |next| { + atom = next; + } else break; + } + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; + const needed_size = @intCast(u32, buf.items.len); + dice_cmd.dataoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dice_cmd.datasize = needed_size; + seg.inner.filesize += needed_size; + + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ + dice_cmd.dataoff, + dice_cmd.dataoff + dice_cmd.datasize, + }); + + try self.base.file.?.pwriteAll(buf.items, dice_cmd.dataoff); + self.load_commands_dirty = true; +} + +fn writeSymbolTable(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64)); + symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + + var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); + defer locals.deinit(); + try locals.appendSlice(self.locals.items); + + if (self.has_stabs) { + for (self.objects.items) |object| { + if (object.debug_info == null) continue; + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_comp_dir.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_name.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime orelse 0, + }); + + for (object.atoms.items) |atom| { + if (atom.stab) |stab| { + const nlists = try stab.asNlists(atom.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } else { + for (atom.contained.items) |sym_at_off| { + const stab = sym_at_off.stab orelse continue; + const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } + } + } - if (needed_size > allocated_size or self.strtab_needs_relocation) { - symtab.strsize = 0; - symtab.stroff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, symtab.symoff)); - self.strtab_needs_relocation = false; + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + } } - symtab.strsize = @intCast(u32, needed_size); - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); + const nlocals = locals.items.len; + const nexports = self.globals.items.len; + const nundefs = self.undefs.items.len; + + const locals_off = symtab.symoff; + const locals_size = nlocals * @sizeOf(macho.nlist_64); + log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); + + const exports_off = locals_off + locals_size; + const exports_size = nexports * @sizeOf(macho.nlist_64); + log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); + + const undefs_off = exports_off + exports_size; + const undefs_size = nundefs * @sizeOf(macho.nlist_64); + log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); + + symtab.nsyms = @intCast(u32, nlocals + nexports + nundefs); + seg.inner.filesize += locals_size + exports_size + undefs_size; + + // Update dynamic symbol table. + const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + dysymtab.nlocalsym = @intCast(u32, nlocals); + dysymtab.iextdefsym = dysymtab.nlocalsym; + dysymtab.nextdefsym = @intCast(u32, nexports); + dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; + dysymtab.nundefsym = @intCast(u32, nundefs); + + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stubs = &text_segment.sections.items[self.stubs_section_index.?]; + const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_segment.sections.items[self.got_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; + + const nstubs = @intCast(u32, self.stubs_map.keys().len); + const ngot_entries = @intCast(u32, self.got_entries_map.keys().len); + + dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; + + const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); + seg.inner.filesize += needed_size; + + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ + dysymtab.indirectsymoff, + dysymtab.indirectsymoff + needed_size, + }); + + var buf = try self.base.allocator.alloc(u8, needed_size); + defer self.base.allocator.free(buf); + + var stream = std.io.fixedBufferStream(buf); + var writer = stream.writer(); + + stubs.reserved1 = 0; + for (self.stubs_map.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); + } + + got.reserved1 = nstubs; + for (self.got_entries_map.keys()) |key| { + switch (key.where) { + .undef => { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key.where_index); + }, + .local => { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + }, + } + } + + la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; + for (self.stubs_map.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); + } + + try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); self.load_commands_dirty = true; - self.strtab_dirty = false; } -fn writeStringTableZld(self: *MachO) !void { +fn writeStringTable(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); @@ -5719,55 +4765,81 @@ fn writeStringTableZld(self: *MachO) !void { // This is potentially the last section, so we need to pad it out. try self.base.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); } + self.load_commands_dirty = true; } -fn updateLinkeditSegmentSizes(self: *MachO) !void { - if (!self.load_commands_dirty) return; +fn writeLinkeditSegment(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + seg.inner.filesize = 0; + + try self.writeDyldInfoData(); + try self.writeDices(); + try self.writeSymbolTable(); + try self.writeStringTable(); + + seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); +} +fn writeCodeSignaturePadding(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - // Now, we are in position to update __LINKEDIT segment sizes. - // TODO Add checkpointing so that we don't have to do this every single time. const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - var final_offset = linkedit_segment.inner.fileoff; - - if (self.dyld_info_cmd_index) |idx| { - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - final_offset = std.math.max(final_offset, dyld_info.rebase_off + dyld_info.rebase_size); - final_offset = std.math.max(final_offset, dyld_info.bind_off + dyld_info.bind_size); - final_offset = std.math.max(final_offset, dyld_info.weak_bind_off + dyld_info.weak_bind_size); - final_offset = std.math.max(final_offset, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size); - final_offset = std.math.max(final_offset, dyld_info.export_off + dyld_info.export_size); - } - if (self.function_starts_cmd_index) |idx| { - const fstart = self.load_commands.items[idx].LinkeditData; - final_offset = std.math.max(final_offset, fstart.dataoff + fstart.datasize); - } - if (self.data_in_code_cmd_index) |idx| { - const dic = self.load_commands.items[idx].LinkeditData; - final_offset = std.math.max(final_offset, dic.dataoff + dic.datasize); - } - if (self.dysymtab_cmd_index) |idx| { - const dysymtab = self.load_commands.items[idx].Dysymtab; - const nindirectsize = dysymtab.nindirectsyms * @sizeOf(u32); - final_offset = std.math.max(final_offset, dysymtab.indirectsymoff + nindirectsize); - // TODO Handle more dynamic symbol table sections. - } - if (self.symtab_cmd_index) |idx| { - const symtab = self.load_commands.items[idx].Symtab; - const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); - final_offset = std.math.max(final_offset, symtab.symoff + symsize); - final_offset = std.math.max(final_offset, symtab.stroff + symtab.strsize); - } - - const filesize = final_offset - linkedit_segment.inner.fileoff; - linkedit_segment.inner.filesize = filesize; - linkedit_segment.inner.vmsize = mem.alignForwardGeneric(u64, filesize, self.page_size); - try self.base.file.?.pwriteAll(&[_]u8{0}, final_offset); + const code_sig_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; + const fileoff = linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize; + const needed_size = CodeSignature.calcCodeSignaturePaddingSize( + self.base.options.emit.?.sub_path, + fileoff, + self.page_size, + ); + code_sig_cmd.dataoff = @intCast(u32, fileoff); + code_sig_cmd.datasize = needed_size; + + // Advance size of __LINKEDIT segment + linkedit_segment.inner.filesize += needed_size; + if (linkedit_segment.inner.vmsize < linkedit_segment.inner.filesize) { + linkedit_segment.inner.vmsize = mem.alignForwardGeneric(u64, linkedit_segment.inner.filesize, self.page_size); + } + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); + // Pad out the space. We need to do this to calculate valid hashes for everything in the file + // except for code signature data. + try self.base.file.?.pwriteAll(&[_]u8{0}, fileoff + needed_size - 1); self.load_commands_dirty = true; } +fn writeCodeSignature(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; + + var code_sig: CodeSignature = .{}; + defer code_sig.deinit(self.base.allocator); + + try code_sig.calcAdhocSignature( + self.base.allocator, + self.base.file.?, + self.base.options.emit.?.sub_path, + text_segment.inner, + code_sig_cmd, + self.base.options.output_mode, + self.page_size, + ); + + var buffer = try self.base.allocator.alloc(u8, code_sig.size()); + defer self.base.allocator.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + try code_sig.write(stream.writer()); + + log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len }); + + try self.base.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); +} + /// Writes all load commands and section headers. fn writeLoadCommands(self: *MachO) !void { if (!self.load_commands_dirty) return; @@ -5844,6 +4916,13 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { std.math.maxInt(@TypeOf(actual_size)); } +pub fn makeStaticString(bytes: []const u8) [16]u8 { + var buf = [_]u8{0} ** 16; + assert(bytes.len <= buf.len); + mem.copy(u8, &buf, bytes); + return buf; +} + pub fn makeString(self: *MachO, string: []const u8) !u32 { const gop = try self.strtab_dir.getOrPutContextAdapted(self.base.allocator, @as([]const u8, string), StringIndexAdapter{ .bytes = &self.strtab, diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig @@ -0,0 +1,1324 @@ +const Atom = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const aarch64 = @import("../../codegen/aarch64.zig"); +const assert = std.debug.assert; +const commands = @import("commands.zig"); +const log = std.log.scoped(.text_block); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; +const trace = @import("../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const Arch = std.Target.Cpu.Arch; +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); +const StringIndexAdapter = std.hash_map.StringIndexAdapter; + +/// Each decl always gets a local symbol with the fully qualified name. +/// The vaddr and size are found here directly. +/// The file offset is found by computing the vaddr offset from the section vaddr +/// the symbol references, and adding that to the file offset of the section. +/// If this field is 0, it means the codegen size = 0 and there is no symbol or +/// offset table entry. +local_sym_index: u32, + +/// List of symbol aliases pointing to the same atom via different nlists +aliases: std.ArrayListUnmanaged(u32) = .{}, + +/// List of symbols contained within this atom +contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// Code (may be non-relocated) this atom represents +code: std.ArrayListUnmanaged(u8) = .{}, + +/// Size and alignment of this atom +/// Unlike in Elf, we need to store the size of this symbol as part of +/// the atom since macho.nlist_64 lacks this information. +size: u64, + +/// Alignment of this atom as a power of 2. +/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. +alignment: u32, + +/// List of relocations belonging to this atom. +relocs: std.ArrayListUnmanaged(Relocation) = .{}, + +/// List of offsets contained within this atom that need rebasing by the dynamic +/// loader in presence of ASLR. +rebases: std.ArrayListUnmanaged(u64) = .{}, + +/// List of offsets contained within this atom that will be dynamically bound +/// by the dynamic loader and contain pointers to resolved (at load time) extern +/// symbols (aka proxies aka imports) +bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// List of lazy bindings +lazy_bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// List of data-in-code entries. This is currently specific to x86_64 only. +dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, + +/// Stab entry for this atom. This is currently specific to a binary created +/// by linking object files in a traditional sense - in incremental sense, we +/// bypass stabs altogether to produce dSYM bundle directly with fully relocated +/// DWARF sections. +stab: ?Stab = null, + +/// Points to the previous and next neighbours +next: ?*Atom, +prev: ?*Atom, + +/// Previous/next linked list pointers. +/// This is the linked list node for this Decl's corresponding .debug_info tag. +dbg_info_prev: ?*Atom, +dbg_info_next: ?*Atom, +/// Offset into .debug_info pointing to the tag for this Decl. +dbg_info_off: u32, +/// Size of the .debug_info tag for this Decl, not including padding. +dbg_info_len: u32, + +dirty: bool = true, + +pub const SymbolAtOffset = struct { + local_sym_index: u32, + offset: u64, + stab: ?Stab = null, + + pub fn format( + self: SymbolAtOffset, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "{{ {d}: .offset = {d}", .{ self.local_sym_index, self.offset }); + if (self.stab) |stab| { + try std.fmt.format(writer, ", .stab = {any}", .{stab}); + } + try std.fmt.format(writer, " }}", .{}); + } +}; + +pub const Stab = union(enum) { + function: u64, + static, + global, + + pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 { + var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator); + defer nlists.deinit(); + + const sym = macho_file.locals.items[local_sym_index]; + switch (stab) { + .function => |size| { + try nlists.ensureUnusedCapacity(4); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = sym.n_strx, + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = size, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = size, + }); + }, + .global => { + try nlists.append(.{ + .n_strx = sym.n_strx, + .n_type = macho.N_GSYM, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + }, + .static => { + try nlists.append(.{ + .n_strx = sym.n_strx, + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + }, + } + + return nlists.toOwnedSlice(); + } +}; + +pub const Relocation = struct { + /// Offset within the atom's code buffer. + /// Note relocation size can be inferred by relocation's kind. + offset: u32, + + where: enum { + local, + undef, + }, + + where_index: u32, + + payload: union(enum) { + unsigned: Unsigned, + branch: Branch, + page: Page, + page_off: PageOff, + pointer_to_got: PointerToGot, + signed: Signed, + load: Load, + }, + + const ResolveArgs = struct { + block: *Atom, + offset: u32, + source_addr: u64, + target_addr: u64, + macho_file: *MachO, + }; + + pub const Unsigned = struct { + subtractor: ?u32, + + /// Addend embedded directly in the relocation slot + addend: i64, + + /// Extracted from r_length: + /// => 3 implies true + /// => 2 implies false + /// => * is unreachable + is_64bit: bool, + + pub fn resolve(self: Unsigned, args: ResolveArgs) !void { + const result = blk: { + if (self.subtractor) |subtractor| { + const sym = args.macho_file.locals.items[subtractor]; + break :blk @intCast(i64, args.target_addr) - @intCast(i64, sym.n_value) + self.addend; + } else { + break :blk @intCast(i64, args.target_addr) + self.addend; + } + }; + + if (self.is_64bit) { + mem.writeIntLittle(u64, args.block.code.items[args.offset..][0..8], @bitCast(u64, result)); + } else { + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); + } + } + + pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Unsigned {{ ", .{}); + if (self.subtractor) |sub| { + try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + const length: usize = if (self.is_64bit) 8 else 4; + try std.fmt.format(writer, ".length = {}, ", .{length}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Branch = struct { + arch: Arch, + + pub fn resolve(self: Branch, args: ResolveArgs) !void { + switch (self.arch) { + .aarch64 => { + const displacement = math.cast( + i28, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), + ) catch |err| switch (err) { + error.Overflow => { + log.err("jump too big to encode as i28 displacement value", .{}); + log.err(" (target - source) = displacement => 0x{x} - 0x{x} = 0x{x}", .{ + args.target_addr, + args.source_addr, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), + }); + log.err(" TODO implement branch islands to extend jump distance for arm64", .{}); + return error.TODOImplementBranchIslands; + }, + }; + const code = args.block.code.items[args.offset..][0..4]; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code), + }; + inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .x86_64 => { + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, + ); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); + }, + else => return error.UnsupportedCpuArchitecture, + } + } + + pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + try std.fmt.format(writer, "Branch {{}}", .{}); + } + }; + + pub const Page = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: u32 = 0, + + pub fn resolve(self: Page, args: ResolveArgs) !void { + const target_addr = args.target_addr + self.addend; + const source_page = @intCast(i32, args.source_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + + const code = args.block.code.items[args.offset..][0..4]; + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code), + }; + inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + inst.pc_relative_address.immlo = @truncate(u2, pages); + + mem.writeIntLittle(u32, code, inst.toU32()); + } + + pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Page {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp", .{}); + }, + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PageOff = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: u32 = 0, + op_kind: ?OpKind = null, + + pub const OpKind = enum { + arithmetic, + load, + }; + + pub fn resolve(self: PageOff, args: ResolveArgs) !void { + const code = args.block.code.items[args.offset..][0..4]; + + switch (self.kind) { + .page => { + const target_addr = args.target_addr + self.addend; + const narrowed = @truncate(u12, target_addr); + + const op_kind = self.op_kind orelse unreachable; + var inst: aarch64.Instruction = blk: { + switch (op_kind) { + .arithmetic => { + break :blk .{ + .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code), + }; + }, + .load => { + break :blk .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + }, + } + }; + + if (op_kind == .arithmetic) { + inst.add_subtract_immediate.imm12 = narrowed; + } else { + const offset: u12 = blk: { + if (inst.load_store_register.size == 0) { + if (inst.load_store_register.v == 1) { + // 128-bit SIMD is scaled by 16. + break :blk try math.divExact(u12, narrowed, 16); + } + // Otherwise, 8-bit SIMD or ldrb. + break :blk narrowed; + } else { + const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); + break :blk try math.divExact(u12, narrowed, denom); + } + }; + inst.load_store_register.offset = offset; + } + + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .got => { + const narrowed = @truncate(u12, args.target_addr); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + const offset = try math.divExact(u12, narrowed, 8); + inst.load_store_register.offset = offset; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .tlvp => { + const RegInfo = struct { + rd: u5, + rn: u5, + size: u1, + }; + const reg_info: RegInfo = blk: { + if (isArithmeticOp(code)) { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; + } else { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = @truncate(u1, inst.size), + }; + } + }; + const narrowed = @truncate(u12, args.target_addr); + var inst = aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = narrowed, + .sh = 0, + .s = 0, + .op = 0, + .sf = reg_info.size, + }, + }; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + } + } + + pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "PageOff {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp, ", .{}); + }, + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PointerToGot = struct { + pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { + const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, result)); + } + + pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + try std.fmt.format(writer, "PointerToGot {{}}", .{}); + } + }; + + pub const Signed = struct { + addend: i64, + correction: u3, + + pub fn resolve(self: Signed, args: ResolveArgs) !void { + const target_addr = @intCast(i64, args.target_addr) + self.addend; + const displacement = try math.cast( + i32, + target_addr - @intCast(i64, args.source_addr + self.correction + 4), + ); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Signed {{ ", .{}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Load = struct { + kind: enum { + got, + tlvp, + }, + addend: i32 = 0, + + pub fn resolve(self: Load, args: ResolveArgs) !void { + if (self.kind == .tlvp) { + // We need to rewrite the opcode from movq to leaq. + args.block.code.items[args.offset - 2] = 0x8d; + } + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend, + ); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Load {{ ", .{}); + try std.fmt.format(writer, "{s}, ", .{self.kind}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub fn resolve(self: Relocation, args: ResolveArgs) !void { + switch (self.payload) { + .unsigned => |unsigned| try unsigned.resolve(args), + .branch => |branch| try branch.resolve(args), + .page => |page| try page.resolve(args), + .page_off => |page_off| try page_off.resolve(args), + .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args), + .signed => |signed| try signed.resolve(args), + .load => |load| try load.resolve(args), + } + } + + pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Relocation {{ ", .{}); + try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); + try std.fmt.format(writer, ".where = {}, ", .{self.where}); + try std.fmt.format(writer, ".where_index = {d}, ", .{self.where_index}); + + switch (self.payload) { + .unsigned => |unsigned| try unsigned.format(fmt, options, writer), + .branch => |branch| try branch.format(fmt, options, writer), + .page => |page| try page.format(fmt, options, writer), + .page_off => |page_off| try page_off.format(fmt, options, writer), + .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), + .signed => |signed| try signed.format(fmt, options, writer), + .load => |load| try load.format(fmt, options, writer), + } + + try std.fmt.format(writer, "}}", .{}); + } +}; + +pub const empty = Atom{ + .local_sym_index = 0, + .size = 0, + .alignment = 0, + .prev = null, + .next = null, + .dbg_info_prev = null, + .dbg_info_next = null, + .dbg_info_off = undefined, + .dbg_info_len = undefined, +}; + +pub fn deinit(self: *Atom, allocator: *Allocator) void { + self.dices.deinit(allocator); + self.lazy_bindings.deinit(allocator); + self.bindings.deinit(allocator); + self.rebases.deinit(allocator); + self.relocs.deinit(allocator); + self.contained.deinit(allocator); + self.aliases.deinit(allocator); + self.code.deinit(allocator); +} + +pub fn clearRetainingCapacity(self: *Atom) void { + self.dices.clearRetainingCapacity(); + self.lazy_bindings.clearRetainingCapacity(); + self.bindings.clearRetainingCapacity(); + self.rebases.clearRetainingCapacity(); + self.relocs.clearRetainingCapacity(); + self.contained.clearRetainingCapacity(); + self.aliases.clearRetainingCapacity(); + self.code.clearRetainingCapacity(); +} + +/// Returns how much room there is to grow in virtual address space. +/// File offset relocation happens transparently, so it is not included in +/// this calculation. +pub fn capacity(self: Atom, macho_file: MachO) u64 { + const self_sym = macho_file.locals.items[self.local_sym_index]; + if (self.next) |next| { + const next_sym = macho_file.locals.items[next.local_sym_index]; + return next_sym.n_value - self_sym.n_value; + } else { + // We are the last atom. + // The capacity is limited only by virtual address space. + return std.math.maxInt(u64) - self_sym.n_value; + } +} + +pub fn freeListEligible(self: Atom, macho_file: MachO) bool { + // No need to keep a free list node for the last atom. + const next = self.next orelse return false; + const self_sym = macho_file.locals.items[self.local_sym_index]; + const next_sym = macho_file.locals.items[next.local_sym_index]; + const cap = next_sym.n_value - self_sym.n_value; + const ideal_cap = MachO.padToIdeal(self.size); + if (cap <= ideal_cap) return false; + const surplus = cap - ideal_cap; + return surplus >= MachO.min_text_capacity; +} + +const RelocContext = struct { + base_addr: u64 = 0, + base_offset: u64 = 0, + allocator: *Allocator, + object: *Object, + macho_file: *MachO, + parsed_atoms: *Object.ParsedAtoms, +}; + +fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation { + var parsed_rel = Relocation{ + .offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_offset), + .where = undefined, + .where_index = undefined, + .payload = undefined, + }; + + if (rel.r_extern == 0) { + const sect_id = @intCast(u16, rel.r_symbolnum - 1); + + const local_sym_index = context.object.sections_as_symbols.get(sect_id) orelse blk: { + const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const sect = seg.sections.items[sect_id]; + const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable; + const local_sym_index = @intCast(u32, context.macho_file.locals.items.len); + const sym_name = try std.fmt.allocPrint(context.allocator, "l_{s}_{s}_{s}", .{ + context.object.name, + commands.segmentName(sect), + commands.sectionName(sect), + }); + defer context.allocator.free(sym_name); + + try context.macho_file.locals.append(context.allocator, .{ + .n_strx = try context.macho_file.makeString(sym_name), + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = 0, + }); + try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index); + break :blk local_sym_index; + }; + + parsed_rel.where = .local; + parsed_rel.where_index = local_sym_index; + } else { + const sym = context.object.symtab.items[rel.r_symbolnum]; + const sym_name = context.object.getString(sym.n_strx); + + if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { + const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + parsed_rel.where = .local; + parsed_rel.where_index = where_index; + } else { + const n_strx = context.macho_file.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), StringIndexAdapter{ + .bytes = &context.macho_file.strtab, + }) orelse unreachable; + const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable; + switch (resolv.where) { + .global => { + parsed_rel.where = .local; + parsed_rel.where_index = resolv.local_sym_index; + }, + .undef => { + parsed_rel.where = .undef; + parsed_rel.where_index = resolv.where_index; + }, + } + } + } + + return parsed_rel; +} + +pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocContext) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const filtered_relocs = filterRelocs(relocs, context.base_offset, context.base_offset + self.size); + var it = RelocIterator{ + .buffer = filtered_relocs, + }; + + var addend: u32 = 0; + var subtractor: ?u32 = null; + const arch = context.macho_file.base.options.target.cpu.arch; + + while (it.next()) |rel| { + if (isAddend(rel, arch)) { + // Addend is not a relocation with effect on the TextBlock, so + // parse it and carry on. + assert(addend == 0); // Oh no, addend was not reset! + addend = rel.r_symbolnum; + + // Verify ADDEND is followed by a PAGE21 or PAGEOFF12. + const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); + switch (next) { + .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, + else => { + log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); + return error.UnexpectedRelocationType; + }, + } + continue; + } + + if (isSubtractor(rel, arch)) { + // Subtractor is not a relocation with effect on the TextBlock, so + // parse it and carry on. + assert(subtractor == null); // Oh no, subtractor was not reset! + assert(rel.r_extern == 1); + const sym = context.object.symtab.items[rel.r_symbolnum]; + const sym_name = context.object.getString(sym.n_strx); + + if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { + const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + subtractor = where_index; + } else { + const n_strx = context.macho_file.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), StringIndexAdapter{ + .bytes = &context.macho_file.strtab, + }) orelse unreachable; + const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable; + assert(resolv.where == .global); + subtractor = resolv.local_sym_index; + } + + // Verify SUBTRACTOR is followed by UNSIGNED. + switch (arch) { + .aarch64 => { + const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); + if (next != .ARM64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + }, + .x86_64 => { + const next = @intToEnum(macho.reloc_type_x86_64, it.peek().r_type); + if (next != .X86_64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + }, + else => unreachable, + } + continue; + } + + var parsed_rel = try initRelocFromObject(rel, context); + + switch (arch) { + .aarch64 => { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + switch (rel_type) { + .ARM64_RELOC_ADDEND => unreachable, + .ARM64_RELOC_SUBTRACTOR => unreachable, + .ARM64_RELOC_BRANCH26 => { + self.parseBranch(rel, &parsed_rel, context); + }, + .ARM64_RELOC_UNSIGNED => { + self.parseUnsigned(rel, &parsed_rel, subtractor, context); + subtractor = null; + }, + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => { + self.parsePage(rel, &parsed_rel, addend); + if (rel_type == .ARM64_RELOC_PAGE21) + addend = 0; + }, + .ARM64_RELOC_PAGEOFF12, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => { + self.parsePageOff(rel, &parsed_rel, addend); + if (rel_type == .ARM64_RELOC_PAGEOFF12) + addend = 0; + }, + .ARM64_RELOC_POINTER_TO_GOT => { + self.parsePointerToGot(rel, &parsed_rel); + }, + } + }, + .x86_64 => { + switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + .X86_64_RELOC_SUBTRACTOR => unreachable, + .X86_64_RELOC_BRANCH => { + self.parseBranch(rel, &parsed_rel, context); + }, + .X86_64_RELOC_UNSIGNED => { + self.parseUnsigned(rel, &parsed_rel, subtractor, context); + subtractor = null; + }, + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + self.parseSigned(rel, &parsed_rel, context); + }, + .X86_64_RELOC_GOT_LOAD, + .X86_64_RELOC_GOT, + .X86_64_RELOC_TLV, + => { + self.parseLoad(rel, &parsed_rel); + }, + } + }, + else => unreachable, + } + + try self.relocs.append(context.allocator, parsed_rel); + + const is_via_got = switch (parsed_rel.payload) { + .pointer_to_got => true, + .load => |load| load.kind == .got, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + else => false, + }; + + if (is_via_got) blk: { + const key = MachO.GotIndirectionKey{ + .where = switch (parsed_rel.where) { + .local => .local, + .undef => .undef, + }, + .where_index = parsed_rel.where_index, + }; + if (context.macho_file.got_entries_map.contains(key)) break :blk; + + const atom = try context.macho_file.createGotAtom(key); + try context.macho_file.got_entries_map.putNoClobber(context.macho_file.base.allocator, key, atom); + const match = MachO.MatchingSection{ + .seg = context.macho_file.data_const_segment_cmd_index.?, + .sect = context.macho_file.got_section_index.?, + }; + + if (context.parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } else { + try context.parsed_atoms.putNoClobber(match, atom); + } + } else if (parsed_rel.payload == .unsigned) { + switch (parsed_rel.where) { + .undef => { + try self.bindings.append(context.allocator, .{ + .local_sym_index = parsed_rel.where_index, + .offset = parsed_rel.offset, + }); + }, + .local => { + const source_sym = context.macho_file.locals.items[self.local_sym_index]; + const match = context.macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; + const seg = context.macho_file.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const sect_type = commands.sectionType(sect); + + const should_rebase = rebase: { + if (!parsed_rel.payload.unsigned.is_64bit) break :rebase false; + + // TODO actually, a check similar to what dyld is doing, that is, verifying + // that the segment is writable should be enough here. + const is_right_segment = blk: { + if (context.macho_file.data_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + if (context.macho_file.data_const_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + break :blk false; + }; + + if (!is_right_segment) break :rebase false; + if (sect_type != macho.S_LITERAL_POINTERS and + sect_type != macho.S_REGULAR and + sect_type != macho.S_MOD_INIT_FUNC_POINTERS and + sect_type != macho.S_MOD_TERM_FUNC_POINTERS) + { + break :rebase false; + } + + break :rebase true; + }; + + if (should_rebase) { + try self.rebases.append(context.allocator, parsed_rel.offset); + } + }, + } + } else if (parsed_rel.payload == .branch) blk: { + if (parsed_rel.where != .undef) break :blk; + if (context.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk; + + const stub_helper_atom = try context.macho_file.createStubHelperAtom(); + const laptr_atom = try context.macho_file.createLazyPointerAtom( + stub_helper_atom.local_sym_index, + parsed_rel.where_index, + ); + const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index); + try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom); + // TODO clean this up! + if (context.parsed_atoms.getPtr(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + })) |last| { + last.*.next = stub_helper_atom; + stub_helper_atom.prev = last.*; + last.* = stub_helper_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + }, stub_helper_atom); + } + if (context.parsed_atoms.getPtr(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + })) |last| { + last.*.next = stub_atom; + stub_atom.prev = last.*; + last.* = stub_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + }, stub_atom); + } + if (context.parsed_atoms.getPtr(.{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + })) |last| { + last.*.next = laptr_atom; + laptr_atom.prev = last.*; + last.* = laptr_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + }, laptr_atom); + } + } + } +} + +fn isAddend(rel: macho.relocation_info, arch: Arch) bool { + if (arch != .aarch64) return false; + return @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_ADDEND; +} + +fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool { + return switch (arch) { + .aarch64 => @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_SUBTRACTOR, + .x86_64 => @intToEnum(macho.reloc_type_x86_64, rel.r_type) == .X86_64_RELOC_SUBTRACTOR, + else => unreachable, + }; +} + +fn parseUnsigned( + self: Atom, + rel: macho.relocation_info, + out: *Relocation, + subtractor: ?u32, + context: RelocContext, +) void { + assert(rel.r_pcrel == 0); + + const is_64bit: bool = switch (rel.r_length) { + 3 => true, + 2 => false, + else => unreachable, + }; + + var addend: i64 = if (is_64bit) + mem.readIntLittle(i64, self.code.items[out.offset..][0..8]) + else + mem.readIntLittle(i32, self.code.items[out.offset..][0..4]); + + if (rel.r_extern == 0) { + const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + addend -= @intCast(i64, target_sect_base_addr); + } + + out.payload = .{ + .unsigned = .{ + .subtractor = subtractor, + .is_64bit = is_64bit, + .addend = addend, + }, + }; +} + +fn parseBranch(self: Atom, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .branch = .{ + .arch = context.macho_file.base.options.target.cpu.arch, + }, + }; +} + +fn parsePage(self: Atom, rel: macho.relocation_info, out: *Relocation, addend: u32) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .page = .{ + .kind = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_PAGE21 => .page, + .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; +} + +fn parsePageOff(self: Atom, rel: macho.relocation_info, out: *Relocation, addend: u32) void { + assert(rel.r_pcrel == 0); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + const op_kind: ?Relocation.PageOff.OpKind = blk: { + if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; + const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code.items[out.offset..][0..4])) + .arithmetic + else + .load; + break :blk op_kind; + }; + + out.payload = .{ + .page_off = .{ + .kind = switch (rel_type) { + .ARM64_RELOC_PAGEOFF12 => .page, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, + else => unreachable, + }, + .addend = addend, + .op_kind = op_kind, + }, + }; +} + +fn parsePointerToGot(self: Atom, rel: macho.relocation_info, out: *Relocation) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .pointer_to_got = .{}, + }; +} + +fn parseSigned(self: Atom, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const correction: u3 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction; + + if (rel.r_extern == 0) { + const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + addend += @intCast(i64, context.base_addr + out.offset + correction + 4) - @intCast(i64, target_sect_base_addr); + } + + out.payload = .{ + .signed = .{ + .correction = correction, + .addend = addend, + }, + }; +} + +fn parseLoad(self: Atom, rel: macho.relocation_info, out: *Relocation) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const addend: i32 = if (rel_type == .X86_64_RELOC_GOT) + mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + else + 0; + + out.payload = .{ + .load = .{ + .kind = switch (rel_type) { + .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, + .X86_64_RELOC_TLV => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; +} + +pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.relocs.items) |rel| { + log.debug("relocating {}", .{rel}); + + const source_addr = blk: { + const sym = macho_file.locals.items[self.local_sym_index]; + break :blk sym.n_value + rel.offset; + }; + const target_addr = blk: { + const is_via_got = switch (rel.payload) { + .pointer_to_got => true, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + .load => |load| load.kind == .got, + else => false, + }; + + if (is_via_got) { + const atom = macho_file.got_entries_map.get(.{ + .where = switch (rel.where) { + .local => .local, + .undef => .undef, + }, + .where_index = rel.where_index, + }) orelse { + const sym = switch (rel.where) { + .local => macho_file.locals.items[rel.where_index], + .undef => macho_file.undefs.items[rel.where_index], + }; + log.err("expected GOT entry for symbol '{s}'", .{macho_file.getString(sym.n_strx)}); + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + break :blk macho_file.locals.items[atom.local_sym_index].n_value; + } + + switch (rel.where) { + .local => { + const sym = macho_file.locals.items[rel.where_index]; + const is_tlv = is_tlv: { + const source_sym = macho_file.locals.items[self.local_sym_index]; + const match = macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; + const seg = macho_file.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; + }; + if (is_tlv) { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].Segment; + const base_address = inner: { + if (macho_file.tlv_data_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else if (macho_file.tlv_bss_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; + } + }; + break :blk sym.n_value - base_address; + } + + break :blk sym.n_value; + }, + .undef => { + const atom = macho_file.stubs_map.get(rel.where_index) orelse { + // TODO this is required for incremental when we don't have every symbol + // resolved when creating relocations. In this case, we will insert a branch + // reloc to an undef symbol which may happen to be defined within the binary. + // Then, the undef we point at will be a null symbol (free symbol) which we + // should remove/repurpose. To circumvent this (for now), we check if the symbol + // we point to is garbage, and if so we fall back to symbol resolver to find by name. + const n_strx = macho_file.undefs.items[rel.where_index].n_strx; + if (macho_file.symbol_resolver.get(n_strx)) |resolv| inner: { + if (resolv.where != .global) break :inner; + break :blk macho_file.globals.items[resolv.where_index].n_value; + } + + // TODO verify in TextBlock that the symbol is indeed dynamically bound. + break :blk 0; // Dynamically bound by dyld. + }; + + break :blk macho_file.locals.items[atom.local_sym_index].n_value; + }, + } + }; + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + log.debug(" | target_addr = 0x{x}", .{target_addr}); + + try rel.resolve(.{ + .block = self, + .offset = rel.offset, + .source_addr = source_addr, + .target_addr = target_addr, + .macho_file = macho_file, + }); + } +} + +pub fn format(self: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "TextBlock {{ ", .{}); + try std.fmt.format(writer, ".local_sym_index = {d}, ", .{self.local_sym_index}); + try std.fmt.format(writer, ".aliases = {any}, ", .{self.aliases.items}); + try std.fmt.format(writer, ".contained = {any}, ", .{self.contained.items}); + try std.fmt.format(writer, ".code = {*}, ", .{self.code.items}); + try std.fmt.format(writer, ".size = {d}, ", .{self.size}); + try std.fmt.format(writer, ".alignment = {d}, ", .{self.alignment}); + try std.fmt.format(writer, ".relocs = {any}, ", .{self.relocs.items}); + try std.fmt.format(writer, ".rebases = {any}, ", .{self.rebases.items}); + try std.fmt.format(writer, ".bindings = {any}, ", .{self.bindings.items}); + try std.fmt.format(writer, ".dices = {any}, ", .{self.dices.items}); + if (self.stab) |stab| { + try std.fmt.format(writer, ".stab = {any}, ", .{stab}); + } + try std.fmt.format(writer, "}}", .{}); +} + +const RelocIterator = struct { + buffer: []const macho.relocation_info, + index: i32 = -1, + + pub fn next(self: *RelocIterator) ?macho.relocation_info { + self.index += 1; + if (self.index < self.buffer.len) { + return self.buffer[@intCast(u32, self.index)]; + } + return null; + } + + pub fn peek(self: RelocIterator) macho.relocation_info { + assert(self.index + 1 < self.buffer.len); + return self.buffer[@intCast(u32, self.index + 1)]; + } +}; + +fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; + } + }; + + const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); + + return relocs[start..end]; +} + +inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @truncate(u5, inst[3]); + return ((group_decode >> 2) == 4); +} diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig @@ -5,25 +5,25 @@ const assert = std.debug.assert; const fs = std.fs; const log = std.log.scoped(.dsym); const macho = std.macho; +const math = std.math; const mem = std.mem; const DW = std.dwarf; const leb = std.leb; const Allocator = mem.Allocator; const build_options = @import("build_options"); +const commands = @import("commands.zig"); const trace = @import("../../tracy.zig").trace; +const LoadCommand = commands.LoadCommand; const Module = @import("../../Module.zig"); const Type = @import("../../type.zig").Type; const link = @import("../../link.zig"); const MachO = @import("../MachO.zig"); -const SrcFn = MachO.SrcFn; const TextBlock = MachO.TextBlock; -const padToIdeal = MachO.padToIdeal; - -const commands = @import("commands.zig"); -const emptyHeader = commands.emptyHeader; -const LoadCommand = commands.LoadCommand; const SegmentCommand = commands.SegmentCommand; +const SrcFn = MachO.SrcFn; +const makeStaticString = MachO.makeStaticString; +const padToIdeal = MachO.padToIdeal; const page_size: u16 = 0x1000; @@ -188,105 +188,84 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size }); try self.load_commands.append(allocator, .{ - .Segment = SegmentCommand.empty("__DWARF", .{ - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = off, - .filesize = needed_size, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__DWARF"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = off, + .filesize = needed_size, + }, + }, }); self.load_commands_dirty = true; } if (self.debug_str_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_str_section_index = @intCast(u16, dwarf_segment.sections.items.len); assert(self.debug_string_table.items.len == 0); - - try dwarf_segment.addSection(allocator, "__debug_str", .{ - .addr = dwarf_segment.inner.vmaddr, - .size = @intCast(u32, self.debug_string_table.items.len), - .offset = @intCast(u32, dwarf_segment.inner.fileoff), - .@"align" = 1, - }); - self.load_commands_dirty = true; + self.debug_str_section_index = try self.allocateSection( + "__debug_str", + @intCast(u32, self.debug_string_table.items.len), + 0, + ); self.debug_string_table_dirty = true; } if (self.debug_info_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_info_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 200; - const p_align = 1; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_info free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_info", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_info_section_index = try self.allocateSection("__debug_info", 200, 0); self.debug_info_header_dirty = true; } if (self.debug_abbrev_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_abbrev_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 128; - const p_align = 1; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_abbrev free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_abbrev", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_abbrev_section_index = try self.allocateSection("__debug_abbrev", 128, 0); self.debug_abbrev_section_dirty = true; } if (self.debug_aranges_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_aranges_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 160; - const p_align = 16; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_aranges free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_aranges", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_aranges_section_index = try self.allocateSection("__debug_aranges", 160, 4); self.debug_aranges_section_dirty = true; } if (self.debug_line_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_line_section_index = @intCast(u16, dwarf_segment.sections.items.len); + self.debug_line_section_index = try self.allocateSection("__debug_line", 250, 0); + self.debug_line_header_dirty = true; + } +} - const file_size_hint = 250; - const p_align = 1; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); +fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u16 { + const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + var sect = macho.section_64{ + .sectname = makeStaticString(sectname), + .segname = seg.inner.segname, + .size = @intCast(u32, size), + .@"align" = alignment, + }; + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const off = seg.findFreeSpace(size, alignment_pow_2, null); - log.debug("found __debug_line free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + assert(off + size <= seg.inner.fileoff + seg.inner.filesize); // TODO expand - try dwarf_segment.addSection(allocator, "__debug_line", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; - self.debug_line_header_dirty = true; - } + log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ + commands.segmentName(sect), + commands.sectionName(sect), + off, + off + size, + }); + + sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.offset = @intCast(u32, off); + + const index = @intCast(u16, seg.sections.items.len); + try seg.sections.append(self.base.base.allocator, sect); + seg.inner.cmdsize += @sizeOf(macho.section_64); + seg.inner.nsects += 1; + + // TODO + // const match = MatchingSection{ + // .seg = segment_id, + // .sect = index, + // }; + // _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + // try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); + + self.load_commands_dirty = true; + + return index; } pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Options) !void { @@ -614,15 +593,18 @@ pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { } fn copySegmentCommand(self: *DebugSymbols, allocator: *Allocator, base_cmd: SegmentCommand) !SegmentCommand { - var cmd = SegmentCommand.empty("", .{ - .cmdsize = base_cmd.inner.cmdsize, - .vmaddr = base_cmd.inner.vmaddr, - .vmsize = base_cmd.inner.vmsize, - .maxprot = base_cmd.inner.maxprot, - .initprot = base_cmd.inner.initprot, - .nsects = base_cmd.inner.nsects, - .flags = base_cmd.inner.flags, - }); + var cmd = SegmentCommand{ + .inner = .{ + .segname = undefined, + .cmdsize = base_cmd.inner.cmdsize, + .vmaddr = base_cmd.inner.vmaddr, + .vmsize = base_cmd.inner.vmsize, + .maxprot = base_cmd.inner.maxprot, + .initprot = base_cmd.inner.initprot, + .nsects = base_cmd.inner.nsects, + .flags = base_cmd.inner.flags, + }, + }; mem.copy(u8, &cmd.inner.segname, &base_cmd.inner.segname); try cmd.sections.ensureCapacity(allocator, cmd.inner.nsects); @@ -692,7 +674,7 @@ fn writeLoadCommands(self: *DebugSymbols, allocator: *Allocator) !void { } fn writeHeader(self: *DebugSymbols) !void { - var header = emptyHeader(.{ + var header = commands.emptyHeader(.{ .filetype = macho.MH_DSYM, }); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig @@ -1,6 +1,7 @@ const Object = @This(); const std = @import("std"); +const build_options = @import("build_options"); const assert = std.debug.assert; const dwarf = std.dwarf; const fs = std.fs; @@ -13,11 +14,12 @@ const sort = std.sort; const commands = @import("commands.zig"); const segmentName = commands.segmentName; const sectionName = commands.sectionName; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); const LoadCommand = commands.LoadCommand; const MachO = @import("../MachO.zig"); -const TextBlock = @import("TextBlock.zig"); file: fs.File, name: []const u8, @@ -54,7 +56,7 @@ tu_name: ?[]const u8 = null, tu_comp_dir: ?[]const u8 = null, mtime: ?u64 = null, -text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, +atoms: std.ArrayListUnmanaged(*Atom) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, // TODO symbol mapping and its inverse can probably be simple arrays @@ -62,6 +64,8 @@ sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, +analyzed: bool = false, + const DebugInfo = struct { inner: dwarf.DwarfInfo, debug_info: []u8, @@ -134,7 +138,7 @@ pub fn deinit(self: *Object, allocator: *Allocator) void { self.data_in_code_entries.deinit(allocator); self.symtab.deinit(allocator); self.strtab.deinit(allocator); - self.text_blocks.deinit(allocator); + self.atoms.deinit(allocator); self.sections_as_symbols.deinit(allocator); self.symbol_mapping.deinit(allocator); self.reverse_symbol_mapping.deinit(allocator); @@ -316,16 +320,17 @@ const Context = struct { object: *Object, macho_file: *MachO, match: MachO.MatchingSection, + parsed_atoms: *ParsedAtoms, }; -const TextBlockParser = struct { +const AtomParser = struct { section: macho.section_64, code: []u8, relocs: []macho.relocation_info, nlists: []NlistWithIndex, index: u32 = 0, - fn peek(self: TextBlockParser) ?NlistWithIndex { + fn peek(self: AtomParser) ?NlistWithIndex { return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; } @@ -339,9 +344,12 @@ const TextBlockParser = struct { } } - pub fn next(self: *TextBlockParser, context: Context) !?*TextBlock { + pub fn next(self: *AtomParser, context: Context) !?*Atom { if (self.index == self.nlists.len) return null; + const tracy = trace(@src()); + defer tracy.end(); + var aliases = std.ArrayList(NlistWithIndex).init(context.allocator); defer aliases.deinit(); @@ -364,12 +372,12 @@ const TextBlockParser = struct { } if (aliases.items.len > 1) { - // Bubble-up senior symbol as the main link to the text block. + // Bubble-up senior symbol as the main link to the atom. sort.sort( NlistWithIndex, aliases.items, context, - TextBlockParser.lessThanBySeniority, + AtomParser.lessThanBySeniority, ); } @@ -389,12 +397,12 @@ const TextBlockParser = struct { else max_align; - const stab: ?TextBlock.Stab = if (context.object.debug_info) |di| blk: { + const stab: ?Atom.Stab = if (context.object.debug_info) |di| blk: { // TODO there has to be a better to handle this. for (di.inner.func_list.items) |func| { if (func.pc_range) |range| { if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) { - break :blk TextBlock.Stab{ + break :blk Atom.Stab{ .function = range.end - range.start, }; } @@ -405,28 +413,31 @@ const TextBlockParser = struct { break :blk .static; } else null; - const block = try context.allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = senior_nlist.index; - block.stab = stab; - block.size = size; - block.alignment = actual_align; - try context.macho_file.managed_blocks.append(context.allocator, block); + const atom = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align); + atom.stab = stab; - try block.code.appendSlice(context.allocator, code); + const is_zerofill = blk: { + const section_type = commands.sectionType(self.section); + break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; + }; + if (!is_zerofill) { + mem.copy(u8, atom.code.items, code); + } - try block.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); + try atom.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); for (aliases.items) |alias| { - block.aliases.appendAssumeCapacity(alias.index); + atom.aliases.appendAssumeCapacity(alias.index); const sym = &context.macho_file.locals.items[alias.index]; sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); } - try block.parseRelocs(self.relocs, .{ - .base_addr = start_addr, + try atom.parseRelocs(self.relocs, .{ + .base_addr = self.section.addr, + .base_offset = start_addr, .allocator = context.allocator, .object = context.object, .macho_file = context.macho_file, + .parsed_atoms = context.parsed_atoms, }); if (context.macho_file.has_dices) { @@ -435,10 +446,10 @@ const TextBlockParser = struct { senior_nlist.nlist.n_value, senior_nlist.nlist.n_value + size, ); - try block.dices.ensureTotalCapacity(context.allocator, dices.len); + try atom.dices.ensureTotalCapacity(context.allocator, dices.len); for (dices) |dice| { - block.dices.appendAssumeCapacity(.{ + atom.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value), .length = dice.length, .kind = dice.kind, @@ -448,16 +459,22 @@ const TextBlockParser = struct { self.index += 1; - return block; + return atom; } }; -pub fn parseTextBlocks( +pub const ParsedAtoms = std.AutoHashMap(MachO.MatchingSection, *Atom); + +pub fn parseIntoAtoms( self: *Object, allocator: *Allocator, object_id: u16, macho_file: *MachO, -) !void { +) !ParsedAtoms { + const tracy = trace(@src()); + defer tracy.end(); + + var parsed_atoms = ParsedAtoms.init(allocator); const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.debug("analysing {s}", .{self.name}); @@ -498,7 +515,7 @@ pub fn parseTextBlocks( for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); - log.debug("putting section '{s},{s}' as a TextBlock", .{ + log.debug("putting section '{s},{s}' as an Atom", .{ segmentName(sect), sectionName(sect), }); @@ -523,14 +540,17 @@ pub fn parseTextBlocks( // Symbols within this section only. const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); + // TODO rewrite and re-enable dead-code stripping optimisation. I think it might make sense + // to do this in a standalone pass after we parse the sections as atoms. // In release mode, if the object file was generated with dead code stripping optimisations, // note it now and parse sections as atoms. - const is_splittable = blk: { - if (macho_file.base.options.optimize_mode == .Debug) break :blk false; - break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - }; + // const is_splittable = blk: { + // if (macho_file.base.options.optimize_mode == .Debug) break :blk false; + // break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + // }; + const is_splittable = false; - macho_file.has_dices = blk: { + macho_file.has_dices = macho_file.has_dices or blk: { if (self.text_section_index) |index| { if (index != id) break :blk false; if (self.data_in_code_entries.items.len == 0) break :blk false; @@ -541,12 +561,12 @@ pub fn parseTextBlocks( macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; next: { - if (is_splittable) blocks: { - if (filtered_nlists.len == 0) break :blocks; + if (is_splittable) atoms: { + if (filtered_nlists.len == 0) break :atoms; // If the first nlist does not match the start of the section, // then we need to encapsulate the memory range [section start, first symbol) - // as a temporary symbol and insert the matching TextBlock. + // as a temporary symbol and insert the matching Atom. const first_nlist = filtered_nlists[0].nlist; if (first_nlist.n_value > sect.addr) { const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{ @@ -556,44 +576,45 @@ pub fn parseTextBlocks( }); defer allocator.free(sym_name); - const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len); try macho_file.locals.append(allocator, .{ .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, - .n_value = sect.addr, + .n_value = 0, }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); - break :blk block_local_sym_index; + try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index); + break :blk atom_local_sym_index; }; + const atom_code = code[0 .. first_nlist.n_value - sect.addr]; + const atom_size = atom_code.len; + const atom = try macho_file.createEmptyAtom(atom_local_sym_index, atom_size, sect.@"align"); - const block_code = code[0 .. first_nlist.n_value - sect.addr]; - const block_size = block_code.len; - - const block = try allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = block_local_sym_index; - block.size = block_size; - block.alignment = sect.@"align"; - try macho_file.managed_blocks.append(allocator, block); - - try block.code.appendSlice(allocator, block_code); + const is_zerofill = blk: { + const section_type = commands.sectionType(sect); + break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; + }; + if (!is_zerofill) { + mem.copy(u8, atom.code.items, atom_code); + } - try block.parseRelocs(relocs, .{ - .base_addr = 0, + try atom.parseRelocs(relocs, .{ + .base_addr = sect.addr, + .base_offset = 0, .allocator = allocator, .object = self, .macho_file = macho_file, + .parsed_atoms = &parsed_atoms, }); if (macho_file.has_dices) { - const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size); - try block.dices.ensureTotalCapacity(allocator, dices.len); + const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + atom_size); + try atom.dices.ensureTotalCapacity(allocator, dices.len); for (dices) |dice| { - block.dices.appendAssumeCapacity(.{ + atom.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, sect.addr), .length = dice.length, .kind = dice.kind, @@ -601,29 +622,17 @@ pub fn parseTextBlocks( } } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &macho_file.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (macho_file.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; } else { - try macho_file.blocks.putNoClobber(allocator, match, block); + try parsed_atoms.putNoClobber(match, atom); } - - try self.text_blocks.append(allocator, block); + try self.atoms.append(allocator, atom); } - var parser = TextBlockParser{ + var parser = AtomParser{ .section = sect, .code = code, .relocs = relocs, @@ -635,10 +644,11 @@ pub fn parseTextBlocks( .object = self, .macho_file = macho_file, .match = match, - })) |block| { - const sym = macho_file.locals.items[block.local_sym_index]; + .parsed_atoms = &parsed_atoms, + })) |atom| { + const sym = macho_file.locals.items[atom.local_sym_index]; const is_ext = blk: { - const orig_sym_id = self.reverse_symbol_mapping.get(block.local_sym_index) orelse unreachable; + const orig_sym_id = self.reverse_symbol_mapping.get(atom.local_sym_index) orelse unreachable; break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]); }; if (is_ext) { @@ -662,38 +672,26 @@ pub fn parseTextBlocks( // In x86_64 relocs, it can so happen that the compiler refers to the same // atom by both the actual assigned symbol and the start of the section. In this // case, we need to link the two together so add an alias. - try block.aliases.append(allocator, alias); + try atom.aliases.append(allocator, alias); } } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &macho_file.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (macho_file.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; } else { - try macho_file.blocks.putNoClobber(allocator, match, block); + try parsed_atoms.putNoClobber(match, atom); } - - try self.text_blocks.append(allocator, block); + try self.atoms.append(allocator, atom); } break :next; } - // Since there is no symbol to refer to this block, we create + // Since there is no symbol to refer to this atom, we create // a temp one, unless we already did that when working out the relocations - // of other text blocks. + // of other atoms. const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{ self.name, segmentName(sect), @@ -701,41 +699,43 @@ pub fn parseTextBlocks( }); defer allocator.free(sym_name); - const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len); try macho_file.locals.append(allocator, .{ .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, - .n_value = sect.addr, + .n_value = 0, }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); - break :blk block_local_sym_index; + try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index); + break :blk atom_local_sym_index; }; + const atom = try macho_file.createEmptyAtom(atom_local_sym_index, sect.size, sect.@"align"); - const block = try allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = block_local_sym_index; - block.size = sect.size; - block.alignment = sect.@"align"; - try macho_file.managed_blocks.append(allocator, block); - - try block.code.appendSlice(allocator, code); + const is_zerofill = blk: { + const section_type = commands.sectionType(sect); + break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; + }; + if (!is_zerofill) { + mem.copy(u8, atom.code.items, code); + } - try block.parseRelocs(relocs, .{ - .base_addr = 0, + try atom.parseRelocs(relocs, .{ + .base_addr = sect.addr, + .base_offset = 0, .allocator = allocator, .object = self, .macho_file = macho_file, + .parsed_atoms = &parsed_atoms, }); if (macho_file.has_dices) { const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); - try block.dices.ensureTotalCapacity(allocator, dices.len); + try atom.dices.ensureTotalCapacity(allocator, dices.len); for (dices) |dice| { - block.dices.appendAssumeCapacity(.{ + atom.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, sect.addr), .length = dice.length, .kind = dice.kind, @@ -743,12 +743,12 @@ pub fn parseTextBlocks( } } - // Since this is block gets a helper local temporary symbol that didn't exist + // Since this is atom gets a helper local temporary symbol that didn't exist // in the object file which encompasses the entire section, we need traverse // the filtered symbols and note which symbol is contained within so that // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. - try block.contained.ensureTotalCapacity(allocator, filtered_nlists.len); + try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len); for (filtered_nlists) |nlist_with_index| { const nlist = nlist_with_index.nlist; @@ -756,12 +756,12 @@ pub fn parseTextBlocks( const local = &macho_file.locals.items[local_sym_index]; local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1); - const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { + const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { // TODO there has to be a better to handle this. for (di.inner.func_list.items) |func| { if (func.pc_range) |range| { if (nlist.n_value >= range.start and nlist.n_value < range.end) { - break :blk TextBlock.Stab{ + break :blk Atom.Stab{ .function = range.end - range.start, }; } @@ -772,35 +772,25 @@ pub fn parseTextBlocks( break :blk .static; } else null; - block.contained.appendAssumeCapacity(.{ + atom.contained.appendAssumeCapacity(.{ .local_sym_index = local_sym_index, .offset = nlist.n_value - sect.addr, .stab = stab, }); } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &macho_file.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (macho_file.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; } else { - try macho_file.blocks.putNoClobber(allocator, match, block); + try parsed_atoms.putNoClobber(match, atom); } - - try self.text_blocks.append(allocator, block); + try self.atoms.append(allocator, atom); } } + + return parsed_atoms; } fn parseSymtab(self: *Object, allocator: *Allocator) !void { diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig @@ -1,1221 +0,0 @@ -const TextBlock = @This(); - -const std = @import("std"); -const aarch64 = @import("../../codegen/aarch64.zig"); -const assert = std.debug.assert; -const commands = @import("commands.zig"); -const log = std.log.scoped(.text_block); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; - -const Allocator = mem.Allocator; -const Arch = std.Target.Cpu.Arch; -const MachO = @import("../MachO.zig"); -const Object = @import("Object.zig"); -const StringIndexAdapter = std.hash_map.StringIndexAdapter; - -/// Each decl always gets a local symbol with the fully qualified name. -/// The vaddr and size are found here directly. -/// The file offset is found by computing the vaddr offset from the section vaddr -/// the symbol references, and adding that to the file offset of the section. -/// If this field is 0, it means the codegen size = 0 and there is no symbol or -/// offset table entry. -local_sym_index: u32, - -/// List of symbol aliases pointing to the same block via different nlists -aliases: std.ArrayListUnmanaged(u32) = .{}, - -/// List of symbols contained within this block -contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, - -/// Code (may be non-relocated) this block represents -code: std.ArrayListUnmanaged(u8) = .{}, - -/// Size and alignment of this text block -/// Unlike in Elf, we need to store the size of this symbol as part of -/// the TextBlock since macho.nlist_64 lacks this information. -size: u64, -alignment: u32, - -relocs: std.ArrayListUnmanaged(Relocation) = .{}, - -/// List of offsets contained within this block that need rebasing by the dynamic -/// loader in presence of ASLR -rebases: std.ArrayListUnmanaged(u64) = .{}, - -/// List of offsets contained within this block that will be dynamically bound -/// by the dynamic loader and contain pointers to resolved (at load time) extern -/// symbols (aka proxies aka imports) -bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, - -/// List of data-in-code entries. This is currently specific to x86_64 only. -dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, - -/// Stab entry for this block. This is currently specific to a binary created -/// by linking object files in a traditional sense - in incremental sense, we -/// bypass stabs altogether to produce dSYM bundle directly with fully relocated -/// DWARF sections. -stab: ?Stab = null, - -/// Points to the previous and next neighbours -next: ?*TextBlock, -prev: ?*TextBlock, - -/// Previous/next linked list pointers. -/// This is the linked list node for this Decl's corresponding .debug_info tag. -dbg_info_prev: ?*TextBlock, -dbg_info_next: ?*TextBlock, -/// Offset into .debug_info pointing to the tag for this Decl. -dbg_info_off: u32, -/// Size of the .debug_info tag for this Decl, not including padding. -dbg_info_len: u32, - -pub const SymbolAtOffset = struct { - local_sym_index: u32, - offset: u64, - stab: ?Stab = null, - - pub fn format( - self: SymbolAtOffset, - comptime fmt: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "{{ {d}: .offset = {d}", .{ self.local_sym_index, self.offset }); - if (self.stab) |stab| { - try std.fmt.format(writer, ", .stab = {any}", .{stab}); - } - try std.fmt.format(writer, " }}", .{}); - } -}; - -pub const Stab = union(enum) { - function: u64, - static, - global, - - pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 { - var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator); - defer nlists.deinit(); - - const sym = macho_file.locals.items[local_sym_index]; - switch (stab) { - .function => |size| { - try nlists.ensureUnusedCapacity(4); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = sym.n_strx, - .n_type = macho.N_FUN, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = size, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = size, - }); - }, - .global => { - try nlists.append(.{ - .n_strx = sym.n_strx, - .n_type = macho.N_GSYM, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - }, - .static => { - try nlists.append(.{ - .n_strx = sym.n_strx, - .n_type = macho.N_STSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - }, - } - - return nlists.toOwnedSlice(); - } -}; - -pub const Relocation = struct { - /// Offset within the `block`s code buffer. - /// Note relocation size can be inferred by relocation's kind. - offset: u32, - - where: enum { - local, - undef, - }, - - where_index: u32, - - payload: union(enum) { - unsigned: Unsigned, - branch: Branch, - page: Page, - page_off: PageOff, - pointer_to_got: PointerToGot, - signed: Signed, - load: Load, - }, - - const ResolveArgs = struct { - block: *TextBlock, - offset: u32, - source_addr: u64, - target_addr: u64, - macho_file: *MachO, - }; - - pub const Unsigned = struct { - subtractor: ?u32, - - /// Addend embedded directly in the relocation slot - addend: i64, - - /// Extracted from r_length: - /// => 3 implies true - /// => 2 implies false - /// => * is unreachable - is_64bit: bool, - - pub fn resolve(self: Unsigned, args: ResolveArgs) !void { - const result = blk: { - if (self.subtractor) |subtractor| { - const sym = args.macho_file.locals.items[subtractor]; - break :blk @intCast(i64, args.target_addr) - @intCast(i64, sym.n_value) + self.addend; - } else { - break :blk @intCast(i64, args.target_addr) + self.addend; - } - }; - - if (self.is_64bit) { - mem.writeIntLittle(u64, args.block.code.items[args.offset..][0..8], @bitCast(u64, result)); - } else { - mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); - } - } - - pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Unsigned {{ ", .{}); - if (self.subtractor) |sub| { - try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); - } - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - const length: usize = if (self.is_64bit) 8 else 4; - try std.fmt.format(writer, ".length = {}, ", .{length}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const Branch = struct { - arch: Arch, - - pub fn resolve(self: Branch, args: ResolveArgs) !void { - switch (self.arch) { - .aarch64 => { - const displacement = try math.cast( - i28, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), - ); - const code = args.block.code.items[args.offset..][0..4]; - var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), code), - }; - inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .x86_64 => { - const displacement = try math.cast( - i32, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, - ); - mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); - }, - else => return error.UnsupportedCpuArchitecture, - } - } - - pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - try std.fmt.format(writer, "Branch {{}}", .{}); - } - }; - - pub const Page = struct { - kind: enum { - page, - got, - tlvp, - }, - addend: u32 = 0, - - pub fn resolve(self: Page, args: ResolveArgs) !void { - const target_addr = args.target_addr + self.addend; - const source_page = @intCast(i32, args.source_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - const code = args.block.code.items[args.offset..][0..4]; - var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), code), - }; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); - - mem.writeIntLittle(u32, code, inst.toU32()); - } - - pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Page {{ ", .{}); - switch (self.kind) { - .page => {}, - .got => { - try std.fmt.format(writer, ".got, ", .{}); - }, - .tlvp => { - try std.fmt.format(writer, ".tlvp", .{}); - }, - } - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const PageOff = struct { - kind: enum { - page, - got, - tlvp, - }, - addend: u32 = 0, - op_kind: ?OpKind = null, - - pub const OpKind = enum { - arithmetic, - load, - }; - - pub fn resolve(self: PageOff, args: ResolveArgs) !void { - const code = args.block.code.items[args.offset..][0..4]; - - switch (self.kind) { - .page => { - const target_addr = args.target_addr + self.addend; - const narrowed = @truncate(u12, target_addr); - - const op_kind = self.op_kind orelse unreachable; - var inst: aarch64.Instruction = blk: { - switch (op_kind) { - .arithmetic => { - break :blk .{ - .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code), - }; - }, - .load => { - break :blk .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - }, - } - }; - - if (op_kind == .arithmetic) { - inst.add_subtract_immediate.imm12 = narrowed; - } else { - const offset: u12 = blk: { - if (inst.load_store_register.size == 0) { - if (inst.load_store_register.v == 1) { - // 128-bit SIMD is scaled by 16. - break :blk try math.divExact(u12, narrowed, 16); - } - // Otherwise, 8-bit SIMD or ldrb. - break :blk narrowed; - } else { - const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); - break :blk try math.divExact(u12, narrowed, denom); - } - }; - inst.load_store_register.offset = offset; - } - - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .got => { - const narrowed = @truncate(u12, args.target_addr); - var inst: aarch64.Instruction = .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - const offset = try math.divExact(u12, narrowed, 8); - inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .tlvp => { - const RegInfo = struct { - rd: u5, - rn: u5, - size: u1, - }; - const reg_info: RegInfo = blk: { - if (isArithmeticOp(code)) { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code); - break :blk .{ - .rd = inst.rd, - .rn = inst.rn, - .size = inst.sf, - }; - } else { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code); - break :blk .{ - .rd = inst.rt, - .rn = inst.rn, - .size = @truncate(u1, inst.size), - }; - } - }; - const narrowed = @truncate(u12, args.target_addr); - var inst = aarch64.Instruction{ - .add_subtract_immediate = .{ - .rd = reg_info.rd, - .rn = reg_info.rn, - .imm12 = narrowed, - .sh = 0, - .s = 0, - .op = 0, - .sf = reg_info.size, - }, - }; - mem.writeIntLittle(u32, code, inst.toU32()); - }, - } - } - - pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "PageOff {{ ", .{}); - switch (self.kind) { - .page => {}, - .got => { - try std.fmt.format(writer, ".got, ", .{}); - }, - .tlvp => { - try std.fmt.format(writer, ".tlvp, ", .{}); - }, - } - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const PointerToGot = struct { - pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { - const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, result)); - } - - pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - try std.fmt.format(writer, "PointerToGot {{}}", .{}); - } - }; - - pub const Signed = struct { - addend: i64, - correction: i4, - - pub fn resolve(self: Signed, args: ResolveArgs) !void { - const target_addr = @intCast(i64, args.target_addr) + self.addend; - const displacement = try math.cast( - i32, - target_addr - @intCast(i64, args.source_addr) - self.correction - 4, - ); - mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); - } - - pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Signed {{ ", .{}); - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const Load = struct { - kind: enum { - got, - tlvp, - }, - addend: i32 = 0, - - pub fn resolve(self: Load, args: ResolveArgs) !void { - if (self.kind == .tlvp) { - // We need to rewrite the opcode from movq to leaq. - args.block.code.items[args.offset - 2] = 0x8d; - } - const displacement = try math.cast( - i32, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend, - ); - mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); - } - - pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Load {{ ", .{}); - try std.fmt.format(writer, "{s}, ", .{self.kind}); - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub fn resolve(self: Relocation, args: ResolveArgs) !void { - switch (self.payload) { - .unsigned => |unsigned| try unsigned.resolve(args), - .branch => |branch| try branch.resolve(args), - .page => |page| try page.resolve(args), - .page_off => |page_off| try page_off.resolve(args), - .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args), - .signed => |signed| try signed.resolve(args), - .load => |load| try load.resolve(args), - } - } - - pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - try std.fmt.format(writer, "Relocation {{ ", .{}); - try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); - try std.fmt.format(writer, ".where = {}, ", .{self.where}); - try std.fmt.format(writer, ".where_index = {d}, ", .{self.where_index}); - - switch (self.payload) { - .unsigned => |unsigned| try unsigned.format(fmt, options, writer), - .branch => |branch| try branch.format(fmt, options, writer), - .page => |page| try page.format(fmt, options, writer), - .page_off => |page_off| try page_off.format(fmt, options, writer), - .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), - .signed => |signed| try signed.format(fmt, options, writer), - .load => |load| try load.format(fmt, options, writer), - } - - try std.fmt.format(writer, "}}", .{}); - } -}; - -pub const empty = TextBlock{ - .local_sym_index = 0, - .size = 0, - .alignment = 0, - .prev = null, - .next = null, - .dbg_info_prev = null, - .dbg_info_next = null, - .dbg_info_off = undefined, - .dbg_info_len = undefined, -}; - -pub fn deinit(self: *TextBlock, allocator: *Allocator) void { - self.dices.deinit(allocator); - self.bindings.deinit(allocator); - self.rebases.deinit(allocator); - self.relocs.deinit(allocator); - self.contained.deinit(allocator); - self.aliases.deinit(allocator); - self.code.deinit(allocator); -} - -/// Returns how much room there is to grow in virtual address space. -/// File offset relocation happens transparently, so it is not included in -/// this calculation. -pub fn capacity(self: TextBlock, macho_file: MachO) u64 { - const self_sym = macho_file.locals.items[self.local_sym_index]; - if (self.next) |next| { - const next_sym = macho_file.locals.items[next.local_sym_index]; - return next_sym.n_value - self_sym.n_value; - } else { - // We are the last block. - // The capacity is limited only by virtual address space. - return std.math.maxInt(u64) - self_sym.n_value; - } -} - -pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool { - // No need to keep a free list node for the last block. - const next = self.next orelse return false; - const self_sym = macho_file.locals.items[self.local_sym_index]; - const next_sym = macho_file.locals.items[next.local_sym_index]; - const cap = next_sym.n_value - self_sym.n_value; - const ideal_cap = MachO.padToIdeal(self.size); - if (cap <= ideal_cap) return false; - const surplus = cap - ideal_cap; - return surplus >= MachO.min_text_capacity; -} - -const RelocContext = struct { - base_addr: u64 = 0, - allocator: *Allocator, - object: *Object, - macho_file: *MachO, -}; - -fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation { - var parsed_rel = Relocation{ - .offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_addr), - .where = undefined, - .where_index = undefined, - .payload = undefined, - }; - - if (rel.r_extern == 0) { - const sect_id = @intCast(u16, rel.r_symbolnum - 1); - - const local_sym_index = context.object.sections_as_symbols.get(sect_id) orelse blk: { - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; - const sect = seg.sections.items[sect_id]; - const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable; - const local_sym_index = @intCast(u32, context.macho_file.locals.items.len); - const sym_name = try std.fmt.allocPrint(context.allocator, "l_{s}_{s}_{s}", .{ - context.object.name, - commands.segmentName(sect), - commands.sectionName(sect), - }); - defer context.allocator.free(sym_name); - - try context.macho_file.locals.append(context.allocator, .{ - .n_strx = try context.macho_file.makeString(sym_name), - .n_type = macho.N_SECT, - .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1), - .n_desc = 0, - .n_value = sect.addr, - }); - try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index); - break :blk local_sym_index; - }; - - parsed_rel.where = .local; - parsed_rel.where_index = local_sym_index; - } else { - const sym = context.object.symtab.items[rel.r_symbolnum]; - const sym_name = context.object.getString(sym.n_strx); - - if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { - const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; - parsed_rel.where = .local; - parsed_rel.where_index = where_index; - } else { - const n_strx = context.macho_file.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), StringIndexAdapter{ - .bytes = &context.macho_file.strtab, - }) orelse unreachable; - const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable; - switch (resolv.where) { - .global => { - parsed_rel.where = .local; - parsed_rel.where_index = resolv.local_sym_index; - }, - .undef => { - parsed_rel.where = .undef; - parsed_rel.where_index = resolv.where_index; - }, - } - } - } - - return parsed_rel; -} - -pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: RelocContext) !void { - const filtered_relocs = filterRelocs(relocs, context.base_addr, context.base_addr + self.size); - var it = RelocIterator{ - .buffer = filtered_relocs, - }; - - var addend: u32 = 0; - var subtractor: ?u32 = null; - const arch = context.macho_file.base.options.target.cpu.arch; - - while (it.next()) |rel| { - if (isAddend(rel, arch)) { - // Addend is not a relocation with effect on the TextBlock, so - // parse it and carry on. - assert(addend == 0); // Oh no, addend was not reset! - addend = rel.r_symbolnum; - - // Verify ADDEND is followed by a PAGE21 or PAGEOFF12. - const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); - switch (next) { - .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, - else => { - log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); - return error.UnexpectedRelocationType; - }, - } - continue; - } - - if (isSubtractor(rel, arch)) { - // Subtractor is not a relocation with effect on the TextBlock, so - // parse it and carry on. - assert(subtractor == null); // Oh no, subtractor was not reset! - assert(rel.r_extern == 1); - const sym = context.object.symtab.items[rel.r_symbolnum]; - const sym_name = context.object.getString(sym.n_strx); - - if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { - const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; - subtractor = where_index; - } else { - const n_strx = context.macho_file.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), StringIndexAdapter{ - .bytes = &context.macho_file.strtab, - }) orelse unreachable; - const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable; - assert(resolv.where == .global); - subtractor = resolv.local_sym_index; - } - - // Verify SUBTRACTOR is followed by UNSIGNED. - switch (arch) { - .aarch64 => { - const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); - if (next != .ARM64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - }, - .x86_64 => { - const next = @intToEnum(macho.reloc_type_x86_64, it.peek().r_type); - if (next != .X86_64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - }, - else => unreachable, - } - continue; - } - - var parsed_rel = try initRelocFromObject(rel, context); - - switch (arch) { - .aarch64 => { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - switch (rel_type) { - .ARM64_RELOC_ADDEND => unreachable, - .ARM64_RELOC_SUBTRACTOR => unreachable, - .ARM64_RELOC_BRANCH26 => { - self.parseBranch(rel, &parsed_rel, context); - }, - .ARM64_RELOC_UNSIGNED => { - self.parseUnsigned(rel, &parsed_rel, subtractor, context); - subtractor = null; - }, - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => { - self.parsePage(rel, &parsed_rel, addend); - if (rel_type == .ARM64_RELOC_PAGE21) - addend = 0; - }, - .ARM64_RELOC_PAGEOFF12, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, - => { - self.parsePageOff(rel, &parsed_rel, addend); - if (rel_type == .ARM64_RELOC_PAGEOFF12) - addend = 0; - }, - .ARM64_RELOC_POINTER_TO_GOT => { - self.parsePointerToGot(rel, &parsed_rel); - }, - } - }, - .x86_64 => { - switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { - .X86_64_RELOC_SUBTRACTOR => unreachable, - .X86_64_RELOC_BRANCH => { - self.parseBranch(rel, &parsed_rel, context); - }, - .X86_64_RELOC_UNSIGNED => { - self.parseUnsigned(rel, &parsed_rel, subtractor, context); - subtractor = null; - }, - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => { - self.parseSigned(rel, &parsed_rel, context); - }, - .X86_64_RELOC_GOT_LOAD, - .X86_64_RELOC_GOT, - .X86_64_RELOC_TLV, - => { - self.parseLoad(rel, &parsed_rel); - }, - } - }, - else => unreachable, - } - - try self.relocs.append(context.allocator, parsed_rel); - - const is_via_got = switch (parsed_rel.payload) { - .pointer_to_got => true, - .load => |load| load.kind == .got, - .page => |page| page.kind == .got, - .page_off => |page_off| page_off.kind == .got, - else => false, - }; - - if (is_via_got) blk: { - const key = MachO.GotIndirectionKey{ - .where = switch (parsed_rel.where) { - .local => .local, - .undef => .undef, - }, - .where_index = parsed_rel.where_index, - }; - if (context.macho_file.got_entries_map.contains(key)) break :blk; - - const got_index = @intCast(u32, context.macho_file.got_entries.items.len); - try context.macho_file.got_entries.append(context.allocator, key); - try context.macho_file.got_entries_map.putNoClobber(context.allocator, key, got_index); - } else if (parsed_rel.payload == .unsigned) { - switch (parsed_rel.where) { - .undef => { - try self.bindings.append(context.allocator, .{ - .local_sym_index = parsed_rel.where_index, - .offset = parsed_rel.offset, - }); - }, - .local => { - const source_sym = context.macho_file.locals.items[self.local_sym_index]; - const match = context.macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; - const seg = context.macho_file.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - const sect_type = commands.sectionType(sect); - - const should_rebase = rebase: { - if (!parsed_rel.payload.unsigned.is_64bit) break :rebase false; - - // TODO actually, a check similar to what dyld is doing, that is, verifying - // that the segment is writable should be enough here. - const is_right_segment = blk: { - if (context.macho_file.data_segment_cmd_index) |idx| { - if (match.seg == idx) { - break :blk true; - } - } - if (context.macho_file.data_const_segment_cmd_index) |idx| { - if (match.seg == idx) { - break :blk true; - } - } - break :blk false; - }; - - if (!is_right_segment) break :rebase false; - if (sect_type != macho.S_LITERAL_POINTERS and - sect_type != macho.S_REGULAR and - sect_type != macho.S_MOD_INIT_FUNC_POINTERS and - sect_type != macho.S_MOD_TERM_FUNC_POINTERS) - { - break :rebase false; - } - - break :rebase true; - }; - - if (should_rebase) { - try self.rebases.append(context.allocator, parsed_rel.offset); - } - }, - } - } else if (parsed_rel.payload == .branch) blk: { - if (parsed_rel.where != .undef) break :blk; - if (context.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk; - - const stubs_index = @intCast(u32, context.macho_file.stubs.items.len); - try context.macho_file.stubs.append(context.allocator, parsed_rel.where_index); - try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stubs_index); - } - } -} - -fn isAddend(rel: macho.relocation_info, arch: Arch) bool { - if (arch != .aarch64) return false; - return @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_ADDEND; -} - -fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool { - return switch (arch) { - .aarch64 => @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_SUBTRACTOR, - .x86_64 => @intToEnum(macho.reloc_type_x86_64, rel.r_type) == .X86_64_RELOC_SUBTRACTOR, - else => unreachable, - }; -} - -fn parseUnsigned( - self: TextBlock, - rel: macho.relocation_info, - out: *Relocation, - subtractor: ?u32, - context: RelocContext, -) void { - assert(rel.r_pcrel == 0); - - const is_64bit: bool = switch (rel.r_length) { - 3 => true, - 2 => false, - else => unreachable, - }; - - var addend: i64 = if (is_64bit) - mem.readIntLittle(i64, self.code.items[out.offset..][0..8]) - else - mem.readIntLittle(i32, self.code.items[out.offset..][0..4]); - - if (rel.r_extern == 0) { - assert(out.where == .local); - const target_sym = context.macho_file.locals.items[out.where_index]; - addend -= @intCast(i64, target_sym.n_value); - } - - out.payload = .{ - .unsigned = .{ - .subtractor = subtractor, - .is_64bit = is_64bit, - .addend = addend, - }, - }; -} - -fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { - _ = self; - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - out.payload = .{ - .branch = .{ - .arch = context.macho_file.base.options.target.cpu.arch, - }, - }; -} - -fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { - _ = self; - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - out.payload = .{ - .page = .{ - .kind = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_PAGE21 => .page, - .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, - .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, - else => unreachable, - }, - .addend = addend, - }, - }; -} - -fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { - assert(rel.r_pcrel == 0); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - const op_kind: ?Relocation.PageOff.OpKind = blk: { - if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; - const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code.items[out.offset..][0..4])) - .arithmetic - else - .load; - break :blk op_kind; - }; - - out.payload = .{ - .page_off = .{ - .kind = switch (rel_type) { - .ARM64_RELOC_PAGEOFF12 => .page, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, - else => unreachable, - }, - .addend = addend, - .op_kind = op_kind, - }, - }; -} - -fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { - _ = self; - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - out.payload = .{ - .pointer_to_got = .{}, - }; -} - -fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const correction: i4 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction; - - if (rel.r_extern == 0) { - const source_sym = context.macho_file.locals.items[self.local_sym_index]; - const target_sym = switch (out.where) { - .local => context.macho_file.locals.items[out.where_index], - .undef => context.macho_file.undefs.items[out.where_index], - }; - addend = @intCast(i64, source_sym.n_value + out.offset + 4) + addend - @intCast(i64, target_sym.n_value); - } - - out.payload = .{ - .signed = .{ - .correction = correction, - .addend = addend, - }, - }; -} - -fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const addend: i32 = if (rel_type == .X86_64_RELOC_GOT) - mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) - else - 0; - - out.payload = .{ - .load = .{ - .kind = switch (rel_type) { - .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, - .X86_64_RELOC_TLV => .tlvp, - else => unreachable, - }, - .addend = addend, - }, - }; -} - -pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { - for (self.relocs.items) |rel| { - log.debug("relocating {}", .{rel}); - - const source_addr = blk: { - const sym = macho_file.locals.items[self.local_sym_index]; - break :blk sym.n_value + rel.offset; - }; - const target_addr = blk: { - const is_via_got = switch (rel.payload) { - .pointer_to_got => true, - .page => |page| page.kind == .got, - .page_off => |page_off| page_off.kind == .got, - .load => |load| load.kind == .got, - else => false, - }; - - if (is_via_got) { - const dc_seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = dc_seg.sections.items[macho_file.got_section_index.?]; - const got_index = macho_file.got_entries_map.get(.{ - .where = switch (rel.where) { - .local => .local, - .undef => .undef, - }, - .where_index = rel.where_index, - }) orelse { - const sym = switch (rel.where) { - .local => macho_file.locals.items[rel.where_index], - .undef => macho_file.undefs.items[rel.where_index], - }; - log.err("expected GOT entry for symbol '{s}'", .{macho_file.getString(sym.n_strx)}); - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }; - break :blk got.addr + got_index * @sizeOf(u64); - } - - switch (rel.where) { - .local => { - const sym = macho_file.locals.items[rel.where_index]; - const is_tlv = is_tlv: { - const source_sym = macho_file.locals.items[self.local_sym_index]; - const match = macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; - const seg = macho_file.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; - }; - if (is_tlv) { - // For TLV relocations, the value specified as a relocation is the displacement from the - // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first - // defined TLV template init section in the following order: - // * wrt to __thread_data if defined, then - // * wrt to __thread_bss - const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].Segment; - const base_address = inner: { - if (macho_file.tlv_data_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else if (macho_file.tlv_bss_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else { - log.err("threadlocal variables present but no initializer sections found", .{}); - log.err(" __thread_data not found", .{}); - log.err(" __thread_bss not found", .{}); - return error.FailedToResolveRelocationTarget; - } - }; - break :blk sym.n_value - base_address; - } - - break :blk sym.n_value; - }, - .undef => { - const stubs_index = macho_file.stubs_map.get(rel.where_index) orelse { - // TODO verify in TextBlock that the symbol is indeed dynamically bound. - break :blk 0; // Dynamically bound by dyld. - }; - const segment = macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[macho_file.stubs_section_index.?]; - break :blk stubs.addr + stubs_index * stubs.reserved2; - }, - } - }; - - log.debug(" | source_addr = 0x{x}", .{source_addr}); - log.debug(" | target_addr = 0x{x}", .{target_addr}); - - try rel.resolve(.{ - .block = self, - .offset = rel.offset, - .source_addr = source_addr, - .target_addr = target_addr, - .macho_file = macho_file, - }); - } -} - -pub fn format(self: TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "TextBlock {{ ", .{}); - try std.fmt.format(writer, ".local_sym_index = {d}, ", .{self.local_sym_index}); - try std.fmt.format(writer, ".aliases = {any}, ", .{self.aliases.items}); - try std.fmt.format(writer, ".contained = {any}, ", .{self.contained.items}); - try std.fmt.format(writer, ".code = {*}, ", .{self.code.items}); - try std.fmt.format(writer, ".size = {d}, ", .{self.size}); - try std.fmt.format(writer, ".alignment = {d}, ", .{self.alignment}); - try std.fmt.format(writer, ".relocs = {any}, ", .{self.relocs.items}); - try std.fmt.format(writer, ".rebases = {any}, ", .{self.rebases.items}); - try std.fmt.format(writer, ".bindings = {any}, ", .{self.bindings.items}); - try std.fmt.format(writer, ".dices = {any}, ", .{self.dices.items}); - if (self.stab) |stab| { - try std.fmt.format(writer, ".stab = {any}, ", .{stab}); - } - try std.fmt.format(writer, "}}", .{}); -} - -const RelocIterator = struct { - buffer: []const macho.relocation_info, - index: i32 = -1, - - pub fn next(self: *RelocIterator) ?macho.relocation_info { - self.index += 1; - if (self.index < self.buffer.len) { - return self.buffer[@intCast(u32, self.index)]; - } - return null; - } - - pub fn peek(self: RelocIterator) macho.relocation_info { - assert(self.index + 1 < self.buffer.len); - return self.buffer[@intCast(u32, self.index + 1)]; - } -}; - -fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { - const Predicate = struct { - addr: u64, - - pub fn predicate(self: @This(), rel: macho.relocation_info) bool { - return rel.r_address < self.addr; - } - }; - - const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); - const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); - - return relocs[start..end]; -} - -inline fn isArithmeticOp(inst: *const [4]u8) bool { - const group_decode = @truncate(u5, inst[3]); - return ((group_decode >> 2) == 4); -} diff --git a/src/link/MachO/bind.zig b/src/link/MachO/bind.zig @@ -9,15 +9,6 @@ pub const Pointer = struct { name: ?[]const u8 = null, }; -pub fn pointerCmp(context: void, a: Pointer, b: Pointer) bool { - _ = context; - if (a.segment_id < b.segment_id) return true; - if (a.segment_id == b.segment_id) { - return a.offset < b.offset; - } - return false; -} - pub fn rebaseInfoSize(pointers: []const Pointer) !u64 { var stream = std.io.countingWriter(std.io.null_writer); var writer = stream.writer(); diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig @@ -9,6 +9,7 @@ const assert = std.debug.assert; const Allocator = std.mem.Allocator; const MachO = @import("../MachO.zig"); +const makeStaticString = MachO.makeStaticString; const padToIdeal = MachO.padToIdeal; pub const HeaderArgs = struct { @@ -217,75 +218,6 @@ pub const SegmentCommand = struct { inner: macho.segment_command_64, sections: std.ArrayListUnmanaged(macho.section_64) = .{}, - const SegmentOptions = struct { - cmdsize: u32 = @sizeOf(macho.segment_command_64), - vmaddr: u64 = 0, - vmsize: u64 = 0, - fileoff: u64 = 0, - filesize: u64 = 0, - maxprot: macho.vm_prot_t = macho.VM_PROT_NONE, - initprot: macho.vm_prot_t = macho.VM_PROT_NONE, - nsects: u32 = 0, - flags: u32 = 0, - }; - - pub fn empty(comptime segname: []const u8, opts: SegmentOptions) SegmentCommand { - return .{ - .inner = .{ - .cmd = macho.LC_SEGMENT_64, - .cmdsize = opts.cmdsize, - .segname = makeStaticString(segname), - .vmaddr = opts.vmaddr, - .vmsize = opts.vmsize, - .fileoff = opts.fileoff, - .filesize = opts.filesize, - .maxprot = opts.maxprot, - .initprot = opts.initprot, - .nsects = opts.nsects, - .flags = opts.flags, - }, - }; - } - - const SectionOptions = struct { - addr: u64 = 0, - size: u64 = 0, - offset: u32 = 0, - @"align": u32 = 0, - reloff: u32 = 0, - nreloc: u32 = 0, - flags: u32 = macho.S_REGULAR, - reserved1: u32 = 0, - reserved2: u32 = 0, - reserved3: u32 = 0, - }; - - pub fn addSection( - self: *SegmentCommand, - alloc: *Allocator, - comptime sectname: []const u8, - opts: SectionOptions, - ) !void { - var section = macho.section_64{ - .sectname = makeStaticString(sectname), - .segname = undefined, - .addr = opts.addr, - .size = opts.size, - .offset = opts.offset, - .@"align" = opts.@"align", - .reloff = opts.reloff, - .nreloc = opts.nreloc, - .flags = opts.flags, - .reserved1 = opts.reserved1, - .reserved2 = opts.reserved2, - .reserved3 = opts.reserved3, - }; - mem.copy(u8, &section.segname, &self.inner.segname); - try self.sections.append(alloc, section); - self.inner.cmdsize += @sizeOf(macho.section_64); - self.inner.nsects += 1; - } - pub fn read(alloc: *Allocator, reader: anytype) !SegmentCommand { const inner = try reader.readStruct(macho.segment_command_64); var segment = SegmentCommand{ @@ -314,10 +246,8 @@ pub const SegmentCommand = struct { } pub fn allocatedSize(self: SegmentCommand, start: u64) u64 { - assert(start > 0); - if (start == self.inner.fileoff) - return 0; - var min_pos: u64 = std.math.maxInt(u64); + assert(start >= self.inner.fileoff); + var min_pos: u64 = self.inner.fileoff + self.inner.filesize; for (self.sections.items) |section| { if (section.offset <= start) continue; if (section.offset < min_pos) min_pos = section.offset; @@ -337,12 +267,12 @@ pub const SegmentCommand = struct { return null; } - pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u16, start: ?u64) u64 { - var st: u64 = if (start) |v| v else self.inner.fileoff; - while (self.detectAllocCollision(st, object_size)) |item_end| { - st = mem.alignForwardGeneric(u64, item_end, min_alignment); + pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u64, start: ?u64) u64 { + var offset: u64 = if (start) |v| v else self.inner.fileoff; + while (self.detectAllocCollision(offset, object_size)) |item_end| { + offset = mem.alignForwardGeneric(u64, item_end, min_alignment); } - return st; + return offset; } fn eql(self: SegmentCommand, other: SegmentCommand) bool { @@ -427,13 +357,6 @@ pub fn createLoadDylibCommand( return dylib_cmd; } -fn makeStaticString(bytes: []const u8) [16]u8 { - var buf = [_]u8{0} ** 16; - assert(bytes.len <= buf.len); - mem.copy(u8, &buf, bytes); - return buf; -} - fn parseName(name: *const [16]u8) []const u8 { const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; return name[0..len]; @@ -514,17 +437,14 @@ test "read-write segment command" { }; var cmd = SegmentCommand{ .inner = .{ - .cmd = macho.LC_SEGMENT_64, .cmdsize = 152, .segname = makeStaticString("__TEXT"), .vmaddr = 4294967296, .vmsize = 294912, - .fileoff = 0, .filesize = 294912, .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE, .initprot = macho.VM_PROT_EXECUTE | macho.VM_PROT_READ, .nsects = 1, - .flags = 0, }, }; try cmd.sections.append(gpa, .{ @@ -534,12 +454,7 @@ test "read-write segment command" { .size = 448, .offset = 16384, .@"align" = 2, - .reloff = 0, - .nreloc = 0, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved1 = 0, - .reserved2 = 0, - .reserved3 = 0, }); defer cmd.deinit(gpa); try testRead(gpa, in_buffer, LoadCommand{ .Segment = cmd }); diff --git a/test/stage2/darwin.zig b/test/stage2/darwin.zig @@ -27,8 +27,8 @@ pub fn addCases(ctx: *TestContext) !void { // Regular old hello world case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -47,8 +47,8 @@ pub fn addCases(ctx: *TestContext) !void { // Print it 4 times and force growth and realloc. case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -74,8 +74,8 @@ pub fn addCases(ctx: *TestContext) !void { // Print it once, and change the message. case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -94,8 +94,8 @@ pub fn addCases(ctx: *TestContext) !void { // Now we print it twice. case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -121,7 +121,7 @@ pub fn addCases(ctx: *TestContext) !void { // This test case also covers an infrequent scenarion where the string table *may* be relocated // into the position preceeding the symbol table which results in a dyld error. case.addCompareOutput( - \\extern "c" fn exit(usize) noreturn; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ exit(0); @@ -131,8 +131,8 @@ pub fn addCases(ctx: *TestContext) !void { ); case.addCompareOutput( - \\extern "c" fn exit(usize) noreturn; - \\extern "c" fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; \\ \\pub export fn main() noreturn { \\ _ = write(1, @ptrToInt("Hey!\n"), 5);