Merge pull request #20032 from ziglang/macho-literals

link/macho: implement logic for merging literals
2024-05-23 14:21:31 +02:00
parent 9be8a9000f d31eb744ce
commit fb88cfdf6a
9 changed files with 1082 additions and 107 deletions
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -539,6 +539,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node

    try self.convertTentativeDefinitions();
    try self.createObjcSections();
+    try self.dedupLiterals();
    try self.claimUnresolved();

    if (self.base.gc_sections) {
@@ -1491,6 +1492,33 @@ fn createObjcSections(self: *MachO) !void {
        const name = eatPrefix(sym.getName(self), "_objc_msgSend$").?;
        const selrefs_index = try internal.addObjcMsgsendSections(name, self);
        try sym.addExtra(.{ .objc_selrefs = selrefs_index }, self);
+        sym.flags.objc_stubs = true;
+    }
+}
+
+pub fn dedupLiterals(self: *MachO) !void {
+    const gpa = self.base.comp.gpa;
+    var lp: LiteralPool = .{};
+    defer lp.deinit(gpa);
+
+    if (self.getZigObject()) |zo| {
+        try zo.resolveLiterals(&lp, self);
+    }
+    for (self.objects.items) |index| {
+        try self.getFile(index).?.object.resolveLiterals(&lp, self);
+    }
+    if (self.getInternalObject()) |object| {
+        try object.resolveLiterals(&lp, self);
+    }
+
+    if (self.getZigObject()) |zo| {
+        zo.dedupLiterals(lp, self);
+    }
+    for (self.objects.items) |index| {
+        self.getFile(index).?.object.dedupLiterals(lp, self);
+    }
+    if (self.getInternalObject()) |object| {
+        object.dedupLiterals(lp, self);
    }
 }

@@ -1728,20 +1756,18 @@ fn initOutputSections(self: *MachO) !void {
            atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(self), self);
        }
    }
-    if (self.text_sect_index == null) {
-        self.text_sect_index = try self.addSection("__TEXT", "__text", .{
-            .alignment = switch (self.getTarget().cpu.arch) {
-                .x86_64 => 0,
-                .aarch64 => 2,
-                else => unreachable,
-            },
-            .flags = macho.S_REGULAR |
-                macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
-        });
-    }
-    if (self.data_sect_index == null) {
-        self.data_sect_index = try self.addSection("__DATA", "__data", .{});
-    }
+    self.text_sect_index = self.getSectionByName("__TEXT", "__text") orelse
+        try self.addSection("__TEXT", "__text", .{
+        .alignment = switch (self.getTarget().cpu.arch) {
+            .x86_64 => 0,
+            .aarch64 => 2,
+            else => unreachable,
+        },
+        .flags = macho.S_REGULAR |
+            macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
+    });
+    self.data_sect_index = self.getSectionByName("__DATA", "__data") orelse
+        try self.addSection("__DATA", "__data", .{});
 }

 fn initSyntheticSections(self: *MachO) !void {
@@ -4387,6 +4413,87 @@ const Section = struct {
    last_atom_index: Atom.Index = 0,
 };

+pub const LiteralPool = struct {
+    table: std.AutoArrayHashMapUnmanaged(void, void) = .{},
+    keys: std.ArrayListUnmanaged(Key) = .{},
+    values: std.ArrayListUnmanaged(Atom.Index) = .{},
+    data: std.ArrayListUnmanaged(u8) = .{},
+
+    pub fn deinit(lp: *LiteralPool, allocator: Allocator) void {
+        lp.table.deinit(allocator);
+        lp.keys.deinit(allocator);
+        lp.values.deinit(allocator);
+        lp.data.deinit(allocator);
+    }
+
+    pub fn getAtom(lp: LiteralPool, index: Index, macho_file: *MachO) *Atom {
+        assert(index < lp.values.items.len);
+        return macho_file.getAtom(lp.values.items[index]).?;
+    }
+
+    const InsertResult = struct {
+        found_existing: bool,
+        index: Index,
+        atom: *Atom.Index,
+    };
+
+    pub fn insert(lp: *LiteralPool, allocator: Allocator, @"type": u8, string: []const u8) !InsertResult {
+        const size: u32 = @intCast(string.len);
+        try lp.data.ensureUnusedCapacity(allocator, size);
+        const off: u32 = @intCast(lp.data.items.len);
+        lp.data.appendSliceAssumeCapacity(string);
+        const adapter = Adapter{ .lp = lp };
+        const key = Key{ .off = off, .size = size, .seed = @"type" };
+        const gop = try lp.table.getOrPutAdapted(allocator, key, adapter);
+        if (!gop.found_existing) {
+            try lp.keys.append(allocator, key);
+            _ = try lp.values.addOne(allocator);
+        }
+        return .{
+            .found_existing = gop.found_existing,
+            .index = @intCast(gop.index),
+            .atom = &lp.values.items[gop.index],
+        };
+    }
+
+    const Key = struct {
+        off: u32,
+        size: u32,
+        seed: u8,
+
+        fn getData(key: Key, lp: *const LiteralPool) []const u8 {
+            return lp.data.items[key.off..][0..key.size];
+        }
+
+        fn eql(key: Key, other: Key, lp: *const LiteralPool) bool {
+            const key_data = key.getData(lp);
+            const other_data = other.getData(lp);
+            return mem.eql(u8, key_data, other_data);
+        }
+
+        fn hash(key: Key, lp: *const LiteralPool) u32 {
+            const data = key.getData(lp);
+            return @truncate(Hash.hash(key.seed, data));
+        }
+    };
+
+    const Adapter = struct {
+        lp: *const LiteralPool,
+
+        pub fn eql(ctx: @This(), key: Key, b_void: void, b_map_index: usize) bool {
+            _ = b_void;
+            const other = ctx.lp.keys.items[b_map_index];
+            return key.eql(other, ctx.lp);
+        }
+
+        pub fn hash(ctx: @This(), key: Key) u32 {
+            return key.hash(ctx.lp);
+        }
+    };
+
+    pub const Index = u32;
+};
+
 const HotUpdateState = struct {
    mach_task: ?std.c.MachTask = null,
 };
@@ -4738,6 +4845,7 @@ const Dylib = @import("MachO/Dylib.zig");
 const ExportTrieSection = synthetic.ExportTrieSection;
 const File = @import("MachO/file.zig").File;
 const GotSection = synthetic.GotSection;
+const Hash = std.hash.Wyhash;
 const Indsymtab = synthetic.Indsymtab;
 const InternalObject = @import("MachO/InternalObject.zig");
 const ObjcStubsSection = synthetic.ObjcStubsSection;
--- a/src/link/MachO/Atom.zig
+++ b/src/link/MachO/Atom.zig
@@ -113,12 +113,18 @@ pub fn getThunk(self: Atom, macho_file: *MachO) *Thunk {
    return macho_file.getThunk(extra.thunk);
 }

+pub fn getLiteralPoolIndex(self: Atom, macho_file: *MachO) ?MachO.LiteralPool.Index {
+    if (!self.flags.literal_pool) return null;
+    return self.getExtra(macho_file).?.literal_index;
+}
+
 const AddExtraOpts = struct {
    thunk: ?u32 = null,
    rel_index: ?u32 = null,
    rel_count: ?u32 = null,
    unwind_index: ?u32 = null,
    unwind_count: ?u32 = null,
+    literal_index: ?u32 = null,
 };

 pub fn addExtra(atom: *Atom, opts: AddExtraOpts, macho_file: *MachO) !void {
@@ -143,6 +149,16 @@ pub inline fn setExtra(atom: Atom, extra: Extra, macho_file: *MachO) void {
 }

 pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 {
+    if (macho_file.base.isRelocatable()) {
+        const osec = macho_file.getSectionByName(sect.segName(), sect.sectName()) orelse
+            try macho_file.addSection(
+            sect.segName(),
+            sect.sectName(),
+            .{ .flags = sect.flags },
+        );
+        return osec;
+    }
+
    const segname, const sectname, const flags = blk: {
        if (sect.isCode()) break :blk .{
            "__TEXT",
@@ -200,18 +216,11 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 {
            else => break :blk .{ sect.segName(), sect.sectName(), sect.flags },
        }
    };
-    const osec = macho_file.getSectionByName(segname, sectname) orelse try macho_file.addSection(
+    return macho_file.getSectionByName(segname, sectname) orelse try macho_file.addSection(
        segname,
        sectname,
        .{ .flags = flags },
    );
-    if (mem.eql(u8, segname, "__TEXT") and mem.eql(u8, sectname, "__text")) {
-        macho_file.text_sect_index = osec;
-    }
-    if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__data")) {
-        macho_file.data_sect_index = osec;
-    }
-    return osec;
 }

 /// Returns how much room there is to grow in virtual address space.
@@ -651,6 +660,19 @@ fn resolveRelocInner(
    // Address of the __got_zig table entry if any.
    const ZIG_GOT = @as(i64, @intCast(rel.getZigGotTargetAddress(macho_file)));

+    const divExact = struct {
+        fn divExact(atom: Atom, r: Relocation, num: u12, den: u12, ctx: *MachO) !u12 {
+            return math.divExact(u12, num, den) catch {
+                try ctx.reportParseError2(atom.getFile(ctx).getIndex(), "{s}: unexpected remainder when resolving {s} at offset 0x{x}", .{
+                    atom.getName(ctx),
+                    r.fmtPretty(ctx.getTarget().cpu.arch),
+                    r.offset,
+                });
+                return error.UnexpectedRemainder;
+            };
+        }
+    }.divExact;
+
    switch (rel.tag) {
        .local => relocs_log.debug("  {x}<+{d}>: {s}: [=> {x}] atom({d})", .{
            P,
@@ -822,12 +844,12 @@ fn resolveRelocInner(
                };
                inst.load_store_register.offset = switch (inst.load_store_register.size) {
                    0 => if (inst.load_store_register.v == 1)
-                        try math.divExact(u12, @truncate(target), 16)
+                        try divExact(self, rel, @truncate(target), 16, macho_file)
                    else
                        @truncate(target),
-                    1 => try math.divExact(u12, @truncate(target), 2),
-                    2 => try math.divExact(u12, @truncate(target), 4),
-                    3 => try math.divExact(u12, @truncate(target), 8),
+                    1 => try divExact(self, rel, @truncate(target), 2, macho_file),
+                    2 => try divExact(self, rel, @truncate(target), 4, macho_file),
+                    3 => try divExact(self, rel, @truncate(target), 8, macho_file),
                };
                try writer.writeInt(u32, inst.toU32(), .little);
            }
@@ -838,7 +860,7 @@ fn resolveRelocInner(
            assert(rel.meta.length == 2);
            assert(!rel.meta.pcrel);
            const target = math.cast(u64, G + A) orelse return error.Overflow;
-            aarch64.writeLoadStoreRegInst(try math.divExact(u12, @truncate(target), 8), code[rel_offset..][0..4]);
+            aarch64.writeLoadStoreRegInst(try divExact(self, rel, @truncate(target), 8, macho_file), code[rel_offset..][0..4]);
        },

        .tlvp_pageoff => {
@@ -890,7 +912,7 @@ fn resolveRelocInner(
                .load_store_register = .{
                    .rt = reg_info.rd,
                    .rn = reg_info.rn,
-                    .offset = try math.divExact(u12, @truncate(target), 8),
+                    .offset = try divExact(self, rel, @truncate(target), 8, macho_file),
                    .opc = 0b01,
                    .op1 = 0b01,
                    .v = 0,
@@ -1174,7 +1196,7 @@ pub const Flags = packed struct {
    /// Specifies whether this atom is alive or has been garbage collected.
    alive: bool = true,

-    /// Specifies if the atom has been visited during garbage collection.
+    /// Specifies if this atom has been visited during garbage collection.
    visited: bool = false,

    /// Whether this atom has a range extension thunk.
@@ -1185,6 +1207,9 @@ pub const Flags = packed struct {

    /// Whether this atom has any unwind records.
    unwind: bool = false,
+
+    /// Whether this atom has LiteralPool entry.
+    literal_pool: bool = false,
 };

 pub const Extra = struct {
@@ -1202,6 +1227,9 @@ pub const Extra = struct {

    /// Count of relocations belonging to this atom.
    unwind_count: u32 = 0,
+
+    /// Index into LiteralPool entry for this atom.
+    literal_index: u32 = 0,
 };

 pub const Alignment = @import("../../InternPool.zig").Alignment;
--- a/src/link/MachO/InternalObject.zig
+++ b/src/link/MachO/InternalObject.zig
@@ -3,7 +3,6 @@ index: File.Index,
 sections: std.MultiArrayList(Section) = .{},
 atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
 symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
-strtab: std.ArrayListUnmanaged(u8) = .{},

 objc_methnames: std.ArrayListUnmanaged(u8) = .{},
 objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64),
@@ -18,7 +17,6 @@ pub fn deinit(self: *InternalObject, allocator: Allocator) void {
    self.sections.deinit(allocator);
    self.atoms.deinit(allocator);
    self.symbols.deinit(allocator);
-    self.strtab.deinit(allocator);
    self.objc_methnames.deinit(allocator);
 }

@@ -38,9 +36,9 @@ pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO)
 }

 /// Creates a fake input sections __TEXT,__objc_methname and __DATA,__objc_selrefs.
-pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !u32 {
+pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !Atom.Index {
    const methname_atom_index = try self.addObjcMethnameSection(sym_name, macho_file);
-    return try self.addObjcSelrefsSection(sym_name, methname_atom_index, macho_file);
+    return try self.addObjcSelrefsSection(methname_atom_index, macho_file);
 }

 fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_file: *MachO) !Atom.Index {
@@ -48,11 +46,8 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil
    const atom_index = try macho_file.addAtom();
    try self.atoms.append(gpa, atom_index);

-    const name = try std.fmt.allocPrintZ(gpa, "__TEXT$__objc_methname${s}", .{methname});
-    defer gpa.free(name);
    const atom = macho_file.getAtom(atom_index).?;
    atom.atom_index = atom_index;
-    atom.name = try self.addString(gpa, name);
    atom.file = self.index;
    atom.size = methname.len + 1;
    atom.alignment = .@"1";
@@ -72,21 +67,13 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil
    return atom_index;
 }

-fn addObjcSelrefsSection(
-    self: *InternalObject,
-    methname: []const u8,
-    methname_atom_index: Atom.Index,
-    macho_file: *MachO,
-) !Atom.Index {
+fn addObjcSelrefsSection(self: *InternalObject, methname_atom_index: Atom.Index, macho_file: *MachO) !Atom.Index {
    const gpa = macho_file.base.comp.gpa;
    const atom_index = try macho_file.addAtom();
    try self.atoms.append(gpa, atom_index);

-    const name = try std.fmt.allocPrintZ(gpa, "__DATA$__objc_selrefs${s}", .{methname});
-    defer gpa.free(name);
    const atom = macho_file.getAtom(atom_index).?;
    atom.atom_index = atom_index;
-    atom.name = try self.addString(gpa, name);
    atom.file = self.index;
    atom.size = @sizeOf(u64);
    atom.alignment = .@"8";
@@ -122,6 +109,102 @@ fn addObjcSelrefsSection(
    return atom_index;
 }

+pub fn resolveLiterals(self: InternalObject, lp: *MachO.LiteralPool, macho_file: *MachO) !void {
+    const gpa = macho_file.base.comp.gpa;
+
+    var buffer = std.ArrayList(u8).init(gpa);
+    defer buffer.deinit();
+
+    const slice = self.sections.slice();
+    for (slice.items(.header), self.atoms.items, 0..) |header, atom_index, n_sect| {
+        if (Object.isCstringLiteral(header) or Object.isFixedSizeLiteral(header)) {
+            const data = try self.getSectionData(@intCast(n_sect));
+            const atom = macho_file.getAtom(atom_index).?;
+            const res = try lp.insert(gpa, header.type(), data);
+            if (!res.found_existing) {
+                res.atom.* = atom_index;
+            }
+            atom.flags.literal_pool = true;
+            try atom.addExtra(.{ .literal_index = res.index }, macho_file);
+        } else if (Object.isPtrLiteral(header)) {
+            const atom = macho_file.getAtom(atom_index).?;
+            const relocs = atom.getRelocs(macho_file);
+            assert(relocs.len == 1);
+            const rel = relocs[0];
+            assert(rel.tag == .local);
+            const target = macho_file.getAtom(rel.target).?;
+            const addend = std.math.cast(u32, rel.addend) orelse return error.Overflow;
+            const target_size = std.math.cast(usize, target.size) orelse return error.Overflow;
+            try buffer.ensureUnusedCapacity(target_size);
+            buffer.resize(target_size) catch unreachable;
+            try target.getData(macho_file, buffer.items);
+            const res = try lp.insert(gpa, header.type(), buffer.items[addend..]);
+            buffer.clearRetainingCapacity();
+            if (!res.found_existing) {
+                res.atom.* = atom_index;
+            }
+            atom.flags.literal_pool = true;
+            try atom.addExtra(.{ .literal_index = res.index }, macho_file);
+        }
+    }
+}
+
+pub fn dedupLiterals(self: InternalObject, lp: MachO.LiteralPool, macho_file: *MachO) void {
+    for (self.atoms.items) |atom_index| {
+        const atom = macho_file.getAtom(atom_index) orelse continue;
+        if (!atom.flags.alive) continue;
+        if (!atom.flags.relocs) continue;
+
+        const relocs = blk: {
+            const extra = atom.getExtra(macho_file).?;
+            const relocs = self.sections.items(.relocs)[atom.n_sect].items;
+            break :blk relocs[extra.rel_index..][0..extra.rel_count];
+        };
+        for (relocs) |*rel| switch (rel.tag) {
+            .local => {
+                const target = macho_file.getAtom(rel.target).?;
+                if (target.getLiteralPoolIndex(macho_file)) |lp_index| {
+                    const lp_atom = lp.getAtom(lp_index, macho_file);
+                    if (target.atom_index != lp_atom.atom_index) {
+                        lp_atom.alignment = lp_atom.alignment.max(target.alignment);
+                        target.flags.alive = false;
+                        rel.target = lp_atom.atom_index;
+                    }
+                }
+            },
+            .@"extern" => {
+                const target_sym = rel.getTargetSymbol(macho_file);
+                if (target_sym.getAtom(macho_file)) |target_atom| {
+                    if (target_atom.getLiteralPoolIndex(macho_file)) |lp_index| {
+                        const lp_atom = lp.getAtom(lp_index, macho_file);
+                        if (target_atom.atom_index != lp_atom.atom_index) {
+                            lp_atom.alignment = lp_atom.alignment.max(target_atom.alignment);
+                            target_atom.flags.alive = false;
+                            target_sym.atom = lp_atom.atom_index;
+                        }
+                    }
+                }
+            },
+        };
+    }
+
+    for (self.symbols.items) |sym_index| {
+        const sym = macho_file.getSymbol(sym_index);
+        if (!sym.flags.objc_stubs) continue;
+        var extra = sym.getExtra(macho_file).?;
+        const atom = macho_file.getAtom(extra.objc_selrefs).?;
+        if (atom.getLiteralPoolIndex(macho_file)) |lp_index| {
+            const lp_atom = lp.getAtom(lp_index, macho_file);
+            if (atom.atom_index != lp_atom.atom_index) {
+                lp_atom.alignment = lp_atom.alignment.max(atom.alignment);
+                atom.flags.alive = false;
+                extra.objc_selrefs = lp_atom.atom_index;
+                sym.setExtra(extra, macho_file);
+            }
+        }
+    }
+}
+
 pub fn calcSymtabSize(self: *InternalObject, macho_file: *MachO) !void {
    for (self.symbols.items) |sym_index| {
        const sym = macho_file.getSymbol(sym_index);
@@ -167,18 +250,23 @@ fn addSection(self: *InternalObject, allocator: Allocator, segname: []const u8,
    return n_sect;
 }

-pub fn getAtomData(self: *const InternalObject, atom: Atom, buffer: []u8) !void {
-    assert(buffer.len == atom.size);
+fn getSectionData(self: *const InternalObject, index: u32) error{Overflow}![]const u8 {
    const slice = self.sections.slice();
-    const sect = slice.items(.header)[atom.n_sect];
-    const extra = slice.items(.extra)[atom.n_sect];
-    const data = if (extra.is_objc_methname) blk: {
+    assert(index < slice.items(.header).len);
+    const sect = slice.items(.header)[index];
+    const extra = slice.items(.extra)[index];
+    if (extra.is_objc_methname) {
        const size = std.math.cast(usize, sect.size) orelse return error.Overflow;
-        break :blk self.objc_methnames.items[sect.offset..][0..size];
+        return self.objc_methnames.items[sect.offset..][0..size];
    } else if (extra.is_objc_selref)
-        &self.objc_selrefs
+        return &self.objc_selrefs
    else
        @panic("ref to non-existent section");
+}
+
+pub fn getAtomData(self: *const InternalObject, atom: Atom, buffer: []u8) error{Overflow}!void {
+    assert(buffer.len == atom.size);
+    const data = try self.getSectionData(atom.n_sect);
    const off = std.math.cast(usize, atom.off) orelse return error.Overflow;
    const size = std.math.cast(usize, atom.size) orelse return error.Overflow;
    @memcpy(buffer, data[off..][0..size]);
@@ -191,17 +279,11 @@ pub fn getAtomRelocs(self: *const InternalObject, atom: Atom, macho_file: *MachO
    return relocs.items[extra.rel_index..][0..extra.rel_count];
 }

-fn addString(self: *InternalObject, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 {
-    const off: u32 = @intCast(self.strtab.items.len);
-    try self.strtab.ensureUnusedCapacity(allocator, name.len + 1);
-    self.strtab.appendSliceAssumeCapacity(name);
-    self.strtab.appendAssumeCapacity(0);
-    return off;
-}
-
 pub fn getString(self: InternalObject, off: u32) [:0]const u8 {
-    assert(off < self.strtab.items.len);
-    return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0);
+    _ = self;
+    _ = off;
+    // We don't have any local strings for synthetic atoms.
+    return "";
 }

 pub fn asFile(self: *InternalObject) File {
--- a/src/link/MachO/Object.zig
+++ b/src/link/MachO/Object.zig
@@ -208,7 +208,9 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
        try self.initSections(nlists.items, macho_file);
    }

-    try self.initLiteralSections(macho_file);
+    try self.initCstringLiterals(macho_file);
+    try self.initFixedSizeLiterals(macho_file);
+    try self.initPointerLiterals(macho_file);
    try self.linkNlistToAtom(macho_file);

    try self.sortAtoms(macho_file);
@@ -263,25 +265,33 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
    }
 }

-inline fn isLiteral(sect: macho.section_64) bool {
+pub fn isCstringLiteral(sect: macho.section_64) bool {
+    return sect.type() == macho.S_CSTRING_LITERALS;
+}
+
+pub fn isFixedSizeLiteral(sect: macho.section_64) bool {
    return switch (sect.type()) {
-        macho.S_CSTRING_LITERALS,
        macho.S_4BYTE_LITERALS,
        macho.S_8BYTE_LITERALS,
        macho.S_16BYTE_LITERALS,
-        macho.S_LITERAL_POINTERS,
        => true,
        else => false,
    };
 }

+pub fn isPtrLiteral(sect: macho.section_64) bool {
+    return sect.type() == macho.S_LITERAL_POINTERS;
+}
+
 fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
    const tracy = trace(@src());
    defer tracy.end();
    const gpa = macho_file.base.comp.gpa;
    const slice = self.sections.slice();
    for (slice.items(.header), slice.items(.subsections), 0..) |sect, *subsections, n_sect| {
-        if (isLiteral(sect)) continue;
+        if (isCstringLiteral(sect)) continue;
+        if (isFixedSizeLiteral(sect)) continue;
+        if (isPtrLiteral(sect)) continue;

        const nlist_start = for (nlists, 0..) |nlist, i| {
            if (nlist.nlist.n_sect - 1 == n_sect) break i;
@@ -352,7 +362,9 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
    try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len);

    for (slice.items(.header), 0..) |sect, n_sect| {
-        if (isLiteral(sect)) continue;
+        if (isCstringLiteral(sect)) continue;
+        if (isFixedSizeLiteral(sect)) continue;
+        if (isPtrLiteral(sect)) continue;

        const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() });
        defer gpa.free(name);
@@ -393,6 +405,220 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
    }
 }

+fn initCstringLiterals(self: *Object, macho_file: *MachO) !void {
+    const tracy = trace(@src());
+    defer tracy.end();
+
+    const gpa = macho_file.base.comp.gpa;
+    const slice = self.sections.slice();
+
+    for (slice.items(.header), 0..) |sect, n_sect| {
+        if (!isCstringLiteral(sect)) continue;
+
+        const data = try self.getSectionData(@intCast(n_sect), macho_file);
+        defer gpa.free(data);
+
+        var start: u32 = 0;
+        while (start < data.len) {
+            var end = start;
+            while (end < data.len - 1 and data[end] != 0) : (end += 1) {}
+            if (data[end] != 0) {
+                try macho_file.reportParseError2(
+                    self.index,
+                    "string not null terminated in '{s},{s}'",
+                    .{ sect.segName(), sect.sectName() },
+                );
+                return error.MalformedObject;
+            }
+            end += 1;
+
+            const atom_index = try self.addAtom(.{
+                .name = 0,
+                .n_sect = @intCast(n_sect),
+                .off = start,
+                .size = end - start,
+                .alignment = sect.@"align",
+            }, macho_file);
+            try slice.items(.subsections)[n_sect].append(gpa, .{
+                .atom = atom_index,
+                .off = start,
+            });
+
+            start = end;
+        }
+    }
+}
+
+fn initFixedSizeLiterals(self: *Object, macho_file: *MachO) !void {
+    const tracy = trace(@src());
+    defer tracy.end();
+
+    const gpa = macho_file.base.comp.gpa;
+    const slice = self.sections.slice();
+
+    for (slice.items(.header), 0..) |sect, n_sect| {
+        if (!isFixedSizeLiteral(sect)) continue;
+        const rec_size: u8 = switch (sect.type()) {
+            macho.S_4BYTE_LITERALS => 4,
+            macho.S_8BYTE_LITERALS => 8,
+            macho.S_16BYTE_LITERALS => 16,
+            else => unreachable,
+        };
+        if (sect.size % rec_size != 0) {
+            try macho_file.reportParseError2(
+                self.index,
+                "size not multiple of record size in '{s},{s}'",
+                .{ sect.segName(), sect.sectName() },
+            );
+            return error.MalformedObject;
+        }
+        var pos: u32 = 0;
+        while (pos < sect.size) : (pos += rec_size) {
+            const atom_index = try self.addAtom(.{
+                .name = 0,
+                .n_sect = @intCast(n_sect),
+                .off = pos,
+                .size = rec_size,
+                .alignment = sect.@"align",
+            }, macho_file);
+            try slice.items(.subsections)[n_sect].append(gpa, .{
+                .atom = atom_index,
+                .off = pos,
+            });
+        }
+    }
+}
+
+fn initPointerLiterals(self: *Object, macho_file: *MachO) !void {
+    const tracy = trace(@src());
+    defer tracy.end();
+
+    const gpa = macho_file.base.comp.gpa;
+    const slice = self.sections.slice();
+
+    for (slice.items(.header), 0..) |sect, n_sect| {
+        if (!isPtrLiteral(sect)) continue;
+
+        const rec_size: u8 = 8;
+        if (sect.size % rec_size != 0) {
+            try macho_file.reportParseError2(
+                self.index,
+                "size not multiple of record size in '{s},{s}'",
+                .{ sect.segName(), sect.sectName() },
+            );
+            return error.MalformedObject;
+        }
+        const num_ptrs = math.cast(usize, @divExact(sect.size, rec_size)) orelse return error.Overflow;
+
+        for (0..num_ptrs) |i| {
+            const pos: u32 = @as(u32, @intCast(i)) * rec_size;
+            const atom_index = try self.addAtom(.{
+                .name = 0,
+                .n_sect = @intCast(n_sect),
+                .off = pos,
+                .size = rec_size,
+                .alignment = sect.@"align",
+            }, macho_file);
+            try slice.items(.subsections)[n_sect].append(gpa, .{
+                .atom = atom_index,
+                .off = pos,
+            });
+        }
+    }
+}
+
+pub fn resolveLiterals(self: Object, lp: *MachO.LiteralPool, macho_file: *MachO) !void {
+    const gpa = macho_file.base.comp.gpa;
+
+    var buffer = std.ArrayList(u8).init(gpa);
+    defer buffer.deinit();
+
+    const slice = self.sections.slice();
+    for (slice.items(.header), slice.items(.subsections), 0..) |header, subs, n_sect| {
+        if (isCstringLiteral(header) or isFixedSizeLiteral(header)) {
+            const data = try self.getSectionData(@intCast(n_sect), macho_file);
+            defer gpa.free(data);
+
+            for (subs.items) |sub| {
+                const atom = macho_file.getAtom(sub.atom).?;
+                const atom_off = math.cast(usize, atom.off) orelse return error.Overflow;
+                const atom_size = math.cast(usize, atom.size) orelse return error.Overflow;
+                const atom_data = data[atom_off..][0..atom_size];
+                const res = try lp.insert(gpa, header.type(), atom_data);
+                if (!res.found_existing) {
+                    res.atom.* = sub.atom;
+                }
+                atom.flags.literal_pool = true;
+                try atom.addExtra(.{ .literal_index = res.index }, macho_file);
+            }
+        } else if (isPtrLiteral(header)) {
+            for (subs.items) |sub| {
+                const atom = macho_file.getAtom(sub.atom).?;
+                const relocs = atom.getRelocs(macho_file);
+                assert(relocs.len == 1);
+                const rel = relocs[0];
+                const target = switch (rel.tag) {
+                    .local => rel.target,
+                    .@"extern" => rel.getTargetSymbol(macho_file).atom,
+                };
+                const addend = math.cast(u32, rel.addend) orelse return error.Overflow;
+                const target_atom = macho_file.getAtom(target).?;
+                const target_atom_size = math.cast(usize, target_atom.size) orelse return error.Overflow;
+                try buffer.ensureUnusedCapacity(target_atom_size);
+                buffer.resize(target_atom_size) catch unreachable;
+                try target_atom.getData(macho_file, buffer.items);
+                const res = try lp.insert(gpa, header.type(), buffer.items[addend..]);
+                buffer.clearRetainingCapacity();
+                if (!res.found_existing) {
+                    res.atom.* = sub.atom;
+                }
+                atom.flags.literal_pool = true;
+                try atom.addExtra(.{ .literal_index = res.index }, macho_file);
+            }
+        }
+    }
+}
+
+pub fn dedupLiterals(self: Object, lp: MachO.LiteralPool, macho_file: *MachO) void {
+    for (self.atoms.items) |atom_index| {
+        const atom = macho_file.getAtom(atom_index) orelse continue;
+        if (!atom.flags.alive) continue;
+        if (!atom.flags.relocs) continue;
+
+        const relocs = blk: {
+            const extra = atom.getExtra(macho_file).?;
+            const relocs = self.sections.items(.relocs)[atom.n_sect].items;
+            break :blk relocs[extra.rel_index..][0..extra.rel_count];
+        };
+        for (relocs) |*rel| switch (rel.tag) {
+            .local => {
+                const target = macho_file.getAtom(rel.target).?;
+                if (target.getLiteralPoolIndex(macho_file)) |lp_index| {
+                    const lp_atom = lp.getAtom(lp_index, macho_file);
+                    if (target.atom_index != lp_atom.atom_index) {
+                        lp_atom.alignment = lp_atom.alignment.max(target.alignment);
+                        target.flags.alive = false;
+                        rel.target = lp_atom.atom_index;
+                    }
+                }
+            },
+            .@"extern" => {
+                const target_sym = rel.getTargetSymbol(macho_file);
+                if (target_sym.getAtom(macho_file)) |target_atom| {
+                    if (target_atom.getLiteralPoolIndex(macho_file)) |lp_index| {
+                        const lp_atom = lp.getAtom(lp_index, macho_file);
+                        if (target_atom.atom_index != lp_atom.atom_index) {
+                            lp_atom.alignment = lp_atom.alignment.max(target_atom.alignment);
+                            target_atom.flags.alive = false;
+                            target_sym.atom = lp_atom.atom_index;
+                        }
+                    }
+                }
+            },
+        };
+    }
+}
+
 const AddAtomArgs = struct {
    name: u32,
    n_sect: u8,
@@ -416,34 +642,6 @@ fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index {
    return atom_index;
 }

-fn initLiteralSections(self: *Object, macho_file: *MachO) !void {
-    const tracy = trace(@src());
-    defer tracy.end();
-    // TODO here we should split into equal-sized records, hash the contents, and then
-    // deduplicate - ICF.
-    // For now, we simply cover each literal section with one large atom.
-    const gpa = macho_file.base.comp.gpa;
-    const slice = self.sections.slice();
-
-    try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len);
-
-    for (slice.items(.header), 0..) |sect, n_sect| {
-        if (!isLiteral(sect)) continue;
-
-        const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() });
-        defer gpa.free(name);
-
-        const atom_index = try self.addAtom(.{
-            .name = try self.addString(gpa, name),
-            .n_sect = @intCast(n_sect),
-            .off = 0,
-            .size = sect.size,
-            .alignment = sect.@"align",
-        }, macho_file);
-        try slice.items(.subsections)[n_sect].append(gpa, .{ .atom = atom_index, .off = 0 });
-    }
-}
-
 pub fn findAtom(self: Object, addr: u64) ?Atom.Index {
    const tracy = trace(@src());
    defer tracy.end();
@@ -1369,7 +1567,10 @@ pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void {
        const name = sym.getName(macho_file);
        // TODO in -r mode, we actually want to merge symbol names and emit only one
        // work it out when emitting relocs
-        if (name.len > 0 and (name[0] == 'L' or name[0] == 'l') and !macho_file.base.isObject()) continue;
+        if (name.len > 0 and
+            (name[0] == 'L' or name[0] == 'l' or
+            mem.startsWith(u8, name, "_OBJC_SELECTOR_REFERENCES_")) and
+            !macho_file.base.isObject()) continue;
        sym.flags.output_symtab = true;
        if (sym.isLocal()) {
            try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file);
--- a/src/link/MachO/Relocation.zig
+++ b/src/link/MachO/Relocation.zig
@@ -60,6 +60,59 @@ pub fn lessThan(ctx: void, lhs: Relocation, rhs: Relocation) bool {
    return lhs.offset < rhs.offset;
 }

+const FormatCtx = struct { Relocation, std.Target.Cpu.Arch };
+
+pub fn fmtPretty(rel: Relocation, cpu_arch: std.Target.Cpu.Arch) std.fmt.Formatter(formatPretty) {
+    return .{ .data = .{ rel, cpu_arch } };
+}
+
+fn formatPretty(
+    ctx: FormatCtx,
+    comptime unused_fmt_string: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = options;
+    _ = unused_fmt_string;
+    const rel, const cpu_arch = ctx;
+    const str = switch (rel.type) {
+        .signed => "X86_64_RELOC_SIGNED",
+        .signed1 => "X86_64_RELOC_SIGNED_1",
+        .signed2 => "X86_64_RELOC_SIGNED_2",
+        .signed4 => "X86_64_RELOC_SIGNED_4",
+        .got_load => "X86_64_RELOC_GOT_LOAD",
+        .tlv => "X86_64_RELOC_TLV",
+        .zig_got_load => "ZIG_GOT_LOAD",
+        .page => "ARM64_RELOC_PAGE21",
+        .pageoff => "ARM64_RELOC_PAGEOFF12",
+        .got_load_page => "ARM64_RELOC_GOT_LOAD_PAGE21",
+        .got_load_pageoff => "ARM64_RELOC_GOT_LOAD_PAGEOFF12",
+        .tlvp_page => "ARM64_RELOC_TLVP_LOAD_PAGE21",
+        .tlvp_pageoff => "ARM64_RELOC_TLVP_LOAD_PAGEOFF12",
+        .branch => switch (cpu_arch) {
+            .x86_64 => "X86_64_RELOC_BRANCH",
+            .aarch64 => "ARM64_RELOC_BRANCH26",
+            else => unreachable,
+        },
+        .got => switch (cpu_arch) {
+            .x86_64 => "X86_64_RELOC_GOT",
+            .aarch64 => "ARM64_RELOC_POINTER_TO_GOT",
+            else => unreachable,
+        },
+        .subtractor => switch (cpu_arch) {
+            .x86_64 => "X86_64_RELOC_SUBTRACTOR",
+            .aarch64 => "ARM64_RELOC_SUBTRACTOR",
+            else => unreachable,
+        },
+        .unsigned => switch (cpu_arch) {
+            .x86_64 => "X86_64_RELOC_UNSIGNED",
+            .aarch64 => "ARM64_RELOC_UNSIGNED",
+            else => unreachable,
+        },
+    };
+    try writer.writeAll(str);
+}
+
 pub const Type = enum {
    // x86_64
    /// RIP-relative displacement (X86_64_RELOC_SIGNED)
--- a/src/link/MachO/Symbol.zig
+++ b/src/link/MachO/Symbol.zig
@@ -14,8 +14,8 @@ file: File.Index = 0,
 /// Use `getAtom` to get the pointer to the atom.
 atom: Atom.Index = 0,

-/// Assigned output section index for this atom.
-out_n_sect: u16 = 0,
+/// Assigned output section index for this symbol.
+out_n_sect: u8 = 0,

 /// Index of the source nlist this symbol references.
 /// Use `getNlist` to pull the nlist from the relevant file.
--- a/src/link/MachO/ZigObject.zig
+++ b/src/link/MachO/ZigObject.zig
@@ -314,6 +314,20 @@ pub fn checkDuplicates(self: *ZigObject, dupes: anytype, macho_file: *MachO) !vo
    }
 }

+pub fn resolveLiterals(self: *ZigObject, lp: *MachO.LiteralPool, macho_file: *MachO) !void {
+    _ = self;
+    _ = lp;
+    _ = macho_file;
+    // TODO
+}
+
+pub fn dedupLiterals(self: *ZigObject, lp: MachO.LiteralPool, macho_file: *MachO) void {
+    _ = self;
+    _ = lp;
+    _ = macho_file;
+    // TODO
+}
+
 /// This is just a temporary helper function that allows us to re-read what we wrote to file into a buffer.
 /// We need this so that we can write to an archive.
 /// TODO implement writing ZigObject data directly to a buffer instead.
--- a/src/link/MachO/relocatable.zig
+++ b/src/link/MachO/relocatable.zig
@@ -46,6 +46,7 @@ pub fn flushObject(macho_file: *MachO, comp: *Compilation, module_obj_path: ?[]c

    try macho_file.addUndefinedGlobals();
    try macho_file.resolveSymbols();
+    try macho_file.dedupLiterals();
    markExports(macho_file);
    claimUnresolved(macho_file);
    try initOutputSections(macho_file);
@@ -542,6 +543,9 @@ fn writeAtoms(macho_file: *MachO) !void {
    const cpu_arch = macho_file.getTarget().cpu.arch;
    const slice = macho_file.sections.slice();

+    var relocs = std.ArrayList(macho.relocation_info).init(gpa);
+    defer relocs.deinit();
+
    for (slice.items(.header), slice.items(.atoms), 0..) |header, atoms, i| {
        if (atoms.items.len == 0) continue;
        if (header.isZerofill()) continue;
@@ -553,8 +557,7 @@ fn writeAtoms(macho_file: *MachO) !void {
        const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0;
        @memset(code, padding_byte);

-        var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc);
-        defer relocs.deinit();
+        try relocs.ensureTotalCapacity(header.nreloc);

        for (atoms.items) |atom_index| {
            const atom = macho_file.getAtom(atom_index).?;
@@ -572,22 +575,24 @@ fn writeAtoms(macho_file: *MachO) !void {
        // TODO scattered writes?
        try macho_file.base.file.?.pwriteAll(code, header.offset);
        try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff);
+
+        relocs.clearRetainingCapacity();
    }

    if (macho_file.getZigObject()) |zo| {
        // TODO: this is ugly; perhaps we should aggregrate before?
-        var relocs = std.AutoArrayHashMap(u8, std.ArrayList(macho.relocation_info)).init(gpa);
+        var zo_relocs = std.AutoArrayHashMap(u8, std.ArrayList(macho.relocation_info)).init(gpa);
        defer {
-            for (relocs.values()) |*list| {
+            for (zo_relocs.values()) |*list| {
                list.deinit();
            }
-            relocs.deinit();
+            zo_relocs.deinit();
        }

        for (macho_file.sections.items(.header), 0..) |header, n_sect| {
            if (header.isZerofill()) continue;
            if (!macho_file.isZigSection(@intCast(n_sect)) and !macho_file.isDebugSection(@intCast(n_sect))) continue;
-            const gop = try relocs.getOrPut(@intCast(n_sect));
+            const gop = try zo_relocs.getOrPut(@intCast(n_sect));
            if (gop.found_existing) continue;
            gop.value_ptr.* = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc);
        }
@@ -618,12 +623,12 @@ fn writeAtoms(macho_file: *MachO) !void {
                },
            };
            const file_offset = header.offset + atom.value;
-            const rels = relocs.getPtr(atom.out_n_sect).?;
+            const rels = zo_relocs.getPtr(atom.out_n_sect).?;
            try atom.writeRelocs(macho_file, code, rels);
            try macho_file.base.file.?.pwriteAll(code, file_offset);
        }

-        for (relocs.keys(), relocs.values()) |sect_id, rels| {
+        for (zo_relocs.keys(), zo_relocs.values()) |sect_id, rels| {
            const header = macho_file.sections.items(.header)[sect_id];
            assert(rels.items.len == header.nreloc);
            mem.sort(macho.relocation_info, rels.items, {}, sortReloc);
--- a/test/link/macho.zig
+++ b/test/link/macho.zig
@@ -38,6 +38,10 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step {
    macho_step.dependOn(testLayout(b, .{ .target = default_target }));
    macho_step.dependOn(testLinkingStaticLib(b, .{ .target = default_target }));
    macho_step.dependOn(testLinksection(b, .{ .target = default_target }));
+    macho_step.dependOn(testMergeLiteralsX64(b, .{ .target = x86_64_target }));
+    macho_step.dependOn(testMergeLiteralsArm64(b, .{ .target = aarch64_target }));
+    macho_step.dependOn(testMergeLiteralsArm642(b, .{ .target = aarch64_target }));
+    macho_step.dependOn(testMergeLiteralsAlignment(b, .{ .target = aarch64_target }));
    macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target }));
    macho_step.dependOn(testNoDeadStrip(b, .{ .target = default_target }));
    macho_step.dependOn(testNoExportsDylib(b, .{ .target = default_target }));
@@ -81,6 +85,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step {
            macho_step.dependOn(testDeadStripDylibs(b, .{ .target = b.host }));
            macho_step.dependOn(testHeaderpad(b, .{ .target = b.host }));
            macho_step.dependOn(testLinkDirectlyCppTbd(b, .{ .target = b.host }));
+            macho_step.dependOn(testMergeLiteralsObjc(b, .{ .target = b.host }));
            macho_step.dependOn(testNeededFramework(b, .{ .target = b.host }));
            macho_step.dependOn(testObjc(b, .{ .target = b.host }));
            macho_step.dependOn(testObjcpp(b, .{ .target = b.host }));
@@ -914,6 +919,485 @@ fn testLinksection(b: *Build, opts: Options) *Step {
    return test_step;
 }

+fn testMergeLiteralsX64(b: *Build, opts: Options) *Step {
+    const test_step = addTestStep(b, "merge-literals-x64", opts);
+
+    const a_o = addObject(b, opts, .{ .name = "a", .asm_source_bytes = 
+    \\.globl _q1
+    \\.globl _s1
+    \\
+    \\.align 4
+    \\_q1:
+    \\  lea L._q1(%rip), %rax
+    \\  mov (%rax), %xmm0
+    \\  ret
+    \\ 
+    \\.section __TEXT,__cstring,cstring_literals
+    \\l._s1:
+    \\  .asciz "hello"
+    \\
+    \\.section __TEXT,__literal8,8byte_literals
+    \\.align 8
+    \\L._q1:
+    \\  .double 1.2345
+    \\
+    \\.section __DATA,__data
+    \\.align 8
+    \\_s1:
+    \\  .quad l._s1
+    });
+
+    const b_o = addObject(b, opts, .{ .name = "b", .asm_source_bytes = 
+    \\.globl _q2
+    \\.globl _s2
+    \\.globl _s3
+    \\
+    \\.align 4
+    \\_q2:
+    \\  lea L._q2(%rip), %rax
+    \\  mov (%rax), %xmm0
+    \\  ret
+    \\ 
+    \\.section __TEXT,__cstring,cstring_literals
+    \\l._s2:
+    \\  .asciz "hello"
+    \\l._s3:
+    \\  .asciz "world"
+    \\
+    \\.section __TEXT,__literal8,8byte_literals
+    \\.align 8
+    \\L._q2:
+    \\  .double 1.2345
+    \\
+    \\.section __DATA,__data
+    \\.align 8
+    \\_s2:
+    \\   .quad l._s2
+    \\_s3:
+    \\   .quad l._s3
+    });
+
+    const main_o = addObject(b, opts, .{ .name = "main", .c_source_bytes = 
+    \\#include <stdio.h>
+    \\extern double q1();
+    \\extern double q2();
+    \\extern const char* s1;
+    \\extern const char* s2;
+    \\extern const char* s3;
+    \\int main() {
+    \\  printf("%s, %s, %s, %f, %f", s1, s2, s3, q1(), q2());
+    \\  return 0;
+    \\}
+    });
+
+    const runWithChecks = struct {
+        fn runWithChecks(step: *Step, exe: *Compile) void {
+            const run = addRunArtifact(exe);
+            run.expectStdOutEqual("hello, hello, world, 1.234500, 1.234500");
+            step.dependOn(&run.step);
+
+            const check = exe.checkObject();
+            check.dumpSection("__TEXT,__const");
+            check.checkContains("\x8d\x97n\x12\x83\xc0\xf3?");
+            check.dumpSection("__TEXT,__cstring");
+            check.checkContains("hello\x00world\x00%s, %s, %s, %f, %f\x00");
+            step.dependOn(&check.step);
+        }
+    }.runWithChecks;
+
+    {
+        const exe = addExecutable(b, opts, .{ .name = "main1" });
+        exe.addObject(a_o);
+        exe.addObject(b_o);
+        exe.addObject(main_o);
+        runWithChecks(test_step, exe);
+    }
+
+    {
+        const exe = addExecutable(b, opts, .{ .name = "main2" });
+        exe.addObject(b_o);
+        exe.addObject(a_o);
+        exe.addObject(main_o);
+        runWithChecks(test_step, exe);
+    }
+
+    {
+        const c_o = addObject(b, opts, .{ .name = "c" });
+        c_o.addObject(a_o);
+        c_o.addObject(b_o);
+        c_o.addObject(main_o);
+
+        const exe = addExecutable(b, opts, .{ .name = "main3" });
+        exe.addObject(c_o);
+        runWithChecks(test_step, exe);
+    }
+
+    return test_step;
+}
+
+fn testMergeLiteralsArm64(b: *Build, opts: Options) *Step {
+    const test_step = addTestStep(b, "merge-literals-arm64", opts);
+
+    const a_o = addObject(b, opts, .{ .name = "a", .asm_source_bytes = 
+    \\.globl _q1
+    \\.globl _s1
+    \\
+    \\.align 4
+    \\_q1:
+    \\  adrp x8, L._q1@PAGE
+    \\  ldr d0, [x8, L._q1@PAGEOFF]
+    \\  ret
+    \\ 
+    \\.section __TEXT,__cstring,cstring_literals
+    \\l._s1:
+    \\  .asciz "hello"
+    \\
+    \\.section __TEXT,__literal8,8byte_literals
+    \\.align 8
+    \\L._q1:
+    \\  .double 1.2345
+    \\
+    \\.section __DATA,__data
+    \\.align 8
+    \\_s1:
+    \\  .quad l._s1
+    });
+
+    const b_o = addObject(b, opts, .{ .name = "b", .asm_source_bytes = 
+    \\.globl _q2
+    \\.globl _s2
+    \\.globl _s3
+    \\
+    \\.align 4
+    \\_q2:
+    \\  adrp x8, L._q2@PAGE
+    \\  ldr d0, [x8, L._q2@PAGEOFF]
+    \\  ret
+    \\ 
+    \\.section __TEXT,__cstring,cstring_literals
+    \\l._s2:
+    \\  .asciz "hello"
+    \\l._s3:
+    \\  .asciz "world"
+    \\
+    \\.section __TEXT,__literal8,8byte_literals
+    \\.align 8
+    \\L._q2:
+    \\  .double 1.2345
+    \\
+    \\.section __DATA,__data
+    \\.align 8
+    \\_s2:
+    \\   .quad l._s2
+    \\_s3:
+    \\   .quad l._s3
+    });
+
+    const main_o = addObject(b, opts, .{ .name = "main", .c_source_bytes = 
+    \\#include <stdio.h>
+    \\extern double q1();
+    \\extern double q2();
+    \\extern const char* s1;
+    \\extern const char* s2;
+    \\extern const char* s3;
+    \\int main() {
+    \\  printf("%s, %s, %s, %f, %f", s1, s2, s3, q1(), q2());
+    \\  return 0;
+    \\}
+    });
+
+    const runWithChecks = struct {
+        fn runWithChecks(step: *Step, exe: *Compile) void {
+            const run = addRunArtifact(exe);
+            run.expectStdOutEqual("hello, hello, world, 1.234500, 1.234500");
+            step.dependOn(&run.step);
+
+            const check = exe.checkObject();
+            check.dumpSection("__TEXT,__const");
+            check.checkContains("\x8d\x97n\x12\x83\xc0\xf3?");
+            check.dumpSection("__TEXT,__cstring");
+            check.checkContains("hello\x00world\x00%s, %s, %s, %f, %f\x00");
+            step.dependOn(&check.step);
+        }
+    }.runWithChecks;
+
+    {
+        const exe = addExecutable(b, opts, .{ .name = "main1" });
+        exe.addObject(a_o);
+        exe.addObject(b_o);
+        exe.addObject(main_o);
+        runWithChecks(test_step, exe);
+    }
+
+    {
+        const exe = addExecutable(b, opts, .{ .name = "main2" });
+        exe.addObject(b_o);
+        exe.addObject(a_o);
+        exe.addObject(main_o);
+        runWithChecks(test_step, exe);
+    }
+
+    {
+        const c_o = addObject(b, opts, .{ .name = "c" });
+        c_o.addObject(a_o);
+        c_o.addObject(b_o);
+        c_o.addObject(main_o);
+
+        const exe = addExecutable(b, opts, .{ .name = "main3" });
+        exe.addObject(c_o);
+        runWithChecks(test_step, exe);
+    }
+
+    return test_step;
+}
+
+/// This particular test case will generate invalid machine code that will segfault at runtime.
+/// However, this is by design as we want to test that the linker does not panic when linking it
+/// which is also the case for the system linker and lld - linking succeeds, runtime segfaults.
+/// It should also be mentioned that runtime segfault is not due to the linker but faulty input asm.
+fn testMergeLiteralsArm642(b: *Build, opts: Options) *Step {
+    const test_step = addTestStep(b, "merge-literals-arm64-2", opts);
+
+    const a_o = addObject(b, opts, .{ .name = "a", .asm_source_bytes = 
+    \\.globl _q1
+    \\.globl _s1
+    \\
+    \\.align 4
+    \\_q1:
+    \\  adrp x0, L._q1@PAGE
+    \\  ldr x0, [x0, L._q1@PAGEOFF]
+    \\  ret
+    \\ 
+    \\.section __TEXT,__cstring,cstring_literals
+    \\_s1:
+    \\  .asciz "hello"
+    \\
+    \\.section __TEXT,__literal8,8byte_literals
+    \\.align 8
+    \\L._q1:
+    \\  .double 1.2345
+    });
+
+    const b_o = addObject(b, opts, .{ .name = "b", .asm_source_bytes = 
+    \\.globl _q2
+    \\.globl _s2
+    \\.globl _s3
+    \\
+    \\.align 4
+    \\_q2:
+    \\  adrp x0, L._q2@PAGE
+    \\  ldr x0, [x0, L._q2@PAGEOFF]
+    \\  ret
+    \\ 
+    \\.section __TEXT,__cstring,cstring_literals
+    \\_s2:
+    \\  .asciz "hello"
+    \\_s3:
+    \\  .asciz "world"
+    \\
+    \\.section __TEXT,__literal8,8byte_literals
+    \\.align 8
+    \\L._q2:
+    \\  .double 1.2345
+    });
+
+    const main_o = addObject(b, opts, .{ .name = "main", .c_source_bytes = 
+    \\#include <stdio.h>
+    \\extern double q1();
+    \\extern double q2();
+    \\extern const char* s1;
+    \\extern const char* s2;
+    \\extern const char* s3;
+    \\int main() {
+    \\  printf("%s, %s, %s, %f, %f", s1, s2, s3, q1(), q2());
+    \\  return 0;
+    \\}
+    });
+
+    const exe = addExecutable(b, opts, .{ .name = "main1" });
+    exe.addObject(a_o);
+    exe.addObject(b_o);
+    exe.addObject(main_o);
+
+    const check = exe.checkObject();
+    check.dumpSection("__TEXT,__const");
+    check.checkContains("\x8d\x97n\x12\x83\xc0\xf3?");
+    check.dumpSection("__TEXT,__cstring");
+    check.checkContains("hello\x00world\x00%s, %s, %s, %f, %f\x00");
+    test_step.dependOn(&check.step);
+
+    return test_step;
+}
+
+fn testMergeLiteralsAlignment(b: *Build, opts: Options) *Step {
+    const test_step = addTestStep(b, "merge-literals-alignment", opts);
+
+    const a_o = addObject(b, opts, .{ .name = "a", .asm_source_bytes = 
+    \\.globl _s1
+    \\.globl _s2
+    \\
+    \\.section __TEXT,__cstring,cstring_literals
+    \\.align 3
+    \\_s1:
+    \\  .asciz "str1"
+    \\_s2:
+    \\  .asciz "str2"
+    });
+
+    const b_o = addObject(b, opts, .{ .name = "b", .asm_source_bytes = 
+    \\.globl _s3
+    \\.globl _s4
+    \\
+    \\.section __TEXT,__cstring,cstring_literals
+    \\.align 2
+    \\_s3:
+    \\  .asciz "str1"
+    \\_s4:
+    \\  .asciz "str2"
+    });
+
+    const main_o = addObject(b, opts, .{ .name = "main", .c_source_bytes = 
+    \\#include <assert.h>
+    \\#include <stdint.h>
+    \\#include <stdio.h>
+    \\extern const char* s1;
+    \\extern const char* s2;
+    \\extern const char* s3;
+    \\extern const char* s4;
+    \\int main() {
+    \\  assert((uintptr_t)(&s1) % 8 == 0 && s1 == s3);
+    \\  assert((uintptr_t)(&s2) % 8 == 0 && s2 == s4);
+    \\  printf("%s%s%s%s", &s1, &s2, &s3, &s4);
+    \\  return 0;
+    \\}
+    , .c_source_flags = &.{"-Wno-format"} });
+
+    const runWithChecks = struct {
+        fn runWithChecks(step: *Step, exe: *Compile) void {
+            const run = addRunArtifact(exe);
+            run.expectStdOutEqual("str1str2str1str2");
+            step.dependOn(&run.step);
+
+            const check = exe.checkObject();
+            check.dumpSection("__TEXT,__cstring");
+            check.checkContains("str1\x00\x00\x00\x00str2\x00");
+            check.checkInHeaders();
+            check.checkExact("segname __TEXT");
+            check.checkExact("sectname __cstring");
+            check.checkExact("align 3");
+            step.dependOn(&check.step);
+        }
+    }.runWithChecks;
+
+    {
+        const exe = addExecutable(b, opts, .{ .name = "main1" });
+        exe.addObject(a_o);
+        exe.addObject(b_o);
+        exe.addObject(main_o);
+        runWithChecks(test_step, exe);
+    }
+
+    {
+        const exe = addExecutable(b, opts, .{ .name = "main2" });
+        exe.addObject(b_o);
+        exe.addObject(a_o);
+        exe.addObject(main_o);
+        runWithChecks(test_step, exe);
+    }
+
+    return test_step;
+}
+
+fn testMergeLiteralsObjc(b: *Build, opts: Options) *Step {
+    const test_step = addTestStep(b, "merge-literals-objc", opts);
+
+    const main_o = addObject(b, opts, .{ .name = "main", .objc_source_bytes = 
+    \\#import <Foundation/Foundation.h>;
+    \\
+    \\extern void foo();
+    \\
+    \\int main() {
+    \\  NSString *thing = @"aaa";
+    \\
+    \\  SEL sel = @selector(lowercaseString);
+    \\  NSString *lower = (([thing respondsToSelector:sel]) ? @"YES" : @"NO");
+    \\  NSLog (@"Responds to lowercaseString: %@", lower);
+    \\  if ([thing respondsToSelector:sel]) //(lower == @"YES")
+    \\      NSLog(@"lowercaseString is: %@", [thing lowercaseString]);
+    \\
+    \\  foo();
+    \\}
+    });
+
+    const a_o = addObject(b, opts, .{ .name = "a", .objc_source_bytes = 
+    \\#import <Foundation/Foundation.h>;
+    \\
+    \\void foo() {
+    \\  NSString *thing = @"aaa";
+    \\  SEL sel = @selector(lowercaseString);
+    \\  NSString *lower = (([thing respondsToSelector:sel]) ? @"YES" : @"NO");
+    \\  NSLog (@"Responds to lowercaseString in foo(): %@", lower);
+    \\  if ([thing respondsToSelector:sel]) //(lower == @"YES")
+    \\      NSLog(@"lowercaseString in foo() is: %@", [thing lowercaseString]);
+    \\  SEL sel2 = @selector(uppercaseString);
+    \\  NSString *upper = (([thing respondsToSelector:sel2]) ? @"YES" : @"NO");
+    \\  NSLog (@"Responds to uppercaseString in foo(): %@", upper);
+    \\  if ([thing respondsToSelector:sel2]) //(upper == @"YES")
+    \\      NSLog(@"uppercaseString in foo() is: %@", [thing uppercaseString]);
+    \\}
+    });
+
+    const runWithChecks = struct {
+        fn runWithChecks(step: *Step, exe: *Compile) void {
+            const builder = step.owner;
+            const run = addRunArtifact(exe);
+            run.addCheck(.{ .expect_stderr_match = builder.dupe("Responds to lowercaseString: YES") });
+            run.addCheck(.{ .expect_stderr_match = builder.dupe("lowercaseString is: aaa") });
+            run.addCheck(.{ .expect_stderr_match = builder.dupe("Responds to lowercaseString in foo(): YES") });
+            run.addCheck(.{ .expect_stderr_match = builder.dupe("lowercaseString in foo() is: aaa") });
+            run.addCheck(.{ .expect_stderr_match = builder.dupe("Responds to uppercaseString in foo(): YES") });
+            run.addCheck(.{ .expect_stderr_match = builder.dupe("uppercaseString in foo() is: AAA") });
+            step.dependOn(&run.step);
+
+            const check = exe.checkObject();
+            check.dumpSection("__TEXT,__objc_methname");
+            check.checkContains("lowercaseString\x00");
+            check.dumpSection("__TEXT,__objc_methname");
+            check.checkContains("uppercaseString\x00");
+            step.dependOn(&check.step);
+        }
+    }.runWithChecks;
+
+    {
+        const exe = addExecutable(b, opts, .{ .name = "main1" });
+        exe.addObject(main_o);
+        exe.addObject(a_o);
+        exe.root_module.linkFramework("Foundation", .{});
+        runWithChecks(test_step, exe);
+    }
+
+    {
+        const exe = addExecutable(b, opts, .{ .name = "main2" });
+        exe.addObject(a_o);
+        exe.addObject(main_o);
+        exe.root_module.linkFramework("Foundation", .{});
+        runWithChecks(test_step, exe);
+    }
+
+    {
+        const b_o = addObject(b, opts, .{ .name = "b" });
+        b_o.addObject(a_o);
+        b_o.addObject(main_o);
+
+        const exe = addExecutable(b, opts, .{ .name = "main3" });
+        exe.addObject(b_o);
+        exe.root_module.linkFramework("Foundation", .{});
+        runWithChecks(test_step, exe);
+    }
+
+    return test_step;
+}
+
 fn testMhExecuteHeader(b: *Build, opts: Options) *Step {
    const test_step = addTestStep(b, "mh-execute-header", opts);