Merge pull request #21720 from kubkon/macho-dwarf-v5 - zig - fork of https://codeberg.org/ziglang/zig

commit 4e09e363cd00b1dd36467ef7958d750ea09f296d (tree)
parent c013f45ad08c2c6d727bf336767e23d988f5f30b
Author: Andrew Kelley <andrew@ziglang.org>
Date:   Tue,  3 Dec 2024 02:28:22 -0500

Merge pull request #21720 from kubkon/macho-dwarf-v5

macho: add basic handling of DWARFv5
Diffstat:
M CMakeLists.txt  | 2 +-
A src/link/MachO/Dwarf.zig  | 409 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/link/MachO/Object.zig  | 284 +++++++++++++++++++++++++++++++------------------------------------------------
D src/link/MachO/dwarf.zig  | 286 -------------------------------------------------------------------------------

4 files changed, 522 insertions(+), 459 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -611,6 +611,7 @@ set(ZIG_STAGE2_SOURCES
     src/link/MachO/Atom.zig
     src/link/MachO/CodeSignature.zig
     src/link/MachO/DebugSymbols.zig
+    src/link/MachO/Dwarf.zig
     src/link/MachO/Dylib.zig
     src/link/MachO/InternalObject.zig
     src/link/MachO/Object.zig
@@ -622,7 +623,6 @@ set(ZIG_STAGE2_SOURCES
     src/link/MachO/dyld_info/Rebase.zig
     src/link/MachO/dyld_info/Trie.zig
     src/link/MachO/dyld_info/bind.zig
-    src/link/MachO/dwarf.zig
     src/link/MachO/eh_frame.zig
     src/link/MachO/fat.zig
     src/link/MachO/file.zig
diff --git a/src/link/MachO/Dwarf.zig b/src/link/MachO/Dwarf.zig
@@ -0,0 +1,409 @@
+debug_info: []u8 = &[0]u8{},
+debug_abbrev: []u8 = &[0]u8{},
+debug_str: []u8 = &[0]u8{},
+debug_str_offsets: []u8 = &[0]u8{},
+
+pub fn deinit(dwarf: *Dwarf, allocator: Allocator) void {
+    allocator.free(dwarf.debug_info);
+    allocator.free(dwarf.debug_abbrev);
+    allocator.free(dwarf.debug_str);
+    allocator.free(dwarf.debug_str_offsets);
+}
+
+/// Pulls an offset into __debug_str section from a __debug_str_offs section.
+/// This is new in DWARFv5 and requires the producer to specify DW_FORM_strx* (`index` arg)
+/// but also DW_AT_str_offsets_base with DW_FORM_sec_offset (`base` arg) in the opening header
+/// of a "referencing entity" such as DW_TAG_compile_unit.
+fn getOffset(debug_str_offsets: []const u8, base: u64, index: u64, dw_fmt: DwarfFormat) error{Overflow}!u64 {
+    const base_as_usize = math.cast(usize, base) orelse return error.Overflow;
+    const index_as_usize = math.cast(usize, index) orelse return error.Overflow;
+    return switch (dw_fmt) {
+        .dwarf32 => @as(
+            *align(1) const u32,
+            @ptrCast(debug_str_offsets.ptr + base_as_usize + index_as_usize * @sizeOf(u32)),
+        ).*,
+        .dwarf64 => @as(
+            *align(1) const u64,
+            @ptrCast(debug_str_offsets.ptr + base_as_usize + index_as_usize * @sizeOf(u64)),
+        ).*,
+    };
+}
+
+pub const InfoReader = struct {
+    ctx: Dwarf,
+    pos: usize = 0,
+
+    fn bytes(p: InfoReader) []const u8 {
+        return p.ctx.debug_info;
+    }
+
+    pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader {
+        var length: u64 = try p.readInt(u32);
+        const is_64bit = length == 0xffffffff;
+        if (is_64bit) {
+            length = try p.readInt(u64);
+        }
+        const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32;
+        const version = try p.readInt(Version);
+        const rest: struct {
+            debug_abbrev_offset: u64,
+            address_size: u8,
+            unit_type: u8,
+        } = switch (version) {
+            4 => .{
+                .debug_abbrev_offset = try p.readOffset(dw_fmt),
+                .address_size = try p.readByte(),
+                .unit_type = 0,
+            },
+            5 => .{
+                // According to the spec, version 5 introduced .unit_type field in the header, and
+                // it reordered .debug_abbrev_offset with .address_size fields.
+                .unit_type = try p.readByte(),
+                .address_size = try p.readByte(),
+                .debug_abbrev_offset = try p.readOffset(dw_fmt),
+            },
+            else => return error.InvalidVersion,
+        };
+        return .{
+            .format = dw_fmt,
+            .length = length,
+            .version = version,
+            .debug_abbrev_offset = rest.debug_abbrev_offset,
+            .address_size = rest.address_size,
+            .unit_type = rest.unit_type,
+        };
+    }
+
+    pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void {
+        const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow;
+        const end_pos = p.pos + switch (cuh.format) {
+            .dwarf32 => @as(usize, 4),
+            .dwarf64 => 12,
+        } + cuh_length;
+        while (p.pos < end_pos) {
+            const di_code = try p.readUleb128(u64);
+            if (di_code == 0) return error.UnexpectedEndOfFile;
+            if (di_code == code) return;
+
+            while (try abbrev_reader.readAttr()) |attr| {
+                try p.skip(attr.form, cuh);
+            }
+        }
+        return error.UnexpectedEndOfFile;
+    }
+
+    /// When skipping attributes, we don't really need to be able to handle them all
+    /// since we only ever care about the DW_TAG_compile_unit.
+    pub fn skip(p: *InfoReader, form: Form, cuh: CompileUnitHeader) !void {
+        switch (form) {
+            dw.FORM.sec_offset,
+            dw.FORM.ref_addr,
+            => {
+                _ = try p.readOffset(cuh.format);
+            },
+
+            dw.FORM.addr => {
+                _ = try p.readNBytes(cuh.address_size);
+            },
+
+            dw.FORM.block1,
+            dw.FORM.block2,
+            dw.FORM.block4,
+            dw.FORM.block,
+            => {
+                _ = try p.readBlock(form);
+            },
+
+            dw.FORM.exprloc => {
+                _ = try p.readExprLoc();
+            },
+
+            dw.FORM.flag_present => {},
+
+            dw.FORM.data1,
+            dw.FORM.ref1,
+            dw.FORM.flag,
+            dw.FORM.data2,
+            dw.FORM.ref2,
+            dw.FORM.data4,
+            dw.FORM.ref4,
+            dw.FORM.data8,
+            dw.FORM.ref8,
+            dw.FORM.ref_sig8,
+            dw.FORM.udata,
+            dw.FORM.ref_udata,
+            dw.FORM.sdata,
+            => {
+                _ = try p.readConstant(form);
+            },
+
+            dw.FORM.strp,
+            dw.FORM.string,
+            => {
+                _ = try p.readString(form, cuh);
+            },
+
+            else => if (cuh.version >= 5) switch (form) {
+                dw.FORM.strx,
+                dw.FORM.strx1,
+                dw.FORM.strx2,
+                dw.FORM.strx3,
+                dw.FORM.strx4,
+                => {
+                    // We are just iterating over the __debug_info data, so we don't care about an actual
+                    // string, therefore we set the `base = 0`.
+                    _ = try p.readStringIndexed(form, cuh, 0);
+                },
+
+                dw.FORM.addrx,
+                dw.FORM.addrx1,
+                dw.FORM.addrx2,
+                dw.FORM.addrx3,
+                dw.FORM.addrx4,
+                => {
+                    _ = try p.readIndex(form);
+                },
+
+                else => return error.UnhandledForm,
+            } else return error.UnhandledForm,
+        }
+    }
+
+    pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 {
+        const len: u64 = switch (form) {
+            dw.FORM.block1 => try p.readByte(),
+            dw.FORM.block2 => try p.readInt(u16),
+            dw.FORM.block4 => try p.readInt(u32),
+            dw.FORM.block => try p.readUleb128(u64),
+            else => unreachable,
+        };
+        return p.readNBytes(len);
+    }
+
+    pub fn readExprLoc(p: *InfoReader) ![]const u8 {
+        const len: u64 = try p.readUleb128(u64);
+        return p.readNBytes(len);
+    }
+
+    pub fn readConstant(p: *InfoReader, form: Form) !u64 {
+        return switch (form) {
+            dw.FORM.data1, dw.FORM.ref1, dw.FORM.flag => try p.readByte(),
+            dw.FORM.data2, dw.FORM.ref2 => try p.readInt(u16),
+            dw.FORM.data4, dw.FORM.ref4 => try p.readInt(u32),
+            dw.FORM.data8, dw.FORM.ref8, dw.FORM.ref_sig8 => try p.readInt(u64),
+            dw.FORM.udata, dw.FORM.ref_udata => try p.readUleb128(u64),
+            dw.FORM.sdata => @bitCast(try p.readIleb128(i64)),
+            else => return error.UnhandledConstantForm,
+        };
+    }
+
+    pub fn readIndex(p: *InfoReader, form: Form) !u64 {
+        return switch (form) {
+            dw.FORM.strx1, dw.FORM.addrx1 => try p.readByte(),
+            dw.FORM.strx2, dw.FORM.addrx2 => try p.readInt(u16),
+            dw.FORM.strx3, dw.FORM.addrx3 => error.UnhandledForm,
+            dw.FORM.strx4, dw.FORM.addrx4 => try p.readInt(u32),
+            dw.FORM.strx, dw.FORM.addrx => try p.readUleb128(u64),
+            else => return error.UnhandledIndexForm,
+        };
+    }
+
+    pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 {
+        switch (form) {
+            dw.FORM.strp => {
+                const off = try p.readOffset(cuh.format);
+                const off_u = math.cast(usize, off) orelse return error.Overflow;
+                return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off_u)), 0);
+            },
+            dw.FORM.string => {
+                const start = p.pos;
+                while (p.pos < p.bytes().len) : (p.pos += 1) {
+                    if (p.bytes()[p.pos] == 0) break;
+                }
+                if (p.bytes()[p.pos] != 0) return error.UnexpectedEndOfFile;
+                return p.bytes()[start..p.pos :0];
+            },
+            else => unreachable,
+        }
+    }
+
+    pub fn readStringIndexed(p: *InfoReader, form: Form, cuh: CompileUnitHeader, base: u64) ![:0]const u8 {
+        switch (form) {
+            dw.FORM.strx,
+            dw.FORM.strx1,
+            dw.FORM.strx2,
+            dw.FORM.strx3,
+            dw.FORM.strx4,
+            => {
+                const index = try p.readIndex(form);
+                const off = math.cast(
+                    usize,
+                    try getOffset(p.ctx.debug_str_offsets, base, index, cuh.format),
+                ) orelse return error.Overflow;
+                return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off)), 0);
+            },
+            else => unreachable,
+        }
+    }
+
+    pub fn readByte(p: *InfoReader) !u8 {
+        if (p.pos + 1 > p.bytes().len) return error.UnexpectedEndOfFile;
+        defer p.pos += 1;
+        return p.bytes()[p.pos];
+    }
+
+    pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 {
+        const num_usize = math.cast(usize, num) orelse return error.Overflow;
+        if (p.pos + num_usize > p.bytes().len) return error.UnexpectedEndOfFile;
+        defer p.pos += num_usize;
+        return p.bytes()[p.pos..][0..num_usize];
+    }
+
+    pub fn readInt(p: *InfoReader, comptime Int: type) !Int {
+        if (p.pos + @sizeOf(Int) > p.bytes().len) return error.UnexpectedEndOfFile;
+        defer p.pos += @sizeOf(Int);
+        return mem.readInt(Int, p.bytes()[p.pos..][0..@sizeOf(Int)], .little);
+    }
+
+    pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 {
+        return switch (dw_fmt) {
+            .dwarf32 => try p.readInt(u32),
+            .dwarf64 => try p.readInt(u64),
+        };
+    }
+
+    pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type {
+        var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
+        var creader = std.io.countingReader(stream.reader());
+        const value: Type = try leb.readUleb128(Type, creader.reader());
+        p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
+        return value;
+    }
+
+    pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type {
+        var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
+        var creader = std.io.countingReader(stream.reader());
+        const value: Type = try leb.readIleb128(Type, creader.reader());
+        p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
+        return value;
+    }
+
+    pub fn seekTo(p: *InfoReader, off: u64) !void {
+        p.pos = math.cast(usize, off) orelse return error.Overflow;
+    }
+};
+
+pub const AbbrevReader = struct {
+    ctx: Dwarf,
+    pos: usize = 0,
+
+    fn bytes(p: AbbrevReader) []const u8 {
+        return p.ctx.debug_abbrev;
+    }
+
+    pub fn hasMore(p: AbbrevReader) bool {
+        return p.pos < p.bytes().len;
+    }
+
+    pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl {
+        const pos = p.pos;
+        const code = try p.readUleb128(Code);
+        if (code == 0) return null;
+
+        const tag = try p.readUleb128(Tag);
+        const has_children = (try p.readByte()) > 0;
+        return .{
+            .code = code,
+            .pos = pos,
+            .len = p.pos - pos,
+            .tag = tag,
+            .has_children = has_children,
+        };
+    }
+
+    pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr {
+        const pos = p.pos;
+        const at = try p.readUleb128(At);
+        const form = try p.readUleb128(Form);
+        return if (at == 0 and form == 0) null else .{
+            .at = at,
+            .form = form,
+            .pos = pos,
+            .len = p.pos - pos,
+        };
+    }
+
+    pub fn readByte(p: *AbbrevReader) !u8 {
+        if (p.pos + 1 > p.bytes().len) return error.Eof;
+        defer p.pos += 1;
+        return p.bytes()[p.pos];
+    }
+
+    pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type {
+        var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
+        var creader = std.io.countingReader(stream.reader());
+        const value: Type = try leb.readUleb128(Type, creader.reader());
+        p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
+        return value;
+    }
+
+    pub fn seekTo(p: *AbbrevReader, off: u64) !void {
+        p.pos = math.cast(usize, off) orelse return error.Overflow;
+    }
+};
+
+const AbbrevDecl = struct {
+    code: Code,
+    pos: usize,
+    len: usize,
+    tag: Tag,
+    has_children: bool,
+};
+
+const AbbrevAttr = struct {
+    at: At,
+    form: Form,
+    pos: usize,
+    len: usize,
+};
+
+const CompileUnitHeader = struct {
+    format: DwarfFormat,
+    length: u64,
+    version: Version,
+    debug_abbrev_offset: u64,
+    address_size: u8,
+    unit_type: u8,
+};
+
+const Die = struct {
+    pos: usize,
+    len: usize,
+};
+
+const DwarfFormat = enum {
+    dwarf32,
+    dwarf64,
+};
+
+const dw = std.dwarf;
+const leb = std.leb;
+const log = std.log.scoped(.link);
+const math = std.math;
+const mem = std.mem;
+const std = @import("std");
+const Allocator = mem.Allocator;
+const Dwarf = @This();
+const File = @import("file.zig").File;
+const MachO = @import("../MachO.zig");
+const Object = @import("Object.zig");
+
+pub const At = u64;
+pub const Code = u64;
+pub const Form = u64;
+pub const Tag = u64;
+pub const Version = u16;
+
+pub const AT = dw.AT;
+pub const FORM = dw.FORM;
+pub const TAG = dw.TAG;
diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig
@@ -443,11 +443,8 @@ fn initCstringLiterals(self: *Object, allocator: Allocator, file: File.Handle, m
     for (slice.items(.header), 0..) |sect, n_sect| {
         if (!isCstringLiteral(sect)) continue;
 
-        const sect_size = math.cast(usize, sect.size) orelse return error.Overflow;
-        const data = try allocator.alloc(u8, sect_size);
+        const data = try self.readSectionData(allocator, file, @intCast(n_sect));
         defer allocator.free(data);
-        const amt = try file.preadAll(data, sect.offset + self.offset);
-        if (amt != data.len) return error.InputOutput;
 
         var count: u32 = 0;
         var start: u32 = 0;
@@ -646,13 +643,10 @@ pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO
     }
 
     const slice = self.sections.slice();
-    for (slice.items(.header), slice.items(.subsections)) |header, subs| {
+    for (slice.items(.header), slice.items(.subsections), 0..) |header, subs, n_sect| {
         if (isCstringLiteral(header) or isFixedSizeLiteral(header)) {
-            const sect_size = math.cast(usize, header.size) orelse return error.Overflow;
-            const data = try gpa.alloc(u8, sect_size);
+            const data = try self.readSectionData(gpa, file, @intCast(n_sect));
             defer gpa.free(data);
-            const amt = try file.preadAll(data, header.offset + self.offset);
-            if (amt != data.len) return error.InputOutput;
 
             for (subs.items) |sub| {
                 const atom = self.getAtom(sub.atom).?;
@@ -686,12 +680,7 @@ pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO
                 buffer.resize(target_size) catch unreachable;
                 const gop = try sections_data.getOrPut(target.n_sect);
                 if (!gop.found_existing) {
-                    const target_sect = slice.items(.header)[target.n_sect];
-                    const target_sect_size = math.cast(usize, target_sect.size) orelse return error.Overflow;
-                    const data = try gpa.alloc(u8, target_sect_size);
-                    const amt = try file.preadAll(data, target_sect.offset + self.offset);
-                    if (amt != data.len) return error.InputOutput;
-                    gop.value_ptr.* = data;
+                    gop.value_ptr.* = try self.readSectionData(gpa, file, @intCast(target.n_sect));
                 }
                 const data = gop.value_ptr.*;
                 const target_off = math.cast(usize, target.off) orelse return error.Overflow;
@@ -1000,7 +989,7 @@ fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, m
     defer tracy.end();
     const slice = self.sections.slice();
 
-    for (slice.items(.header), slice.items(.relocs)) |sect, *out| {
+    for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| {
         if (sect.nreloc == 0) continue;
         // We skip relocs for __DWARF since even in -r mode, the linker is expected to emit
         // debug symbol stabs in the relocatable. This made me curious why that is. For now,
@@ -1009,8 +998,8 @@ fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, m
             !mem.eql(u8, sect.sectName(), "__compact_unwind")) continue;
 
         switch (cpu_arch) {
-            .x86_64 => try x86_64.parseRelocs(self, sect, out, file, macho_file),
-            .aarch64 => try aarch64.parseRelocs(self, sect, out, file, macho_file),
+            .x86_64 => try x86_64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file),
+            .aarch64 => try aarch64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file),
             else => unreachable,
         }
 
@@ -1146,11 +1135,8 @@ fn initUnwindRecords(self: *Object, allocator: Allocator, sect_id: u8, file: Fil
     };
 
     const header = self.sections.items(.header)[sect_id];
-    const size = math.cast(usize, header.size) orelse return error.Overflow;
-    const data = try allocator.alloc(u8, size);
+    const data = try self.readSectionData(allocator, file, sect_id);
     defer allocator.free(data);
-    const amt = try file.preadAll(data, header.offset + self.offset);
-    if (amt != data.len) return error.InputOutput;
 
     const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry));
     const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs];
@@ -1359,151 +1345,106 @@ fn parseDebugInfo(self: *Object, macho_file: *MachO) !void {
     defer tracy.end();
 
     const gpa = macho_file.base.comp.gpa;
+    const file = macho_file.getFileHandle(self.file_handle);
 
-    var debug_info_index: ?usize = null;
-    var debug_abbrev_index: ?usize = null;
-    var debug_str_index: ?usize = null;
+    var dwarf: Dwarf = .{};
+    defer dwarf.deinit(gpa);
 
     for (self.sections.items(.header), 0..) |sect, index| {
+        const n_sect: u8 = @intCast(index);
         if (sect.attrs() & macho.S_ATTR_DEBUG == 0) continue;
-        if (mem.eql(u8, sect.sectName(), "__debug_info")) debug_info_index = index;
-        if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) debug_abbrev_index = index;
-        if (mem.eql(u8, sect.sectName(), "__debug_str")) debug_str_index = index;
+        if (mem.eql(u8, sect.sectName(), "__debug_info")) {
+            dwarf.debug_info = try self.readSectionData(gpa, file, n_sect);
+        }
+        if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) {
+            dwarf.debug_abbrev = try self.readSectionData(gpa, file, n_sect);
+        }
+        if (mem.eql(u8, sect.sectName(), "__debug_str")) {
+            dwarf.debug_str = try self.readSectionData(gpa, file, n_sect);
+        }
+        // __debug_str_offs[ets] section is a new addition in DWARFv5 and is generally
+        // required in order to correctly parse strings.
+        if (mem.eql(u8, sect.sectName(), "__debug_str_offs")) {
+            dwarf.debug_str_offsets = try self.readSectionData(gpa, file, n_sect);
+        }
     }
 
-    if (debug_info_index == null or debug_abbrev_index == null) return;
+    if (dwarf.debug_info.len == 0) return;
 
-    const slice = self.sections.slice();
-    const file = macho_file.getFileHandle(self.file_handle);
-    const debug_info = blk: {
-        const sect = slice.items(.header)[debug_info_index.?];
-        const size = math.cast(usize, sect.size) orelse return error.Overflow;
-        const data = try gpa.alloc(u8, size);
-        const amt = try file.preadAll(data, sect.offset + self.offset);
-        if (amt != data.len) return error.InputOutput;
-        break :blk data;
-    };
-    defer gpa.free(debug_info);
-    const debug_abbrev = blk: {
-        const sect = slice.items(.header)[debug_abbrev_index.?];
-        const size = math.cast(usize, sect.size) orelse return error.Overflow;
-        const data = try gpa.alloc(u8, size);
-        const amt = try file.preadAll(data, sect.offset + self.offset);
-        if (amt != data.len) return error.InputOutput;
-        break :blk data;
-    };
-    defer gpa.free(debug_abbrev);
-    const debug_str = if (debug_str_index) |sid| blk: {
-        const sect = slice.items(.header)[sid];
-        const size = math.cast(usize, sect.size) orelse return error.Overflow;
-        const data = try gpa.alloc(u8, size);
-        const amt = try file.preadAll(data, sect.offset + self.offset);
-        if (amt != data.len) return error.InputOutput;
-        break :blk data;
-    } else &[0]u8{};
-    defer gpa.free(debug_str);
-
-    self.compile_unit = self.findCompileUnit(.{
-        .gpa = gpa,
-        .debug_info = debug_info,
-        .debug_abbrev = debug_abbrev,
-        .debug_str = debug_str,
-    }) catch null; // TODO figure out what errors are fatal, and when we silently fail
-}
-
-fn findCompileUnit(self: *Object, args: struct {
-    gpa: Allocator,
-    debug_info: []const u8,
-    debug_abbrev: []const u8,
-    debug_str: []const u8,
-}) !CompileUnit {
-    var cu_wip: struct {
-        comp_dir: ?[:0]const u8 = null,
-        tu_name: ?[:0]const u8 = null,
-    } = .{};
-
-    const gpa = args.gpa;
-    var info_reader = dwarf.InfoReader{ .bytes = args.debug_info, .strtab = args.debug_str };
-    var abbrev_reader = dwarf.AbbrevReader{ .bytes = args.debug_abbrev };
+    // TODO return error once we fix emitting DWARF in self-hosted backend.
+    // https://github.com/ziglang/zig/issues/21719
+    self.compile_unit = self.findCompileUnit(gpa, dwarf) catch null;
+}
+
+fn findCompileUnit(self: *Object, gpa: Allocator, ctx: Dwarf) !CompileUnit {
+    var info_reader = Dwarf.InfoReader{ .ctx = ctx };
+    var abbrev_reader = Dwarf.AbbrevReader{ .ctx = ctx };
 
     const cuh = try info_reader.readCompileUnitHeader();
     try abbrev_reader.seekTo(cuh.debug_abbrev_offset);
 
-    const cu_decl = (try abbrev_reader.readDecl()) orelse return error.Eof;
-    if (cu_decl.tag != dwarf.TAG.compile_unit) return error.UnexpectedTag;
+    const cu_decl = (try abbrev_reader.readDecl()) orelse return error.UnexpectedEndOfFile;
+    if (cu_decl.tag != Dwarf.TAG.compile_unit) return error.UnexpectedTag;
 
     try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader);
 
-    while (try abbrev_reader.readAttr()) |attr| switch (attr.at) {
-        dwarf.AT.name => {
-            cu_wip.tu_name = try info_reader.readString(attr.form, cuh);
-        },
-        dwarf.AT.comp_dir => {
-            cu_wip.comp_dir = try info_reader.readString(attr.form, cuh);
-        },
-        else => switch (attr.form) {
-            dwarf.FORM.sec_offset,
-            dwarf.FORM.ref_addr,
-            => {
-                _ = try info_reader.readOffset(cuh.format);
-            },
-
-            dwarf.FORM.addr => {
-                _ = try info_reader.readNBytes(cuh.address_size);
-            },
-
-            dwarf.FORM.block1,
-            dwarf.FORM.block2,
-            dwarf.FORM.block4,
-            dwarf.FORM.block,
-            => {
-                _ = try info_reader.readBlock(attr.form);
-            },
-
-            dwarf.FORM.exprloc => {
-                _ = try info_reader.readExprLoc();
-            },
-
-            dwarf.FORM.flag_present => {},
-
-            dwarf.FORM.data1,
-            dwarf.FORM.ref1,
-            dwarf.FORM.flag,
-            dwarf.FORM.data2,
-            dwarf.FORM.ref2,
-            dwarf.FORM.data4,
-            dwarf.FORM.ref4,
-            dwarf.FORM.data8,
-            dwarf.FORM.ref8,
-            dwarf.FORM.ref_sig8,
-            dwarf.FORM.udata,
-            dwarf.FORM.ref_udata,
-            dwarf.FORM.sdata,
-            => {
-                _ = try info_reader.readConstant(attr.form);
-            },
-
-            dwarf.FORM.strp,
-            dwarf.FORM.string,
-            => {
-                _ = try info_reader.readString(attr.form, cuh);
-            },
-
-            else => {
-                // TODO actual errors?
-                log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form});
-                return error.UnhandledForm;
-            },
-        },
+    const Pos = struct {
+        pos: usize,
+        form: Dwarf.Form,
     };
-
-    if (cu_wip.comp_dir == null) return error.MissingCompDir;
-    if (cu_wip.tu_name == null) return error.MissingTuName;
-
-    return .{
-        .comp_dir = try self.addString(gpa, cu_wip.comp_dir.?),
-        .tu_name = try self.addString(gpa, cu_wip.tu_name.?),
+    var saved: struct {
+        tu_name: ?Pos,
+        comp_dir: ?Pos,
+        str_offsets_base: ?Pos,
+    } = .{
+        .tu_name = null,
+        .comp_dir = null,
+        .str_offsets_base = null,
     };
+    while (try abbrev_reader.readAttr()) |attr| {
+        const pos: Pos = .{ .pos = info_reader.pos, .form = attr.form };
+        switch (attr.at) {
+            Dwarf.AT.name => saved.tu_name = pos,
+            Dwarf.AT.comp_dir => saved.comp_dir = pos,
+            Dwarf.AT.str_offsets_base => saved.str_offsets_base = pos,
+            else => {},
+        }
+        try info_reader.skip(attr.form, cuh);
+    }
+
+    if (saved.comp_dir == null) return error.MissingCompileDir;
+    if (saved.tu_name == null) return error.MissingTuName;
+
+    const str_offsets_base: ?u64 = if (saved.str_offsets_base) |str_offsets_base| str_offsets_base: {
+        try info_reader.seekTo(str_offsets_base.pos);
+        break :str_offsets_base try info_reader.readOffset(cuh.format);
+    } else null;
+
+    var cu: CompileUnit = .{ .comp_dir = .{}, .tu_name = .{} };
+    for (&[_]struct { Pos, *MachO.String }{
+        .{ saved.comp_dir.?, &cu.comp_dir },
+        .{ saved.tu_name.?, &cu.tu_name },
+    }) |tuple| {
+        const pos, const str_offset_ptr = tuple;
+        try info_reader.seekTo(pos.pos);
+        str_offset_ptr.* = switch (pos.form) {
+            Dwarf.FORM.strp,
+            Dwarf.FORM.string,
+            => try self.addString(gpa, try info_reader.readString(pos.form, cuh)),
+            Dwarf.FORM.strx,
+            Dwarf.FORM.strx1,
+            Dwarf.FORM.strx2,
+            Dwarf.FORM.strx3,
+            Dwarf.FORM.strx4,
+            => blk: {
+                const base = str_offsets_base orelse return error.MissingStrOffsetsBase;
+                break :blk try self.addString(gpa, try info_reader.readStringIndexed(pos.form, cuh, base));
+            },
+            else => return error.InvalidForm,
+        };
+    }
+
+    return cu;
 }
 
 pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void {
@@ -2561,6 +2502,17 @@ pub fn getUnwindRecord(self: *Object, index: UnwindInfo.Record.Index) *UnwindInf
     return &self.unwind_records.items[index];
 }
 
+/// Caller owns the memory.
+pub fn readSectionData(self: Object, allocator: Allocator, file: File.Handle, n_sect: u8) ![]u8 {
+    const header = self.sections.items(.header)[n_sect];
+    const size = math.cast(usize, header.size) orelse return error.Overflow;
+    const data = try allocator.alloc(u8, size);
+    const amt = try file.preadAll(data, header.offset + self.offset);
+    errdefer allocator.free(data);
+    if (amt != data.len) return error.InputOutput;
+    return data;
+}
+
 pub fn format(
     self: *Object,
     comptime unused_fmt_string: []const u8,
@@ -2848,6 +2800,7 @@ const CompactUnwindCtx = struct {
 const x86_64 = struct {
     fn parseRelocs(
         self: *Object,
+        n_sect: u8,
         sect: macho.section_64,
         out: *std.ArrayListUnmanaged(Relocation),
         handle: File.Handle,
@@ -2857,19 +2810,12 @@ const x86_64 = struct {
 
         const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
         defer gpa.free(relocs_buffer);
-        {
-            const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
-            if (amt != relocs_buffer.len) return error.InputOutput;
-        }
+        const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
+        if (amt != relocs_buffer.len) return error.InputOutput;
         const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc];
 
-        const sect_size = math.cast(usize, sect.size) orelse return error.Overflow;
-        const code = try gpa.alloc(u8, sect_size);
+        const code = try self.readSectionData(gpa, handle, n_sect);
         defer gpa.free(code);
-        {
-            const amt = try handle.preadAll(code, sect.offset + self.offset);
-            if (amt != code.len) return error.InputOutput;
-        }
 
         try out.ensureTotalCapacityPrecise(gpa, relocs.len);
 
@@ -3021,6 +2967,7 @@ const x86_64 = struct {
 const aarch64 = struct {
     fn parseRelocs(
         self: *Object,
+        n_sect: u8,
         sect: macho.section_64,
         out: *std.ArrayListUnmanaged(Relocation),
         handle: File.Handle,
@@ -3030,19 +2977,12 @@ const aarch64 = struct {
 
         const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
         defer gpa.free(relocs_buffer);
-        {
-            const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
-            if (amt != relocs_buffer.len) return error.InputOutput;
-        }
+        const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
+        if (amt != relocs_buffer.len) return error.InputOutput;
         const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc];
 
-        const sect_size = math.cast(usize, sect.size) orelse return error.Overflow;
-        const code = try gpa.alloc(u8, sect_size);
+        const code = try self.readSectionData(gpa, handle, n_sect);
         defer gpa.free(code);
-        {
-            const amt = try handle.preadAll(code, sect.offset + self.offset);
-            if (amt != code.len) return error.InputOutput;
-        }
 
         try out.ensureTotalCapacityPrecise(gpa, relocs.len);
 
@@ -3219,7 +3159,6 @@ const aarch64 = struct {
 };
 
 const assert = std.debug.assert;
-const dwarf = @import("dwarf.zig");
 const eh_frame = @import("eh_frame.zig");
 const log = std.log.scoped(.link);
 const macho = std.macho;
@@ -3233,6 +3172,7 @@ const Allocator = mem.Allocator;
 const Archive = @import("Archive.zig");
 const Atom = @import("Atom.zig");
 const Cie = eh_frame.Cie;
+const Dwarf = @import("Dwarf.zig");
 const Fde = eh_frame.Fde;
 const File = @import("file.zig").File;
 const LoadCommandIterator = macho.LoadCommandIterator;
diff --git a/src/link/MachO/dwarf.zig b/src/link/MachO/dwarf.zig
@@ -1,286 +0,0 @@
-pub const InfoReader = struct {
-    bytes: []const u8,
-    strtab: []const u8,
-    pos: usize = 0,
-
-    pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader {
-        var length: u64 = try p.readInt(u32);
-        const is_64bit = length == 0xffffffff;
-        if (is_64bit) {
-            length = try p.readInt(u64);
-        }
-        const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32;
-        return .{
-            .format = dw_fmt,
-            .length = length,
-            .version = try p.readInt(u16),
-            .debug_abbrev_offset = try p.readOffset(dw_fmt),
-            .address_size = try p.readByte(),
-        };
-    }
-
-    pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void {
-        const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow;
-        const end_pos = p.pos + switch (cuh.format) {
-            .dwarf32 => @as(usize, 4),
-            .dwarf64 => 12,
-        } + cuh_length;
-        while (p.pos < end_pos) {
-            const di_code = try p.readUleb128(u64);
-            if (di_code == 0) return error.Eof;
-            if (di_code == code) return;
-
-            while (try abbrev_reader.readAttr()) |attr| switch (attr.at) {
-                dwarf.FORM.sec_offset,
-                dwarf.FORM.ref_addr,
-                => {
-                    _ = try p.readOffset(cuh.format);
-                },
-
-                dwarf.FORM.addr => {
-                    _ = try p.readNBytes(cuh.address_size);
-                },
-
-                dwarf.FORM.block1,
-                dwarf.FORM.block2,
-                dwarf.FORM.block4,
-                dwarf.FORM.block,
-                => {
-                    _ = try p.readBlock(attr.form);
-                },
-
-                dwarf.FORM.exprloc => {
-                    _ = try p.readExprLoc();
-                },
-
-                dwarf.FORM.flag_present => {},
-
-                dwarf.FORM.data1,
-                dwarf.FORM.ref1,
-                dwarf.FORM.flag,
-                dwarf.FORM.data2,
-                dwarf.FORM.ref2,
-                dwarf.FORM.data4,
-                dwarf.FORM.ref4,
-                dwarf.FORM.data8,
-                dwarf.FORM.ref8,
-                dwarf.FORM.ref_sig8,
-                dwarf.FORM.udata,
-                dwarf.FORM.ref_udata,
-                dwarf.FORM.sdata,
-                => {
-                    _ = try p.readConstant(attr.form);
-                },
-
-                dwarf.FORM.strp,
-                dwarf.FORM.string,
-                => {
-                    _ = try p.readString(attr.form, cuh);
-                },
-
-                else => {
-                    // TODO better errors
-                    log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form});
-                    return error.UnhandledDwFormValue;
-                },
-            };
-        }
-    }
-
-    pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 {
-        const len: u64 = switch (form) {
-            dwarf.FORM.block1 => try p.readByte(),
-            dwarf.FORM.block2 => try p.readInt(u16),
-            dwarf.FORM.block4 => try p.readInt(u32),
-            dwarf.FORM.block => try p.readUleb128(u64),
-            else => unreachable,
-        };
-        return p.readNBytes(len);
-    }
-
-    pub fn readExprLoc(p: *InfoReader) ![]const u8 {
-        const len: u64 = try p.readUleb128(u64);
-        return p.readNBytes(len);
-    }
-
-    pub fn readConstant(p: *InfoReader, form: Form) !u64 {
-        return switch (form) {
-            dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag => try p.readByte(),
-            dwarf.FORM.data2, dwarf.FORM.ref2 => try p.readInt(u16),
-            dwarf.FORM.data4, dwarf.FORM.ref4 => try p.readInt(u32),
-            dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8 => try p.readInt(u64),
-            dwarf.FORM.udata, dwarf.FORM.ref_udata => try p.readUleb128(u64),
-            dwarf.FORM.sdata => @bitCast(try p.readIleb128(i64)),
-            else => return error.UnhandledConstantForm,
-        };
-    }
-
-    pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 {
-        switch (form) {
-            dwarf.FORM.strp => {
-                const off = try p.readOffset(cuh.format);
-                const off_u = math.cast(usize, off) orelse return error.Overflow;
-                return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.strtab.ptr + off_u)), 0);
-            },
-            dwarf.FORM.string => {
-                const start = p.pos;
-                while (p.pos < p.bytes.len) : (p.pos += 1) {
-                    if (p.bytes[p.pos] == 0) break;
-                }
-                if (p.bytes[p.pos] != 0) return error.Eof;
-                return p.bytes[start..p.pos :0];
-            },
-            else => unreachable,
-        }
-    }
-
-    pub fn readByte(p: *InfoReader) !u8 {
-        if (p.pos + 1 > p.bytes.len) return error.Eof;
-        defer p.pos += 1;
-        return p.bytes[p.pos];
-    }
-
-    pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 {
-        const num_usize = math.cast(usize, num) orelse return error.Overflow;
-        if (p.pos + num_usize > p.bytes.len) return error.Eof;
-        defer p.pos += num_usize;
-        return p.bytes[p.pos..][0..num_usize];
-    }
-
-    pub fn readInt(p: *InfoReader, comptime Int: type) !Int {
-        if (p.pos + @sizeOf(Int) > p.bytes.len) return error.Eof;
-        defer p.pos += @sizeOf(Int);
-        return mem.readInt(Int, p.bytes[p.pos..][0..@sizeOf(Int)], .little);
-    }
-
-    pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 {
-        return switch (dw_fmt) {
-            .dwarf32 => try p.readInt(u32),
-            .dwarf64 => try p.readInt(u64),
-        };
-    }
-
-    pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type {
-        var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
-        var creader = std.io.countingReader(stream.reader());
-        const value: Type = try leb.readUleb128(Type, creader.reader());
-        p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
-        return value;
-    }
-
-    pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type {
-        var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
-        var creader = std.io.countingReader(stream.reader());
-        const value: Type = try leb.readIleb128(Type, creader.reader());
-        p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
-        return value;
-    }
-
-    pub fn seekTo(p: *InfoReader, off: u64) !void {
-        p.pos = math.cast(usize, off) orelse return error.Overflow;
-    }
-};
-
-pub const AbbrevReader = struct {
-    bytes: []const u8,
-    pos: usize = 0,
-
-    pub fn hasMore(p: AbbrevReader) bool {
-        return p.pos < p.bytes.len;
-    }
-
-    pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl {
-        const pos = p.pos;
-        const code = try p.readUleb128(Code);
-        if (code == 0) return null;
-
-        const tag = try p.readUleb128(Tag);
-        const has_children = (try p.readByte()) > 0;
-        return .{
-            .code = code,
-            .pos = pos,
-            .len = p.pos - pos,
-            .tag = tag,
-            .has_children = has_children,
-        };
-    }
-
-    pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr {
-        const pos = p.pos;
-        const at = try p.readUleb128(At);
-        const form = try p.readUleb128(Form);
-        return if (at == 0 and form == 0) null else .{
-            .at = at,
-            .form = form,
-            .pos = pos,
-            .len = p.pos - pos,
-        };
-    }
-
-    pub fn readByte(p: *AbbrevReader) !u8 {
-        if (p.pos + 1 > p.bytes.len) return error.Eof;
-        defer p.pos += 1;
-        return p.bytes[p.pos];
-    }
-
-    pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type {
-        var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
-        var creader = std.io.countingReader(stream.reader());
-        const value: Type = try leb.readUleb128(Type, creader.reader());
-        p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
-        return value;
-    }
-
-    pub fn seekTo(p: *AbbrevReader, off: u64) !void {
-        p.pos = math.cast(usize, off) orelse return error.Overflow;
-    }
-};
-
-const AbbrevDecl = struct {
-    code: Code,
-    pos: usize,
-    len: usize,
-    tag: Tag,
-    has_children: bool,
-};
-
-const AbbrevAttr = struct {
-    at: At,
-    form: Form,
-    pos: usize,
-    len: usize,
-};
-
-const CompileUnitHeader = struct {
-    format: DwarfFormat,
-    length: u64,
-    version: u16,
-    debug_abbrev_offset: u64,
-    address_size: u8,
-};
-
-const Die = struct {
-    pos: usize,
-    len: usize,
-};
-
-const DwarfFormat = enum {
-    dwarf32,
-    dwarf64,
-};
-
-const dwarf = std.dwarf;
-const leb = std.leb;
-const log = std.log.scoped(.link);
-const math = std.math;
-const mem = std.mem;
-const std = @import("std");
-
-const At = u64;
-const Code = u64;
-const Form = u64;
-const Tag = u64;
-
-pub const AT = dwarf.AT;
-pub const FORM = dwarf.FORM;
-pub const TAG = dwarf.TAG;

	zig fork of https://codeberg.org/ziglang/zig
	Log \| Files \| Refs \| README \| LICENSE

M	CMakeLists.txt	\|	2	+-
A	src/link/MachO/Dwarf.zig	\|	409	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/link/MachO/Object.zig	\|	284	+++++++++++++++++++++++++++++++------------------------------------------------
D	src/link/MachO/dwarf.zig	\|	286	-------------------------------------------------------------------------------