commit 4e09e363cd00b1dd36467ef7958d750ea09f296d (tree)
parent c013f45ad08c2c6d727bf336767e23d988f5f30b
Author: Andrew Kelley <andrew@ziglang.org>
Date: Tue, 3 Dec 2024 02:28:22 -0500
Merge pull request #21720 from kubkon/macho-dwarf-v5
macho: add basic handling of DWARFv5
Diffstat:
4 files changed, 522 insertions(+), 459 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -611,6 +611,7 @@ set(ZIG_STAGE2_SOURCES
src/link/MachO/Atom.zig
src/link/MachO/CodeSignature.zig
src/link/MachO/DebugSymbols.zig
+ src/link/MachO/Dwarf.zig
src/link/MachO/Dylib.zig
src/link/MachO/InternalObject.zig
src/link/MachO/Object.zig
@@ -622,7 +623,6 @@ set(ZIG_STAGE2_SOURCES
src/link/MachO/dyld_info/Rebase.zig
src/link/MachO/dyld_info/Trie.zig
src/link/MachO/dyld_info/bind.zig
- src/link/MachO/dwarf.zig
src/link/MachO/eh_frame.zig
src/link/MachO/fat.zig
src/link/MachO/file.zig
diff --git a/src/link/MachO/Dwarf.zig b/src/link/MachO/Dwarf.zig
@@ -0,0 +1,409 @@
+debug_info: []u8 = &[0]u8{},
+debug_abbrev: []u8 = &[0]u8{},
+debug_str: []u8 = &[0]u8{},
+debug_str_offsets: []u8 = &[0]u8{},
+
+pub fn deinit(dwarf: *Dwarf, allocator: Allocator) void {
+ allocator.free(dwarf.debug_info);
+ allocator.free(dwarf.debug_abbrev);
+ allocator.free(dwarf.debug_str);
+ allocator.free(dwarf.debug_str_offsets);
+}
+
+/// Pulls an offset into __debug_str section from a __debug_str_offs section.
+/// This is new in DWARFv5 and requires the producer to specify DW_FORM_strx* (`index` arg)
+/// but also DW_AT_str_offsets_base with DW_FORM_sec_offset (`base` arg) in the opening header
+/// of a "referencing entity" such as DW_TAG_compile_unit.
+fn getOffset(debug_str_offsets: []const u8, base: u64, index: u64, dw_fmt: DwarfFormat) error{Overflow}!u64 {
+ const base_as_usize = math.cast(usize, base) orelse return error.Overflow;
+ const index_as_usize = math.cast(usize, index) orelse return error.Overflow;
+ return switch (dw_fmt) {
+ .dwarf32 => @as(
+ *align(1) const u32,
+ @ptrCast(debug_str_offsets.ptr + base_as_usize + index_as_usize * @sizeOf(u32)),
+ ).*,
+ .dwarf64 => @as(
+ *align(1) const u64,
+ @ptrCast(debug_str_offsets.ptr + base_as_usize + index_as_usize * @sizeOf(u64)),
+ ).*,
+ };
+}
+
+pub const InfoReader = struct {
+ ctx: Dwarf,
+ pos: usize = 0,
+
+ fn bytes(p: InfoReader) []const u8 {
+ return p.ctx.debug_info;
+ }
+
+ pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader {
+ var length: u64 = try p.readInt(u32);
+ const is_64bit = length == 0xffffffff;
+ if (is_64bit) {
+ length = try p.readInt(u64);
+ }
+ const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32;
+ const version = try p.readInt(Version);
+ const rest: struct {
+ debug_abbrev_offset: u64,
+ address_size: u8,
+ unit_type: u8,
+ } = switch (version) {
+ 4 => .{
+ .debug_abbrev_offset = try p.readOffset(dw_fmt),
+ .address_size = try p.readByte(),
+ .unit_type = 0,
+ },
+ 5 => .{
+ // According to the spec, version 5 introduced .unit_type field in the header, and
+ // it reordered .debug_abbrev_offset with .address_size fields.
+ .unit_type = try p.readByte(),
+ .address_size = try p.readByte(),
+ .debug_abbrev_offset = try p.readOffset(dw_fmt),
+ },
+ else => return error.InvalidVersion,
+ };
+ return .{
+ .format = dw_fmt,
+ .length = length,
+ .version = version,
+ .debug_abbrev_offset = rest.debug_abbrev_offset,
+ .address_size = rest.address_size,
+ .unit_type = rest.unit_type,
+ };
+ }
+
+ pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void {
+ const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow;
+ const end_pos = p.pos + switch (cuh.format) {
+ .dwarf32 => @as(usize, 4),
+ .dwarf64 => 12,
+ } + cuh_length;
+ while (p.pos < end_pos) {
+ const di_code = try p.readUleb128(u64);
+ if (di_code == 0) return error.UnexpectedEndOfFile;
+ if (di_code == code) return;
+
+ while (try abbrev_reader.readAttr()) |attr| {
+ try p.skip(attr.form, cuh);
+ }
+ }
+ return error.UnexpectedEndOfFile;
+ }
+
+ /// When skipping attributes, we don't really need to be able to handle them all
+ /// since we only ever care about the DW_TAG_compile_unit.
+ pub fn skip(p: *InfoReader, form: Form, cuh: CompileUnitHeader) !void {
+ switch (form) {
+ dw.FORM.sec_offset,
+ dw.FORM.ref_addr,
+ => {
+ _ = try p.readOffset(cuh.format);
+ },
+
+ dw.FORM.addr => {
+ _ = try p.readNBytes(cuh.address_size);
+ },
+
+ dw.FORM.block1,
+ dw.FORM.block2,
+ dw.FORM.block4,
+ dw.FORM.block,
+ => {
+ _ = try p.readBlock(form);
+ },
+
+ dw.FORM.exprloc => {
+ _ = try p.readExprLoc();
+ },
+
+ dw.FORM.flag_present => {},
+
+ dw.FORM.data1,
+ dw.FORM.ref1,
+ dw.FORM.flag,
+ dw.FORM.data2,
+ dw.FORM.ref2,
+ dw.FORM.data4,
+ dw.FORM.ref4,
+ dw.FORM.data8,
+ dw.FORM.ref8,
+ dw.FORM.ref_sig8,
+ dw.FORM.udata,
+ dw.FORM.ref_udata,
+ dw.FORM.sdata,
+ => {
+ _ = try p.readConstant(form);
+ },
+
+ dw.FORM.strp,
+ dw.FORM.string,
+ => {
+ _ = try p.readString(form, cuh);
+ },
+
+ else => if (cuh.version >= 5) switch (form) {
+ dw.FORM.strx,
+ dw.FORM.strx1,
+ dw.FORM.strx2,
+ dw.FORM.strx3,
+ dw.FORM.strx4,
+ => {
+ // We are just iterating over the __debug_info data, so we don't care about an actual
+ // string, therefore we set the `base = 0`.
+ _ = try p.readStringIndexed(form, cuh, 0);
+ },
+
+ dw.FORM.addrx,
+ dw.FORM.addrx1,
+ dw.FORM.addrx2,
+ dw.FORM.addrx3,
+ dw.FORM.addrx4,
+ => {
+ _ = try p.readIndex(form);
+ },
+
+ else => return error.UnhandledForm,
+ } else return error.UnhandledForm,
+ }
+ }
+
+ pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 {
+ const len: u64 = switch (form) {
+ dw.FORM.block1 => try p.readByte(),
+ dw.FORM.block2 => try p.readInt(u16),
+ dw.FORM.block4 => try p.readInt(u32),
+ dw.FORM.block => try p.readUleb128(u64),
+ else => unreachable,
+ };
+ return p.readNBytes(len);
+ }
+
+ pub fn readExprLoc(p: *InfoReader) ![]const u8 {
+ const len: u64 = try p.readUleb128(u64);
+ return p.readNBytes(len);
+ }
+
+ pub fn readConstant(p: *InfoReader, form: Form) !u64 {
+ return switch (form) {
+ dw.FORM.data1, dw.FORM.ref1, dw.FORM.flag => try p.readByte(),
+ dw.FORM.data2, dw.FORM.ref2 => try p.readInt(u16),
+ dw.FORM.data4, dw.FORM.ref4 => try p.readInt(u32),
+ dw.FORM.data8, dw.FORM.ref8, dw.FORM.ref_sig8 => try p.readInt(u64),
+ dw.FORM.udata, dw.FORM.ref_udata => try p.readUleb128(u64),
+ dw.FORM.sdata => @bitCast(try p.readIleb128(i64)),
+ else => return error.UnhandledConstantForm,
+ };
+ }
+
+ pub fn readIndex(p: *InfoReader, form: Form) !u64 {
+ return switch (form) {
+ dw.FORM.strx1, dw.FORM.addrx1 => try p.readByte(),
+ dw.FORM.strx2, dw.FORM.addrx2 => try p.readInt(u16),
+ dw.FORM.strx3, dw.FORM.addrx3 => error.UnhandledForm,
+ dw.FORM.strx4, dw.FORM.addrx4 => try p.readInt(u32),
+ dw.FORM.strx, dw.FORM.addrx => try p.readUleb128(u64),
+ else => return error.UnhandledIndexForm,
+ };
+ }
+
+ pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 {
+ switch (form) {
+ dw.FORM.strp => {
+ const off = try p.readOffset(cuh.format);
+ const off_u = math.cast(usize, off) orelse return error.Overflow;
+ return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off_u)), 0);
+ },
+ dw.FORM.string => {
+ const start = p.pos;
+ while (p.pos < p.bytes().len) : (p.pos += 1) {
+ if (p.bytes()[p.pos] == 0) break;
+ }
+ if (p.bytes()[p.pos] != 0) return error.UnexpectedEndOfFile;
+ return p.bytes()[start..p.pos :0];
+ },
+ else => unreachable,
+ }
+ }
+
+ pub fn readStringIndexed(p: *InfoReader, form: Form, cuh: CompileUnitHeader, base: u64) ![:0]const u8 {
+ switch (form) {
+ dw.FORM.strx,
+ dw.FORM.strx1,
+ dw.FORM.strx2,
+ dw.FORM.strx3,
+ dw.FORM.strx4,
+ => {
+ const index = try p.readIndex(form);
+ const off = math.cast(
+ usize,
+ try getOffset(p.ctx.debug_str_offsets, base, index, cuh.format),
+ ) orelse return error.Overflow;
+ return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off)), 0);
+ },
+ else => unreachable,
+ }
+ }
+
+ pub fn readByte(p: *InfoReader) !u8 {
+ if (p.pos + 1 > p.bytes().len) return error.UnexpectedEndOfFile;
+ defer p.pos += 1;
+ return p.bytes()[p.pos];
+ }
+
+ pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 {
+ const num_usize = math.cast(usize, num) orelse return error.Overflow;
+ if (p.pos + num_usize > p.bytes().len) return error.UnexpectedEndOfFile;
+ defer p.pos += num_usize;
+ return p.bytes()[p.pos..][0..num_usize];
+ }
+
+ pub fn readInt(p: *InfoReader, comptime Int: type) !Int {
+ if (p.pos + @sizeOf(Int) > p.bytes().len) return error.UnexpectedEndOfFile;
+ defer p.pos += @sizeOf(Int);
+ return mem.readInt(Int, p.bytes()[p.pos..][0..@sizeOf(Int)], .little);
+ }
+
+ pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 {
+ return switch (dw_fmt) {
+ .dwarf32 => try p.readInt(u32),
+ .dwarf64 => try p.readInt(u64),
+ };
+ }
+
+ pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type {
+ var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
+ var creader = std.io.countingReader(stream.reader());
+ const value: Type = try leb.readUleb128(Type, creader.reader());
+ p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
+ return value;
+ }
+
+ pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type {
+ var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
+ var creader = std.io.countingReader(stream.reader());
+ const value: Type = try leb.readIleb128(Type, creader.reader());
+ p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
+ return value;
+ }
+
+ pub fn seekTo(p: *InfoReader, off: u64) !void {
+ p.pos = math.cast(usize, off) orelse return error.Overflow;
+ }
+};
+
+pub const AbbrevReader = struct {
+ ctx: Dwarf,
+ pos: usize = 0,
+
+ fn bytes(p: AbbrevReader) []const u8 {
+ return p.ctx.debug_abbrev;
+ }
+
+ pub fn hasMore(p: AbbrevReader) bool {
+ return p.pos < p.bytes().len;
+ }
+
+ pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl {
+ const pos = p.pos;
+ const code = try p.readUleb128(Code);
+ if (code == 0) return null;
+
+ const tag = try p.readUleb128(Tag);
+ const has_children = (try p.readByte()) > 0;
+ return .{
+ .code = code,
+ .pos = pos,
+ .len = p.pos - pos,
+ .tag = tag,
+ .has_children = has_children,
+ };
+ }
+
+ pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr {
+ const pos = p.pos;
+ const at = try p.readUleb128(At);
+ const form = try p.readUleb128(Form);
+ return if (at == 0 and form == 0) null else .{
+ .at = at,
+ .form = form,
+ .pos = pos,
+ .len = p.pos - pos,
+ };
+ }
+
+ pub fn readByte(p: *AbbrevReader) !u8 {
+ if (p.pos + 1 > p.bytes().len) return error.Eof;
+ defer p.pos += 1;
+ return p.bytes()[p.pos];
+ }
+
+ pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type {
+ var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
+ var creader = std.io.countingReader(stream.reader());
+ const value: Type = try leb.readUleb128(Type, creader.reader());
+ p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
+ return value;
+ }
+
+ pub fn seekTo(p: *AbbrevReader, off: u64) !void {
+ p.pos = math.cast(usize, off) orelse return error.Overflow;
+ }
+};
+
+const AbbrevDecl = struct {
+ code: Code,
+ pos: usize,
+ len: usize,
+ tag: Tag,
+ has_children: bool,
+};
+
+const AbbrevAttr = struct {
+ at: At,
+ form: Form,
+ pos: usize,
+ len: usize,
+};
+
+const CompileUnitHeader = struct {
+ format: DwarfFormat,
+ length: u64,
+ version: Version,
+ debug_abbrev_offset: u64,
+ address_size: u8,
+ unit_type: u8,
+};
+
+const Die = struct {
+ pos: usize,
+ len: usize,
+};
+
+const DwarfFormat = enum {
+ dwarf32,
+ dwarf64,
+};
+
+const dw = std.dwarf;
+const leb = std.leb;
+const log = std.log.scoped(.link);
+const math = std.math;
+const mem = std.mem;
+const std = @import("std");
+const Allocator = mem.Allocator;
+const Dwarf = @This();
+const File = @import("file.zig").File;
+const MachO = @import("../MachO.zig");
+const Object = @import("Object.zig");
+
+pub const At = u64;
+pub const Code = u64;
+pub const Form = u64;
+pub const Tag = u64;
+pub const Version = u16;
+
+pub const AT = dw.AT;
+pub const FORM = dw.FORM;
+pub const TAG = dw.TAG;
diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig
@@ -443,11 +443,8 @@ fn initCstringLiterals(self: *Object, allocator: Allocator, file: File.Handle, m
for (slice.items(.header), 0..) |sect, n_sect| {
if (!isCstringLiteral(sect)) continue;
- const sect_size = math.cast(usize, sect.size) orelse return error.Overflow;
- const data = try allocator.alloc(u8, sect_size);
+ const data = try self.readSectionData(allocator, file, @intCast(n_sect));
defer allocator.free(data);
- const amt = try file.preadAll(data, sect.offset + self.offset);
- if (amt != data.len) return error.InputOutput;
var count: u32 = 0;
var start: u32 = 0;
@@ -646,13 +643,10 @@ pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO
}
const slice = self.sections.slice();
- for (slice.items(.header), slice.items(.subsections)) |header, subs| {
+ for (slice.items(.header), slice.items(.subsections), 0..) |header, subs, n_sect| {
if (isCstringLiteral(header) or isFixedSizeLiteral(header)) {
- const sect_size = math.cast(usize, header.size) orelse return error.Overflow;
- const data = try gpa.alloc(u8, sect_size);
+ const data = try self.readSectionData(gpa, file, @intCast(n_sect));
defer gpa.free(data);
- const amt = try file.preadAll(data, header.offset + self.offset);
- if (amt != data.len) return error.InputOutput;
for (subs.items) |sub| {
const atom = self.getAtom(sub.atom).?;
@@ -686,12 +680,7 @@ pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO
buffer.resize(target_size) catch unreachable;
const gop = try sections_data.getOrPut(target.n_sect);
if (!gop.found_existing) {
- const target_sect = slice.items(.header)[target.n_sect];
- const target_sect_size = math.cast(usize, target_sect.size) orelse return error.Overflow;
- const data = try gpa.alloc(u8, target_sect_size);
- const amt = try file.preadAll(data, target_sect.offset + self.offset);
- if (amt != data.len) return error.InputOutput;
- gop.value_ptr.* = data;
+ gop.value_ptr.* = try self.readSectionData(gpa, file, @intCast(target.n_sect));
}
const data = gop.value_ptr.*;
const target_off = math.cast(usize, target.off) orelse return error.Overflow;
@@ -1000,7 +989,7 @@ fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, m
defer tracy.end();
const slice = self.sections.slice();
- for (slice.items(.header), slice.items(.relocs)) |sect, *out| {
+ for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| {
if (sect.nreloc == 0) continue;
// We skip relocs for __DWARF since even in -r mode, the linker is expected to emit
// debug symbol stabs in the relocatable. This made me curious why that is. For now,
@@ -1009,8 +998,8 @@ fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, m
!mem.eql(u8, sect.sectName(), "__compact_unwind")) continue;
switch (cpu_arch) {
- .x86_64 => try x86_64.parseRelocs(self, sect, out, file, macho_file),
- .aarch64 => try aarch64.parseRelocs(self, sect, out, file, macho_file),
+ .x86_64 => try x86_64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file),
+ .aarch64 => try aarch64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file),
else => unreachable,
}
@@ -1146,11 +1135,8 @@ fn initUnwindRecords(self: *Object, allocator: Allocator, sect_id: u8, file: Fil
};
const header = self.sections.items(.header)[sect_id];
- const size = math.cast(usize, header.size) orelse return error.Overflow;
- const data = try allocator.alloc(u8, size);
+ const data = try self.readSectionData(allocator, file, sect_id);
defer allocator.free(data);
- const amt = try file.preadAll(data, header.offset + self.offset);
- if (amt != data.len) return error.InputOutput;
const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry));
const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs];
@@ -1359,151 +1345,106 @@ fn parseDebugInfo(self: *Object, macho_file: *MachO) !void {
defer tracy.end();
const gpa = macho_file.base.comp.gpa;
+ const file = macho_file.getFileHandle(self.file_handle);
- var debug_info_index: ?usize = null;
- var debug_abbrev_index: ?usize = null;
- var debug_str_index: ?usize = null;
+ var dwarf: Dwarf = .{};
+ defer dwarf.deinit(gpa);
for (self.sections.items(.header), 0..) |sect, index| {
+ const n_sect: u8 = @intCast(index);
if (sect.attrs() & macho.S_ATTR_DEBUG == 0) continue;
- if (mem.eql(u8, sect.sectName(), "__debug_info")) debug_info_index = index;
- if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) debug_abbrev_index = index;
- if (mem.eql(u8, sect.sectName(), "__debug_str")) debug_str_index = index;
+ if (mem.eql(u8, sect.sectName(), "__debug_info")) {
+ dwarf.debug_info = try self.readSectionData(gpa, file, n_sect);
+ }
+ if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) {
+ dwarf.debug_abbrev = try self.readSectionData(gpa, file, n_sect);
+ }
+ if (mem.eql(u8, sect.sectName(), "__debug_str")) {
+ dwarf.debug_str = try self.readSectionData(gpa, file, n_sect);
+ }
+ // __debug_str_offs[ets] section is a new addition in DWARFv5 and is generally
+ // required in order to correctly parse strings.
+ if (mem.eql(u8, sect.sectName(), "__debug_str_offs")) {
+ dwarf.debug_str_offsets = try self.readSectionData(gpa, file, n_sect);
+ }
}
- if (debug_info_index == null or debug_abbrev_index == null) return;
+ if (dwarf.debug_info.len == 0) return;
- const slice = self.sections.slice();
- const file = macho_file.getFileHandle(self.file_handle);
- const debug_info = blk: {
- const sect = slice.items(.header)[debug_info_index.?];
- const size = math.cast(usize, sect.size) orelse return error.Overflow;
- const data = try gpa.alloc(u8, size);
- const amt = try file.preadAll(data, sect.offset + self.offset);
- if (amt != data.len) return error.InputOutput;
- break :blk data;
- };
- defer gpa.free(debug_info);
- const debug_abbrev = blk: {
- const sect = slice.items(.header)[debug_abbrev_index.?];
- const size = math.cast(usize, sect.size) orelse return error.Overflow;
- const data = try gpa.alloc(u8, size);
- const amt = try file.preadAll(data, sect.offset + self.offset);
- if (amt != data.len) return error.InputOutput;
- break :blk data;
- };
- defer gpa.free(debug_abbrev);
- const debug_str = if (debug_str_index) |sid| blk: {
- const sect = slice.items(.header)[sid];
- const size = math.cast(usize, sect.size) orelse return error.Overflow;
- const data = try gpa.alloc(u8, size);
- const amt = try file.preadAll(data, sect.offset + self.offset);
- if (amt != data.len) return error.InputOutput;
- break :blk data;
- } else &[0]u8{};
- defer gpa.free(debug_str);
-
- self.compile_unit = self.findCompileUnit(.{
- .gpa = gpa,
- .debug_info = debug_info,
- .debug_abbrev = debug_abbrev,
- .debug_str = debug_str,
- }) catch null; // TODO figure out what errors are fatal, and when we silently fail
-}
-
-fn findCompileUnit(self: *Object, args: struct {
- gpa: Allocator,
- debug_info: []const u8,
- debug_abbrev: []const u8,
- debug_str: []const u8,
-}) !CompileUnit {
- var cu_wip: struct {
- comp_dir: ?[:0]const u8 = null,
- tu_name: ?[:0]const u8 = null,
- } = .{};
-
- const gpa = args.gpa;
- var info_reader = dwarf.InfoReader{ .bytes = args.debug_info, .strtab = args.debug_str };
- var abbrev_reader = dwarf.AbbrevReader{ .bytes = args.debug_abbrev };
+ // TODO return error once we fix emitting DWARF in self-hosted backend.
+ // https://github.com/ziglang/zig/issues/21719
+ self.compile_unit = self.findCompileUnit(gpa, dwarf) catch null;
+}
+
+fn findCompileUnit(self: *Object, gpa: Allocator, ctx: Dwarf) !CompileUnit {
+ var info_reader = Dwarf.InfoReader{ .ctx = ctx };
+ var abbrev_reader = Dwarf.AbbrevReader{ .ctx = ctx };
const cuh = try info_reader.readCompileUnitHeader();
try abbrev_reader.seekTo(cuh.debug_abbrev_offset);
- const cu_decl = (try abbrev_reader.readDecl()) orelse return error.Eof;
- if (cu_decl.tag != dwarf.TAG.compile_unit) return error.UnexpectedTag;
+ const cu_decl = (try abbrev_reader.readDecl()) orelse return error.UnexpectedEndOfFile;
+ if (cu_decl.tag != Dwarf.TAG.compile_unit) return error.UnexpectedTag;
try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader);
- while (try abbrev_reader.readAttr()) |attr| switch (attr.at) {
- dwarf.AT.name => {
- cu_wip.tu_name = try info_reader.readString(attr.form, cuh);
- },
- dwarf.AT.comp_dir => {
- cu_wip.comp_dir = try info_reader.readString(attr.form, cuh);
- },
- else => switch (attr.form) {
- dwarf.FORM.sec_offset,
- dwarf.FORM.ref_addr,
- => {
- _ = try info_reader.readOffset(cuh.format);
- },
-
- dwarf.FORM.addr => {
- _ = try info_reader.readNBytes(cuh.address_size);
- },
-
- dwarf.FORM.block1,
- dwarf.FORM.block2,
- dwarf.FORM.block4,
- dwarf.FORM.block,
- => {
- _ = try info_reader.readBlock(attr.form);
- },
-
- dwarf.FORM.exprloc => {
- _ = try info_reader.readExprLoc();
- },
-
- dwarf.FORM.flag_present => {},
-
- dwarf.FORM.data1,
- dwarf.FORM.ref1,
- dwarf.FORM.flag,
- dwarf.FORM.data2,
- dwarf.FORM.ref2,
- dwarf.FORM.data4,
- dwarf.FORM.ref4,
- dwarf.FORM.data8,
- dwarf.FORM.ref8,
- dwarf.FORM.ref_sig8,
- dwarf.FORM.udata,
- dwarf.FORM.ref_udata,
- dwarf.FORM.sdata,
- => {
- _ = try info_reader.readConstant(attr.form);
- },
-
- dwarf.FORM.strp,
- dwarf.FORM.string,
- => {
- _ = try info_reader.readString(attr.form, cuh);
- },
-
- else => {
- // TODO actual errors?
- log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form});
- return error.UnhandledForm;
- },
- },
+ const Pos = struct {
+ pos: usize,
+ form: Dwarf.Form,
};
-
- if (cu_wip.comp_dir == null) return error.MissingCompDir;
- if (cu_wip.tu_name == null) return error.MissingTuName;
-
- return .{
- .comp_dir = try self.addString(gpa, cu_wip.comp_dir.?),
- .tu_name = try self.addString(gpa, cu_wip.tu_name.?),
+ var saved: struct {
+ tu_name: ?Pos,
+ comp_dir: ?Pos,
+ str_offsets_base: ?Pos,
+ } = .{
+ .tu_name = null,
+ .comp_dir = null,
+ .str_offsets_base = null,
};
+ while (try abbrev_reader.readAttr()) |attr| {
+ const pos: Pos = .{ .pos = info_reader.pos, .form = attr.form };
+ switch (attr.at) {
+ Dwarf.AT.name => saved.tu_name = pos,
+ Dwarf.AT.comp_dir => saved.comp_dir = pos,
+ Dwarf.AT.str_offsets_base => saved.str_offsets_base = pos,
+ else => {},
+ }
+ try info_reader.skip(attr.form, cuh);
+ }
+
+ if (saved.comp_dir == null) return error.MissingCompileDir;
+ if (saved.tu_name == null) return error.MissingTuName;
+
+ const str_offsets_base: ?u64 = if (saved.str_offsets_base) |str_offsets_base| str_offsets_base: {
+ try info_reader.seekTo(str_offsets_base.pos);
+ break :str_offsets_base try info_reader.readOffset(cuh.format);
+ } else null;
+
+ var cu: CompileUnit = .{ .comp_dir = .{}, .tu_name = .{} };
+ for (&[_]struct { Pos, *MachO.String }{
+ .{ saved.comp_dir.?, &cu.comp_dir },
+ .{ saved.tu_name.?, &cu.tu_name },
+ }) |tuple| {
+ const pos, const str_offset_ptr = tuple;
+ try info_reader.seekTo(pos.pos);
+ str_offset_ptr.* = switch (pos.form) {
+ Dwarf.FORM.strp,
+ Dwarf.FORM.string,
+ => try self.addString(gpa, try info_reader.readString(pos.form, cuh)),
+ Dwarf.FORM.strx,
+ Dwarf.FORM.strx1,
+ Dwarf.FORM.strx2,
+ Dwarf.FORM.strx3,
+ Dwarf.FORM.strx4,
+ => blk: {
+ const base = str_offsets_base orelse return error.MissingStrOffsetsBase;
+ break :blk try self.addString(gpa, try info_reader.readStringIndexed(pos.form, cuh, base));
+ },
+ else => return error.InvalidForm,
+ };
+ }
+
+ return cu;
}
pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void {
@@ -2561,6 +2502,17 @@ pub fn getUnwindRecord(self: *Object, index: UnwindInfo.Record.Index) *UnwindInf
return &self.unwind_records.items[index];
}
+/// Caller owns the memory.
+pub fn readSectionData(self: Object, allocator: Allocator, file: File.Handle, n_sect: u8) ![]u8 {
+ const header = self.sections.items(.header)[n_sect];
+ const size = math.cast(usize, header.size) orelse return error.Overflow;
+ const data = try allocator.alloc(u8, size);
+ const amt = try file.preadAll(data, header.offset + self.offset);
+ errdefer allocator.free(data);
+ if (amt != data.len) return error.InputOutput;
+ return data;
+}
+
pub fn format(
self: *Object,
comptime unused_fmt_string: []const u8,
@@ -2848,6 +2800,7 @@ const CompactUnwindCtx = struct {
const x86_64 = struct {
fn parseRelocs(
self: *Object,
+ n_sect: u8,
sect: macho.section_64,
out: *std.ArrayListUnmanaged(Relocation),
handle: File.Handle,
@@ -2857,19 +2810,12 @@ const x86_64 = struct {
const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
defer gpa.free(relocs_buffer);
- {
- const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
- if (amt != relocs_buffer.len) return error.InputOutput;
- }
+ const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
+ if (amt != relocs_buffer.len) return error.InputOutput;
const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc];
- const sect_size = math.cast(usize, sect.size) orelse return error.Overflow;
- const code = try gpa.alloc(u8, sect_size);
+ const code = try self.readSectionData(gpa, handle, n_sect);
defer gpa.free(code);
- {
- const amt = try handle.preadAll(code, sect.offset + self.offset);
- if (amt != code.len) return error.InputOutput;
- }
try out.ensureTotalCapacityPrecise(gpa, relocs.len);
@@ -3021,6 +2967,7 @@ const x86_64 = struct {
const aarch64 = struct {
fn parseRelocs(
self: *Object,
+ n_sect: u8,
sect: macho.section_64,
out: *std.ArrayListUnmanaged(Relocation),
handle: File.Handle,
@@ -3030,19 +2977,12 @@ const aarch64 = struct {
const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
defer gpa.free(relocs_buffer);
- {
- const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
- if (amt != relocs_buffer.len) return error.InputOutput;
- }
+ const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
+ if (amt != relocs_buffer.len) return error.InputOutput;
const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc];
- const sect_size = math.cast(usize, sect.size) orelse return error.Overflow;
- const code = try gpa.alloc(u8, sect_size);
+ const code = try self.readSectionData(gpa, handle, n_sect);
defer gpa.free(code);
- {
- const amt = try handle.preadAll(code, sect.offset + self.offset);
- if (amt != code.len) return error.InputOutput;
- }
try out.ensureTotalCapacityPrecise(gpa, relocs.len);
@@ -3219,7 +3159,6 @@ const aarch64 = struct {
};
const assert = std.debug.assert;
-const dwarf = @import("dwarf.zig");
const eh_frame = @import("eh_frame.zig");
const log = std.log.scoped(.link);
const macho = std.macho;
@@ -3233,6 +3172,7 @@ const Allocator = mem.Allocator;
const Archive = @import("Archive.zig");
const Atom = @import("Atom.zig");
const Cie = eh_frame.Cie;
+const Dwarf = @import("Dwarf.zig");
const Fde = eh_frame.Fde;
const File = @import("file.zig").File;
const LoadCommandIterator = macho.LoadCommandIterator;
diff --git a/src/link/MachO/dwarf.zig b/src/link/MachO/dwarf.zig
@@ -1,286 +0,0 @@
-pub const InfoReader = struct {
- bytes: []const u8,
- strtab: []const u8,
- pos: usize = 0,
-
- pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader {
- var length: u64 = try p.readInt(u32);
- const is_64bit = length == 0xffffffff;
- if (is_64bit) {
- length = try p.readInt(u64);
- }
- const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32;
- return .{
- .format = dw_fmt,
- .length = length,
- .version = try p.readInt(u16),
- .debug_abbrev_offset = try p.readOffset(dw_fmt),
- .address_size = try p.readByte(),
- };
- }
-
- pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void {
- const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow;
- const end_pos = p.pos + switch (cuh.format) {
- .dwarf32 => @as(usize, 4),
- .dwarf64 => 12,
- } + cuh_length;
- while (p.pos < end_pos) {
- const di_code = try p.readUleb128(u64);
- if (di_code == 0) return error.Eof;
- if (di_code == code) return;
-
- while (try abbrev_reader.readAttr()) |attr| switch (attr.at) {
- dwarf.FORM.sec_offset,
- dwarf.FORM.ref_addr,
- => {
- _ = try p.readOffset(cuh.format);
- },
-
- dwarf.FORM.addr => {
- _ = try p.readNBytes(cuh.address_size);
- },
-
- dwarf.FORM.block1,
- dwarf.FORM.block2,
- dwarf.FORM.block4,
- dwarf.FORM.block,
- => {
- _ = try p.readBlock(attr.form);
- },
-
- dwarf.FORM.exprloc => {
- _ = try p.readExprLoc();
- },
-
- dwarf.FORM.flag_present => {},
-
- dwarf.FORM.data1,
- dwarf.FORM.ref1,
- dwarf.FORM.flag,
- dwarf.FORM.data2,
- dwarf.FORM.ref2,
- dwarf.FORM.data4,
- dwarf.FORM.ref4,
- dwarf.FORM.data8,
- dwarf.FORM.ref8,
- dwarf.FORM.ref_sig8,
- dwarf.FORM.udata,
- dwarf.FORM.ref_udata,
- dwarf.FORM.sdata,
- => {
- _ = try p.readConstant(attr.form);
- },
-
- dwarf.FORM.strp,
- dwarf.FORM.string,
- => {
- _ = try p.readString(attr.form, cuh);
- },
-
- else => {
- // TODO better errors
- log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form});
- return error.UnhandledDwFormValue;
- },
- };
- }
- }
-
- pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 {
- const len: u64 = switch (form) {
- dwarf.FORM.block1 => try p.readByte(),
- dwarf.FORM.block2 => try p.readInt(u16),
- dwarf.FORM.block4 => try p.readInt(u32),
- dwarf.FORM.block => try p.readUleb128(u64),
- else => unreachable,
- };
- return p.readNBytes(len);
- }
-
- pub fn readExprLoc(p: *InfoReader) ![]const u8 {
- const len: u64 = try p.readUleb128(u64);
- return p.readNBytes(len);
- }
-
- pub fn readConstant(p: *InfoReader, form: Form) !u64 {
- return switch (form) {
- dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag => try p.readByte(),
- dwarf.FORM.data2, dwarf.FORM.ref2 => try p.readInt(u16),
- dwarf.FORM.data4, dwarf.FORM.ref4 => try p.readInt(u32),
- dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8 => try p.readInt(u64),
- dwarf.FORM.udata, dwarf.FORM.ref_udata => try p.readUleb128(u64),
- dwarf.FORM.sdata => @bitCast(try p.readIleb128(i64)),
- else => return error.UnhandledConstantForm,
- };
- }
-
- pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 {
- switch (form) {
- dwarf.FORM.strp => {
- const off = try p.readOffset(cuh.format);
- const off_u = math.cast(usize, off) orelse return error.Overflow;
- return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.strtab.ptr + off_u)), 0);
- },
- dwarf.FORM.string => {
- const start = p.pos;
- while (p.pos < p.bytes.len) : (p.pos += 1) {
- if (p.bytes[p.pos] == 0) break;
- }
- if (p.bytes[p.pos] != 0) return error.Eof;
- return p.bytes[start..p.pos :0];
- },
- else => unreachable,
- }
- }
-
- pub fn readByte(p: *InfoReader) !u8 {
- if (p.pos + 1 > p.bytes.len) return error.Eof;
- defer p.pos += 1;
- return p.bytes[p.pos];
- }
-
- pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 {
- const num_usize = math.cast(usize, num) orelse return error.Overflow;
- if (p.pos + num_usize > p.bytes.len) return error.Eof;
- defer p.pos += num_usize;
- return p.bytes[p.pos..][0..num_usize];
- }
-
- pub fn readInt(p: *InfoReader, comptime Int: type) !Int {
- if (p.pos + @sizeOf(Int) > p.bytes.len) return error.Eof;
- defer p.pos += @sizeOf(Int);
- return mem.readInt(Int, p.bytes[p.pos..][0..@sizeOf(Int)], .little);
- }
-
- pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 {
- return switch (dw_fmt) {
- .dwarf32 => try p.readInt(u32),
- .dwarf64 => try p.readInt(u64),
- };
- }
-
- pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type {
- var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
- var creader = std.io.countingReader(stream.reader());
- const value: Type = try leb.readUleb128(Type, creader.reader());
- p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
- return value;
- }
-
- pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type {
- var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
- var creader = std.io.countingReader(stream.reader());
- const value: Type = try leb.readIleb128(Type, creader.reader());
- p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
- return value;
- }
-
- pub fn seekTo(p: *InfoReader, off: u64) !void {
- p.pos = math.cast(usize, off) orelse return error.Overflow;
- }
-};
-
-pub const AbbrevReader = struct {
- bytes: []const u8,
- pos: usize = 0,
-
- pub fn hasMore(p: AbbrevReader) bool {
- return p.pos < p.bytes.len;
- }
-
- pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl {
- const pos = p.pos;
- const code = try p.readUleb128(Code);
- if (code == 0) return null;
-
- const tag = try p.readUleb128(Tag);
- const has_children = (try p.readByte()) > 0;
- return .{
- .code = code,
- .pos = pos,
- .len = p.pos - pos,
- .tag = tag,
- .has_children = has_children,
- };
- }
-
- pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr {
- const pos = p.pos;
- const at = try p.readUleb128(At);
- const form = try p.readUleb128(Form);
- return if (at == 0 and form == 0) null else .{
- .at = at,
- .form = form,
- .pos = pos,
- .len = p.pos - pos,
- };
- }
-
- pub fn readByte(p: *AbbrevReader) !u8 {
- if (p.pos + 1 > p.bytes.len) return error.Eof;
- defer p.pos += 1;
- return p.bytes[p.pos];
- }
-
- pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type {
- var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
- var creader = std.io.countingReader(stream.reader());
- const value: Type = try leb.readUleb128(Type, creader.reader());
- p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
- return value;
- }
-
- pub fn seekTo(p: *AbbrevReader, off: u64) !void {
- p.pos = math.cast(usize, off) orelse return error.Overflow;
- }
-};
-
-const AbbrevDecl = struct {
- code: Code,
- pos: usize,
- len: usize,
- tag: Tag,
- has_children: bool,
-};
-
-const AbbrevAttr = struct {
- at: At,
- form: Form,
- pos: usize,
- len: usize,
-};
-
-const CompileUnitHeader = struct {
- format: DwarfFormat,
- length: u64,
- version: u16,
- debug_abbrev_offset: u64,
- address_size: u8,
-};
-
-const Die = struct {
- pos: usize,
- len: usize,
-};
-
-const DwarfFormat = enum {
- dwarf32,
- dwarf64,
-};
-
-const dwarf = std.dwarf;
-const leb = std.leb;
-const log = std.log.scoped(.link);
-const math = std.math;
-const mem = std.mem;
-const std = @import("std");
-
-const At = u64;
-const Code = u64;
-const Form = u64;
-const Tag = u64;
-
-pub const AT = dwarf.AT;
-pub const FORM = dwarf.FORM;
-pub const TAG = dwarf.TAG;