diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ba2c106f05..e8b8e40f37 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -813,7 +813,7 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { // Pad out space for code signature const text_cmd = parser.load_commands.items[parser.text_cmd_index.?].Segment.inner; const dataoff = @intCast(u32, mem.alignForward(parser.end_pos.?, @sizeOf(u64))); - const datasize = 0x1000; + const datasize = 0x400000; const code_sig = macho.linkedit_data_command{ .cmd = macho.LC_CODE_SIGNATURE, .cmdsize = @sizeOf(macho.linkedit_data_command), @@ -1600,7 +1600,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, return vaddr; } -fn makeStaticString(comptime bytes: []const u8) [16]u8 { +pub fn makeStaticString(comptime bytes: []const u8) [16]u8 { var buf = [_]u8{0} ** 16; if (bytes.len > buf.len) @compileError("string too long; max 16 bytes"); mem.copy(u8, buf[0..], bytes); @@ -1994,3 +1994,10 @@ fn satMul(a: anytype, b: anytype) @TypeOf(a, b) { const T = @TypeOf(a, b); return std.math.mul(T, a, b) catch std.math.maxInt(T); } + +test "" { + // TODO surprisingly this causes a linking error: + // _linkWithLLD symbol missing for arch + // _ = std.testing.refAllDecls(@This()); + _ = std.testing.refAllDecls(@import("MachO/commands.zig")); +} diff --git a/src/link/MachO/Parser.zig b/src/link/MachO/Parser.zig new file mode 100644 index 0000000000..2f2e74933e --- /dev/null +++ b/src/link/MachO/Parser.zig @@ -0,0 +1,80 @@ +const Parser = @This(); + +const std = @import("std"); +const fs = std.fs; +const io = std.io; +const mem = std.mem; +const macho = std.macho; + +const Allocator = std.mem.Allocator; + +const LoadCommand = @import("commands.zig").LoadCommand; + +allocator: *Allocator, + +/// Mach-O header +header: ?macho.mach_header_64 = null, + +/// Load commands +load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, + +text_cmd_index: ?usize = null, + +linkedit_cmd_index: ?usize = null, +linkedit_cmd_offset: ?u64 = null, + +code_sig_cmd_offset: ?u64 = null, + +end_pos: ?u64 = null, + +pub fn init(allocator: *Allocator) Parser { + return .{ + .allocator = allocator, + }; +} + +pub fn parse(self: *Parser, reader: anytype) !void { + self.header = try reader.readStruct(macho.mach_header_64); + + const ncmds = self.header.?.ncmds; + try self.load_commands.ensureCapacity(self.allocator, ncmds); + + var off: u64 = @sizeOf(macho.mach_header_64); + var i: u16 = 0; + while (i < ncmds) : (i += 1) { + const cmd = try LoadCommand.read(self.allocator, reader); + switch (cmd.cmd()) { + macho.LC_SEGMENT_64 => { + const x = cmd.Segment; + if (mem.eql(u8, mem.trimRight(u8, x.inner.segname[0..], &[_]u8{0}), "__LINKEDIT")) { + self.linkedit_cmd_index = i; + self.linkedit_cmd_offset = off; + } else if (mem.eql(u8, mem.trimRight(u8, x.inner.segname[0..], &[_]u8{0}), "__TEXT")) { + self.text_cmd_index = i; + } + }, + macho.LC_SYMTAB => { + const x = cmd.Symtab; + self.end_pos = x.stroff + x.strsize; + }, + else => {}, + } + off += cmd.cmdsize(); + self.load_commands.appendAssumeCapacity(cmd); + } + + self.code_sig_cmd_offset = off; + + // TODO parse memory mapped segments +} + +pub fn parseFile(self: *Parser, file: fs.File) !void { + return self.parse(file.reader()); +} + +pub fn deinit(self: *Parser) void { + for (self.load_commands.items) |*cmd| { + cmd.deinit(self.allocator); + } + self.load_commands.deinit(self.allocator); +} diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig new file mode 100644 index 0000000000..7be72d9745 --- /dev/null +++ b/src/link/MachO/commands.zig @@ -0,0 +1,371 @@ +const std = @import("std"); +const fs = std.fs; +const io = std.io; +const mem = std.mem; +const macho = std.macho; +const testing = std.testing; + +const Allocator = std.mem.Allocator; +const makeName = @import("../MachO.zig").makeStaticString; + +pub const LoadCommand = union(enum) { + Segment: SegmentCommand, + DyldInfoOnly: macho.dyld_info_command, + Symtab: macho.symtab_command, + Dysymtab: macho.dysymtab_command, + Dylinker: GenericCommandWithData(macho.dylinker_command), + Dylib: GenericCommandWithData(macho.dylib_command), + Main: macho.entry_point_command, + VersionMin: macho.version_min_command, + SourceVersion: macho.source_version_command, + LinkeditData: macho.linkedit_data_command, + Unknown: GenericCommandWithData(macho.load_command), + + pub fn read(allocator: *Allocator, reader: anytype) !LoadCommand { + const header = try reader.readStruct(macho.load_command); + var buffer = try allocator.alloc(u8, header.cmdsize); + defer allocator.free(buffer); + const slice = [1]macho.load_command{header}; + mem.copy(u8, buffer[0..], mem.sliceAsBytes(slice[0..1])); + try reader.readNoEof(buffer[@sizeOf(macho.load_command)..]); + var stream = io.fixedBufferStream(buffer[0..]); + + return switch (header.cmd) { + macho.LC_SEGMENT_64 => LoadCommand{ + .Segment = try SegmentCommand.read(allocator, stream.reader()), + }, + macho.LC_DYLD_INFO, macho.LC_DYLD_INFO_ONLY => LoadCommand{ + .DyldInfoOnly = try stream.reader().readStruct(macho.dyld_info_command), + }, + macho.LC_SYMTAB => LoadCommand{ + .Symtab = try stream.reader().readStruct(macho.symtab_command), + }, + macho.LC_DYSYMTAB => LoadCommand{ + .Dysymtab = try stream.reader().readStruct(macho.dysymtab_command), + }, + macho.LC_ID_DYLINKER, macho.LC_LOAD_DYLINKER, macho.LC_DYLD_ENVIRONMENT => LoadCommand{ + .Dylinker = try GenericCommandWithData(macho.dylinker_command).read(allocator, stream.reader()), + }, + macho.LC_ID_DYLIB, macho.LC_LOAD_WEAK_DYLIB, macho.LC_LOAD_DYLIB, macho.LC_REEXPORT_DYLIB => LoadCommand{ + .Dylib = try GenericCommandWithData(macho.dylib_command).read(allocator, stream.reader()), + }, + macho.LC_MAIN => LoadCommand{ + .Main = try stream.reader().readStruct(macho.entry_point_command), + }, + macho.LC_VERSION_MIN_MACOSX, macho.LC_VERSION_MIN_IPHONEOS, macho.LC_VERSION_MIN_WATCHOS, macho.LC_VERSION_MIN_TVOS => LoadCommand{ + .VersionMin = try stream.reader().readStruct(macho.version_min_command), + }, + macho.LC_SOURCE_VERSION => LoadCommand{ + .SourceVersion = try stream.reader().readStruct(macho.source_version_command), + }, + macho.LC_FUNCTION_STARTS, macho.LC_DATA_IN_CODE, macho.LC_CODE_SIGNATURE => LoadCommand{ + .LinkeditData = try stream.reader().readStruct(macho.linkedit_data_command), + }, + else => LoadCommand{ + .Unknown = try GenericCommandWithData(macho.load_command).read(allocator, stream.reader()), + }, + }; + } + + pub fn write(self: LoadCommand, writer: anytype) !void { + return switch (self) { + .DyldInfoOnly => |x| writeStruct(x, writer), + .Symtab => |x| writeStruct(x, writer), + .Dysymtab => |x| writeStruct(x, writer), + .Main => |x| writeStruct(x, writer), + .VersionMin => |x| writeStruct(x, writer), + .SourceVersion => |x| writeStruct(x, writer), + .LinkeditData => |x| writeStruct(x, writer), + .Segment => |x| x.write(writer), + .Dylinker => |x| x.write(writer), + .Dylib => |x| x.write(writer), + .Unknown => |x| x.write(writer), + }; + } + + pub fn cmd(self: LoadCommand) u32 { + return switch (self) { + .DyldInfoOnly => |x| x.cmd, + .Symtab => |x| x.cmd, + .Dysymtab => |x| x.cmd, + .Main => |x| x.cmd, + .VersionMin => |x| x.cmd, + .SourceVersion => |x| x.cmd, + .LinkeditData => |x| x.cmd, + .Segment => |x| x.inner.cmd, + .Dylinker => |x| x.inner.cmd, + .Dylib => |x| x.inner.cmd, + .Unknown => |x| x.inner.cmd, + }; + } + + pub fn cmdsize(self: LoadCommand) u32 { + return switch (self) { + .DyldInfoOnly => |x| x.cmdsize, + .Symtab => |x| x.cmdsize, + .Dysymtab => |x| x.cmdsize, + .Main => |x| x.cmdsize, + .VersionMin => |x| x.cmdsize, + .SourceVersion => |x| x.cmdsize, + .LinkeditData => |x| x.cmdsize, + .Segment => |x| x.inner.cmdsize, + .Dylinker => |x| x.inner.cmdsize, + .Dylib => |x| x.inner.cmdsize, + .Unknown => |x| x.inner.cmdsize, + }; + } + + pub fn deinit(self: *LoadCommand, allocator: *Allocator) void { + return switch (self.*) { + .Segment => |*x| x.deinit(allocator), + .Dylinker => |*x| x.deinit(allocator), + .Dylib => |*x| x.deinit(allocator), + .Unknown => |*x| x.deinit(allocator), + else => {}, + }; + } + + fn writeStruct(command: anytype, writer: anytype) !void { + const slice = [1]@TypeOf(command){command}; + return writer.writeAll(mem.sliceAsBytes(slice[0..1])); + } + + fn eql(self: LoadCommand, other: LoadCommand) bool { + if (@as(@TagType(LoadCommand), self) != @as(@TagType(LoadCommand), other)) return false; + return switch (self) { + .DyldInfoOnly => |x| eqlStruct(x, other.DyldInfoOnly), + .Symtab => |x| eqlStruct(x, other.Symtab), + .Dysymtab => |x| eqlStruct(x, other.Dysymtab), + .Main => |x| eqlStruct(x, other.Main), + .VersionMin => |x| eqlStruct(x, other.VersionMin), + .SourceVersion => |x| eqlStruct(x, other.SourceVersion), + .LinkeditData => |x| eqlStruct(x, other.LinkeditData), + .Segment => |x| x.eql(other.Segment), + .Dylinker => |x| x.eql(other.Dylinker), + .Dylib => |x| x.eql(other.Dylib), + .Unknown => |x| x.eql(other.Unknown), + }; + } + + fn eqlStruct(lhs: anytype, rhs: anytype) bool { + return mem.eql(u8, mem.asBytes(&lhs), mem.asBytes(&rhs)); + } +}; + +pub const SegmentCommand = struct { + inner: macho.segment_command_64, + sections: std.StringArrayHashMapUnmanaged(macho.section_64) = .{}, + + pub fn read(alloc: *Allocator, reader: anytype) !SegmentCommand { + const inner = try reader.readStruct(macho.segment_command_64); + var segment = SegmentCommand{ + .inner = inner, + }; + try segment.sections.ensureCapacity(alloc, inner.nsects); + + var i: usize = 0; + while (i < inner.nsects) : (i += 1) { + const section = try reader.readStruct(macho.section_64); + segment.sections.putAssumeCapacityNoClobber(mem.trimRight(u8, section.sectname[0..], &[_]u8{0}), section); + } + + return segment; + } + + pub fn write(self: SegmentCommand, writer: anytype) !void { + const cmd = [1]macho.segment_command_64{self.inner}; + try writer.writeAll(mem.sliceAsBytes(cmd[0..1])); + + for (self.sections.items()) |entry| { + const section = [1]macho.section_64{entry.value}; + try writer.writeAll(mem.sliceAsBytes(section[0..1])); + } + } + + pub fn deinit(self: *SegmentCommand, alloc: *Allocator) void { + self.sections.deinit(alloc); + } + + fn eql(self: SegmentCommand, other: SegmentCommand) bool { + if (!mem.eql(u8, mem.asBytes(&self.inner), mem.asBytes(&other.inner))) return false; + const lhs = self.sections.items(); + const rhs = other.sections.items(); + var i: usize = 0; + while (i < self.inner.nsects) : (i += 1) { + if (!mem.eql(u8, lhs[i].key, rhs[i].key)) return false; + if (!mem.eql(u8, mem.asBytes(&lhs[i].value), mem.asBytes(&rhs[i].value))) return false; + } + return true; + } +}; + +pub fn GenericCommandWithData(comptime Cmd: type) type { + return struct { + inner: Cmd, + /// This field remains undefined until `read` is called. + data: []u8 = undefined, + + const Self = @This(); + + pub fn read(allocator: *Allocator, reader: anytype) !Self { + const inner = try reader.readStruct(Cmd); + var data = try allocator.alloc(u8, inner.cmdsize - @sizeOf(Cmd)); + errdefer allocator.free(data); + try reader.readNoEof(data[0..]); + return Self{ + .inner = inner, + .data = data, + }; + } + + pub fn write(self: Self, writer: anytype) !void { + const cmd = [1]Cmd{self.inner}; + try writer.writeAll(mem.sliceAsBytes(cmd[0..1])); + try writer.writeAll(self.data); + } + + pub fn deinit(self: *Self, allocator: *Allocator) void { + allocator.free(self.data); + } + + pub fn eql(self: Self, other: Self) bool { + if (!mem.eql(u8, mem.asBytes(&self.inner), mem.asBytes(&other.inner))) return false; + return mem.eql(u8, self.data, other.data); + } + }; +} + +fn testRead(allocator: *Allocator, buffer: []const u8, expected: anytype) !void { + var stream = io.fixedBufferStream(buffer); + var given = try LoadCommand.read(allocator, stream.reader()); + defer given.deinit(allocator); + testing.expect(expected.eql(given)); +} + +fn testWrite(buffer: []u8, cmd: LoadCommand, expected: []const u8) !void { + var stream = io.fixedBufferStream(buffer); + try cmd.write(stream.writer()); + testing.expect(mem.eql(u8, expected, buffer[0..expected.len])); +} + +test "read-write segment command" { + var gpa = testing.allocator; + const in_buffer = &[_]u8{ + 0x19, 0x00, 0x00, 0x00, // cmd + 0x98, 0x00, 0x00, 0x00, // cmdsize + 0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // segname + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // vmaddr + 0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // vmsize + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // fileoff + 0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // filesize + 0x07, 0x00, 0x00, 0x00, // maxprot + 0x05, 0x00, 0x00, 0x00, // initprot + 0x01, 0x00, 0x00, 0x00, // nsects + 0x00, 0x00, 0x00, 0x00, // flags + 0x5f, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // sectname + 0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // segname + 0x00, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // address + 0xc0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // size + 0x00, 0x40, 0x00, 0x00, // offset + 0x02, 0x00, 0x00, 0x00, // alignment + 0x00, 0x00, 0x00, 0x00, // reloff + 0x00, 0x00, 0x00, 0x00, // nreloc + 0x00, 0x04, 0x00, 0x80, // flags + 0x00, 0x00, 0x00, 0x00, // reserved1 + 0x00, 0x00, 0x00, 0x00, // reserved2 + 0x00, 0x00, 0x00, 0x00, // reserved3 + }; + var cmd = SegmentCommand{ + .inner = .{ + .cmd = macho.LC_SEGMENT_64, + .cmdsize = 152, + .segname = makeName("__TEXT"), + .vmaddr = 4294967296, + .vmsize = 294912, + .fileoff = 0, + .filesize = 294912, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE, + .initprot = macho.VM_PROT_EXECUTE | macho.VM_PROT_READ, + .nsects = 1, + .flags = 0, + }, + }; + try cmd.sections.putNoClobber(gpa, "__text", .{ + .sectname = makeName("__text"), + .segname = makeName("__TEXT"), + .addr = 4294983680, + .size = 448, + .offset = 16384, + .@"align" = 2, + .reloff = 0, + .nreloc = 0, + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved1 = 0, + .reserved2 = 0, + .reserved3 = 0, + }); + defer cmd.deinit(gpa); + try testRead(gpa, in_buffer[0..], LoadCommand{ .Segment = cmd }); + + var out_buffer: [in_buffer.len]u8 = undefined; + try testWrite(out_buffer[0..], LoadCommand{ .Segment = cmd }, in_buffer[0..]); +} + +test "read-write generic command with data" { + var gpa = testing.allocator; + const in_buffer = &[_]u8{ + 0x0c, 0x00, 0x00, 0x00, // cmd + 0x20, 0x00, 0x00, 0x00, // cmdsize + 0x18, 0x00, 0x00, 0x00, // name + 0x02, 0x00, 0x00, 0x00, // timestamp + 0x00, 0x00, 0x00, 0x00, // current_version + 0x00, 0x00, 0x00, 0x00, // compatibility_version + 0x2f, 0x75, 0x73, 0x72, 0x00, 0x00, 0x00, 0x00, // data + }; + var cmd = GenericCommandWithData(macho.dylib_command){ + .inner = .{ + .cmd = macho.LC_LOAD_DYLIB, + .cmdsize = 32, + .dylib = .{ + .name = 24, + .timestamp = 2, + .current_version = 0, + .compatibility_version = 0, + }, + }, + }; + cmd.data = try gpa.alloc(u8, 8); + defer gpa.free(cmd.data); + cmd.data[0] = 0x2f; + cmd.data[1] = 0x75; + cmd.data[2] = 0x73; + cmd.data[3] = 0x72; + cmd.data[4] = 0x0; + cmd.data[5] = 0x0; + cmd.data[6] = 0x0; + cmd.data[7] = 0x0; + try testRead(gpa, in_buffer[0..], LoadCommand{ .Dylib = cmd }); + + var out_buffer: [in_buffer.len]u8 = undefined; + try testWrite(out_buffer[0..], LoadCommand{ .Dylib = cmd }, in_buffer[0..]); +} + +test "read-write C struct command" { + var gpa = testing.allocator; + const in_buffer = &[_]u8{ + 0x28, 0x00, 0x00, 0x80, // cmd + 0x18, 0x00, 0x00, 0x00, // cmdsize + 0x04, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // entryoff + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // stacksize + }; + const cmd = .{ + .cmd = macho.LC_MAIN, + .cmdsize = 24, + .entryoff = 16644, + .stacksize = 0, + }; + try testRead(gpa, in_buffer[0..], LoadCommand{ .Main = cmd }); + + var out_buffer: [in_buffer.len]u8 = undefined; + try testWrite(out_buffer[0..], LoadCommand{ .Main = cmd }, in_buffer[0..]); +}