lld: start unifying load command logic

This commit is contained in:
Jakub Konka
2020-11-30 19:42:08 +01:00
parent 0ef3071db6
commit de66b65677
3 changed files with 460 additions and 2 deletions

View File

@@ -813,7 +813,7 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
// Pad out space for code signature
const text_cmd = parser.load_commands.items[parser.text_cmd_index.?].Segment.inner;
const dataoff = @intCast(u32, mem.alignForward(parser.end_pos.?, @sizeOf(u64)));
const datasize = 0x1000;
const datasize = 0x400000;
const code_sig = macho.linkedit_data_command{
.cmd = macho.LC_CODE_SIGNATURE,
.cmdsize = @sizeOf(macho.linkedit_data_command),
@@ -1600,7 +1600,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64,
return vaddr;
}
fn makeStaticString(comptime bytes: []const u8) [16]u8 {
pub fn makeStaticString(comptime bytes: []const u8) [16]u8 {
var buf = [_]u8{0} ** 16;
if (bytes.len > buf.len) @compileError("string too long; max 16 bytes");
mem.copy(u8, buf[0..], bytes);
@@ -1994,3 +1994,10 @@ fn satMul(a: anytype, b: anytype) @TypeOf(a, b) {
const T = @TypeOf(a, b);
return std.math.mul(T, a, b) catch std.math.maxInt(T);
}
test "" {
// TODO surprisingly this causes a linking error:
// _linkWithLLD symbol missing for arch
// _ = std.testing.refAllDecls(@This());
_ = std.testing.refAllDecls(@import("MachO/commands.zig"));
}

80
src/link/MachO/Parser.zig Normal file
View File

@@ -0,0 +1,80 @@
const Parser = @This();
const std = @import("std");
const fs = std.fs;
const io = std.io;
const mem = std.mem;
const macho = std.macho;
const Allocator = std.mem.Allocator;
const LoadCommand = @import("commands.zig").LoadCommand;
allocator: *Allocator,
/// Mach-O header
header: ?macho.mach_header_64 = null,
/// Load commands
load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
text_cmd_index: ?usize = null,
linkedit_cmd_index: ?usize = null,
linkedit_cmd_offset: ?u64 = null,
code_sig_cmd_offset: ?u64 = null,
end_pos: ?u64 = null,
pub fn init(allocator: *Allocator) Parser {
return .{
.allocator = allocator,
};
}
pub fn parse(self: *Parser, reader: anytype) !void {
self.header = try reader.readStruct(macho.mach_header_64);
const ncmds = self.header.?.ncmds;
try self.load_commands.ensureCapacity(self.allocator, ncmds);
var off: u64 = @sizeOf(macho.mach_header_64);
var i: u16 = 0;
while (i < ncmds) : (i += 1) {
const cmd = try LoadCommand.read(self.allocator, reader);
switch (cmd.cmd()) {
macho.LC_SEGMENT_64 => {
const x = cmd.Segment;
if (mem.eql(u8, mem.trimRight(u8, x.inner.segname[0..], &[_]u8{0}), "__LINKEDIT")) {
self.linkedit_cmd_index = i;
self.linkedit_cmd_offset = off;
} else if (mem.eql(u8, mem.trimRight(u8, x.inner.segname[0..], &[_]u8{0}), "__TEXT")) {
self.text_cmd_index = i;
}
},
macho.LC_SYMTAB => {
const x = cmd.Symtab;
self.end_pos = x.stroff + x.strsize;
},
else => {},
}
off += cmd.cmdsize();
self.load_commands.appendAssumeCapacity(cmd);
}
self.code_sig_cmd_offset = off;
// TODO parse memory mapped segments
}
pub fn parseFile(self: *Parser, file: fs.File) !void {
return self.parse(file.reader());
}
pub fn deinit(self: *Parser) void {
for (self.load_commands.items) |*cmd| {
cmd.deinit(self.allocator);
}
self.load_commands.deinit(self.allocator);
}

371
src/link/MachO/commands.zig Normal file
View File

@@ -0,0 +1,371 @@
const std = @import("std");
const fs = std.fs;
const io = std.io;
const mem = std.mem;
const macho = std.macho;
const testing = std.testing;
const Allocator = std.mem.Allocator;
const makeName = @import("../MachO.zig").makeStaticString;
pub const LoadCommand = union(enum) {
Segment: SegmentCommand,
DyldInfoOnly: macho.dyld_info_command,
Symtab: macho.symtab_command,
Dysymtab: macho.dysymtab_command,
Dylinker: GenericCommandWithData(macho.dylinker_command),
Dylib: GenericCommandWithData(macho.dylib_command),
Main: macho.entry_point_command,
VersionMin: macho.version_min_command,
SourceVersion: macho.source_version_command,
LinkeditData: macho.linkedit_data_command,
Unknown: GenericCommandWithData(macho.load_command),
pub fn read(allocator: *Allocator, reader: anytype) !LoadCommand {
const header = try reader.readStruct(macho.load_command);
var buffer = try allocator.alloc(u8, header.cmdsize);
defer allocator.free(buffer);
const slice = [1]macho.load_command{header};
mem.copy(u8, buffer[0..], mem.sliceAsBytes(slice[0..1]));
try reader.readNoEof(buffer[@sizeOf(macho.load_command)..]);
var stream = io.fixedBufferStream(buffer[0..]);
return switch (header.cmd) {
macho.LC_SEGMENT_64 => LoadCommand{
.Segment = try SegmentCommand.read(allocator, stream.reader()),
},
macho.LC_DYLD_INFO, macho.LC_DYLD_INFO_ONLY => LoadCommand{
.DyldInfoOnly = try stream.reader().readStruct(macho.dyld_info_command),
},
macho.LC_SYMTAB => LoadCommand{
.Symtab = try stream.reader().readStruct(macho.symtab_command),
},
macho.LC_DYSYMTAB => LoadCommand{
.Dysymtab = try stream.reader().readStruct(macho.dysymtab_command),
},
macho.LC_ID_DYLINKER, macho.LC_LOAD_DYLINKER, macho.LC_DYLD_ENVIRONMENT => LoadCommand{
.Dylinker = try GenericCommandWithData(macho.dylinker_command).read(allocator, stream.reader()),
},
macho.LC_ID_DYLIB, macho.LC_LOAD_WEAK_DYLIB, macho.LC_LOAD_DYLIB, macho.LC_REEXPORT_DYLIB => LoadCommand{
.Dylib = try GenericCommandWithData(macho.dylib_command).read(allocator, stream.reader()),
},
macho.LC_MAIN => LoadCommand{
.Main = try stream.reader().readStruct(macho.entry_point_command),
},
macho.LC_VERSION_MIN_MACOSX, macho.LC_VERSION_MIN_IPHONEOS, macho.LC_VERSION_MIN_WATCHOS, macho.LC_VERSION_MIN_TVOS => LoadCommand{
.VersionMin = try stream.reader().readStruct(macho.version_min_command),
},
macho.LC_SOURCE_VERSION => LoadCommand{
.SourceVersion = try stream.reader().readStruct(macho.source_version_command),
},
macho.LC_FUNCTION_STARTS, macho.LC_DATA_IN_CODE, macho.LC_CODE_SIGNATURE => LoadCommand{
.LinkeditData = try stream.reader().readStruct(macho.linkedit_data_command),
},
else => LoadCommand{
.Unknown = try GenericCommandWithData(macho.load_command).read(allocator, stream.reader()),
},
};
}
pub fn write(self: LoadCommand, writer: anytype) !void {
return switch (self) {
.DyldInfoOnly => |x| writeStruct(x, writer),
.Symtab => |x| writeStruct(x, writer),
.Dysymtab => |x| writeStruct(x, writer),
.Main => |x| writeStruct(x, writer),
.VersionMin => |x| writeStruct(x, writer),
.SourceVersion => |x| writeStruct(x, writer),
.LinkeditData => |x| writeStruct(x, writer),
.Segment => |x| x.write(writer),
.Dylinker => |x| x.write(writer),
.Dylib => |x| x.write(writer),
.Unknown => |x| x.write(writer),
};
}
pub fn cmd(self: LoadCommand) u32 {
return switch (self) {
.DyldInfoOnly => |x| x.cmd,
.Symtab => |x| x.cmd,
.Dysymtab => |x| x.cmd,
.Main => |x| x.cmd,
.VersionMin => |x| x.cmd,
.SourceVersion => |x| x.cmd,
.LinkeditData => |x| x.cmd,
.Segment => |x| x.inner.cmd,
.Dylinker => |x| x.inner.cmd,
.Dylib => |x| x.inner.cmd,
.Unknown => |x| x.inner.cmd,
};
}
pub fn cmdsize(self: LoadCommand) u32 {
return switch (self) {
.DyldInfoOnly => |x| x.cmdsize,
.Symtab => |x| x.cmdsize,
.Dysymtab => |x| x.cmdsize,
.Main => |x| x.cmdsize,
.VersionMin => |x| x.cmdsize,
.SourceVersion => |x| x.cmdsize,
.LinkeditData => |x| x.cmdsize,
.Segment => |x| x.inner.cmdsize,
.Dylinker => |x| x.inner.cmdsize,
.Dylib => |x| x.inner.cmdsize,
.Unknown => |x| x.inner.cmdsize,
};
}
pub fn deinit(self: *LoadCommand, allocator: *Allocator) void {
return switch (self.*) {
.Segment => |*x| x.deinit(allocator),
.Dylinker => |*x| x.deinit(allocator),
.Dylib => |*x| x.deinit(allocator),
.Unknown => |*x| x.deinit(allocator),
else => {},
};
}
fn writeStruct(command: anytype, writer: anytype) !void {
const slice = [1]@TypeOf(command){command};
return writer.writeAll(mem.sliceAsBytes(slice[0..1]));
}
fn eql(self: LoadCommand, other: LoadCommand) bool {
if (@as(@TagType(LoadCommand), self) != @as(@TagType(LoadCommand), other)) return false;
return switch (self) {
.DyldInfoOnly => |x| eqlStruct(x, other.DyldInfoOnly),
.Symtab => |x| eqlStruct(x, other.Symtab),
.Dysymtab => |x| eqlStruct(x, other.Dysymtab),
.Main => |x| eqlStruct(x, other.Main),
.VersionMin => |x| eqlStruct(x, other.VersionMin),
.SourceVersion => |x| eqlStruct(x, other.SourceVersion),
.LinkeditData => |x| eqlStruct(x, other.LinkeditData),
.Segment => |x| x.eql(other.Segment),
.Dylinker => |x| x.eql(other.Dylinker),
.Dylib => |x| x.eql(other.Dylib),
.Unknown => |x| x.eql(other.Unknown),
};
}
fn eqlStruct(lhs: anytype, rhs: anytype) bool {
return mem.eql(u8, mem.asBytes(&lhs), mem.asBytes(&rhs));
}
};
pub const SegmentCommand = struct {
inner: macho.segment_command_64,
sections: std.StringArrayHashMapUnmanaged(macho.section_64) = .{},
pub fn read(alloc: *Allocator, reader: anytype) !SegmentCommand {
const inner = try reader.readStruct(macho.segment_command_64);
var segment = SegmentCommand{
.inner = inner,
};
try segment.sections.ensureCapacity(alloc, inner.nsects);
var i: usize = 0;
while (i < inner.nsects) : (i += 1) {
const section = try reader.readStruct(macho.section_64);
segment.sections.putAssumeCapacityNoClobber(mem.trimRight(u8, section.sectname[0..], &[_]u8{0}), section);
}
return segment;
}
pub fn write(self: SegmentCommand, writer: anytype) !void {
const cmd = [1]macho.segment_command_64{self.inner};
try writer.writeAll(mem.sliceAsBytes(cmd[0..1]));
for (self.sections.items()) |entry| {
const section = [1]macho.section_64{entry.value};
try writer.writeAll(mem.sliceAsBytes(section[0..1]));
}
}
pub fn deinit(self: *SegmentCommand, alloc: *Allocator) void {
self.sections.deinit(alloc);
}
fn eql(self: SegmentCommand, other: SegmentCommand) bool {
if (!mem.eql(u8, mem.asBytes(&self.inner), mem.asBytes(&other.inner))) return false;
const lhs = self.sections.items();
const rhs = other.sections.items();
var i: usize = 0;
while (i < self.inner.nsects) : (i += 1) {
if (!mem.eql(u8, lhs[i].key, rhs[i].key)) return false;
if (!mem.eql(u8, mem.asBytes(&lhs[i].value), mem.asBytes(&rhs[i].value))) return false;
}
return true;
}
};
pub fn GenericCommandWithData(comptime Cmd: type) type {
return struct {
inner: Cmd,
/// This field remains undefined until `read` is called.
data: []u8 = undefined,
const Self = @This();
pub fn read(allocator: *Allocator, reader: anytype) !Self {
const inner = try reader.readStruct(Cmd);
var data = try allocator.alloc(u8, inner.cmdsize - @sizeOf(Cmd));
errdefer allocator.free(data);
try reader.readNoEof(data[0..]);
return Self{
.inner = inner,
.data = data,
};
}
pub fn write(self: Self, writer: anytype) !void {
const cmd = [1]Cmd{self.inner};
try writer.writeAll(mem.sliceAsBytes(cmd[0..1]));
try writer.writeAll(self.data);
}
pub fn deinit(self: *Self, allocator: *Allocator) void {
allocator.free(self.data);
}
pub fn eql(self: Self, other: Self) bool {
if (!mem.eql(u8, mem.asBytes(&self.inner), mem.asBytes(&other.inner))) return false;
return mem.eql(u8, self.data, other.data);
}
};
}
fn testRead(allocator: *Allocator, buffer: []const u8, expected: anytype) !void {
var stream = io.fixedBufferStream(buffer);
var given = try LoadCommand.read(allocator, stream.reader());
defer given.deinit(allocator);
testing.expect(expected.eql(given));
}
fn testWrite(buffer: []u8, cmd: LoadCommand, expected: []const u8) !void {
var stream = io.fixedBufferStream(buffer);
try cmd.write(stream.writer());
testing.expect(mem.eql(u8, expected, buffer[0..expected.len]));
}
test "read-write segment command" {
var gpa = testing.allocator;
const in_buffer = &[_]u8{
0x19, 0x00, 0x00, 0x00, // cmd
0x98, 0x00, 0x00, 0x00, // cmdsize
0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // segname
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // vmaddr
0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // vmsize
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // fileoff
0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // filesize
0x07, 0x00, 0x00, 0x00, // maxprot
0x05, 0x00, 0x00, 0x00, // initprot
0x01, 0x00, 0x00, 0x00, // nsects
0x00, 0x00, 0x00, 0x00, // flags
0x5f, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // sectname
0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // segname
0x00, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // address
0xc0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // size
0x00, 0x40, 0x00, 0x00, // offset
0x02, 0x00, 0x00, 0x00, // alignment
0x00, 0x00, 0x00, 0x00, // reloff
0x00, 0x00, 0x00, 0x00, // nreloc
0x00, 0x04, 0x00, 0x80, // flags
0x00, 0x00, 0x00, 0x00, // reserved1
0x00, 0x00, 0x00, 0x00, // reserved2
0x00, 0x00, 0x00, 0x00, // reserved3
};
var cmd = SegmentCommand{
.inner = .{
.cmd = macho.LC_SEGMENT_64,
.cmdsize = 152,
.segname = makeName("__TEXT"),
.vmaddr = 4294967296,
.vmsize = 294912,
.fileoff = 0,
.filesize = 294912,
.maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE,
.initprot = macho.VM_PROT_EXECUTE | macho.VM_PROT_READ,
.nsects = 1,
.flags = 0,
},
};
try cmd.sections.putNoClobber(gpa, "__text", .{
.sectname = makeName("__text"),
.segname = makeName("__TEXT"),
.addr = 4294983680,
.size = 448,
.offset = 16384,
.@"align" = 2,
.reloff = 0,
.nreloc = 0,
.flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
.reserved1 = 0,
.reserved2 = 0,
.reserved3 = 0,
});
defer cmd.deinit(gpa);
try testRead(gpa, in_buffer[0..], LoadCommand{ .Segment = cmd });
var out_buffer: [in_buffer.len]u8 = undefined;
try testWrite(out_buffer[0..], LoadCommand{ .Segment = cmd }, in_buffer[0..]);
}
test "read-write generic command with data" {
var gpa = testing.allocator;
const in_buffer = &[_]u8{
0x0c, 0x00, 0x00, 0x00, // cmd
0x20, 0x00, 0x00, 0x00, // cmdsize
0x18, 0x00, 0x00, 0x00, // name
0x02, 0x00, 0x00, 0x00, // timestamp
0x00, 0x00, 0x00, 0x00, // current_version
0x00, 0x00, 0x00, 0x00, // compatibility_version
0x2f, 0x75, 0x73, 0x72, 0x00, 0x00, 0x00, 0x00, // data
};
var cmd = GenericCommandWithData(macho.dylib_command){
.inner = .{
.cmd = macho.LC_LOAD_DYLIB,
.cmdsize = 32,
.dylib = .{
.name = 24,
.timestamp = 2,
.current_version = 0,
.compatibility_version = 0,
},
},
};
cmd.data = try gpa.alloc(u8, 8);
defer gpa.free(cmd.data);
cmd.data[0] = 0x2f;
cmd.data[1] = 0x75;
cmd.data[2] = 0x73;
cmd.data[3] = 0x72;
cmd.data[4] = 0x0;
cmd.data[5] = 0x0;
cmd.data[6] = 0x0;
cmd.data[7] = 0x0;
try testRead(gpa, in_buffer[0..], LoadCommand{ .Dylib = cmd });
var out_buffer: [in_buffer.len]u8 = undefined;
try testWrite(out_buffer[0..], LoadCommand{ .Dylib = cmd }, in_buffer[0..]);
}
test "read-write C struct command" {
var gpa = testing.allocator;
const in_buffer = &[_]u8{
0x28, 0x00, 0x00, 0x80, // cmd
0x18, 0x00, 0x00, 0x00, // cmdsize
0x04, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // entryoff
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // stacksize
};
const cmd = .{
.cmd = macho.LC_MAIN,
.cmdsize = 24,
.entryoff = 16644,
.stacksize = 0,
};
try testRead(gpa, in_buffer[0..], LoadCommand{ .Main = cmd });
var out_buffer: [in_buffer.len]u8 = undefined;
try testWrite(out_buffer[0..], LoadCommand{ .Main = cmd }, in_buffer[0..]);
}