macho: cleanup (lazy) binding info tables

This commit is contained in:
Jakub Konka
2020-12-13 21:26:57 +01:00
parent 5e913c9c2c
commit ae535111a4
3 changed files with 268 additions and 94 deletions

View File

@@ -810,47 +810,50 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
if (self.symtab_cmd_index == null or self.dysymtab_cmd_index == null) {
std.log.err("Incomplete Mach-O binary: no LC_SYMTAB or LC_DYSYMTAB load command found!", .{});
std.log.err("Without the symbol table, it is not possible to patch up the binary for cross-compilation.", .{});
return error.NoSymbolTable;
return error.NoSymbolTableFound;
}
// Parse symbol and string tables.
try self.parseSymbolTable();
try self.parseStringTable();
std.debug.print("Undef symbols\n", .{});
for (self.undef_symbols.items) |sym| {
const name = self.string_table.items[sym.n_strx..];
const len = blk: {
var end: usize = 0;
while (true) {
if (name[end] == @as(u8, 0)) break;
end += 1;
}
break :blk end;
};
std.debug.print("name={},sym={}\n", .{ name[0..len], sym });
}
// Parse dyld info
var symbols_by_name = std.StringHashMap(u16).init(self.base.allocator);
defer symbols_by_name.deinit();
try symbols_by_name.ensureCapacity(@intCast(u32, self.undef_symbols.items.len));
try self.parseBindingInfoTable();
try self.parseLazyBindingInfoTable();
for (self.undef_symbols.items) |sym, i| {
const name = self.string_table.items[sym.n_strx..];
const len = blk: {
var end: usize = 0;
while (true) {
if (name[end] == @as(u8, 0)) break;
end += 1;
}
break :blk end;
};
symbols_by_name.putAssumeCapacityNoClobber(name[0..len], @intCast(u16, i));
// Update the dylib ordinals.
self.binding_info_table.dylib_ordinal = next_ordinal;
for (self.lazy_binding_info_table.symbols.items) |*symbol| {
symbol.dylib_ordinal = next_ordinal;
}
// Write update dyld info
const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
{
const size = self.binding_info_table.calcSize();
assert(dyld_info.bind_size == size);
var buffer = try self.base.allocator.alloc(u8, size);
defer self.base.allocator.free(buffer);
var stream = std.io.fixedBufferStream(buffer);
try self.binding_info_table.write(stream.writer());
try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off);
}
{
const size = self.lazy_binding_info_table.calcSize();
assert(dyld_info.lazy_bind_size == size);
var buffer = try self.base.allocator.alloc(u8, size);
defer self.base.allocator.free(buffer);
var stream = std.io.fixedBufferStream(buffer);
try self.lazy_binding_info_table.write(stream.writer());
try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off);
}
try self.parseBindingInfoTable(symbols_by_name);
try self.parseLazyBindingInfoTable(symbols_by_name);
// Write updated load commands and the header
try self.writeLoadCommands();
try self.writeHeader();
@@ -1952,6 +1955,68 @@ fn writeExportTrie(self: *MachO) !void {
self.cmd_table_dirty = true;
}
fn writeBindingInfoTable(self: *MachO) !void {
const size = self.binding_info_table.calcSize();
var buffer = try self.base.allocator.alloc(u8, size);
defer self.base.allocator.free(buffer);
var stream = std.io.fixedBufferStream(buffer);
try self.binding_info_table.write(stream.writer());
const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));
dyld_info.bind_off = self.linkedit_segment_next_offset.?;
dyld_info.bind_size = bind_size;
log.debug("writing binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.bind_off, dyld_info.bind_off + bind_size });
if (bind_size > buffer.len) {
// Pad out to align(8).
try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.bind_off + bind_size);
}
try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off);
self.linkedit_segment_next_offset = dyld_info.bind_off + dyld_info.bind_size;
// Advance size of __LINKEDIT segment
const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
linkedit.inner.filesize += dyld_info.bind_size;
if (linkedit.inner.vmsize < linkedit.inner.filesize) {
linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size);
}
self.cmd_table_dirty = true;
}
fn writeLazyBindingInfoTable(self: *MachO) !void {
const size = self.lazy_binding_info_table.calcSize();
var buffer = try self.base.allocator.alloc(u8, size);
defer self.base.allocator.free(buffer);
var stream = std.io.fixedBufferStream(buffer);
try self.lazy_binding_info_table.write(stream.writer());
const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));
dyld_info.lazy_bind_off = self.linkedit_segment_next_offset.?;
dyld_info.lazy_bind_size = bind_size;
log.debug("writing lazy binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + bind_size });
if (bind_size > buffer.len) {
// Pad out to align(8).
try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.lazy_bind_off + bind_size);
}
try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off);
self.linkedit_segment_next_offset = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size;
// Advance size of __LINKEDIT segment
const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
linkedit.inner.filesize += dyld_info.lazy_bind_size;
if (linkedit.inner.vmsize < linkedit.inner.filesize) {
linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size);
}
self.cmd_table_dirty = true;
}
fn writeStringTable(self: *MachO) !void {
const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
const needed_size = self.string_table.items.len;
@@ -2122,7 +2187,7 @@ fn parseStringTable(self: *MachO) !void {
self.string_table.appendSliceAssumeCapacity(buffer);
}
fn parseBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16)) !void {
fn parseBindingInfoTable(self: *MachO) !void {
const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
var buffer = try self.base.allocator.alloc(u8, dyld_info.bind_size);
defer self.base.allocator.free(buffer);
@@ -2130,10 +2195,10 @@ fn parseBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16))
assert(nread == buffer.len);
var stream = std.io.fixedBufferStream(buffer);
try self.binding_info_table.read(self.base.allocator, symbols_by_name, stream.reader());
try self.binding_info_table.read(stream.reader(), self.base.allocator);
}
fn parseLazyBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16)) !void {
fn parseLazyBindingInfoTable(self: *MachO) !void {
const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
var buffer = try self.base.allocator.alloc(u8, dyld_info.lazy_bind_size);
defer self.base.allocator.free(buffer);
@@ -2141,5 +2206,17 @@ fn parseLazyBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u1
assert(nread == buffer.len);
var stream = std.io.fixedBufferStream(buffer);
try self.lazy_binding_info_table.read(self.base.allocator, symbols_by_name, stream.reader());
try self.lazy_binding_info_table.read(stream.reader(), self.base.allocator);
}
/// Calculates number of bytes in LEB128 encoding of value.
pub fn sizeLEB128(value: anytype) usize {
var res: usize = 0;
var v = value;
while (true) {
v = v >> 7;
res += 1;
if (v == 0) break;
}
return res;
}

View File

@@ -38,6 +38,7 @@ const macho = std.macho;
const testing = std.testing;
const assert = std.debug.assert;
const Allocator = mem.Allocator;
const sizeLEB128 = @import("../MachO.zig").sizeLEB128;
pub const Node = struct {
base: *Trie,
@@ -244,9 +245,9 @@ pub const Node = struct {
fn finalize(self: *Node, offset_in_trie: usize) FinalizeResult {
var node_size: usize = 0;
if (self.terminal_info) |info| {
node_size += sizeULEB128Mem(info.export_flags);
node_size += sizeULEB128Mem(info.vmaddr_offset);
node_size += sizeULEB128Mem(node_size);
node_size += sizeLEB128(info.export_flags);
node_size += sizeLEB128(info.vmaddr_offset);
node_size += sizeLEB128(node_size);
} else {
node_size += 1; // 0x0 for non-terminal nodes
}
@@ -254,7 +255,7 @@ pub const Node = struct {
for (self.edges.items) |edge| {
const next_node_offset = edge.to.trie_offset orelse 0;
node_size += edge.label.len + 1 + sizeULEB128Mem(next_node_offset);
node_size += edge.label.len + 1 + sizeLEB128(next_node_offset);
}
const trie_offset = self.trie_offset orelse 0;
@@ -264,18 +265,6 @@ pub const Node = struct {
return .{ .node_size = node_size, .updated = updated };
}
/// Calculates number of bytes in ULEB128 encoding of value.
fn sizeULEB128Mem(value: u64) usize {
var res: usize = 0;
var v = value;
while (true) {
v = v >> 7;
res += 1;
if (v == 0) break;
}
return res;
}
};
/// The root node of the trie.
@@ -380,9 +369,7 @@ pub fn read(self: *Trie, reader: anytype) ReadError!usize {
}
/// Write the trie to a byte stream.
/// Caller owns the memory and needs to free it.
/// Panics if the trie was not finalized using `finalize`
/// before calling this method.
/// Panics if the trie was not finalized using `finalize` before calling this method.
pub fn write(self: Trie, writer: anytype) !usize {
assert(!self.trie_dirty);
var counting_writer = std.io.countingWriter(writer);

View File

@@ -5,16 +5,22 @@ const mem = std.mem;
const assert = std.debug.assert;
const Allocator = mem.Allocator;
const sizeLEB128 = @import("../MachO.zig").sizeLEB128;
/// Table of binding info entries used to tell the dyld which
/// symbols to bind at loading time.
pub const BindingInfoTable = struct {
/// Id of the dynamic library where the specified entries can be found.
dylib_ordinal: i64 = 0,
binding_type: u8 = macho.BIND_TYPE_POINTER,
entries: std.ArrayListUnmanaged(Entry) = .{},
pub const Entry = struct {
/// Id of the symbol in the undef symbol table.
/// Can be null.
symbol: ?u16 = null,
/// Binding type; defaults to pointer type.
binding_type: u8 = macho.BIND_TYPE_POINTER,
symbols: std.ArrayListUnmanaged(Symbol) = .{},
pub const Symbol = struct {
/// Symbol name.
name: ?[]u8 = null,
/// Id of the segment where to bind this symbol to.
segment: u8,
@@ -24,14 +30,17 @@ pub const BindingInfoTable = struct {
};
pub fn deinit(self: *BindingInfoTable, allocator: *Allocator) void {
self.entries.deinit(allocator);
for (self.symbols.items) |*symbol| {
if (symbol.name) |name| {
allocator.free(name);
}
}
self.symbols.deinit(allocator);
}
pub fn read(self: *BindingInfoTable, allocator: *Allocator, symbols_by_name: anytype, reader: anytype) !void {
var name = std.ArrayList(u8).init(allocator);
defer name.deinit();
var entry: Entry = .{
/// Parse the binding info table from byte stream.
pub fn read(self: *BindingInfoTable, reader: anytype, allocator: *Allocator) !void {
var symbol: Symbol = .{
.segment = 0,
.offset = 0,
};
@@ -48,8 +57,8 @@ pub const BindingInfoTable = struct {
switch (opcode) {
macho.BIND_OPCODE_DO_BIND => {
try self.entries.append(allocator, entry);
entry = .{
try self.symbols.append(allocator, symbol);
symbol = .{
.segment = 0,
.offset = 0,
};
@@ -59,17 +68,17 @@ pub const BindingInfoTable = struct {
break;
},
macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
name.shrinkRetainingCapacity(0);
var name = std.ArrayList(u8).init(allocator);
var next = try reader.readByte();
while (next != @as(u8, 0)) {
try name.append(next);
next = try reader.readByte();
}
entry.symbol = symbols_by_name.get(name.items[0..]);
symbol.name = name.toOwnedSlice();
},
macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
entry.segment = imm;
entry.offset = try leb.readILEB128(i64, reader);
symbol.segment = imm;
symbol.offset = try leb.readILEB128(i64, reader);
},
macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
assert(!dylib_ordinal_set);
@@ -90,15 +99,69 @@ pub const BindingInfoTable = struct {
assert(done);
}
pub fn write(self: BindingInfoTable, writer: anytype) !void {}
/// Write the binding info table to byte stream.
pub fn write(self: BindingInfoTable, writer: anytype) !void {
if (self.dylib_ordinal > 15) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
try leb.writeULEB128(writer, @bitCast(u64, self.dylib_ordinal));
} else if (self.dylib_ordinal > 0) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal)));
} else {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal)));
}
try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, self.binding_type));
for (self.symbols.items) |symbol| {
if (symbol.name) |name| {
try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
try writer.writeAll(name);
try writer.writeByte(0);
}
try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
try leb.writeILEB128(writer, symbol.offset);
try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
}
try writer.writeByte(macho.BIND_OPCODE_DONE);
}
/// Calculate size in bytes of this binding info table.
pub fn calcSize(self: *BindingInfoTable) usize {
var size: usize = 1;
if (self.dylib_ordinal > 15) {
size += sizeLEB128(self.dylib_ordinal);
}
size += 1;
for (self.symbols.items) |symbol| {
if (symbol.name) |name| {
size += 1;
size += name.len;
size += 1;
}
size += 1;
size += sizeLEB128(symbol.offset);
size += 1;
}
size += 1;
return size;
}
};
/// Table of lazy binding info entries used to tell the dyld which
/// symbols to lazily bind at first load of a dylib.
pub const LazyBindingInfoTable = struct {
entries: std.ArrayListUnmanaged(Entry) = .{},
symbols: std.ArrayListUnmanaged(Symbol) = .{},
pub const Entry = struct {
/// Id of the symbol in the undef symbol table.
symbol: u16,
pub const Symbol = struct {
/// Symbol name.
name: ?[]u8 = null,
/// Offset of this symbol wrt to the segment id encoded in `segment`.
offset: i64,
@@ -113,15 +176,17 @@ pub const LazyBindingInfoTable = struct {
};
pub fn deinit(self: *LazyBindingInfoTable, allocator: *Allocator) void {
self.entries.deinit(allocator);
for (self.symbols.items) |*symbol| {
if (symbol.name) |name| {
allocator.free(name);
}
}
self.symbols.deinit(allocator);
}
pub fn read(self: *LazyBindingInfoTable, allocator: *Allocator, symbols_by_name: anytype, reader: anytype) !void {
var name = std.ArrayList(u8).init(allocator);
defer name.deinit();
var entry: Entry = .{
.symbol = 0,
/// Parse the binding info table from byte stream.
pub fn read(self: *LazyBindingInfoTable, reader: anytype, allocator: *Allocator) !void {
var symbol: Symbol = .{
.offset = 0,
.segment = 0,
.dylib_ordinal = 0,
@@ -138,35 +203,34 @@ pub const LazyBindingInfoTable = struct {
switch (opcode) {
macho.BIND_OPCODE_DO_BIND => {
try self.entries.append(allocator, entry);
try self.symbols.append(allocator, symbol);
},
macho.BIND_OPCODE_DONE => {
done = true;
entry = .{
.symbol = 0,
symbol = .{
.offset = 0,
.segment = 0,
.dylib_ordinal = 0,
};
},
macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
name.shrinkRetainingCapacity(0);
var name = std.ArrayList(u8).init(allocator);
var next = try reader.readByte();
while (next != @as(u8, 0)) {
try name.append(next);
next = try reader.readByte();
}
entry.symbol = symbols_by_name.get(name.items[0..]) orelse unreachable;
symbol.name = name.toOwnedSlice();
},
macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
entry.segment = imm;
entry.offset = try leb.readILEB128(i64, reader);
symbol.segment = imm;
symbol.offset = try leb.readILEB128(i64, reader);
},
macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
entry.dylib_ordinal = imm;
symbol.dylib_ordinal = imm;
},
macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
entry.dylib_ordinal = try leb.readILEB128(i64, reader);
symbol.dylib_ordinal = try leb.readILEB128(i64, reader);
},
else => {
std.log.warn("unhandled BIND_OPCODE_: 0x{x}", .{opcode});
@@ -176,5 +240,51 @@ pub const LazyBindingInfoTable = struct {
assert(done);
}
pub fn write(self: LazyBindingInfoTable, writer: anytype) !void {}
/// Write the binding info table to byte stream.
pub fn write(self: LazyBindingInfoTable, writer: anytype) !void {
for (self.symbols.items) |symbol| {
try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
try leb.writeILEB128(writer, symbol.offset);
if (symbol.dylib_ordinal > 15) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
} else if (symbol.dylib_ordinal > 0) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
} else {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
}
if (symbol.name) |name| {
try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
try writer.writeAll(name);
try writer.writeByte(0);
}
try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
try writer.writeByte(macho.BIND_OPCODE_DONE);
}
}
/// Calculate size in bytes of this binding info table.
pub fn calcSize(self: *LazyBindingInfoTable) usize {
var size: usize = 0;
for (self.symbols.items) |symbol| {
size += 1;
size += sizeLEB128(symbol.offset);
size += 1;
if (symbol.dylib_ordinal > 15) {
size += sizeLEB128(symbol.dylib_ordinal);
}
if (symbol.name) |name| {
size += 1;
size += name.len;
size += 1;
}
size += 2;
}
return size;
}
};