* macho: unblock stage2 on 32bit platforms Unblocks compilation of stage2 on 32bit platforms, and fixes #7630. * Use libstd convention: reads - usize, writes - u64
575 lines
20 KiB
Zig
575 lines
20 KiB
Zig
//! Represents export trie used in MachO executables and dynamic libraries.
|
|
//! The purpose of an export trie is to encode as compactly as possible all
|
|
//! export symbols for the loader `dyld`.
|
|
//! The export trie encodes offset and other information using ULEB128
|
|
//! encoding, and is part of the __LINKEDIT segment.
|
|
//!
|
|
//! Description from loader.h:
|
|
//!
|
|
//! The symbols exported by a dylib are encoded in a trie. This is a compact
|
|
//! representation that factors out common prefixes. It also reduces LINKEDIT pages
|
|
//! in RAM because it encodes all information (name, address, flags) in one small,
|
|
//! contiguous range. The export area is a stream of nodes. The first node sequentially
|
|
//! is the start node for the trie.
|
|
//!
|
|
//! Nodes for a symbol start with a uleb128 that is the length of the exported symbol
|
|
//! information for the string so far. If there is no exported symbol, the node starts
|
|
//! with a zero byte. If there is exported info, it follows the length.
|
|
//!
|
|
//! First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded
|
|
//! offset which is location of the content named by the symbol from the mach_header
|
|
//! for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags
|
|
//! is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string
|
|
//! is zero length, then the symbol is re-export from the specified dylib with the same name.
|
|
//! If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two
|
|
//! uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.
|
|
//! The resolver is used by lazy pointers and must be called to get the actual address to use.
|
|
//!
|
|
//! After the optional exported symbol information is a byte of how many edges (0-255) that
|
|
//! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of
|
|
//! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to.
|
|
const Trie = @This();
|
|
|
|
const std = @import("std");
|
|
const mem = std.mem;
|
|
const leb = std.leb;
|
|
const log = std.log.scoped(.link);
|
|
const macho = std.macho;
|
|
const testing = std.testing;
|
|
const assert = std.debug.assert;
|
|
const Allocator = mem.Allocator;
|
|
|
|
pub const Node = struct {
|
|
base: *Trie,
|
|
|
|
/// Terminal info associated with this node.
|
|
/// If this node is not a terminal node, info is null.
|
|
terminal_info: ?struct {
|
|
/// Export flags associated with this exported symbol.
|
|
export_flags: u64,
|
|
/// VM address offset wrt to the section this symbol is defined against.
|
|
vmaddr_offset: u64,
|
|
} = null,
|
|
|
|
/// Offset of this node in the trie output byte stream.
|
|
trie_offset: ?u64 = null,
|
|
|
|
/// List of all edges originating from this node.
|
|
edges: std.ArrayListUnmanaged(Edge) = .{},
|
|
|
|
node_dirty: bool = true,
|
|
|
|
/// Edge connecting to nodes in the trie.
|
|
pub const Edge = struct {
|
|
from: *Node,
|
|
to: *Node,
|
|
label: []u8,
|
|
|
|
fn deinit(self: *Edge, allocator: *Allocator) void {
|
|
self.to.deinit(allocator);
|
|
allocator.destroy(self.to);
|
|
allocator.free(self.label);
|
|
self.from = undefined;
|
|
self.to = undefined;
|
|
self.label = undefined;
|
|
}
|
|
};
|
|
|
|
fn deinit(self: *Node, allocator: *Allocator) void {
|
|
for (self.edges.items) |*edge| {
|
|
edge.deinit(allocator);
|
|
}
|
|
self.edges.deinit(allocator);
|
|
}
|
|
|
|
/// Inserts a new node starting from `self`.
|
|
fn put(self: *Node, allocator: *Allocator, label: []const u8) !*Node {
|
|
// Check for match with edges from this node.
|
|
for (self.edges.items) |*edge| {
|
|
const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to;
|
|
if (match == 0) continue;
|
|
if (match == edge.label.len) return edge.to.put(allocator, label[match..]);
|
|
|
|
// Found a match, need to splice up nodes.
|
|
// From: A -> B
|
|
// To: A -> C -> B
|
|
const mid = try allocator.create(Node);
|
|
mid.* = .{ .base = self.base };
|
|
var to_label = try allocator.dupe(u8, edge.label[match..]);
|
|
allocator.free(edge.label);
|
|
const to_node = edge.to;
|
|
edge.to = mid;
|
|
edge.label = try allocator.dupe(u8, label[0..match]);
|
|
self.base.node_count += 1;
|
|
|
|
try mid.edges.append(allocator, .{
|
|
.from = mid,
|
|
.to = to_node,
|
|
.label = to_label,
|
|
});
|
|
|
|
return if (match == label.len) to_node else mid.put(allocator, label[match..]);
|
|
}
|
|
|
|
// Add a new node.
|
|
const node = try allocator.create(Node);
|
|
node.* = .{ .base = self.base };
|
|
self.base.node_count += 1;
|
|
|
|
try self.edges.append(allocator, .{
|
|
.from = self,
|
|
.to = node,
|
|
.label = try allocator.dupe(u8, label),
|
|
});
|
|
|
|
return node;
|
|
}
|
|
|
|
/// Recursively parses the node from the input byte stream.
|
|
fn read(self: *Node, allocator: *Allocator, reader: anytype) Trie.ReadError!usize {
|
|
self.node_dirty = true;
|
|
const trie_offset = try reader.context.getPos();
|
|
self.trie_offset = trie_offset;
|
|
|
|
var nread: usize = 0;
|
|
|
|
const node_size = try leb.readULEB128(u64, reader);
|
|
if (node_size > 0) {
|
|
const export_flags = try leb.readULEB128(u64, reader);
|
|
// TODO Parse special flags.
|
|
assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
|
|
export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
|
|
|
|
const vmaddr_offset = try leb.readULEB128(u64, reader);
|
|
|
|
self.terminal_info = .{
|
|
.export_flags = export_flags,
|
|
.vmaddr_offset = vmaddr_offset,
|
|
};
|
|
}
|
|
|
|
const nedges = try reader.readByte();
|
|
self.base.node_count += nedges;
|
|
|
|
nread += (try reader.context.getPos()) - trie_offset;
|
|
|
|
var i: usize = 0;
|
|
while (i < nedges) : (i += 1) {
|
|
const edge_start_pos = try reader.context.getPos();
|
|
|
|
const label = blk: {
|
|
var label_buf = std.ArrayList(u8).init(allocator);
|
|
while (true) {
|
|
const next = try reader.readByte();
|
|
if (next == @as(u8, 0))
|
|
break;
|
|
try label_buf.append(next);
|
|
}
|
|
break :blk label_buf.toOwnedSlice();
|
|
};
|
|
|
|
const seek_to = try leb.readULEB128(u64, reader);
|
|
const return_pos = try reader.context.getPos();
|
|
|
|
nread += return_pos - edge_start_pos;
|
|
try reader.context.seekTo(seek_to);
|
|
|
|
const node = try allocator.create(Node);
|
|
node.* = .{ .base = self.base };
|
|
|
|
nread += try node.read(allocator, reader);
|
|
try self.edges.append(allocator, .{
|
|
.from = self,
|
|
.to = node,
|
|
.label = label,
|
|
});
|
|
try reader.context.seekTo(return_pos);
|
|
}
|
|
|
|
return nread;
|
|
}
|
|
|
|
/// Writes this node to a byte stream.
|
|
/// The children of this node *are* not written to the byte stream
|
|
/// recursively. To write all nodes to a byte stream in sequence,
|
|
/// iterate over `Trie.ordered_nodes` and call this method on each node.
|
|
/// This is one of the requirements of the MachO.
|
|
/// Panics if `finalize` was not called before calling this method.
|
|
fn write(self: Node, writer: anytype) !void {
|
|
assert(!self.node_dirty);
|
|
if (self.terminal_info) |info| {
|
|
// Terminal node info: encode export flags and vmaddr offset of this symbol.
|
|
var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
|
|
var info_stream = std.io.fixedBufferStream(&info_buf);
|
|
// TODO Implement for special flags.
|
|
assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
|
|
info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
|
|
try leb.writeULEB128(info_stream.writer(), info.export_flags);
|
|
try leb.writeULEB128(info_stream.writer(), info.vmaddr_offset);
|
|
|
|
// Encode the size of the terminal node info.
|
|
var size_buf: [@sizeOf(u64)]u8 = undefined;
|
|
var size_stream = std.io.fixedBufferStream(&size_buf);
|
|
try leb.writeULEB128(size_stream.writer(), info_stream.pos);
|
|
|
|
// Now, write them to the output stream.
|
|
try writer.writeAll(size_buf[0..size_stream.pos]);
|
|
try writer.writeAll(info_buf[0..info_stream.pos]);
|
|
} else {
|
|
// Non-terminal node is delimited by 0 byte.
|
|
try writer.writeByte(0);
|
|
}
|
|
// Write number of edges (max legal number of edges is 256).
|
|
try writer.writeByte(@intCast(u8, self.edges.items.len));
|
|
|
|
for (self.edges.items) |edge| {
|
|
// Write edge label and offset to next node in trie.
|
|
try writer.writeAll(edge.label);
|
|
try writer.writeByte(0);
|
|
try leb.writeULEB128(writer, edge.to.trie_offset.?);
|
|
}
|
|
}
|
|
|
|
const FinalizeResult = struct {
|
|
/// Current size of this node in bytes.
|
|
node_size: u64,
|
|
|
|
/// True if the trie offset of this node in the output byte stream
|
|
/// would need updating; false otherwise.
|
|
updated: bool,
|
|
};
|
|
|
|
/// Updates offset of this node in the output byte stream.
|
|
fn finalize(self: *Node, offset_in_trie: u64) !FinalizeResult {
|
|
var stream = std.io.countingWriter(std.io.null_writer);
|
|
var writer = stream.writer();
|
|
|
|
var node_size: u64 = 0;
|
|
if (self.terminal_info) |info| {
|
|
try leb.writeULEB128(writer, info.export_flags);
|
|
try leb.writeULEB128(writer, info.vmaddr_offset);
|
|
try leb.writeULEB128(writer, stream.bytes_written);
|
|
} else {
|
|
node_size += 1; // 0x0 for non-terminal nodes
|
|
}
|
|
node_size += 1; // 1 byte for edge count
|
|
|
|
for (self.edges.items) |edge| {
|
|
const next_node_offset = edge.to.trie_offset orelse 0;
|
|
node_size += edge.label.len + 1;
|
|
try leb.writeULEB128(writer, next_node_offset);
|
|
}
|
|
|
|
const trie_offset = self.trie_offset orelse 0;
|
|
const updated = offset_in_trie != trie_offset;
|
|
self.trie_offset = offset_in_trie;
|
|
self.node_dirty = false;
|
|
node_size += stream.bytes_written;
|
|
|
|
return FinalizeResult{ .node_size = node_size, .updated = updated };
|
|
}
|
|
};
|
|
|
|
/// The root node of the trie.
|
|
root: ?*Node = null,
|
|
|
|
allocator: *Allocator,
|
|
|
|
/// If you want to access nodes ordered in DFS fashion,
|
|
/// you should call `finalize` first since the nodes
|
|
/// in this container are not guaranteed to not be stale
|
|
/// if more insertions took place after the last `finalize`
|
|
/// call.
|
|
ordered_nodes: std.ArrayListUnmanaged(*Node) = .{},
|
|
|
|
/// The size of the trie in bytes.
|
|
/// This value may be outdated if there were additional
|
|
/// insertions performed after `finalize` was called.
|
|
/// Call `finalize` before accessing this value to ensure
|
|
/// it is up-to-date.
|
|
size: u64 = 0,
|
|
|
|
/// Number of nodes currently in the trie.
|
|
node_count: usize = 0,
|
|
|
|
trie_dirty: bool = true,
|
|
|
|
pub fn init(allocator: *Allocator) Trie {
|
|
return .{ .allocator = allocator };
|
|
}
|
|
|
|
/// Export symbol that is to be placed in the trie.
|
|
pub const ExportSymbol = struct {
|
|
/// Name of the symbol.
|
|
name: []const u8,
|
|
|
|
/// Offset of this symbol's virtual memory address from the beginning
|
|
/// of the __TEXT segment.
|
|
vmaddr_offset: u64,
|
|
|
|
/// Export flags of this exported symbol.
|
|
export_flags: u64,
|
|
};
|
|
|
|
/// Insert a symbol into the trie, updating the prefixes in the process.
|
|
/// This operation may change the layout of the trie by splicing edges in
|
|
/// certain circumstances.
|
|
pub fn put(self: *Trie, symbol: ExportSymbol) !void {
|
|
try self.createRoot();
|
|
const node = try self.root.?.put(self.allocator, symbol.name);
|
|
node.terminal_info = .{
|
|
.vmaddr_offset = symbol.vmaddr_offset,
|
|
.export_flags = symbol.export_flags,
|
|
};
|
|
self.trie_dirty = true;
|
|
}
|
|
|
|
/// Finalizes this trie for writing to a byte stream.
|
|
/// This step performs multiple passes through the trie ensuring
|
|
/// there are no gaps after every `Node` is ULEB128 encoded.
|
|
/// Call this method before trying to `write` the trie to a byte stream.
|
|
pub fn finalize(self: *Trie) !void {
|
|
if (!self.trie_dirty) return;
|
|
|
|
self.ordered_nodes.shrinkRetainingCapacity(0);
|
|
try self.ordered_nodes.ensureCapacity(self.allocator, self.node_count);
|
|
|
|
comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) });
|
|
var fifo = Fifo.init();
|
|
try fifo.writeItem(self.root.?);
|
|
|
|
while (fifo.readItem()) |next| {
|
|
for (next.edges.items) |*edge| {
|
|
try fifo.writeItem(edge.to);
|
|
}
|
|
self.ordered_nodes.appendAssumeCapacity(next);
|
|
}
|
|
|
|
var more: bool = true;
|
|
while (more) {
|
|
self.size = 0;
|
|
more = false;
|
|
for (self.ordered_nodes.items) |node| {
|
|
const res = try node.finalize(self.size);
|
|
self.size += res.node_size;
|
|
if (res.updated) more = true;
|
|
}
|
|
}
|
|
|
|
self.trie_dirty = false;
|
|
}
|
|
|
|
const ReadError = error{
|
|
OutOfMemory,
|
|
EndOfStream,
|
|
Overflow,
|
|
};
|
|
|
|
/// Parse the trie from a byte stream.
|
|
pub fn read(self: *Trie, reader: anytype) ReadError!usize {
|
|
try self.createRoot();
|
|
return self.root.?.read(self.allocator, reader);
|
|
}
|
|
|
|
/// Write the trie to a byte stream.
|
|
/// Panics if the trie was not finalized using `finalize` before calling this method.
|
|
pub fn write(self: Trie, writer: anytype) !u64 {
|
|
assert(!self.trie_dirty);
|
|
var counting_writer = std.io.countingWriter(writer);
|
|
for (self.ordered_nodes.items) |node| {
|
|
try node.write(counting_writer.writer());
|
|
}
|
|
return counting_writer.bytes_written;
|
|
}
|
|
|
|
pub fn deinit(self: *Trie) void {
|
|
if (self.root) |root| {
|
|
root.deinit(self.allocator);
|
|
self.allocator.destroy(root);
|
|
}
|
|
self.ordered_nodes.deinit(self.allocator);
|
|
}
|
|
|
|
fn createRoot(self: *Trie) !void {
|
|
if (self.root == null) {
|
|
const root = try self.allocator.create(Node);
|
|
root.* = .{ .base = self };
|
|
self.root = root;
|
|
self.node_count += 1;
|
|
}
|
|
}
|
|
|
|
test "Trie node count" {
|
|
var gpa = testing.allocator;
|
|
var trie = Trie.init(gpa);
|
|
defer trie.deinit();
|
|
|
|
testing.expectEqual(trie.node_count, 0);
|
|
testing.expect(trie.root == null);
|
|
|
|
try trie.put(.{
|
|
.name = "_main",
|
|
.vmaddr_offset = 0,
|
|
.export_flags = 0,
|
|
});
|
|
testing.expectEqual(trie.node_count, 2);
|
|
|
|
// Inserting the same node shouldn't update the trie.
|
|
try trie.put(.{
|
|
.name = "_main",
|
|
.vmaddr_offset = 0,
|
|
.export_flags = 0,
|
|
});
|
|
testing.expectEqual(trie.node_count, 2);
|
|
|
|
try trie.put(.{
|
|
.name = "__mh_execute_header",
|
|
.vmaddr_offset = 0x1000,
|
|
.export_flags = 0,
|
|
});
|
|
testing.expectEqual(trie.node_count, 4);
|
|
|
|
// Inserting the same node shouldn't update the trie.
|
|
try trie.put(.{
|
|
.name = "__mh_execute_header",
|
|
.vmaddr_offset = 0x1000,
|
|
.export_flags = 0,
|
|
});
|
|
testing.expectEqual(trie.node_count, 4);
|
|
try trie.put(.{
|
|
.name = "_main",
|
|
.vmaddr_offset = 0,
|
|
.export_flags = 0,
|
|
});
|
|
testing.expectEqual(trie.node_count, 4);
|
|
}
|
|
|
|
test "Trie basic" {
|
|
var gpa = testing.allocator;
|
|
var trie = Trie.init(gpa);
|
|
defer trie.deinit();
|
|
|
|
// root --- _st ---> node
|
|
try trie.put(.{
|
|
.name = "_st",
|
|
.vmaddr_offset = 0,
|
|
.export_flags = 0,
|
|
});
|
|
testing.expect(trie.root.?.edges.items.len == 1);
|
|
testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st"));
|
|
|
|
{
|
|
// root --- _st ---> node --- art ---> node
|
|
try trie.put(.{
|
|
.name = "_start",
|
|
.vmaddr_offset = 0,
|
|
.export_flags = 0,
|
|
});
|
|
testing.expect(trie.root.?.edges.items.len == 1);
|
|
|
|
const nextEdge = &trie.root.?.edges.items[0];
|
|
testing.expect(mem.eql(u8, nextEdge.label, "_st"));
|
|
testing.expect(nextEdge.to.edges.items.len == 1);
|
|
testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art"));
|
|
}
|
|
{
|
|
// root --- _ ---> node --- st ---> node --- art ---> node
|
|
// |
|
|
// | --- main ---> node
|
|
try trie.put(.{
|
|
.name = "_main",
|
|
.vmaddr_offset = 0,
|
|
.export_flags = 0,
|
|
});
|
|
testing.expect(trie.root.?.edges.items.len == 1);
|
|
|
|
const nextEdge = &trie.root.?.edges.items[0];
|
|
testing.expect(mem.eql(u8, nextEdge.label, "_"));
|
|
testing.expect(nextEdge.to.edges.items.len == 2);
|
|
testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st"));
|
|
testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main"));
|
|
|
|
const nextNextEdge = &nextEdge.to.edges.items[0];
|
|
testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art"));
|
|
}
|
|
}
|
|
|
|
test "write Trie to a byte stream" {
|
|
var gpa = testing.allocator;
|
|
var trie = Trie.init(gpa);
|
|
defer trie.deinit();
|
|
|
|
try trie.put(.{
|
|
.name = "__mh_execute_header",
|
|
.vmaddr_offset = 0,
|
|
.export_flags = 0,
|
|
});
|
|
try trie.put(.{
|
|
.name = "_main",
|
|
.vmaddr_offset = 0x1000,
|
|
.export_flags = 0,
|
|
});
|
|
|
|
try trie.finalize();
|
|
try trie.finalize(); // Finalizing mulitple times is a nop subsequently unless we add new nodes.
|
|
|
|
const exp_buffer = [_]u8{
|
|
0x0, 0x1, // node root
|
|
0x5f, 0x0, 0x5, // edge '_'
|
|
0x0, 0x2, // non-terminal node
|
|
0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header'
|
|
0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header'
|
|
0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main'
|
|
0x2, 0x0, 0x0, 0x0, // terminal node
|
|
0x3, 0x0, 0x80, 0x20, 0x0, // terminal node
|
|
};
|
|
|
|
var buffer = try gpa.alloc(u8, trie.size);
|
|
defer gpa.free(buffer);
|
|
var stream = std.io.fixedBufferStream(buffer);
|
|
{
|
|
const nwritten = try trie.write(stream.writer());
|
|
testing.expect(nwritten == trie.size);
|
|
testing.expect(mem.eql(u8, buffer, &exp_buffer));
|
|
}
|
|
{
|
|
// Writing finalized trie again should yield the same result.
|
|
try stream.seekTo(0);
|
|
const nwritten = try trie.write(stream.writer());
|
|
testing.expect(nwritten == trie.size);
|
|
testing.expect(mem.eql(u8, buffer, &exp_buffer));
|
|
}
|
|
}
|
|
|
|
test "parse Trie from byte stream" {
|
|
var gpa = testing.allocator;
|
|
|
|
const in_buffer = [_]u8{
|
|
0x0, 0x1, // node root
|
|
0x5f, 0x0, 0x5, // edge '_'
|
|
0x0, 0x2, // non-terminal node
|
|
0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header'
|
|
0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header'
|
|
0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main'
|
|
0x2, 0x0, 0x0, 0x0, // terminal node
|
|
0x3, 0x0, 0x80, 0x20, 0x0, // terminal node
|
|
};
|
|
|
|
var in_stream = std.io.fixedBufferStream(&in_buffer);
|
|
var trie = Trie.init(gpa);
|
|
defer trie.deinit();
|
|
const nread = try trie.read(in_stream.reader());
|
|
|
|
testing.expect(nread == in_buffer.len);
|
|
|
|
try trie.finalize();
|
|
|
|
var out_buffer = try gpa.alloc(u8, trie.size);
|
|
defer gpa.free(out_buffer);
|
|
var out_stream = std.io.fixedBufferStream(out_buffer);
|
|
const nwritten = try trie.write(out_stream.writer());
|
|
|
|
testing.expect(nwritten == trie.size);
|
|
testing.expect(mem.eql(u8, &in_buffer, out_buffer));
|
|
}
|