elf: add garbage collection of sections

This commit is contained in:
Jakub Konka
2023-10-04 15:14:45 +02:00
parent 2ee221328f
commit 9a80ac0429
4 changed files with 199 additions and 25 deletions

View File

@@ -595,6 +595,7 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/Elf/ZigModule.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/eh_frame.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/file.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/gc.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/synthetic_sections.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Archive.zig"

View File

@@ -53,7 +53,7 @@ phdr_load_tls_data_index: ?u16 = null,
/// The index into the program headers of a PT_LOAD program header with TLS zerofill data.
phdr_load_tls_zerofill_index: ?u16 = null,
entry_addr: ?u64 = null,
entry_index: ?Symbol.Index = null,
page_size: u32,
default_sym_version: elf.Elf64_Versym,
@@ -665,7 +665,6 @@ pub fn populateMissingMetadata(self: *Elf) !void {
.alignment = self.page_size,
.flags = elf.PF_X | elf.PF_R | elf.PF_W,
});
self.entry_addr = null;
}
if (self.phdr_got_index == null) {
@@ -1283,9 +1282,29 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
// Any qualifing unresolved symbol will be upgraded to an absolute, weak
// symbol for potential resolution at load-time.
self.resolveSymbols();
try self.addLinkerDefinedSymbols();
self.markEhFrameAtomsDead();
self.markImportsExports();
// Look for entry address in objects if not set by the incremental compiler.
if (self.entry_index == null) {
const entry: ?[]const u8 = entry: {
if (self.base.options.entry) |entry| break :entry entry;
if (!self.isDynLib()) break :entry "_start";
break :entry null;
};
self.entry_index = if (entry) |name| self.globalByName(name) else null;
}
const gc_sections = self.base.options.gc_sections orelse false;
if (gc_sections) {
try gc.gcAtoms(self);
// if (self.base.options.print_gc_sections) {
// try gc.dumpPrunedAtoms(self);
// }
}
try self.addLinkerDefinedSymbols();
self.claimUnresolved();
// Scan and create missing synthetic entries such as GOT indirection.
@@ -1310,19 +1329,6 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
state_log.debug("{}", .{self.dumpState()});
}
// Look for entry address in objects if not set by the incremental compiler.
if (self.entry_addr == null) {
const entry: ?[]const u8 = entry: {
if (self.base.options.entry) |entry| break :entry entry;
if (!self.isDynLib()) break :entry "_start";
break :entry null;
};
self.entry_addr = if (entry) |name| entry_addr: {
const global_index = self.globalByName(name) orelse break :entry_addr null;
break :entry_addr self.symbol(global_index).value;
} else null;
}
// Beyond this point, everything has been allocated a virtual address and we can resolve
// the relocations, and commit objects to file.
if (self.zig_module_index) |index| {
@@ -1538,7 +1544,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
try self.writeAtoms();
try self.writeSyntheticSections();
if (self.entry_addr == null and self.base.options.effectiveOutputMode() == .Exe) {
if (self.entry_index == null and self.base.options.effectiveOutputMode() == .Exe) {
log.debug("flushing. no_entry_point_found = true", .{});
self.error_flags.no_entry_point_found = true;
} else {
@@ -2580,7 +2586,7 @@ fn writeHeader(self: *Elf) !void {
mem.writeInt(u32, hdr_buf[index..][0..4], 1, endian);
index += 4;
const e_entry = if (elf_type == .REL) 0 else self.entry_addr.?;
const e_entry = if (self.entry_index) |entry_index| self.symbol(entry_index).value else 0;
// TODO
const phdr_table_offset = if (self.phdr_table_index) |ind|
@@ -3199,13 +3205,7 @@ pub fn updateDeclExports(
}
const stb_bits: u8 = switch (exp.opts.linkage) {
.Internal => elf.STB_LOCAL,
.Strong => blk: {
const entry_name = self.base.options.entry orelse "_start";
if (mem.eql(u8, exp_name, entry_name)) {
self.entry_addr = decl_sym.value;
}
break :blk elf.STB_GLOBAL;
},
.Strong => elf.STB_GLOBAL,
.Weak => elf.STB_WEAK,
.LinkOnce => {
try mod.failed_exports.ensureUnusedCapacity(mod.gpa, 1);
@@ -4693,6 +4693,16 @@ fn calcNumIRelativeRelocs(self: *Elf) usize {
return count;
}
pub fn isCIdentifier(name: []const u8) bool {
if (name.len == 0) return false;
const first_c = name[0];
if (!std.ascii.isAlphabetic(first_c) and first_c != '_') return false;
for (name[1..]) |c| {
if (!std.ascii.isAlphanumeric(c) and c != '_') return false;
}
return true;
}
pub fn atom(self: *Elf, atom_index: Atom.Index) ?*Atom {
if (atom_index == 0) return null;
assert(atom_index < self.atoms.items.len);
@@ -5202,6 +5212,7 @@ const mem = std.mem;
const codegen = @import("../codegen.zig");
const eh_frame = @import("Elf/eh_frame.zig");
const gc = @import("Elf/gc.zig");
const glibc = @import("../glibc.zig");
const link = @import("../link.zig");
const lldMain = @import("../main.zig").lldMain;

161
src/link/Elf/gc.zig Normal file
View File

@@ -0,0 +1,161 @@
pub fn gcAtoms(elf_file: *Elf) !void {
var roots = std.ArrayList(*Atom).init(elf_file.base.allocator);
defer roots.deinit();
try collectRoots(&roots, elf_file);
mark(roots, elf_file);
prune(elf_file);
}
fn collectRoots(roots: *std.ArrayList(*Atom), elf_file: *Elf) !void {
if (elf_file.entry_index) |index| {
const global = elf_file.symbol(index);
try markSymbol(global, roots, elf_file);
}
for (elf_file.objects.items) |index| {
for (elf_file.file(index).?.object.globals()) |global_index| {
const global = elf_file.symbol(global_index);
if (global.file(elf_file)) |file| {
if (file.index() == index and global.flags.@"export")
try markSymbol(global, roots, elf_file);
}
}
}
for (elf_file.objects.items) |index| {
const object = elf_file.file(index).?.object;
for (object.atoms.items) |atom_index| {
const atom = elf_file.atom(atom_index) orelse continue;
if (!atom.flags.alive) continue;
const shdr = atom.inputShdr(elf_file);
const name = atom.name(elf_file);
const is_gc_root = blk: {
if (shdr.sh_flags & elf.SHF_GNU_RETAIN != 0) break :blk true;
if (shdr.sh_type == elf.SHT_NOTE) break :blk true;
if (shdr.sh_type == elf.SHT_PREINIT_ARRAY) break :blk true;
if (shdr.sh_type == elf.SHT_INIT_ARRAY) break :blk true;
if (shdr.sh_type == elf.SHT_FINI_ARRAY) break :blk true;
if (mem.startsWith(u8, name, ".ctors")) break :blk true;
if (mem.startsWith(u8, name, ".dtors")) break :blk true;
if (mem.startsWith(u8, name, ".init")) break :blk true;
if (mem.startsWith(u8, name, ".fini")) break :blk true;
if (Elf.isCIdentifier(name)) break :blk true;
break :blk false;
};
if (is_gc_root and markAtom(atom)) try roots.append(atom);
if (shdr.sh_flags & elf.SHF_ALLOC == 0) atom.flags.visited = true;
}
// Mark every atom referenced by CIE as alive.
for (object.cies.items) |cie| {
for (cie.relocs(elf_file)) |rel| {
const sym = elf_file.symbol(object.symbols.items[rel.r_sym()]);
try markSymbol(sym, roots, elf_file);
}
}
}
}
fn markSymbol(sym: *Symbol, roots: *std.ArrayList(*Atom), elf_file: *Elf) !void {
const atom = sym.atom(elf_file) orelse return;
if (markAtom(atom)) try roots.append(atom);
}
fn markAtom(atom: *Atom) bool {
const already_visited = atom.flags.visited;
atom.flags.visited = true;
return atom.flags.alive and !already_visited;
}
fn markLive(atom: *Atom, elf_file: *Elf) void {
if (@import("build_options").enable_logging) track_live_level.incr();
assert(atom.flags.visited);
const object = atom.file(elf_file).?.object;
for (atom.fdes(elf_file)) |fde| {
for (fde.relocs(elf_file)[1..]) |rel| {
const target_sym = elf_file.symbol(object.symbols.items[rel.r_sym()]);
const target_atom = target_sym.atom(elf_file) orelse continue;
target_atom.flags.alive = true;
gc_track_live_log.debug("{}marking live atom({d})", .{ track_live_level, target_atom.atom_index });
if (markAtom(target_atom)) markLive(target_atom, elf_file);
}
}
for (atom.relocs(elf_file)) |rel| {
const target_sym = elf_file.symbol(object.symbols.items[rel.r_sym()]);
const target_atom = target_sym.atom(elf_file) orelse continue;
target_atom.flags.alive = true;
gc_track_live_log.debug("{}marking live atom({d})", .{ track_live_level, target_atom.atom_index });
if (markAtom(target_atom)) markLive(target_atom, elf_file);
}
}
fn mark(roots: std.ArrayList(*Atom), elf_file: *Elf) void {
for (roots.items) |root| {
gc_track_live_log.debug("root atom({d})", .{root.atom_index});
markLive(root, elf_file);
}
}
fn prune(elf_file: *Elf) void {
for (elf_file.objects.items) |index| {
for (elf_file.file(index).?.object.atoms.items) |atom_index| {
const atom = elf_file.atom(atom_index) orelse continue;
if (atom.flags.alive and !atom.flags.visited) {
atom.flags.alive = false;
atom.markFdesDead(elf_file);
}
}
}
}
pub fn dumpPrunedAtoms(elf_file: *Elf) !void {
const stderr = std.io.getStdErr().writer();
for (elf_file.objects.items) |index| {
for (elf_file.file(index).?.object.atoms.items) |atom_index| {
const atom = elf_file.atom(atom_index) orelse continue;
if (!atom.flags.alive)
// TODO should we simply print to stderr?
try stderr.print("link: removing unused section '{s}' in file '{}'\n", .{
atom.name(elf_file),
atom.file(elf_file).?.fmtPath(),
});
}
}
}
const Level = struct {
value: usize = 0,
fn incr(self: *@This()) void {
self.value += 1;
}
pub fn format(
self: *const @This(),
comptime unused_fmt_string: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = unused_fmt_string;
_ = options;
try writer.writeByteNTimes(' ', self.value);
}
};
var track_live_level: Level = .{};
const std = @import("std");
const assert = std.debug.assert;
const elf = std.elf;
const gc_track_live_log = std.log.scoped(.gc_track_live);
const mem = std.mem;
const Allocator = mem.Allocator;
const Atom = @import("Atom.zig");
const Elf = @import("../Elf.zig");
const Symbol = @import("Symbol.zig");

View File

@@ -60,6 +60,7 @@ fn testGcSections(b: *Build, opts: Options) *Step {
\\}
);
obj.link_function_sections = true;
obj.link_data_sections = true;
obj.is_linking_libc = true;
obj.is_linking_libcpp = true;