Merge pull request #12557 from Luukdegram/wasm-archive

wasm-linker: Improve archive linking
2022-08-22 08:38:41 +02:00
parent 9cf667a21b d4c56804df
commit 6c020cdb76
8 changed files with 139 additions and 63 deletions
--- a/lib/std/build.zig
+++ b/lib/std/build.zig
@@ -1898,11 +1898,10 @@ pub const LibExeObjStep = struct {
    pub fn runEmulatable(exe: *LibExeObjStep) *EmulatableRunStep {
        assert(exe.kind == .exe or exe.kind == .test_exe);

-        const run_step = EmulatableRunStep.create(exe.builder.fmt("run {s}", .{exe.step.name}), exe);
+        const run_step = EmulatableRunStep.create(exe.builder, exe.builder.fmt("run {s}", .{exe.step.name}), exe);
        if (exe.vcpkg_bin_path) |path| {
-            run_step.addPathDir(path);
+            RunStep.addPathDirInternal(&run_step.step, exe.builder, path);
        }
-
        return run_step;
    }

--- a/lib/std/build/RunStep.zig
+++ b/lib/std/build/RunStep.zig
@@ -101,7 +101,7 @@ pub fn addPathDir(self: *RunStep, search_path: []const u8) void {
 }

 /// For internal use only, users of `RunStep` should use `addPathDir` directly.
-fn addPathDirInternal(step: *Step, builder: *Builder, search_path: []const u8) void {
+pub fn addPathDirInternal(step: *Step, builder: *Builder, search_path: []const u8) void {
    const env_map = getEnvMapInternal(step, builder.allocator);

    const key = "PATH";
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -378,7 +378,7 @@ fn parseObjectFile(self: *Wasm, path: []const u8) !bool {
    const file = try fs.cwd().openFile(path, .{});
    errdefer file.close();

-    var object = Object.create(self.base.allocator, file, path) catch |err| switch (err) {
+    var object = Object.create(self.base.allocator, file, path, null) catch |err| switch (err) {
        error.InvalidMagicByte, error.NotObjectFile => return false,
        else => |e| return e,
    };
@@ -595,8 +595,8 @@ fn resolveSymbolsInArchives(self: *Wasm) !void {
            // Parse object and and resolve symbols again before we check remaining
            // undefined symbols.
            const object_file_index = @intCast(u16, self.objects.items.len);
-            const object = try self.objects.addOne(self.base.allocator);
-            object.* = try archive.parseObject(self.base.allocator, offset.items[0]);
+            var object = try archive.parseObject(self.base.allocator, offset.items[0]);
+            try self.objects.append(self.base.allocator, object);
            try self.resolveSymbolsInObject(object_file_index);

            // continue loop for any remaining undefined symbols that still exist
@@ -860,7 +860,8 @@ fn getGlobalType(self: *const Wasm, loc: SymbolLoc) wasm.GlobalType {
        if (is_undefined) {
            return obj.findImport(.global, symbol.index).kind.global;
        }
-        return obj.globals[symbol.index].global_type;
+        const import_global_count = obj.importedCountByKind(.global);
+        return obj.globals[symbol.index - import_global_count].global_type;
    }
    if (is_undefined) {
        return self.imports.get(loc).?.kind.global;
@@ -880,7 +881,9 @@ fn getFunctionSignature(self: *const Wasm, loc: SymbolLoc) wasm.Type {
            const ty_index = obj.findImport(.function, symbol.index).kind.function;
            return obj.func_types[ty_index];
        }
-        return obj.func_types[obj.functions[symbol.index].type_index];
+        const import_function_count = obj.importedCountByKind(.function);
+        const type_index = obj.functions[symbol.index - import_function_count].type_index;
+        return obj.func_types[type_index];
    }
    if (is_undefined) {
        const ty_index = self.imports.get(loc).?.kind.function;
--- a/src/link/Wasm/Archive.zig
+++ b/src/link/Wasm/Archive.zig
@@ -15,6 +15,12 @@ name: []const u8,

 header: ar_hdr = undefined,

+/// A list of long file names, delimited by a LF character (0x0a).
+/// This is stored as a single slice of bytes, as the header-names
+/// point to the character index of a file name, rather than the index
+/// in the list.
+long_file_names: []const u8 = undefined,
+
 /// Parsed table of contents.
 /// Each symbol name points to a list of all definition
 /// sites within the current static archive.
@@ -53,32 +59,33 @@ const ar_hdr = extern struct {
    /// Always contains ARFMAG.
    ar_fmag: [2]u8,

-    const NameOrLength = union(enum) {
-        Name: []const u8,
-        Length: u32,
+    const NameOrIndex = union(enum) {
+        name: []const u8,
+        index: u32,
    };
-    fn nameOrLength(self: ar_hdr) !NameOrLength {
-        const value = getValue(&self.ar_name);
+
+    fn nameOrIndex(archive: ar_hdr) !NameOrIndex {
+        const value = getValue(&archive.ar_name);
        const slash_index = mem.indexOfScalar(u8, value, '/') orelse return error.MalformedArchive;
        const len = value.len;
        if (slash_index == len - 1) {
            // Name stored directly
-            return NameOrLength{ .Name = value };
+            return NameOrIndex{ .name = value };
        } else {
            // Name follows the header directly and its length is encoded in
            // the name field.
-            const length = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10);
-            return NameOrLength{ .Length = length };
+            const index = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10);
+            return NameOrIndex{ .index = index };
        }
    }

-    fn date(self: ar_hdr) !u64 {
-        const value = getValue(&self.ar_date);
+    fn date(archive: ar_hdr) !u64 {
+        const value = getValue(&archive.ar_date);
        return std.fmt.parseInt(u64, value, 10);
    }

-    fn size(self: ar_hdr) !u32 {
-        const value = getValue(&self.ar_size);
+    fn size(archive: ar_hdr) !u32 {
+        const value = getValue(&archive.ar_size);
        return std.fmt.parseInt(u32, value, 10);
    }

@@ -87,18 +94,19 @@ const ar_hdr = extern struct {
    }
 };

-pub fn deinit(self: *Archive, allocator: Allocator) void {
-    for (self.toc.keys()) |*key| {
+pub fn deinit(archive: *Archive, allocator: Allocator) void {
+    for (archive.toc.keys()) |*key| {
        allocator.free(key.*);
    }
-    for (self.toc.values()) |*value| {
+    for (archive.toc.values()) |*value| {
        value.deinit(allocator);
    }
-    self.toc.deinit(allocator);
+    archive.toc.deinit(allocator);
+    allocator.free(archive.long_file_names);
 }

-pub fn parse(self: *Archive, allocator: Allocator) !void {
-    const reader = self.file.reader();
+pub fn parse(archive: *Archive, allocator: Allocator) !void {
+    const reader = archive.file.reader();

    const magic = try reader.readBytesNoEof(SARMAG);
    if (!mem.eql(u8, &magic, ARMAG)) {
@@ -106,38 +114,31 @@ pub fn parse(self: *Archive, allocator: Allocator) !void {
        return error.NotArchive;
    }

-    self.header = try reader.readStruct(ar_hdr);
-    if (!mem.eql(u8, &self.header.ar_fmag, ARFMAG)) {
-        log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, self.header.ar_fmag });
+    archive.header = try reader.readStruct(ar_hdr);
+    if (!mem.eql(u8, &archive.header.ar_fmag, ARFMAG)) {
+        log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, archive.header.ar_fmag });
        return error.NotArchive;
    }

-    try self.parseTableOfContents(allocator, reader);
+    try archive.parseTableOfContents(allocator, reader);
+    try archive.parseNameTable(allocator, reader);
 }

-fn parseName(allocator: Allocator, header: ar_hdr, reader: anytype) ![]u8 {
-    const name_or_length = try header.nameOrLength();
-    var name: []u8 = undefined;
-    switch (name_or_length) {
-        .Name => |n| {
-            name = try allocator.dupe(u8, n);
-        },
-        .Length => |len| {
-            var n = try allocator.alloc(u8, len);
-            defer allocator.free(n);
-            try reader.readNoEof(n);
-            const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len;
-            name = try allocator.dupe(u8, n[0..actual_len]);
+fn parseName(archive: *const Archive, header: ar_hdr) ![]const u8 {
+    const name_or_index = try header.nameOrIndex();
+    switch (name_or_index) {
+        .name => |name| return name,
+        .index => |index| {
+            const name = mem.sliceTo(archive.long_file_names[index..], 0x0a);
+            return mem.trimRight(u8, name, "/");
        },
    }
-    return name;
 }

-fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !void {
-    log.debug("parsing table of contents for archive file '{s}'", .{self.name});
+fn parseTableOfContents(archive: *Archive, allocator: Allocator, reader: anytype) !void {
    // size field can have extra spaces padded in front as well as the end,
    // so we trim those first before parsing the ASCII value.
-    const size_trimmed = std.mem.trim(u8, &self.header.ar_size, " ");
+    const size_trimmed = mem.trim(u8, &archive.header.ar_size, " ");
    const sym_tab_size = try std.fmt.parseInt(u32, size_trimmed, 10);

    const num_symbols = try reader.readIntBig(u32);
@@ -157,7 +158,7 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !

    var i: usize = 0;
    while (i < sym_tab.len) {
-        const string = std.mem.sliceTo(sym_tab[i..], 0);
+        const string = mem.sliceTo(sym_tab[i..], 0);
        if (string.len == 0) {
            i += 1;
            continue;
@@ -165,7 +166,7 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !
        i += string.len;
        const name = try allocator.dupe(u8, string);
        errdefer allocator.free(name);
-        const gop = try self.toc.getOrPut(allocator, name);
+        const gop = try archive.toc.getOrPut(allocator, name);
        if (gop.found_existing) {
            allocator.free(name);
        } else {
@@ -175,33 +176,49 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !
    }
 }

+fn parseNameTable(archive: *Archive, allocator: Allocator, reader: anytype) !void {
+    const header: ar_hdr = try reader.readStruct(ar_hdr);
+    if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) {
+        log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag });
+        return error.MalformedArchive;
+    }
+    if (!mem.eql(u8, header.ar_name[0..2], "//")) {
+        log.err("invalid archive. Long name table missing", .{});
+        return error.MalformedArchive;
+    }
+    const table_size = try header.size();
+    const long_file_names = try allocator.alloc(u8, table_size);
+    errdefer allocator.free(long_file_names);
+    try reader.readNoEof(long_file_names);
+    archive.long_file_names = long_file_names;
+}
+
 /// From a given file offset, starts reading for a file header.
 /// When found, parses the object file into an `Object` and returns it.
-pub fn parseObject(self: Archive, allocator: Allocator, file_offset: u32) !Object {
-    try self.file.seekTo(file_offset);
-    const reader = self.file.reader();
+pub fn parseObject(archive: Archive, allocator: Allocator, file_offset: u32) !Object {
+    try archive.file.seekTo(file_offset);
+    const reader = archive.file.reader();
    const header = try reader.readStruct(ar_hdr);
-    const current_offset = try self.file.getPos();
-    try self.file.seekTo(0);
+    const current_offset = try archive.file.getPos();
+    try archive.file.seekTo(0);

    if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) {
        log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag });
        return error.MalformedArchive;
    }

-    const object_name = try parseName(allocator, header, reader);
-    defer allocator.free(object_name);
-
+    const object_name = try archive.parseName(header);
    const name = name: {
        var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
-        const path = try std.os.realpath(self.name, &buffer);
+        const path = try std.os.realpath(archive.name, &buffer);
        break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name });
    };
    defer allocator.free(name);

-    const object_file = try std.fs.cwd().openFile(self.name, .{});
+    const object_file = try std.fs.cwd().openFile(archive.name, .{});
    errdefer object_file.close();

+    const object_file_size = try header.size();
    try object_file.seekTo(current_offset);
-    return Object.create(allocator, object_file, name);
+    return Object.create(allocator, object_file, name, object_file_size);
 }
--- a/src/link/Wasm/Object.zig
+++ b/src/link/Wasm/Object.zig
@@ -105,14 +105,33 @@ pub const InitError = error{NotObjectFile} || ParseError || std.fs.File.ReadErro

 /// Initializes a new `Object` from a wasm object file.
 /// This also parses and verifies the object file.
-pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8) InitError!Object {
+/// When a max size is given, will only parse up to the given size,
+/// else will read until the end of the file.
+pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_size: ?usize) InitError!Object {
    var object: Object = .{
        .file = file,
        .name = try gpa.dupe(u8, name),
    };

    var is_object_file: bool = false;
-    try object.parse(gpa, file.reader(), &is_object_file);
+    const size = maybe_max_size orelse size: {
+        errdefer gpa.free(object.name);
+        const stat = try file.stat();
+        break :size @intCast(usize, stat.size);
+    };
+
+    const file_contents = try gpa.alloc(u8, size);
+    defer gpa.free(file_contents);
+    var file_reader = file.reader();
+    var read: usize = 0;
+    while (read < size) {
+        const n = try file_reader.read(file_contents[read..]);
+        std.debug.assert(n != 0);
+        read += n;
+    }
+    var fbs = std.io.fixedBufferStream(file_contents);
+
+    try object.parse(gpa, fbs.reader(), &is_object_file);
    errdefer object.deinit(gpa);
    if (!is_object_file) return error.NotObjectFile;

--- a/test/link.zig
+++ b/test/link.zig
@@ -47,6 +47,11 @@ fn addWasmCases(cases: *tests.StandaloneContext) void {
        .build_modes = true,
        .requires_stage2 = true,
    });
+
+    cases.addBuildFile("test/link/wasm/archive/build.zig", .{
+        .build_modes = true,
+        .requires_stage2 = true,
+    });
 }

 fn addMachOCases(cases: *tests.StandaloneContext) void {
--- a/test/link/wasm/archive/build.zig
+++ b/test/link/wasm/archive/build.zig
@@ -0,0 +1,27 @@
+const std = @import("std");
+const Builder = std.build.Builder;
+
+pub fn build(b: *Builder) void {
+    const mode = b.standardReleaseOptions();
+
+    const test_step = b.step("test", "Test");
+    test_step.dependOn(b.getInstallStep());
+
+    // The code in question will pull-in compiler-rt,
+    // and therefore link with its archive file.
+    const lib = b.addSharedLibrary("main", "main.zig", .unversioned);
+    lib.setBuildMode(mode);
+    lib.setTarget(.{ .cpu_arch = .wasm32, .os_tag = .freestanding });
+    lib.use_llvm = false;
+    lib.use_stage1 = false;
+    lib.use_lld = false;
+
+    const check = lib.checkObject(.wasm);
+    check.checkStart("Section import");
+    check.checkNext("entries 1"); // __truncsfhf2 should have been resolved, so only 1 import (compiler-rt's memcpy).
+
+    check.checkStart("Section custom");
+    check.checkNext("name __truncsfhf2"); // Ensure it was imported and resolved
+
+    test_step.dependOn(&check.step);
+}
--- a/test/link/wasm/archive/main.zig
+++ b/test/link/wasm/archive/main.zig
@@ -0,0 +1,6 @@
+export fn foo() void {
+    var a: f16 = 2.2;
+    // this will pull-in compiler-rt
+    var b = @trunc(a);
+    _ = b;
+}