diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index a1cd642fcc..ee8e638080 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1,39 +1,40 @@ const Wasm = @This(); +const build_options = @import("build_options"); + +const builtin = @import("builtin"); +const native_endian = builtin.cpu.arch.endian(); const std = @import("std"); - -const assert = std.debug.assert; -const build_options = @import("build_options"); -const builtin = @import("builtin"); -const codegen = @import("../codegen.zig"); -const dev = @import("../dev.zig"); -const fs = std.fs; -const leb = std.leb; -const link = @import("../link.zig"); -const lldMain = @import("../main.zig").lldMain; -const log = std.log.scoped(.link); -const gc_log = std.log.scoped(.gc); -const mem = std.mem; -const trace = @import("../tracy.zig").trace; -const wasi_libc = @import("../wasi_libc.zig"); - -const Air = @import("../Air.zig"); const Allocator = std.mem.Allocator; -const Archive = @import("Wasm/Archive.zig"); const Cache = std.Build.Cache; const Path = Cache.Path; +const assert = std.debug.assert; +const fs = std.fs; +const gc_log = std.log.scoped(.gc); +const leb = std.leb; +const log = std.log.scoped(.link); +const mem = std.mem; + +const Air = @import("../Air.zig"); +const Archive = @import("Wasm/Archive.zig"); const CodeGen = @import("../arch/wasm/CodeGen.zig"); const Compilation = @import("../Compilation.zig"); const Dwarf = @import("Dwarf.zig"); const InternPool = @import("../InternPool.zig"); const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; -const Zcu = @import("../Zcu.zig"); const Object = @import("Wasm/Object.zig"); const Symbol = @import("Wasm/Symbol.zig"); const Type = @import("../Type.zig"); const Value = @import("../Value.zig"); +const Zcu = @import("../Zcu.zig"); const ZigObject = @import("Wasm/ZigObject.zig"); +const codegen = @import("../codegen.zig"); +const dev = @import("../dev.zig"); +const link = @import("../link.zig"); +const lldMain = @import("../main.zig").lldMain; +const trace = @import("../tracy.zig").trace; +const wasi_libc = @import("../wasi_libc.zig"); base: link.File, /// Null-terminated strings, indexes have type String and string_table provides @@ -141,6 +142,9 @@ function_table: std.AutoHashMapUnmanaged(SymbolLoc, u32) = .empty, /// All archive files that are lazy loaded. /// e.g. when an undefined symbol references a symbol from the archive. +/// None of this data is serialized to disk because it is trivially reloaded +/// from unchanged archive files on the next start of the compiler process, +/// or if those files have changed, the prelink phase needs to be restarted. lazy_archives: std.ArrayListUnmanaged(LazyArchive) = .empty, /// A map of global names to their symbol location @@ -283,12 +287,15 @@ pub const OptionalObjectId = enum(u16) { } }; +/// None of this data is serialized since it can be re-loaded from disk, or if +/// it has been changed, the data must be discarded. const LazyArchive = struct { path: Path, file_contents: []const u8, archive: Archive, fn deinit(la: *LazyArchive, gpa: Allocator) void { + la.archive.deinit(gpa); gpa.free(la.path.sub_path); gpa.free(la.file_contents); la.* = undefined; diff --git a/src/link/Wasm/Archive.zig b/src/link/Wasm/Archive.zig index 1ff36c5af8..c2078fa525 100644 --- a/src/link/Wasm/Archive.zig +++ b/src/link/Wasm/Archive.zig @@ -2,25 +2,25 @@ /// This is stored as a single slice of bytes, as the header-names /// point to the character index of a file name, rather than the index /// in the list. -long_file_names: []const u8, +/// Points into `file_contents`. +long_file_names: RelativeSlice, /// Parsed table of contents. /// Each symbol name points to a list of all definition /// sites within the current static archive. toc: Toc, +/// Key points into `LazyArchive` `file_contents`. +/// Value is allocated with gpa. const Toc = std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)); -// Archive files start with the ARMAG identifying string. Then follows a -// `struct Header', and as many bytes of member file data as its `size' -// member indicates, for each member file. -/// String that begins an archive file. -const ARMAG: *const [SARMAG:0]u8 = "!\n"; -/// Size of that string. -const SARMAG: u4 = 8; +const ARMAG = std.elf.ARMAG; +const ARFMAG = std.elf.ARFMAG; -/// String in fmag at the end of each header. -const ARFMAG: *const [2:0]u8 = "`\n"; +const RelativeSlice = struct { + off: u32, + len: u32, +}; const Header = extern struct { /// Member file name, sometimes / terminated. @@ -70,31 +70,69 @@ const Header = extern struct { pub fn deinit(archive: *Archive, gpa: Allocator) void { deinitToc(gpa, &archive.toc); - gpa.free(archive.long_file_names); archive.* = undefined; } fn deinitToc(gpa: Allocator, toc: *Toc) void { - for (toc.keys()) |key| gpa.free(key); for (toc.values()) |*value| value.deinit(gpa); toc.deinit(gpa); } pub fn parse(gpa: Allocator, file_contents: []const u8) !Archive { - var fbs = std.io.fixedBufferStream(file_contents); - const reader = fbs.reader(); + var pos: usize = 0; - const magic = try reader.readBytesNoEof(SARMAG); - if (!mem.eql(u8, &magic, ARMAG)) return error.BadArchiveMagic; + if (!mem.eql(u8, file_contents[0..ARMAG.len], ARMAG)) return error.BadArchiveMagic; + pos += ARMAG.len; - const header = try reader.readStruct(Header); + const header = mem.bytesAsValue(Header, file_contents[pos..][0..@sizeOf(Header)]); if (!mem.eql(u8, &header.fmag, ARFMAG)) return error.BadHeaderDelimiter; + pos += @sizeOf(Header); - var toc = try parseTableOfContents(gpa, header, reader); + // The size field can have extra spaces padded in front as well as + // the end, so we trim those first before parsing the ASCII value. + const size_trimmed = mem.trim(u8, &header.size, " "); + const sym_tab_size = try std.fmt.parseInt(u32, size_trimmed, 10); + + const num_symbols = mem.readInt(u32, file_contents[pos..][0..4], .big); + pos += 4; + + const symbol_positions_size = @sizeOf(u32) * num_symbols; + const symbol_positions_be = mem.bytesAsSlice(u32, file_contents[pos..][0..symbol_positions_size]); + pos += symbol_positions_size; + + const sym_tab = file_contents[pos..][0 .. sym_tab_size - 4 - symbol_positions_size]; + pos += sym_tab.len; + + var toc: Toc = .empty; errdefer deinitToc(gpa, &toc); - const long_file_names = try parseNameTable(gpa, reader); - errdefer gpa.free(long_file_names); + var sym_tab_pos: usize = 0; + for (0..num_symbols) |i| { + const name = mem.sliceTo(sym_tab[sym_tab_pos..], 0); + sym_tab_pos += name.len + 1; + if (name.len == 0) continue; + + const gop = try toc.getOrPut(gpa, name); + if (!gop.found_existing) gop.value_ptr.* = .empty; + try gop.value_ptr.append(gpa, switch (native_endian) { + .big => symbol_positions_be[i], + .little => @byteSwap(symbol_positions_be[i]), + }); + } + + const long_file_names: RelativeSlice = s: { + const sub_header = mem.bytesAsValue(Header, file_contents[pos..][0..@sizeOf(Header)]); + pos += @sizeOf(Header); + + if (!mem.eql(u8, &header.fmag, ARFMAG)) return error.BadHeaderDelimiter; + if (!mem.eql(u8, sub_header.name[0..2], "//")) return error.MissingTableName; + const table_size = try sub_header.parsedSize(); + + break :s .{ + .off = @intCast(pos), + .len = table_size, + }; + }; return .{ .toc = toc, @@ -102,98 +140,36 @@ pub fn parse(gpa: Allocator, file_contents: []const u8) !Archive { }; } -fn parseName(archive: *const Archive, header: Header) ![]const u8 { - const name_or_index = try header.nameOrIndex(); - switch (name_or_index) { - .name => |name| return name, - .index => |index| { - const name = mem.sliceTo(archive.long_file_names[index..], 0x0a); - return mem.trimRight(u8, name, "/"); - }, - } -} - -fn parseTableOfContents(gpa: Allocator, header: Header, reader: anytype) !Toc { - // size field can have extra spaces padded in front as well as the end, - // so we trim those first before parsing the ASCII value. - const size_trimmed = mem.trim(u8, &header.size, " "); - const sym_tab_size = try std.fmt.parseInt(u32, size_trimmed, 10); - - const num_symbols = try reader.readInt(u32, .big); - const symbol_positions = try gpa.alloc(u32, num_symbols); - defer gpa.free(symbol_positions); - for (symbol_positions) |*index| { - index.* = try reader.readInt(u32, .big); - } - - const sym_tab = try gpa.alloc(u8, sym_tab_size - 4 - (4 * num_symbols)); - defer gpa.free(sym_tab); - - reader.readNoEof(sym_tab) catch return error.IncompleteSymbolTable; - - var toc: Toc = .empty; - errdefer deinitToc(gpa, &toc); - - var i: usize = 0; - var pos: usize = 0; - while (i < num_symbols) : (i += 1) { - const string = mem.sliceTo(sym_tab[pos..], 0); - pos += string.len + 1; - if (string.len == 0) continue; - - const name = try gpa.dupe(u8, string); - errdefer gpa.free(name); - const gop = try toc.getOrPut(gpa, name); - if (gop.found_existing) { - gpa.free(name); - } else { - gop.value_ptr.* = .{}; - } - try gop.value_ptr.append(gpa, symbol_positions[i]); - } - - return toc; -} - -fn parseNameTable(gpa: Allocator, reader: anytype) ![]const u8 { - const header: Header = try reader.readStruct(Header); - if (!mem.eql(u8, &header.fmag, ARFMAG)) { - return error.InvalidHeaderDelimiter; - } - if (!mem.eql(u8, header.name[0..2], "//")) { - return error.MissingTableName; - } - const table_size = try header.parsedSize(); - const long_file_names = try gpa.alloc(u8, table_size); - errdefer gpa.free(long_file_names); - try reader.readNoEof(long_file_names); - - return long_file_names; -} - /// From a given file offset, starts reading for a file header. /// When found, parses the object file into an `Object` and returns it. pub fn parseObject(archive: Archive, wasm: *Wasm, file_contents: []const u8, path: Path) !Object { - var fbs = std.io.fixedBufferStream(file_contents); - const header = try fbs.reader().readStruct(Header); + const header = mem.bytesAsValue(Header, file_contents[0..@sizeOf(Header)]); + if (!mem.eql(u8, &header.fmag, ARFMAG)) return error.BadHeaderDelimiter; - if (!mem.eql(u8, &header.fmag, ARFMAG)) return error.BadArchiveHeaderDelimiter; + const name_or_index = try header.nameOrIndex(); + const object_name = switch (name_or_index) { + .name => |name| name, + .index => |index| n: { + const long_file_names = file_contents[archive.long_file_names.off..][0..archive.long_file_names.len]; + const name = mem.sliceTo(long_file_names[index..], 0x0a); + break :n mem.trimRight(u8, name, "/"); + }, + }; - const object_name = try archive.parseName(header); const object_file_size = try header.parsedSize(); return Object.create(wasm, file_contents[@sizeOf(Header)..][0..object_file_size], path, object_name); } +const Archive = @This(); + +const builtin = @import("builtin"); +const native_endian = builtin.cpu.arch.endian(); + const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; -const log = std.log.scoped(.archive); const mem = std.mem; +const Allocator = std.mem.Allocator; const Path = std.Build.Cache.Path; -const Allocator = mem.Allocator; -const Object = @import("Object.zig"); const Wasm = @import("../Wasm.zig"); - -const Archive = @This(); +const Object = @import("Object.zig");