link.File.Wasm: unify the string tables

Before, the wasm struct had a string table, the ZigObject had a string
table, and each Object had a string table.

Now there is just the one. This makes for more efficient use of memory
and simplifies logic, particularly with regards to linker state
serialization.

This commit additionally adds significantly more integer type safety.
This commit is contained in:
Andrew Kelley
2024-10-31 21:56:10 -07:00
parent bf9978a57a
commit e501cf51a0
5 changed files with 433 additions and 460 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -173,7 +173,7 @@ fn parseNameTable(gpa: Allocator, reader: anytype) ![]const u8 {
/// From a given file offset, starts reading for a file header.
/// When found, parses the object file into an `Object` and returns it.
pub fn parseObject(archive: Archive, wasm: *const Wasm, file_contents: []const u8, path: Path) !Object {
pub fn parseObject(archive: Archive, wasm: *Wasm, file_contents: []const u8, path: Path) !Object {
var fbs = std.io.fixedBufferStream(file_contents);
const header = try fbs.reader().readStruct(Header);

View File

@@ -63,10 +63,6 @@ comdat_info: []const Wasm.Comdat = &.{},
/// Represents non-synthetic sections that can essentially be mem-cpy'd into place
/// after performing relocations.
relocatable_data: std.AutoHashMapUnmanaged(RelocatableData.Tag, []RelocatableData) = .empty,
/// String table for all strings required by the object file, such as symbol names,
/// import name, module name and export names. Each string will be deduplicated
/// and returns an offset into the table.
string_table: Wasm.StringTable = .{},
/// Amount of functions in the `import` sections.
imported_functions_count: u32 = 0,
/// Amount of globals in the `import` section.
@@ -126,7 +122,7 @@ pub const RelocatableData = struct {
/// When a max size is given, will only parse up to the given size,
/// else will read until the end of the file.
pub fn create(
wasm: *const Wasm,
wasm: *Wasm,
file_contents: []const u8,
path: Path,
archive_member_name: ?[]const u8,
@@ -187,7 +183,6 @@ pub fn deinit(object: *Object, gpa: Allocator) void {
}
}
object.relocatable_data.deinit(gpa);
object.string_table.deinit(gpa);
object.* = undefined;
}
@@ -242,9 +237,9 @@ fn checkLegacyIndirectFunctionTable(object: *Object, wasm: *const Wasm) !?Symbol
}
} else unreachable;
if (!std.mem.eql(u8, object.string_table.get(table_import.name), "__indirect_function_table")) {
if (table_import.name != wasm.preloaded_strings.__indirect_function_table) {
return diags.failParse(object.path, "non-indirect function table import '{s}' is missing a corresponding symbol", .{
object.string_table.get(table_import.name),
wasm.stringSlice(table_import.name),
});
}
@@ -264,10 +259,12 @@ const Parser = struct {
reader: std.io.FixedBufferStream([]const u8),
/// Object file we're building
object: *Object,
/// Read-only reference to the WebAssembly linker
wasm: *const Wasm,
/// Mutable so that the string table can be modified.
wasm: *Wasm,
fn parseObject(parser: *Parser, gpa: Allocator) anyerror!void {
const wasm = parser.wasm;
{
var magic_bytes: [4]u8 = undefined;
try parser.reader.reader().readNoEof(&magic_bytes);
@@ -316,7 +313,7 @@ const Parser = struct {
.type = .custom,
.data = debug_content.ptr,
.size = debug_size,
.index = try parser.object.string_table.put(gpa, name),
.index = @intFromEnum(try wasm.internString(name)),
.offset = 0, // debug sections only contain 1 entry, so no need to calculate offset
.section_index = section_index,
});
@@ -375,8 +372,8 @@ const Parser = struct {
};
import.* = .{
.module_name = try parser.object.string_table.put(gpa, module_name),
.name = try parser.object.string_table.put(gpa, name),
.module_name = try wasm.internString(module_name),
.name = try wasm.internString(name),
.kind = kind_value,
};
}
@@ -422,7 +419,7 @@ const Parser = struct {
defer gpa.free(name);
try reader.readNoEof(name);
exp.* = .{
.name = try parser.object.string_table.put(gpa, name),
.name = try wasm.internString(name),
.kind = try readEnum(std.wasm.ExternalKind, reader),
.index = try readLeb(u32, reader),
};
@@ -587,6 +584,7 @@ const Parser = struct {
/// `parser` is used to provide access to other sections that may be needed,
/// such as access to the `import` section to find the name of a symbol.
fn parseSubsection(parser: *Parser, gpa: Allocator, reader: anytype) !void {
const wasm = parser.wasm;
const sub_type = try leb.readUleb128(u8, reader);
log.debug("Found subsection: {s}", .{@tagName(@as(Wasm.SubsectionType, @enumFromInt(sub_type)))});
const payload_len = try leb.readUleb128(u32, reader);
@@ -680,7 +678,7 @@ const Parser = struct {
symbol.* = try parser.parseSymbol(gpa, reader);
log.debug("Found symbol: type({s}) name({s}) flags(0b{b:0>8})", .{
@tagName(symbol.tag),
parser.object.string_table.get(symbol.name),
wasm.stringSlice(symbol.name),
symbol.flags,
});
}
@@ -697,15 +695,18 @@ const Parser = struct {
if (parser.object.relocatable_data.get(.custom)) |custom_sections| {
for (custom_sections) |*data| {
if (!data.represented) {
const name = wasm.castToString(data.index);
try symbols.append(.{
.name = data.index,
.name = name,
.flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL),
.tag = .section,
.virtual_address = 0,
.index = data.section_index,
});
data.represented = true;
log.debug("Created synthetic custom section symbol for '{s}'", .{parser.object.string_table.get(data.index)});
log.debug("Created synthetic custom section symbol for '{s}'", .{
wasm.stringSlice(name),
});
}
}
}
@@ -719,7 +720,8 @@ const Parser = struct {
/// requires access to `Object` to find the name of a symbol when it's
/// an import and flag `WASM_SYM_EXPLICIT_NAME` is not set.
fn parseSymbol(parser: *Parser, gpa: Allocator, reader: anytype) !Symbol {
const tag = @as(Symbol.Tag, @enumFromInt(try leb.readUleb128(u8, reader)));
const wasm = parser.wasm;
const tag: Symbol.Tag = @enumFromInt(try leb.readUleb128(u8, reader));
const flags = try leb.readUleb128(u32, reader);
var symbol: Symbol = .{
.flags = flags,
@@ -735,7 +737,7 @@ const Parser = struct {
const name = try gpa.alloc(u8, name_len);
defer gpa.free(name);
try reader.readNoEof(name);
symbol.name = try parser.object.string_table.put(gpa, name);
symbol.name = try wasm.internString(name);
// Data symbols only have the following fields if the symbol is defined
if (symbol.isDefined()) {
@@ -750,7 +752,7 @@ const Parser = struct {
const section_data = parser.object.relocatable_data.get(.custom).?;
for (section_data) |*data| {
if (data.section_index == symbol.index) {
symbol.name = data.index;
symbol.name = wasm.castToString(data.index);
data.represented = true;
break;
}
@@ -765,7 +767,7 @@ const Parser = struct {
const name = try gpa.alloc(u8, name_len);
defer gpa.free(name);
try reader.readNoEof(name);
break :name try parser.object.string_table.put(gpa, name);
break :name try wasm.internString(name);
} else parser.object.findImport(symbol).name;
},
}

View File

@@ -8,8 +8,8 @@
/// Can contain any of the flags defined in `Flag`
flags: u32,
/// Symbol name, when the symbol is undefined the name will be taken from the import.
/// Note: This is an index into the string table.
name: u32,
/// Note: This is an index into the wasm string table.
name: wasm.String,
/// Index into the list of objects based on set `tag`
/// NOTE: This will be set to `undefined` when `tag` is `data`
/// and the symbol is undefined.
@@ -207,3 +207,4 @@ pub fn format(symbol: Symbol, comptime fmt: []const u8, options: std.fmt.FormatO
const std = @import("std");
const Symbol = @This();
const wasm = @import("../Wasm.zig");

View File

@@ -23,16 +23,14 @@ globals: std.ArrayListUnmanaged(std.wasm.Global) = .empty,
atom_types: std.AutoHashMapUnmanaged(Atom.Index, u32) = .empty,
/// List of all symbols generated by Zig code.
symbols: std.ArrayListUnmanaged(Symbol) = .empty,
/// Map from symbol name offset to their index into the `symbols` list.
global_syms: std.AutoHashMapUnmanaged(u32, Symbol.Index) = .empty,
/// Map from symbol name to their index into the `symbols` list.
global_syms: std.AutoHashMapUnmanaged(Wasm.String, Symbol.Index) = .empty,
/// List of symbol indexes which are free to be used.
symbols_free_list: std.ArrayListUnmanaged(Symbol.Index) = .empty,
/// Extra metadata about the linking section, such as alignment of segments and their name.
segment_info: std.ArrayListUnmanaged(Wasm.NamedSegment) = .empty,
/// List of indexes which contain a free slot in the `segment_info` list.
segment_free_list: std.ArrayListUnmanaged(u32) = .empty,
/// File encapsulated string table, used to deduplicate strings within the generated file.
string_table: StringTable = .{},
/// Map for storing anonymous declarations. Each anonymous decl maps to its Atom's index.
uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, Atom.Index) = .empty,
/// List of atom indexes of functions that are generated by the backend.
@@ -88,13 +86,9 @@ const NavInfo = struct {
atom: Atom.Index = .null,
exports: std.ArrayListUnmanaged(Symbol.Index) = .empty,
fn @"export"(ni: NavInfo, zig_object: *const ZigObject, name: []const u8) ?Symbol.Index {
fn @"export"(ni: NavInfo, zo: *const ZigObject, name: Wasm.String) ?Symbol.Index {
for (ni.exports.items) |sym_index| {
const sym_name_index = zig_object.symbol(sym_index).name;
const sym_name = zig_object.string_table.getAssumeExists(sym_name_index);
if (std.mem.eql(u8, name, sym_name)) {
return sym_index;
}
if (zo.symbol(sym_index).name == name) return sym_index;
}
return null;
}
@@ -126,14 +120,14 @@ pub fn init(zig_object: *ZigObject, wasm: *Wasm) !void {
fn createStackPointer(zig_object: *ZigObject, wasm: *Wasm) !void {
const gpa = wasm.base.comp.gpa;
const sym_index = try zig_object.getGlobalSymbol(gpa, "__stack_pointer");
const sym_index = try zig_object.getGlobalSymbol(gpa, wasm.preloaded_strings.__stack_pointer);
const sym = zig_object.symbol(sym_index);
sym.index = zig_object.imported_globals_count;
sym.tag = .global;
const is_wasm32 = wasm.base.comp.root_mod.resolved_target.result.cpu.arch == .wasm32;
try zig_object.imports.putNoClobber(gpa, sym_index, .{
.name = sym.name,
.module_name = try zig_object.string_table.insert(gpa, wasm.host_name),
.module_name = wasm.host_name,
.kind = .{ .global = .{ .valtype = if (is_wasm32) .i32 else .i64, .mutable = true } },
});
zig_object.imported_globals_count += 1;
@@ -174,7 +168,7 @@ pub fn deinit(zig_object: *ZigObject, wasm: *Wasm) void {
atom.deinit(gpa);
}
}
if (zig_object.findGlobalSymbol("__zig_errors_len")) |sym_index| {
if (zig_object.global_syms.get(wasm.preloaded_strings.__zig_errors_len)) |sym_index| {
const atom_index = wasm.symbol_atom.get(.{ .file = .zig_object, .index = sym_index }).?;
wasm.getAtomPtr(atom_index).deinit(gpa);
}
@@ -206,7 +200,6 @@ pub fn deinit(zig_object: *ZigObject, wasm: *Wasm) void {
zig_object.segment_info.deinit(gpa);
zig_object.segment_free_list.deinit(gpa);
zig_object.string_table.deinit(gpa);
if (zig_object.dwarf) |*dwarf| {
dwarf.deinit();
}
@@ -219,7 +212,7 @@ pub fn deinit(zig_object: *ZigObject, wasm: *Wasm) void {
pub fn allocateSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator) !Symbol.Index {
try zig_object.symbols.ensureUnusedCapacity(gpa, 1);
const sym: Symbol = .{
.name = std.math.maxInt(u32), // will be set after updateDecl as well as during atom creation for decls
.name = undefined, // will be set after updateDecl as well as during atom creation for decls
.flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL),
.tag = .undefined, // will be set after updateDecl
.index = std.math.maxInt(u32), // will be set during atom parsing
@@ -345,7 +338,7 @@ fn finishUpdateNav(
const atom_index = nav_info.atom;
const atom = wasm.getAtomPtr(atom_index);
const sym = zig_object.symbol(atom.sym_index);
sym.name = try zig_object.string_table.insert(gpa, nav.fqn.toSlice(ip));
sym.name = try wasm.internString(nav.fqn.toSlice(ip));
try atom.code.appendSlice(gpa, code);
atom.size = @intCast(code.len);
@@ -432,7 +425,7 @@ pub fn getOrCreateAtomForNav(
gop.value_ptr.* = .{ .atom = try wasm.createAtom(sym_index, .zig_object) };
const nav = ip.getNav(nav_index);
const sym = zig_object.symbol(sym_index);
sym.name = try zig_object.string_table.insert(gpa, nav.fqn.toSlice(ip));
sym.name = try wasm.internString(nav.fqn.toSlice(ip));
}
return gop.value_ptr.atom;
}
@@ -500,7 +493,7 @@ fn lowerConst(
const segment_name = try std.mem.concat(gpa, u8, &.{ ".rodata.", name });
errdefer gpa.free(segment_name);
zig_object.symbol(sym_index).* = .{
.name = try zig_object.string_table.insert(gpa, name),
.name = try wasm.internString(name),
.flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL),
.tag = .data,
.index = try zig_object.createDataSegment(
@@ -551,11 +544,10 @@ pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm: *Wasm, pt: Zcu.PerThrea
const slice_ty = Type.slice_const_u8_sentinel_0;
atom.alignment = slice_ty.abiAlignment(pt.zcu);
const sym_name = try zig_object.string_table.insert(gpa, "__zig_err_name_table");
const segment_name = try gpa.dupe(u8, ".rodata.__zig_err_name_table");
const sym = zig_object.symbol(sym_index);
sym.* = .{
.name = sym_name,
.name = wasm.preloaded_strings.__zig_err_name_table,
.tag = .data,
.flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL),
.index = try zig_object.createDataSegment(gpa, segment_name, atom.alignment),
@@ -583,11 +575,10 @@ fn populateErrorNameTable(zig_object: *ZigObject, wasm: *Wasm, tid: Zcu.PerThrea
const names_atom_index = try wasm.createAtom(names_sym_index, .zig_object);
const names_atom = wasm.getAtomPtr(names_atom_index);
names_atom.alignment = .@"1";
const sym_name = try zig_object.string_table.insert(gpa, "__zig_err_names");
const segment_name = try gpa.dupe(u8, ".rodata.__zig_err_names");
const names_symbol = zig_object.symbol(names_sym_index);
names_symbol.* = .{
.name = sym_name,
.name = wasm.preloaded_strings.__zig_err_names,
.tag = .data,
.flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL),
.index = try zig_object.createDataSegment(gpa, segment_name, names_atom.alignment),
@@ -661,14 +652,14 @@ pub fn addOrUpdateImport(
// For the import name, we use the decl's name, rather than the fully qualified name
// Also mangle the name when the lib name is set and not equal to "C" so imports with the same
// name but different module can be resolved correctly.
const mangle_name = lib_name != null and
!std.mem.eql(u8, lib_name.?, "c");
const full_name = if (mangle_name) full_name: {
break :full_name try std.fmt.allocPrint(gpa, "{s}|{s}", .{ name, lib_name.? });
} else name;
const mangle_name = if (lib_name) |n| !std.mem.eql(u8, n, "c") else false;
const full_name = if (mangle_name)
try std.fmt.allocPrint(gpa, "{s}|{s}", .{ name, lib_name.? })
else
name;
defer if (mangle_name) gpa.free(full_name);
const decl_name_index = try zig_object.string_table.insert(gpa, full_name);
const decl_name_index = try wasm.internString(full_name);
const sym: *Symbol = &zig_object.symbols.items[@intFromEnum(symbol_index)];
sym.setUndefined(true);
sym.setGlobal(true);
@@ -680,13 +671,11 @@ pub fn addOrUpdateImport(
if (type_index) |ty_index| {
const gop = try zig_object.imports.getOrPut(gpa, symbol_index);
const module_name = if (lib_name) |l_name| l_name else wasm.host_name;
if (!gop.found_existing) {
zig_object.imported_functions_count += 1;
}
const module_name = if (lib_name) |n| try wasm.internString(n) else wasm.host_name;
if (!gop.found_existing) zig_object.imported_functions_count += 1;
gop.value_ptr.* = .{
.module_name = try zig_object.string_table.insert(gpa, module_name),
.name = try zig_object.string_table.insert(gpa, name),
.module_name = module_name,
.name = try wasm.internString(name),
.kind = .{ .function = ty_index },
};
sym.tag = .function;
@@ -699,8 +688,7 @@ pub fn addOrUpdateImport(
/// such as an exported or imported symbol.
/// If the symbol does not yet exist, creates a new one symbol instead
/// and then returns the index to it.
pub fn getGlobalSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator, name: []const u8) !Symbol.Index {
const name_index = try zig_object.string_table.insert(gpa, name);
pub fn getGlobalSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator, name_index: Wasm.String) !Symbol.Index {
const gop = try zig_object.global_syms.getOrPut(gpa, name_index);
if (gop.found_existing) {
return gop.value_ptr.*;
@@ -840,7 +828,8 @@ pub fn deleteExport(
.uav => @panic("TODO: implement Wasm linker code for exporting a constant value"),
};
const nav_info = zig_object.navs.getPtr(nav_index) orelse return;
if (nav_info.@"export"(zig_object, name.toSlice(&zcu.intern_pool))) |sym_index| {
const name_interned = wasm.getExistingString(name.toSlice(&zcu.intern_pool)).?;
if (nav_info.@"export"(zig_object, name_interned)) |sym_index| {
const sym = zig_object.symbol(sym_index);
nav_info.deleteExport(sym_index);
std.debug.assert(zig_object.global_syms.remove(sym.name));
@@ -886,14 +875,13 @@ pub fn updateExports(
continue;
}
const export_string = exp.opts.name.toSlice(ip);
const sym_index = if (nav_info.@"export"(zig_object, export_string)) |idx| idx else index: {
const export_name = try wasm.internString(exp.opts.name.toSlice(ip));
const sym_index = if (nav_info.@"export"(zig_object, export_name)) |idx| idx else index: {
const sym_index = try zig_object.allocateSymbol(gpa);
try nav_info.appendExport(gpa, sym_index);
break :index sym_index;
};
const export_name = try zig_object.string_table.insert(gpa, export_string);
const sym = zig_object.symbol(sym_index);
sym.setGlobal(true);
sym.setUndefined(false);
@@ -922,7 +910,7 @@ pub fn updateExports(
if (exp.opts.visibility == .hidden) {
sym.setFlag(.WASM_SYM_VISIBILITY_HIDDEN);
}
log.debug(" with name '{s}' - {}", .{ export_string, sym });
log.debug(" with name '{s}' - {}", .{ wasm.stringSlice(export_name), sym });
try zig_object.global_syms.put(gpa, export_name, sym_index);
try wasm.symbol_atom.put(gpa, .{ .file = .zig_object, .index = sym_index }, atom_index);
}
@@ -1014,7 +1002,7 @@ pub fn putOrGetFuncType(zig_object: *ZigObject, gpa: std.mem.Allocator, func_typ
/// This will only be generated if the symbol exists.
fn setupErrorsLen(zig_object: *ZigObject, wasm: *Wasm) !void {
const gpa = wasm.base.comp.gpa;
const sym_index = zig_object.findGlobalSymbol("__zig_errors_len") orelse return;
const sym_index = zig_object.global_syms.get(wasm.preloaded_strings.__zig_errors_len) orelse return;
const errors_len = 1 + wasm.base.comp.zcu.?.intern_pool.global_error_set.getNamesFromMainThread().len;
// overwrite existing atom if it already exists (maybe the error set has increased)
@@ -1045,11 +1033,6 @@ fn setupErrorsLen(zig_object: *ZigObject, wasm: *Wasm) !void {
try atom.code.writer(gpa).writeInt(u16, @intCast(errors_len), .little);
}
fn findGlobalSymbol(zig_object: *ZigObject, name: []const u8) ?Symbol.Index {
const offset = zig_object.string_table.getOffset(name) orelse return null;
return zig_object.global_syms.get(offset);
}
/// Initializes symbols and atoms for the debug sections
/// Initialization is only done when compiling Zig code.
/// When Zig is invoked as a linker instead, the atoms
@@ -1082,7 +1065,7 @@ pub fn createDebugSectionForIndex(zig_object: *ZigObject, wasm: *Wasm, index: *?
const atom = wasm.getAtomPtr(atom_index);
zig_object.symbols.items[sym_index] = .{
.tag = .section,
.name = try zig_object.string_table.put(gpa, name),
.name = try wasm.internString(name),
.index = 0,
.flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL),
};
@@ -1197,7 +1180,7 @@ pub fn createFunction(
const sym_index = try zig_object.allocateSymbol(gpa);
const sym = zig_object.symbol(sym_index);
sym.tag = .function;
sym.name = try zig_object.string_table.insert(gpa, symbol_name);
sym.name = try wasm.internString(symbol_name);
const type_index = try zig_object.putOrGetFuncType(gpa, func_ty);
sym.index = try zig_object.appendFunction(gpa, .{ .type_index = type_index });
@@ -1244,7 +1227,6 @@ const Dwarf = @import("../Dwarf.zig");
const InternPool = @import("../../InternPool.zig");
const Liveness = @import("../../Liveness.zig");
const Zcu = @import("../../Zcu.zig");
const StringTable = @import("../StringTable.zig");
const Symbol = @import("Symbol.zig");
const Type = @import("../../Type.zig");
const Value = @import("../../Value.zig");