Merge pull request #9171 from ziglang/zld-tapi

zld: add TAPI parser for linkers, ship libSystem.B.tbd with Zig, and parse tbd stubs on macOS
This commit is contained in:
Jakub Konka
2021-06-24 18:41:33 +02:00
committed by GitHub
14 changed files with 6543 additions and 155 deletions

View File

@@ -579,6 +579,7 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Stub.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig"
@@ -588,6 +589,11 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/MachO/reloc/aarch64.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/reloc/x86_64.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/parse/test.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/Tokenizer.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/yaml.zig"
"${CMAKE_SOURCE_DIR}/src/link/C/zig.h"
"${CMAKE_SOURCE_DIR}/src/link/msdos-stub.bin"
"${CMAKE_SOURCE_DIR}/src/liveness.zig"

3690
lib/libc/darwin/libSystem.B.tbd vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -760,10 +760,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
}
// Assume ld64 default: -search_paths_first
// Look in each directory for a dylib (tbd), and then for archive
// Look in each directory for a dylib (next, tbd), and then for archive
// TODO implement alternative: -search_dylibs_first
// TODO text-based API, or .tbd files.
const exts = &[_][]const u8{ "dylib", "a" };
const exts = &[_][]const u8{ "dylib", "tbd", "a" };
for (search_lib_names.items) |l_name| {
var found = false;
@@ -849,6 +848,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
try zld.link(positionals.items, full_out_path, .{
.libs = libs.items,
.rpaths = rpaths.items,
.libc_stub_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{
"libc", "darwin", "libSystem.B.tbd",
}),
});
break :outer;

View File

@@ -235,7 +235,10 @@ pub fn parseObject(self: Archive, offset: u32) !*Object {
}
pub fn isArchive(file: fs.File) !bool {
const magic = try file.reader().readBytesNoEof(Archive.SARMAG);
const magic = file.reader().readBytesNoEof(Archive.SARMAG) catch |err| switch (err) {
error.EndOfStream => return false,
else => |e| return e,
};
try file.seekTo(0);
return mem.eql(u8, &magic, Archive.ARMAG);
}

View File

@@ -1,6 +1,7 @@
const Dylib = @This();
const std = @import("std");
const assert = std.debug.assert;
const fs = std.fs;
const log = std.log.scoped(.dylib);
const macho = std.macho;
@@ -27,7 +28,10 @@ id_cmd_index: ?u16 = null,
id: ?Id = null,
symbols: std.StringArrayHashMapUnmanaged(*Symbol) = .{},
/// Parsed symbol table represented as hash map of symbols'
/// names. We can and should defer creating *Symbols until
/// a symbol is referenced by an object file.
symbols: std.StringArrayHashMapUnmanaged(void) = .{},
pub const Id = struct {
name: []const u8,
@@ -50,9 +54,8 @@ pub fn deinit(self: *Dylib) void {
}
self.load_commands.deinit(self.allocator);
for (self.symbols.values()) |value| {
value.deinit(self.allocator);
self.allocator.destroy(value);
for (self.symbols.keys()) |key| {
self.allocator.free(key);
}
self.symbols.deinit(self.allocator);
@@ -169,23 +172,33 @@ pub fn parseSymbols(self: *Dylib) !void {
if (!(Symbol.isSect(sym) and Symbol.isExt(sym))) continue;
const name = try self.allocator.dupe(u8, sym_name);
const proxy = try self.allocator.create(Symbol.Proxy);
errdefer self.allocator.destroy(proxy);
proxy.* = .{
.base = .{
.@"type" = .proxy,
.name = name,
},
.dylib = self,
};
try self.symbols.putNoClobber(self.allocator, name, &proxy.base);
try self.symbols.putNoClobber(self.allocator, name, {});
}
}
pub fn isDylib(file: fs.File) !bool {
const header = try file.reader().readStruct(macho.mach_header_64);
const header = file.reader().readStruct(macho.mach_header_64) catch |err| switch (err) {
error.EndOfStream => return false,
else => |e| return e,
};
try file.seekTo(0);
return header.filetype == macho.MH_DYLIB;
}
pub fn createProxy(self: *Dylib, sym_name: []const u8) !?*Symbol {
if (!self.symbols.contains(sym_name)) return null;
const name = try self.allocator.dupe(u8, sym_name);
const proxy = try self.allocator.create(Symbol.Proxy);
errdefer self.allocator.destroy(proxy);
proxy.* = .{
.base = .{
.@"type" = .proxy,
.name = name,
},
.file = .{ .dylib = self },
};
return &proxy.base;
}

View File

@@ -535,7 +535,10 @@ pub fn parseDataInCode(self: *Object) !void {
}
pub fn isObject(file: fs.File) !bool {
const header = try file.reader().readStruct(macho.mach_header_64);
const header = file.reader().readStruct(macho.mach_header_64) catch |err| switch (err) {
error.EndOfStream => return false,
else => |e| return e,
};
try file.seekTo(0);
return header.filetype == macho.MH_OBJECT;
}

130
src/link/MachO/Stub.zig Normal file
View File

@@ -0,0 +1,130 @@
const Stub = @This();
const std = @import("std");
const assert = std.debug.assert;
const fs = std.fs;
const log = std.log.scoped(.stub);
const macho = std.macho;
const mem = std.mem;
const Allocator = mem.Allocator;
const Symbol = @import("Symbol.zig");
pub const LibStub = @import("../tapi.zig").LibStub;
allocator: *Allocator,
arch: ?std.Target.Cpu.Arch = null,
lib_stub: ?LibStub = null,
name: ?[]const u8 = null,
ordinal: ?u16 = null,
id: ?Id = null,
/// Parsed symbol table represented as hash map of symbols'
/// names. We can and should defer creating *Symbols until
/// a symbol is referenced by an object file.
symbols: std.StringArrayHashMapUnmanaged(void) = .{},
pub const Id = struct {
name: []const u8,
timestamp: u32,
current_version: u32,
compatibility_version: u32,
pub fn deinit(id: *Id, allocator: *Allocator) void {
allocator.free(id.name);
}
};
pub fn init(allocator: *Allocator) Stub {
return .{ .allocator = allocator };
}
pub fn deinit(self: *Stub) void {
self.symbols.deinit(self.allocator);
if (self.lib_stub) |*lib_stub| {
lib_stub.deinit();
}
if (self.name) |name| {
self.allocator.free(name);
}
if (self.id) |*id| {
id.deinit(self.allocator);
}
}
pub fn parse(self: *Stub) !void {
const lib_stub = self.lib_stub orelse return error.EmptyStubFile;
if (lib_stub.inner.len == 0) return error.EmptyStubFile;
log.debug("parsing shared library from stub '{s}'", .{self.name.?});
const umbrella_lib = lib_stub.inner[0];
self.id = .{
.name = try self.allocator.dupe(u8, umbrella_lib.install_name),
// TODO parse from the stub
.timestamp = 2,
.current_version = 0,
.compatibility_version = 0,
};
const target_string: []const u8 = switch (self.arch.?) {
.aarch64 => "arm64-macos",
.x86_64 => "x86_64-macos",
else => unreachable,
};
for (lib_stub.inner) |stub| {
if (!hasTarget(stub.targets, target_string)) continue;
if (stub.exports) |exports| {
for (exports) |exp| {
if (!hasTarget(exp.targets, target_string)) continue;
for (exp.symbols) |sym_name| {
if (self.symbols.contains(sym_name)) continue;
try self.symbols.putNoClobber(self.allocator, sym_name, {});
}
}
}
if (stub.reexports) |reexports| {
for (reexports) |reexp| {
if (!hasTarget(reexp.targets, target_string)) continue;
for (reexp.symbols) |sym_name| {
if (self.symbols.contains(sym_name)) continue;
try self.symbols.putNoClobber(self.allocator, sym_name, {});
}
}
}
}
}
fn hasTarget(targets: []const []const u8, target: []const u8) bool {
for (targets) |t| {
if (mem.eql(u8, t, target)) return true;
}
return false;
}
pub fn createProxy(self: *Stub, sym_name: []const u8) !?*Symbol {
if (!self.symbols.contains(sym_name)) return null;
const name = try self.allocator.dupe(u8, sym_name);
const proxy = try self.allocator.create(Symbol.Proxy);
errdefer self.allocator.destroy(proxy);
proxy.* = .{
.base = .{
.@"type" = .proxy,
.name = name,
},
.file = .{ .stub = self },
};
return &proxy.base;
}

View File

@@ -7,6 +7,7 @@ const mem = std.mem;
const Allocator = mem.Allocator;
const Dylib = @import("Dylib.zig");
const Object = @import("Object.zig");
const Stub = @import("Stub.zig");
pub const Type = enum {
regular,
@@ -84,10 +85,22 @@ pub const Regular = struct {
pub const Proxy = struct {
base: Symbol,
/// Dylib where to locate this symbol.
dylib: ?*Dylib = null,
/// Dylib or stub where to locate this symbol.
/// null means self-reference.
file: ?union(enum) {
dylib: *Dylib,
stub: *Stub,
} = null,
pub const base_type: Symbol.Type = .proxy;
pub fn dylibOrdinal(proxy: *Proxy) u16 {
const file = proxy.file orelse return 0;
return switch (file) {
.dylib => |dylib| dylib.ordinal.?,
.stub => |stub| stub.ordinal.?,
};
}
};
pub const Unresolved = struct {

View File

@@ -17,6 +17,7 @@ const Archive = @import("Archive.zig");
const CodeSignature = @import("CodeSignature.zig");
const Dylib = @import("Dylib.zig");
const Object = @import("Object.zig");
const Stub = @import("Stub.zig");
const Symbol = @import("Symbol.zig");
const Trie = @import("Trie.zig");
@@ -37,6 +38,10 @@ stack_size: u64 = 0,
objects: std.ArrayListUnmanaged(*Object) = .{},
archives: std.ArrayListUnmanaged(*Archive) = .{},
dylibs: std.ArrayListUnmanaged(*Dylib) = .{},
lib_stubs: std.ArrayListUnmanaged(*Stub) = .{},
libsystem_stub_index: ?u16 = null,
next_dylib_ordinal: u16 = 1,
load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
@@ -49,7 +54,6 @@ dyld_info_cmd_index: ?u16 = null,
symtab_cmd_index: ?u16 = null,
dysymtab_cmd_index: ?u16 = null,
dylinker_cmd_index: ?u16 = null,
libsystem_cmd_index: ?u16 = null,
data_in_code_cmd_index: ?u16 = null,
function_starts_cmd_index: ?u16 = null,
main_cmd_index: ?u16 = null,
@@ -153,9 +157,20 @@ pub fn deinit(self: *Zld) void {
}
self.dylibs.deinit(self.allocator);
for (self.lib_stubs.items) |stub| {
stub.deinit();
self.allocator.destroy(stub);
}
self.lib_stubs.deinit(self.allocator);
for (self.imports.values()) |proxy| {
proxy.deinit(self.allocator);
self.allocator.destroy(proxy);
}
self.imports.deinit(self.allocator);
self.tentatives.deinit(self.allocator);
self.globals.deinit(self.allocator);
self.imports.deinit(self.allocator);
self.unresolved.deinit(self.allocator);
self.strtab.deinit(self.allocator);
@@ -181,6 +196,7 @@ pub fn closeFiles(self: Zld) void {
const LinkArgs = struct {
libs: []const []const u8,
rpaths: []const []const u8,
libc_stub_path: []const u8,
};
pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8, args: LinkArgs) !void {
@@ -222,6 +238,7 @@ pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8, args: L
try self.addRpaths(args.rpaths);
try self.parseInputFiles(files);
try self.parseLibs(args.libs);
try self.parseLibSystem(args.libc_stub_path);
try self.resolveSymbols();
try self.resolveStubsAndGotEntries();
try self.updateMetadata();
@@ -241,14 +258,18 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
object,
archive,
dylib,
stub,
},
origin: union {
file: fs.File,
stub: Stub.LibStub,
},
file: fs.File,
name: []const u8,
};
var classified = std.ArrayList(Input).init(self.allocator);
defer classified.deinit();
// First, classify input files: object, archive or dylib.
// First, classify input files: object, archive, dylib or stub (tbd).
for (files) |file_name| {
const file = try fs.cwd().openFile(file_name, .{});
const full_path = full_path: {
@@ -261,7 +282,7 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
if (!(try Object.isObject(file))) break :try_object;
try classified.append(.{
.kind = .object,
.file = file,
.origin = .{ .file = file },
.name = full_path,
});
continue;
@@ -271,7 +292,7 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
if (!(try Archive.isArchive(file))) break :try_archive;
try classified.append(.{
.kind = .archive,
.file = file,
.origin = .{ .file = file },
.name = full_path,
});
continue;
@@ -281,12 +302,25 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
if (!(try Dylib.isDylib(file))) break :try_dylib;
try classified.append(.{
.kind = .dylib,
.file = file,
.origin = .{ .file = file },
.name = full_path,
});
continue;
}
try_stub: {
var lib_stub = Stub.LibStub.loadFromFile(self.allocator, file) catch {
break :try_stub;
};
try classified.append(.{
.kind = .stub,
.origin = .{ .stub = lib_stub },
.name = full_path,
});
file.close();
continue;
}
file.close();
log.warn("unknown filetype for positional input file: '{s}'", .{file_name});
}
@@ -301,7 +335,8 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
object.* = Object.init(self.allocator);
object.arch = self.arch.?;
object.name = input.name;
object.file = input.file;
object.file = input.origin.file;
try object.parse();
try self.objects.append(self.allocator, object);
},
@@ -312,7 +347,8 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
archive.* = Archive.init(self.allocator);
archive.arch = self.arch.?;
archive.name = input.name;
archive.file = input.file;
archive.file = input.origin.file;
try archive.parse();
try self.archives.append(self.allocator, archive);
},
@@ -323,27 +359,22 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
dylib.* = Dylib.init(self.allocator);
dylib.arch = self.arch.?;
dylib.name = input.name;
dylib.file = input.file;
dylib.file = input.origin.file;
const ordinal = @intCast(u16, self.dylibs.items.len);
dylib.ordinal = ordinal + 2; // TODO +2 since 1 is reserved for libSystem
// TODO Defer parsing of the dylibs until they are actually needed
try dylib.parse();
try self.dylibs.append(self.allocator, dylib);
},
.stub => {
const stub = try self.allocator.create(Stub);
errdefer self.allocator.destroy(stub);
// Add LC_LOAD_DYLIB command
const dylib_id = dylib.id orelse unreachable;
var dylib_cmd = try createLoadDylibCommand(
self.allocator,
dylib_id.name,
dylib_id.timestamp,
dylib_id.current_version,
dylib_id.compatibility_version,
);
errdefer dylib_cmd.deinit(self.allocator);
stub.* = Stub.init(self.allocator);
stub.arch = self.arch.?;
stub.name = input.name;
stub.lib_stub = input.origin.stub;
try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd });
try stub.parse();
try self.lib_stubs.append(self.allocator, stub);
},
}
}
@@ -362,42 +393,79 @@ fn parseLibs(self: *Zld, libs: []const []const u8) !void {
dylib.name = try self.allocator.dupe(u8, lib);
dylib.file = file;
const ordinal = @intCast(u16, self.dylibs.items.len);
dylib.ordinal = ordinal + 2; // TODO +2 since 1 is reserved for libSystem
// TODO Defer parsing of the dylibs until they are actually needed
try dylib.parse();
try self.dylibs.append(self.allocator, dylib);
// Add LC_LOAD_DYLIB command
const dylib_id = dylib.id orelse unreachable;
var dylib_cmd = try createLoadDylibCommand(
self.allocator,
dylib_id.name,
dylib_id.timestamp,
dylib_id.current_version,
dylib_id.compatibility_version,
);
errdefer dylib_cmd.deinit(self.allocator);
try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd });
} else if (try Archive.isArchive(file)) {
const archive = try self.allocator.create(Archive);
errdefer self.allocator.destroy(archive);
archive.* = Archive.init(self.allocator);
archive.arch = self.arch.?;
archive.name = try self.allocator.dupe(u8, lib);
archive.file = file;
try archive.parse();
try self.archives.append(self.allocator, archive);
} else {
file.close();
log.warn("unknown filetype for a library: '{s}'", .{lib});
// Try tbd stub file next.
if (Stub.LibStub.loadFromFile(self.allocator, file)) |lib_stub| {
const stub = try self.allocator.create(Stub);
errdefer self.allocator.destroy(stub);
stub.* = Stub.init(self.allocator);
stub.arch = self.arch.?;
stub.name = try self.allocator.dupe(u8, lib);
stub.lib_stub = lib_stub;
try stub.parse();
try self.lib_stubs.append(self.allocator, stub);
} else |_| {
// TODO this entire logic has to be cleaned up.
try file.seekTo(0);
if (try Archive.isArchive(file)) {
const archive = try self.allocator.create(Archive);
errdefer self.allocator.destroy(archive);
archive.* = Archive.init(self.allocator);
archive.arch = self.arch.?;
archive.name = try self.allocator.dupe(u8, lib);
archive.file = file;
try archive.parse();
try self.archives.append(self.allocator, archive);
} else {
file.close();
log.warn("unknown filetype for a library: '{s}'", .{lib});
}
}
}
}
}
fn parseLibSystem(self: *Zld, libc_stub_path: []const u8) !void {
const file = try fs.cwd().openFile(libc_stub_path, .{});
defer file.close();
var lib_stub = try Stub.LibStub.loadFromFile(self.allocator, file);
const stub = try self.allocator.create(Stub);
errdefer self.allocator.destroy(stub);
stub.* = Stub.init(self.allocator);
stub.arch = self.arch.?;
stub.name = try self.allocator.dupe(u8, libc_stub_path);
stub.lib_stub = lib_stub;
try stub.parse();
self.libsystem_stub_index = @intCast(u16, self.lib_stubs.items.len);
try self.lib_stubs.append(self.allocator, stub);
// Add LC_LOAD_DYLIB load command.
stub.ordinal = self.next_dylib_ordinal;
const dylib_id = stub.id orelse unreachable;
var dylib_cmd = try createLoadDylibCommand(
self.allocator,
dylib_id.name,
dylib_id.timestamp,
dylib_id.current_version,
dylib_id.compatibility_version,
);
errdefer dylib_cmd.deinit(self.allocator);
try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd });
self.next_dylib_ordinal += 1;
}
fn mapAndUpdateSections(
self: *Zld,
object: *Object,
@@ -1814,9 +1882,8 @@ fn resolveSymbols(self: *Zld) !void {
next_sym += 1;
}
}
// Third pass, resolve symbols in dynamic libraries.
// TODO Implement libSystem as a hard-coded library, or ship with
// a libSystem.B.tbd definition file?
var unresolved = std.ArrayList(*Symbol).init(self.allocator);
defer unresolved.deinit();
@@ -1824,61 +1891,109 @@ fn resolveSymbols(self: *Zld) !void {
for (self.unresolved.values()) |value| {
unresolved.appendAssumeCapacity(value);
}
self.unresolved.clearAndFree(self.allocator);
self.unresolved.clearRetainingCapacity();
var has_undefined = false;
while (unresolved.popOrNull()) |undef| {
var found = false;
for (self.dylibs.items) |dylib| {
const proxy = dylib.symbols.get(undef.name) orelse continue;
try self.imports.putNoClobber(self.allocator, proxy.name, proxy);
undef.alias = proxy;
found = true;
}
var referenced = std.AutoHashMap(union(enum) {
dylib: *Dylib,
stub: *Stub,
}, void).init(self.allocator);
defer referenced.deinit();
if (!found) {
// TODO we currently hardcode all unresolved symbols to libSystem
const proxy = try self.allocator.create(Symbol.Proxy);
errdefer self.allocator.destroy(proxy);
loop: while (unresolved.popOrNull()) |undef| {
const proxy = self.imports.get(undef.name) orelse outer: {
const proxy = inner: {
for (self.dylibs.items) |dylib| {
const proxy = (try dylib.createProxy(undef.name)) orelse continue;
try referenced.put(.{ .dylib = dylib }, {});
break :inner proxy;
}
for (self.lib_stubs.items) |stub, i| {
const proxy = (try stub.createProxy(undef.name)) orelse continue;
if (self.libsystem_stub_index.? != @intCast(u16, i)) {
// LibSystem gets its load command separately.
try referenced.put(.{ .stub = stub }, {});
}
break :inner proxy;
}
if (mem.eql(u8, undef.name, "___dso_handle")) {
// TODO this is just a temp patch until I work out what to actually
// do with ___dso_handle and __mh_execute_header symbols which are
// synthetically created by the linker on macOS.
const name = try self.allocator.dupe(u8, undef.name);
const proxy = try self.allocator.create(Symbol.Proxy);
errdefer self.allocator.destroy(proxy);
proxy.* = .{
.base = .{
.@"type" = .proxy,
.name = name,
},
.file = null,
};
break :inner &proxy.base;
}
proxy.* = .{
.base = .{
.@"type" = .proxy,
.name = try self.allocator.dupe(u8, undef.name),
},
.dylib = null, // TODO null means libSystem
self.unresolved.putAssumeCapacityNoClobber(undef.name, undef);
continue :loop;
};
try self.imports.putNoClobber(self.allocator, proxy.base.name, &proxy.base);
undef.alias = &proxy.base;
// log.err("undefined reference to symbol '{s}'", .{undef.name});
// log.err(" | referenced in {s}", .{
// undef.cast(Symbol.Unresolved).?.file.name.?,
// });
// has_undefined = true;
}
try self.imports.putNoClobber(self.allocator, proxy.name, proxy);
break :outer proxy;
};
undef.alias = proxy;
}
if (has_undefined) return error.UndefinedSymbolReference;
// Add LC_LOAD_DYLIB load command for each referenced dylib/stub.
var it = referenced.iterator();
while (it.next()) |key| {
var dylib_cmd = blk: {
switch (key.key_ptr.*) {
.dylib => |dylib| {
dylib.ordinal = self.next_dylib_ordinal;
const dylib_id = dylib.id orelse unreachable;
break :blk try createLoadDylibCommand(
self.allocator,
dylib_id.name,
dylib_id.timestamp,
dylib_id.current_version,
dylib_id.compatibility_version,
);
},
.stub => |stub| {
stub.ordinal = self.next_dylib_ordinal;
const dylib_id = stub.id orelse unreachable;
break :blk try createLoadDylibCommand(
self.allocator,
dylib_id.name,
dylib_id.timestamp,
dylib_id.current_version,
dylib_id.compatibility_version,
);
},
}
};
errdefer dylib_cmd.deinit(self.allocator);
try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd });
self.next_dylib_ordinal += 1;
}
if (self.unresolved.count() > 0) {
for (self.unresolved.values()) |undef| {
log.err("undefined reference to symbol '{s}'", .{undef.name});
log.err(" | referenced in {s}", .{
undef.cast(Symbol.Unresolved).?.file.name.?,
});
}
return error.UndefinedSymbolReference;
}
// Finally put dyld_stub_binder as an Import
const dyld_stub_binder = try self.allocator.create(Symbol.Proxy);
errdefer self.allocator.destroy(dyld_stub_binder);
dyld_stub_binder.* = .{
.base = .{
.@"type" = .proxy,
.name = try self.allocator.dupe(u8, "dyld_stub_binder"),
},
.dylib = null, // TODO null means libSystem
const libsystem_stub = self.lib_stubs.items[self.libsystem_stub_index.?];
const proxy = (try libsystem_stub.createProxy("dyld_stub_binder")) orelse {
log.err("undefined reference to symbol 'dyld_stub_binder'", .{});
return error.UndefinedSymbolReference;
};
try self.imports.putNoClobber(
self.allocator,
dyld_stub_binder.base.name,
&dyld_stub_binder.base,
);
try self.imports.putNoClobber(self.allocator, proxy.name, proxy);
}
fn resolveStubsAndGotEntries(self: *Zld) !void {
@@ -2437,15 +2552,6 @@ fn populateMetadata(self: *Zld) !void {
try self.load_commands.append(self.allocator, .{ .Dylinker = dylinker_cmd });
}
if (self.libsystem_cmd_index == null) {
self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len);
var dylib_cmd = try createLoadDylibCommand(self.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0);
errdefer dylib_cmd.deinit(self.allocator);
try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd });
}
if (self.main_cmd_index == null) {
self.main_cmd_index = @intCast(u16, self.load_commands.items.len);
try self.load_commands.append(self.allocator, .{
@@ -2746,14 +2852,10 @@ fn writeBindInfoTable(self: *Zld) !void {
for (self.got_entries.items) |sym| {
if (sym.cast(Symbol.Proxy)) |proxy| {
const dylib_ordinal = ordinal: {
const dylib = proxy.dylib orelse break :ordinal 1; // TODO embedded libSystem
break :ordinal dylib.ordinal.?;
};
try pointers.append(.{
.offset = base_offset + proxy.base.got_index.? * @sizeOf(u64),
.segment_id = segment_id,
.dylib_ordinal = dylib_ordinal,
.dylib_ordinal = proxy.dylibOrdinal(),
.name = proxy.base.name,
});
}
@@ -2768,15 +2870,11 @@ fn writeBindInfoTable(self: *Zld) !void {
const sym = self.imports.get("__tlv_bootstrap") orelse unreachable;
const proxy = sym.cast(Symbol.Proxy) orelse unreachable;
const dylib_ordinal = ordinal: {
const dylib = proxy.dylib orelse break :ordinal 1; // TODO embedded libSystem
break :ordinal dylib.ordinal.?;
};
try pointers.append(.{
.offset = base_offset,
.segment_id = segment_id,
.dylib_ordinal = dylib_ordinal,
.dylib_ordinal = proxy.dylibOrdinal(),
.name = proxy.base.name,
});
}
@@ -2813,15 +2911,10 @@ fn writeLazyBindInfoTable(self: *Zld) !void {
for (self.stubs.items) |sym| {
const proxy = sym.cast(Symbol.Proxy) orelse unreachable;
const dylib_ordinal = ordinal: {
const dylib = proxy.dylib orelse break :ordinal 1; // TODO embedded libSystem
break :ordinal dylib.ordinal.?;
};
pointers.appendAssumeCapacity(.{
.offset = base_offset + sym.stubs_index.? * @sizeOf(u64),
.segment_id = segment_id,
.dylib_ordinal = dylib_ordinal,
.dylib_ordinal = proxy.dylibOrdinal(),
.name = sym.name,
});
}
@@ -3128,15 +3221,12 @@ fn writeSymbolTable(self: *Zld) !void {
defer undefs.deinit();
for (self.imports.values()) |sym| {
const ordinal = ordinal: {
const dylib = sym.cast(Symbol.Proxy).?.dylib orelse break :ordinal 1; // TODO handle libSystem
break :ordinal dylib.ordinal.?;
};
const proxy = sym.cast(Symbol.Proxy) orelse unreachable;
try undefs.append(.{
.n_strx = try self.makeString(sym.name),
.n_type = macho.N_UNDF | macho.N_EXT,
.n_sect = 0,
.n_desc = (ordinal * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY,
.n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY,
.n_value = 0,
});
}

79
src/link/tapi.zig Normal file
View File

@@ -0,0 +1,79 @@
const std = @import("std");
const fs = std.fs;
const mem = std.mem;
const log = std.log.scoped(.tapi);
const Allocator = mem.Allocator;
const Yaml = @import("tapi/yaml.zig").Yaml;
pub const LibStub = struct {
/// Underlying memory for stub's contents.
yaml: Yaml,
/// Typed contents of the tbd file.
inner: []Tbd,
const Tbd = struct {
tbd_version: u3,
targets: []const []const u8,
uuids: []const struct {
target: []const u8,
value: []const u8,
},
install_name: []const u8,
current_version: ?union(enum) {
string: []const u8,
float: f64,
int: u64,
},
reexported_libraries: ?[]const struct {
targets: []const []const u8,
libraries: []const []const u8,
},
parent_umbrella: ?[]const struct {
targets: []const []const u8,
umbrella: []const u8,
},
exports: ?[]const struct {
targets: []const []const u8,
symbols: []const []const u8,
},
reexports: ?[]const struct {
targets: []const []const u8,
symbols: []const []const u8,
},
allowable_clients: ?[]const struct {
targets: []const []const u8,
clients: []const []const u8,
},
objc_classes: ?[]const []const u8,
};
pub fn loadFromFile(allocator: *Allocator, file: fs.File) !LibStub {
const source = try file.readToEndAlloc(allocator, std.math.maxInt(u32));
defer allocator.free(source);
var lib_stub = LibStub{
.yaml = try Yaml.load(allocator, source),
.inner = undefined,
};
lib_stub.inner = lib_stub.yaml.parse([]Tbd) catch |err| blk: {
switch (err) {
error.TypeMismatch => {
// TODO clean this up.
var out = try lib_stub.yaml.arena.allocator.alloc(Tbd, 1);
out[0] = try lib_stub.yaml.parse(Tbd);
break :blk out;
},
else => |e| return e,
}
};
return lib_stub;
}
pub fn deinit(self: *LibStub) void {
self.yaml.deinit();
}
};

439
src/link/tapi/Tokenizer.zig Normal file
View File

@@ -0,0 +1,439 @@
const Tokenizer = @This();
const std = @import("std");
const log = std.log.scoped(.tapi);
const testing = std.testing;
buffer: []const u8,
index: usize = 0,
pub const Token = struct {
id: Id,
start: usize,
end: usize,
// Count of spaces/tabs.
// Only active for .Space and .Tab tokens.
count: ?usize = null,
pub const Id = enum {
Eof,
NewLine,
DocStart, // ---
DocEnd, // ...
SeqItemInd, // -
MapValueInd, // :
FlowMapStart, // {
FlowMapEnd, // }
FlowSeqStart, // [
FlowSeqEnd, // ]
Comma,
Space,
Tab,
Comment, // #
Alias, // *
Anchor, // &
Tag, // !
SingleQuote, // '
DoubleQuote, // "
Literal,
};
};
pub const TokenIndex = usize;
pub const TokenIterator = struct {
buffer: []const Token,
pos: TokenIndex = 0,
pub fn next(self: *TokenIterator) Token {
const token = self.buffer[self.pos];
self.pos += 1;
return token;
}
pub fn peek(self: TokenIterator) ?Token {
if (self.pos >= self.buffer.len) return null;
return self.buffer[self.pos];
}
pub fn reset(self: *TokenIterator) void {
self.pos = 0;
}
pub fn seekTo(self: *TokenIterator, pos: TokenIndex) void {
self.pos = pos;
}
pub fn seekBy(self: *TokenIterator, offset: isize) void {
const new_pos = @bitCast(isize, self.pos) + offset;
if (new_pos < 0) {
self.pos = 0;
} else {
self.pos = @intCast(usize, new_pos);
}
}
};
pub fn next(self: *Tokenizer) Token {
var result = Token{
.id = .Eof,
.start = self.index,
.end = undefined,
};
var state: union(enum) {
Start,
NewLine,
Space: usize,
Tab: usize,
Hyphen: usize,
Dot: usize,
Literal,
} = .Start;
while (self.index < self.buffer.len) : (self.index += 1) {
const c = self.buffer[self.index];
switch (state) {
.Start => switch (c) {
' ' => {
state = .{ .Space = 1 };
},
'\t' => {
state = .{ .Tab = 1 };
},
'\n' => {
result.id = .NewLine;
self.index += 1;
break;
},
'\r' => {
state = .NewLine;
},
'-' => {
state = .{ .Hyphen = 1 };
},
'.' => {
state = .{ .Dot = 1 };
},
',' => {
result.id = .Comma;
self.index += 1;
break;
},
'#' => {
result.id = .Comment;
self.index += 1;
break;
},
'*' => {
result.id = .Alias;
self.index += 1;
break;
},
'&' => {
result.id = .Anchor;
self.index += 1;
break;
},
'!' => {
result.id = .Tag;
self.index += 1;
break;
},
'\'' => {
result.id = .SingleQuote;
self.index += 1;
break;
},
'"' => {
result.id = .DoubleQuote;
self.index += 1;
break;
},
'[' => {
result.id = .FlowSeqStart;
self.index += 1;
break;
},
']' => {
result.id = .FlowSeqEnd;
self.index += 1;
break;
},
':' => {
result.id = .MapValueInd;
self.index += 1;
break;
},
'{' => {
result.id = .FlowMapStart;
self.index += 1;
break;
},
'}' => {
result.id = .FlowMapEnd;
self.index += 1;
break;
},
else => {
state = .Literal;
},
},
.Space => |*count| switch (c) {
' ' => {
count.* += 1;
},
else => {
result.id = .Space;
result.count = count.*;
break;
},
},
.Tab => |*count| switch (c) {
' ' => {
count.* += 1;
},
else => {
result.id = .Tab;
result.count = count.*;
break;
},
},
.NewLine => switch (c) {
'\n' => {
result.id = .NewLine;
self.index += 1;
break;
},
else => {}, // TODO this should be an error condition
},
.Hyphen => |*count| switch (c) {
' ' => {
result.id = .SeqItemInd;
self.index += 1;
break;
},
'-' => {
count.* += 1;
if (count.* == 3) {
result.id = .DocStart;
self.index += 1;
break;
}
},
else => {
state = .Literal;
},
},
.Dot => |*count| switch (c) {
'.' => {
count.* += 1;
if (count.* == 3) {
result.id = .DocEnd;
self.index += 1;
break;
}
},
else => {
state = .Literal;
},
},
.Literal => switch (c) {
'\r', '\n', ' ', '\'', '"', ',', ':', ']', '}' => {
result.id = .Literal;
break;
},
else => {
result.id = .Literal;
},
},
}
}
if (state == .Literal and result.id == .Eof) {
result.id = .Literal;
}
result.end = self.index;
log.debug("{any}", .{result});
log.debug(" | {s}", .{self.buffer[result.start..result.end]});
return result;
}
fn testExpected(source: []const u8, expected: []const Token.Id) !void {
var tokenizer = Tokenizer{
.buffer = source,
};
for (expected) |exp| {
const token = tokenizer.next();
try testing.expectEqual(exp, token.id);
}
}
test "empty doc" {
try testExpected("", &[_]Token.Id{.Eof});
}
test "empty doc with explicit markers" {
try testExpected(
\\---
\\...
, &[_]Token.Id{
.DocStart, .NewLine, .DocEnd, .Eof,
});
}
test "sequence of values" {
try testExpected(
\\- 0
\\- 1
\\- 2
, &[_]Token.Id{
.SeqItemInd,
.Literal,
.NewLine,
.SeqItemInd,
.Literal,
.NewLine,
.SeqItemInd,
.Literal,
.Eof,
});
}
test "sequence of sequences" {
try testExpected(
\\- [ val1, val2]
\\- [val3, val4 ]
, &[_]Token.Id{
.SeqItemInd,
.FlowSeqStart,
.Space,
.Literal,
.Comma,
.Space,
.Literal,
.FlowSeqEnd,
.NewLine,
.SeqItemInd,
.FlowSeqStart,
.Literal,
.Comma,
.Space,
.Literal,
.Space,
.FlowSeqEnd,
.Eof,
});
}
test "mappings" {
try testExpected(
\\key1: value1
\\key2: value2
, &[_]Token.Id{
.Literal,
.MapValueInd,
.Space,
.Literal,
.NewLine,
.Literal,
.MapValueInd,
.Space,
.Literal,
.Eof,
});
}
test "inline mapped sequence of values" {
try testExpected(
\\key : [ val1,
\\ val2 ]
, &[_]Token.Id{
.Literal,
.Space,
.MapValueInd,
.Space,
.FlowSeqStart,
.Space,
.Literal,
.Comma,
.Space,
.NewLine,
.Space,
.Literal,
.Space,
.FlowSeqEnd,
.Eof,
});
}
test "part of tdb" {
try testExpected(
\\--- !tapi-tbd
\\tbd-version: 4
\\targets: [ x86_64-macos ]
\\
\\uuids:
\\ - target: x86_64-macos
\\ value: F86CC732-D5E4-30B5-AA7D-167DF5EC2708
\\
\\install-name: '/usr/lib/libSystem.B.dylib'
\\...
, &[_]Token.Id{
.DocStart,
.Space,
.Tag,
.Literal,
.NewLine,
.Literal,
.MapValueInd,
.Space,
.Literal,
.NewLine,
.Literal,
.MapValueInd,
.Space,
.FlowSeqStart,
.Space,
.Literal,
.Space,
.FlowSeqEnd,
.NewLine,
.NewLine,
.Literal,
.MapValueInd,
.NewLine,
.Space,
.SeqItemInd,
.Literal,
.MapValueInd,
.Space,
.Literal,
.NewLine,
.Space,
.Literal,
.MapValueInd,
.Space,
.Literal,
.NewLine,
.NewLine,
.Literal,
.MapValueInd,
.Space,
.SingleQuote,
.Literal,
.SingleQuote,
.NewLine,
.DocEnd,
.Eof,
});
}

713
src/link/tapi/parse.zig Normal file
View File

@@ -0,0 +1,713 @@
const std = @import("std");
const assert = std.debug.assert;
const log = std.log.scoped(.tapi);
const mem = std.mem;
const testing = std.testing;
const Allocator = mem.Allocator;
const Tokenizer = @import("Tokenizer.zig");
const Token = Tokenizer.Token;
const TokenIndex = Tokenizer.TokenIndex;
const TokenIterator = Tokenizer.TokenIterator;
pub const ParseError = error{
MalformedYaml,
NestedDocuments,
UnexpectedTag,
UnexpectedEof,
UnexpectedToken,
Unhandled,
} || Allocator.Error;
pub const Node = struct {
tag: Tag,
tree: *const Tree,
pub const Tag = enum {
doc,
map,
list,
value,
};
pub fn cast(self: *const Node, comptime T: type) ?*const T {
if (self.tag != T.base_tag) {
return null;
}
return @fieldParentPtr(T, "base", self);
}
pub fn deinit(self: *Node, allocator: *Allocator) void {
switch (self.tag) {
.doc => @fieldParentPtr(Node.Doc, "base", self).deinit(allocator),
.map => @fieldParentPtr(Node.Map, "base", self).deinit(allocator),
.list => @fieldParentPtr(Node.List, "base", self).deinit(allocator),
.value => @fieldParentPtr(Node.Value, "base", self).deinit(allocator),
}
}
pub fn format(
self: *const Node,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
return switch (self.tag) {
.doc => @fieldParentPtr(Node.Doc, "base", self).format(fmt, options, writer),
.map => @fieldParentPtr(Node.Map, "base", self).format(fmt, options, writer),
.list => @fieldParentPtr(Node.List, "base", self).format(fmt, options, writer),
.value => @fieldParentPtr(Node.Value, "base", self).format(fmt, options, writer),
};
}
pub const Doc = struct {
base: Node = Node{ .tag = Tag.doc, .tree = undefined },
start: ?TokenIndex = null,
end: ?TokenIndex = null,
directive: ?TokenIndex = null,
value: ?*Node = null,
pub const base_tag: Node.Tag = .doc;
pub fn deinit(self: *Doc, allocator: *Allocator) void {
if (self.value) |node| {
node.deinit(allocator);
allocator.destroy(node);
}
}
pub fn format(
self: *const Doc,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
if (self.directive) |id| {
try std.fmt.format(writer, "{{ ", .{});
const directive = self.base.tree.tokens[id];
try std.fmt.format(writer, ".directive = {s}, ", .{
self.base.tree.source[directive.start..directive.end],
});
}
if (self.value) |node| {
try std.fmt.format(writer, "{}", .{node});
}
if (self.directive != null) {
try std.fmt.format(writer, " }}", .{});
}
}
};
pub const Map = struct {
base: Node = Node{ .tag = Tag.map, .tree = undefined },
start: ?TokenIndex = null,
end: ?TokenIndex = null,
values: std.ArrayListUnmanaged(Entry) = .{},
pub const base_tag: Node.Tag = .map;
pub const Entry = struct {
key: TokenIndex,
value: *Node,
};
pub fn deinit(self: *Map, allocator: *Allocator) void {
for (self.values.items) |entry| {
entry.value.deinit(allocator);
allocator.destroy(entry.value);
}
self.values.deinit(allocator);
}
pub fn format(
self: *const Map,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
try std.fmt.format(writer, "{{ ", .{});
for (self.values.items) |entry| {
const key = self.base.tree.tokens[entry.key];
try std.fmt.format(writer, "{s} => {}, ", .{
self.base.tree.source[key.start..key.end],
entry.value,
});
}
return std.fmt.format(writer, " }}", .{});
}
};
pub const List = struct {
base: Node = Node{ .tag = Tag.list, .tree = undefined },
start: ?TokenIndex = null,
end: ?TokenIndex = null,
values: std.ArrayListUnmanaged(*Node) = .{},
pub const base_tag: Node.Tag = .list;
pub fn deinit(self: *List, allocator: *Allocator) void {
for (self.values.items) |node| {
node.deinit(allocator);
allocator.destroy(node);
}
self.values.deinit(allocator);
}
pub fn format(
self: *const List,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
try std.fmt.format(writer, "[ ", .{});
for (self.values.items) |node| {
try std.fmt.format(writer, "{}, ", .{node});
}
return std.fmt.format(writer, " ]", .{});
}
};
pub const Value = struct {
base: Node = Node{ .tag = Tag.value, .tree = undefined },
start: ?TokenIndex = null,
end: ?TokenIndex = null,
pub const base_tag: Node.Tag = .value;
pub fn deinit(self: *Value, allocator: *Allocator) void {
_ = self;
_ = allocator;
}
pub fn format(
self: *const Value,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
const start = self.base.tree.tokens[self.start.?];
const end = self.base.tree.tokens[self.end.?];
return std.fmt.format(writer, "{s}", .{
self.base.tree.source[start.start..end.end],
});
}
};
};
pub const Tree = struct {
allocator: *Allocator,
source: []const u8,
tokens: []Token,
docs: std.ArrayListUnmanaged(*Node) = .{},
pub fn init(allocator: *Allocator) Tree {
return .{
.allocator = allocator,
.source = undefined,
.tokens = undefined,
};
}
pub fn deinit(self: *Tree) void {
self.allocator.free(self.tokens);
for (self.docs.items) |doc| {
doc.deinit(self.allocator);
self.allocator.destroy(doc);
}
self.docs.deinit(self.allocator);
}
pub fn parse(self: *Tree, source: []const u8) !void {
var tokenizer = Tokenizer{ .buffer = source };
var tokens = std.ArrayList(Token).init(self.allocator);
errdefer tokens.deinit();
while (true) {
const token = tokenizer.next();
try tokens.append(token);
if (token.id == .Eof) break;
}
self.source = source;
self.tokens = tokens.toOwnedSlice();
var it = TokenIterator{ .buffer = self.tokens };
var parser = Parser{
.allocator = self.allocator,
.tree = self,
.token_it = &it,
};
defer parser.deinit();
try parser.scopes.append(self.allocator, .{
.indent = 0,
});
while (true) {
if (parser.token_it.peek() == null) return;
const pos = parser.token_it.pos;
const token = parser.token_it.next();
log.debug("Next token: {}, {}", .{ pos, token });
switch (token.id) {
.Space, .Comment, .NewLine => {},
.Eof => break,
else => {
const doc = try parser.doc(pos);
try self.docs.append(self.allocator, &doc.base);
},
}
}
}
};
const Parser = struct {
allocator: *Allocator,
tree: *Tree,
token_it: *TokenIterator,
scopes: std.ArrayListUnmanaged(Scope) = .{},
const Scope = struct {
indent: usize,
};
fn deinit(self: *Parser) void {
self.scopes.deinit(self.allocator);
}
fn doc(self: *Parser, start: TokenIndex) ParseError!*Node.Doc {
const node = try self.allocator.create(Node.Doc);
errdefer self.allocator.destroy(node);
node.* = .{
.start = start,
};
node.base.tree = self.tree;
self.token_it.seekTo(start);
log.debug("Doc start: {}, {}", .{ start, self.tree.tokens[start] });
const explicit_doc: bool = if (self.eatToken(.DocStart)) |_| explicit_doc: {
if (self.eatToken(.Tag)) |_| {
node.directive = try self.expectToken(.Literal);
}
_ = try self.expectToken(.NewLine);
break :explicit_doc true;
} else false;
while (true) {
const pos = self.token_it.pos;
const token = self.token_it.next();
log.debug("Next token: {}, {}", .{ pos, token });
switch (token.id) {
.Tag => {
return error.UnexpectedTag;
},
.Literal, .SingleQuote, .DoubleQuote => {
_ = try self.expectToken(.MapValueInd);
const map_node = try self.map(pos);
node.value = &map_node.base;
},
.SeqItemInd => {
const list_node = try self.list(pos);
node.value = &list_node.base;
},
.FlowSeqStart => {
const list_node = try self.list_bracketed(pos);
node.value = &list_node.base;
},
.DocEnd => {
if (explicit_doc) break;
return error.UnexpectedToken;
},
.DocStart, .Eof => {
self.token_it.seekBy(-1);
break;
},
else => {
return error.UnexpectedToken;
},
}
}
node.end = self.token_it.pos - 1;
log.debug("Doc end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] });
return node;
}
fn map(self: *Parser, start: TokenIndex) ParseError!*Node.Map {
const node = try self.allocator.create(Node.Map);
errdefer self.allocator.destroy(node);
node.* = .{
.start = start,
};
node.base.tree = self.tree;
self.token_it.seekTo(start);
log.debug("Map start: {}, {}", .{ start, self.tree.tokens[start] });
log.debug("Current scope: {}", .{self.scopes.items[self.scopes.items.len - 1]});
while (true) {
// Parse key.
const key_pos = self.token_it.pos;
const key = self.token_it.next();
switch (key.id) {
.Literal => {},
else => {
self.token_it.seekBy(-1);
break;
},
}
log.debug("Map key: {}, '{s}'", .{ key, self.tree.source[key.start..key.end] });
// Separator
_ = try self.expectToken(.MapValueInd);
self.eatCommentsAndSpace();
// Parse value.
const value: *Node = value: {
if (self.eatToken(.NewLine)) |_| {
// Explicit, complex value such as list or map.
try self.openScope();
const value_pos = self.token_it.pos;
const value = self.token_it.next();
switch (value.id) {
.Literal, .SingleQuote, .DoubleQuote => {
// Assume nested map.
const map_node = try self.map(value_pos);
break :value &map_node.base;
},
.SeqItemInd => {
// Assume list of values.
const list_node = try self.list(value_pos);
break :value &list_node.base;
},
else => {
log.err("{}", .{key});
return error.Unhandled;
},
}
} else {
const value_pos = self.token_it.pos;
const value = self.token_it.next();
switch (value.id) {
.Literal, .SingleQuote, .DoubleQuote => {
// Assume leaf value.
const leaf_node = try self.leaf_value(value_pos);
break :value &leaf_node.base;
},
.FlowSeqStart => {
const list_node = try self.list_bracketed(value_pos);
break :value &list_node.base;
},
else => {
log.err("{}", .{key});
return error.Unhandled;
},
}
}
};
log.debug("Map value: {}", .{value});
try node.values.append(self.allocator, .{
.key = key_pos,
.value = value,
});
if (self.eatToken(.NewLine)) |_| {
if (try self.closeScope()) {
break;
}
}
}
node.end = self.token_it.pos - 1;
log.debug("Map end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] });
return node;
}
fn list(self: *Parser, start: TokenIndex) ParseError!*Node.List {
const node = try self.allocator.create(Node.List);
errdefer self.allocator.destroy(node);
node.* = .{
.start = start,
};
node.base.tree = self.tree;
self.token_it.seekTo(start);
log.debug("List start: {}, {}", .{ start, self.tree.tokens[start] });
log.debug("Current scope: {}", .{self.scopes.items[self.scopes.items.len - 1]});
while (true) {
_ = self.eatToken(.SeqItemInd) orelse {
_ = try self.closeScope();
break;
};
self.eatCommentsAndSpace();
const pos = self.token_it.pos;
const token = self.token_it.next();
const value: *Node = value: {
switch (token.id) {
.Literal, .SingleQuote, .DoubleQuote => {
if (self.eatToken(.MapValueInd)) |_| {
if (self.eatToken(.NewLine)) |_| {
try self.openScope();
}
// nested map
const map_node = try self.map(pos);
break :value &map_node.base;
} else {
// standalone (leaf) value
const leaf_node = try self.leaf_value(pos);
break :value &leaf_node.base;
}
},
.FlowSeqStart => {
const list_node = try self.list_bracketed(pos);
break :value &list_node.base;
},
else => {
log.err("{}", .{token});
return error.Unhandled;
},
}
};
try node.values.append(self.allocator, value);
_ = self.eatToken(.NewLine);
}
node.end = self.token_it.pos - 1;
log.debug("List end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] });
return node;
}
fn list_bracketed(self: *Parser, start: TokenIndex) ParseError!*Node.List {
const node = try self.allocator.create(Node.List);
errdefer self.allocator.destroy(node);
node.* = .{
.start = start,
};
node.base.tree = self.tree;
self.token_it.seekTo(start);
log.debug("List start: {}, {}", .{ start, self.tree.tokens[start] });
log.debug("Current scope: {}", .{self.scopes.items[self.scopes.items.len - 1]});
_ = try self.expectToken(.FlowSeqStart);
while (true) {
_ = self.eatToken(.NewLine);
self.eatCommentsAndSpace();
const pos = self.token_it.pos;
const token = self.token_it.next();
log.debug("Next token: {}, {}", .{ pos, token });
const value: *Node = value: {
switch (token.id) {
.FlowSeqStart => {
const list_node = try self.list_bracketed(pos);
break :value &list_node.base;
},
.FlowSeqEnd => {
break;
},
.Literal, .SingleQuote, .DoubleQuote => {
const leaf_node = try self.leaf_value(pos);
_ = self.eatToken(.Comma);
// TODO newline
break :value &leaf_node.base;
},
else => {
log.err("{}", .{token});
return error.Unhandled;
},
}
};
try node.values.append(self.allocator, value);
}
node.end = self.token_it.pos - 1;
log.debug("List end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] });
return node;
}
fn leaf_value(self: *Parser, start: TokenIndex) ParseError!*Node.Value {
const node = try self.allocator.create(Node.Value);
errdefer self.allocator.destroy(node);
node.* = .{
.start = start,
};
node.base.tree = self.tree;
self.token_it.seekTo(start);
log.debug("Leaf start: {}, {}", .{ node.start.?, self.tree.tokens[node.start.?] });
parse: {
if (self.eatToken(.SingleQuote)) |_| {
node.start = node.start.? + 1;
while (true) {
const tok = self.token_it.next();
switch (tok.id) {
.SingleQuote => {
node.end = self.token_it.pos - 2;
break :parse;
},
.NewLine => return error.UnexpectedToken,
else => {},
}
}
}
if (self.eatToken(.DoubleQuote)) |_| {
node.start = node.start.? + 1;
while (true) {
const tok = self.token_it.next();
switch (tok.id) {
.DoubleQuote => {
node.end = self.token_it.pos - 2;
break :parse;
},
.NewLine => return error.UnexpectedToken,
else => {},
}
}
}
// TODO handle multiline strings in new block scope
while (true) {
const tok = self.token_it.next();
switch (tok.id) {
.Literal => {},
.Space => {
const trailing = self.token_it.pos - 2;
self.eatCommentsAndSpace();
if (self.token_it.peek()) |peek| {
if (peek.id != .Literal) {
node.end = trailing;
break;
}
}
},
else => {
self.token_it.seekBy(-1);
node.end = self.token_it.pos - 1;
break;
},
}
}
}
log.debug("Leaf end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] });
return node;
}
fn openScope(self: *Parser) !void {
const peek = self.token_it.peek() orelse return error.UnexpectedEof;
if (peek.id != .Space and peek.id != .Tab) {
// No need to open scope.
return;
}
const indent = self.token_it.next().count.?;
const prev_scope = self.scopes.items[self.scopes.items.len - 1];
if (indent < prev_scope.indent) {
return error.MalformedYaml;
}
log.debug("Opening scope...", .{});
try self.scopes.append(self.allocator, .{
.indent = indent,
});
}
fn closeScope(self: *Parser) !bool {
const indent = indent: {
const peek = self.token_it.peek() orelse return error.UnexpectedEof;
switch (peek.id) {
.Space, .Tab => {
break :indent self.token_it.next().count.?;
},
else => {
break :indent 0;
},
}
};
const scope = self.scopes.items[self.scopes.items.len - 1];
if (indent < scope.indent) {
log.debug("Closing scope...", .{});
_ = self.scopes.pop();
return true;
}
return false;
}
fn eatCommentsAndSpace(self: *Parser) void {
while (true) {
_ = self.token_it.peek() orelse return;
const token = self.token_it.next();
switch (token.id) {
.Comment, .Space => {},
else => {
self.token_it.seekBy(-1);
break;
},
}
}
}
fn eatToken(self: *Parser, id: Token.Id) ?TokenIndex {
while (true) {
const pos = self.token_it.pos;
_ = self.token_it.peek() orelse return null;
const token = self.token_it.next();
switch (token.id) {
.Comment, .Space => continue,
else => |next_id| if (next_id == id) {
return pos;
} else {
self.token_it.seekTo(pos);
return null;
},
}
}
}
fn expectToken(self: *Parser, id: Token.Id) ParseError!TokenIndex {
return self.eatToken(id) orelse error.UnexpectedToken;
}
};
test {
_ = @import("parse/test.zig");
}

View File

@@ -0,0 +1,556 @@
const std = @import("std");
const mem = std.mem;
const testing = std.testing;
usingnamespace @import("../parse.zig");
test "explicit doc" {
const source =
\\--- !tapi-tbd
\\tbd-version: 4
\\abc-version: 5
\\...
;
var tree = Tree.init(testing.allocator);
defer tree.deinit();
try tree.parse(source);
try testing.expectEqual(tree.docs.items.len, 1);
const doc = tree.docs.items[0].cast(Node.Doc).?;
try testing.expectEqual(doc.start.?, 0);
try testing.expectEqual(doc.end.?, tree.tokens.len - 2);
const directive = tree.tokens[doc.directive.?];
try testing.expectEqual(directive.id, .Literal);
try testing.expect(mem.eql(u8, "tapi-tbd", tree.source[directive.start..directive.end]));
try testing.expect(doc.value != null);
try testing.expectEqual(doc.value.?.tag, .map);
const map = doc.value.?.cast(Node.Map).?;
try testing.expectEqual(map.start.?, 5);
try testing.expectEqual(map.end.?, 14);
try testing.expectEqual(map.values.items.len, 2);
{
const entry = map.values.items[0];
const key = tree.tokens[entry.key];
try testing.expectEqual(key.id, .Literal);
try testing.expect(mem.eql(u8, "tbd-version", tree.source[key.start..key.end]));
const value = entry.value.cast(Node.Value).?;
const value_tok = tree.tokens[value.start.?];
try testing.expectEqual(value_tok.id, .Literal);
try testing.expect(mem.eql(u8, "4", tree.source[value_tok.start..value_tok.end]));
}
{
const entry = map.values.items[1];
const key = tree.tokens[entry.key];
try testing.expectEqual(key.id, .Literal);
try testing.expect(mem.eql(u8, "abc-version", tree.source[key.start..key.end]));
const value = entry.value.cast(Node.Value).?;
const value_tok = tree.tokens[value.start.?];
try testing.expectEqual(value_tok.id, .Literal);
try testing.expect(mem.eql(u8, "5", tree.source[value_tok.start..value_tok.end]));
}
}
test "leaf in quotes" {
const source =
\\key1: no quotes
\\key2: 'single quoted'
\\key3: "double quoted"
;
var tree = Tree.init(testing.allocator);
defer tree.deinit();
try tree.parse(source);
try testing.expectEqual(tree.docs.items.len, 1);
const doc = tree.docs.items[0].cast(Node.Doc).?;
try testing.expectEqual(doc.start.?, 0);
try testing.expectEqual(doc.end.?, tree.tokens.len - 2);
try testing.expect(doc.directive == null);
try testing.expect(doc.value != null);
try testing.expectEqual(doc.value.?.tag, .map);
const map = doc.value.?.cast(Node.Map).?;
try testing.expectEqual(map.start.?, 0);
try testing.expectEqual(map.end.?, tree.tokens.len - 2);
try testing.expectEqual(map.values.items.len, 3);
{
const entry = map.values.items[0];
const key = tree.tokens[entry.key];
try testing.expectEqual(key.id, .Literal);
try testing.expect(mem.eql(
u8,
"key1",
tree.source[key.start..key.end],
));
const value = entry.value.cast(Node.Value).?;
const start = tree.tokens[value.start.?];
const end = tree.tokens[value.end.?];
try testing.expectEqual(start.id, .Literal);
try testing.expectEqual(end.id, .Literal);
try testing.expect(mem.eql(
u8,
"no quotes",
tree.source[start.start..end.end],
));
}
}
test "nested maps" {
const source =
\\key1:
\\ key1_1 : value1_1
\\ key1_2 : value1_2
\\key2 : value2
;
var tree = Tree.init(testing.allocator);
defer tree.deinit();
try tree.parse(source);
try testing.expectEqual(tree.docs.items.len, 1);
const doc = tree.docs.items[0].cast(Node.Doc).?;
try testing.expectEqual(doc.start.?, 0);
try testing.expectEqual(doc.end.?, tree.tokens.len - 2);
try testing.expect(doc.directive == null);
try testing.expect(doc.value != null);
try testing.expectEqual(doc.value.?.tag, .map);
const map = doc.value.?.cast(Node.Map).?;
try testing.expectEqual(map.start.?, 0);
try testing.expectEqual(map.end.?, tree.tokens.len - 2);
try testing.expectEqual(map.values.items.len, 2);
{
const entry = map.values.items[0];
const key = tree.tokens[entry.key];
try testing.expectEqual(key.id, .Literal);
try testing.expect(mem.eql(u8, "key1", tree.source[key.start..key.end]));
const nested_map = entry.value.cast(Node.Map).?;
try testing.expectEqual(nested_map.start.?, 4);
try testing.expectEqual(nested_map.end.?, 16);
try testing.expectEqual(nested_map.values.items.len, 2);
{
const nested_entry = nested_map.values.items[0];
const nested_key = tree.tokens[nested_entry.key];
try testing.expectEqual(nested_key.id, .Literal);
try testing.expect(mem.eql(
u8,
"key1_1",
tree.source[nested_key.start..nested_key.end],
));
const nested_value = nested_entry.value.cast(Node.Value).?;
const nested_value_tok = tree.tokens[nested_value.start.?];
try testing.expectEqual(nested_value_tok.id, .Literal);
try testing.expect(mem.eql(
u8,
"value1_1",
tree.source[nested_value_tok.start..nested_value_tok.end],
));
}
{
const nested_entry = nested_map.values.items[1];
const nested_key = tree.tokens[nested_entry.key];
try testing.expectEqual(nested_key.id, .Literal);
try testing.expect(mem.eql(
u8,
"key1_2",
tree.source[nested_key.start..nested_key.end],
));
const nested_value = nested_entry.value.cast(Node.Value).?;
const nested_value_tok = tree.tokens[nested_value.start.?];
try testing.expectEqual(nested_value_tok.id, .Literal);
try testing.expect(mem.eql(
u8,
"value1_2",
tree.source[nested_value_tok.start..nested_value_tok.end],
));
}
}
{
const entry = map.values.items[1];
const key = tree.tokens[entry.key];
try testing.expectEqual(key.id, .Literal);
try testing.expect(mem.eql(u8, "key2", tree.source[key.start..key.end]));
const value = entry.value.cast(Node.Value).?;
const value_tok = tree.tokens[value.start.?];
try testing.expectEqual(value_tok.id, .Literal);
try testing.expect(mem.eql(
u8,
"value2",
tree.source[value_tok.start..value_tok.end],
));
}
}
test "map of list of values" {
const source =
\\ints:
\\ - 0
\\ - 1
\\ - 2
;
var tree = Tree.init(testing.allocator);
defer tree.deinit();
try tree.parse(source);
try testing.expectEqual(tree.docs.items.len, 1);
const doc = tree.docs.items[0].cast(Node.Doc).?;
try testing.expectEqual(doc.start.?, 0);
try testing.expectEqual(doc.end.?, tree.tokens.len - 2);
try testing.expect(doc.value != null);
try testing.expectEqual(doc.value.?.tag, .map);
const map = doc.value.?.cast(Node.Map).?;
try testing.expectEqual(map.start.?, 0);
try testing.expectEqual(map.end.?, tree.tokens.len - 2);
try testing.expectEqual(map.values.items.len, 1);
const entry = map.values.items[0];
const key = tree.tokens[entry.key];
try testing.expectEqual(key.id, .Literal);
try testing.expect(mem.eql(u8, "ints", tree.source[key.start..key.end]));
const value = entry.value.cast(Node.List).?;
try testing.expectEqual(value.start.?, 4);
try testing.expectEqual(value.end.?, tree.tokens.len - 2);
try testing.expectEqual(value.values.items.len, 3);
{
const elem = value.values.items[0].cast(Node.Value).?;
const leaf = tree.tokens[elem.start.?];
try testing.expectEqual(leaf.id, .Literal);
try testing.expect(mem.eql(u8, "0", tree.source[leaf.start..leaf.end]));
}
{
const elem = value.values.items[1].cast(Node.Value).?;
const leaf = tree.tokens[elem.start.?];
try testing.expectEqual(leaf.id, .Literal);
try testing.expect(mem.eql(u8, "1", tree.source[leaf.start..leaf.end]));
}
{
const elem = value.values.items[2].cast(Node.Value).?;
const leaf = tree.tokens[elem.start.?];
try testing.expectEqual(leaf.id, .Literal);
try testing.expect(mem.eql(u8, "2", tree.source[leaf.start..leaf.end]));
}
}
test "map of list of maps" {
const source =
\\key1:
\\- key2 : value2
\\- key3 : value3
\\- key4 : value4
;
var tree = Tree.init(testing.allocator);
defer tree.deinit();
try tree.parse(source);
try testing.expectEqual(tree.docs.items.len, 1);
const doc = tree.docs.items[0].cast(Node.Doc).?;
try testing.expectEqual(doc.start.?, 0);
try testing.expectEqual(doc.end.?, tree.tokens.len - 2);
try testing.expect(doc.value != null);
try testing.expectEqual(doc.value.?.tag, .map);
const map = doc.value.?.cast(Node.Map).?;
try testing.expectEqual(map.start.?, 0);
try testing.expectEqual(map.end.?, tree.tokens.len - 2);
try testing.expectEqual(map.values.items.len, 1);
const entry = map.values.items[0];
const key = tree.tokens[entry.key];
try testing.expectEqual(key.id, .Literal);
try testing.expect(mem.eql(u8, "key1", tree.source[key.start..key.end]));
const value = entry.value.cast(Node.List).?;
try testing.expectEqual(value.start.?, 3);
try testing.expectEqual(value.end.?, tree.tokens.len - 2);
try testing.expectEqual(value.values.items.len, 3);
{
const elem = value.values.items[0].cast(Node.Map).?;
const nested = elem.values.items[0];
const nested_key = tree.tokens[nested.key];
try testing.expectEqual(nested_key.id, .Literal);
try testing.expect(mem.eql(u8, "key2", tree.source[nested_key.start..nested_key.end]));
const nested_v = nested.value.cast(Node.Value).?;
const leaf = tree.tokens[nested_v.start.?];
try testing.expectEqual(leaf.id, .Literal);
try testing.expect(mem.eql(u8, "value2", tree.source[leaf.start..leaf.end]));
}
{
const elem = value.values.items[1].cast(Node.Map).?;
const nested = elem.values.items[0];
const nested_key = tree.tokens[nested.key];
try testing.expectEqual(nested_key.id, .Literal);
try testing.expect(mem.eql(u8, "key3", tree.source[nested_key.start..nested_key.end]));
const nested_v = nested.value.cast(Node.Value).?;
const leaf = tree.tokens[nested_v.start.?];
try testing.expectEqual(leaf.id, .Literal);
try testing.expect(mem.eql(u8, "value3", tree.source[leaf.start..leaf.end]));
}
{
const elem = value.values.items[2].cast(Node.Map).?;
const nested = elem.values.items[0];
const nested_key = tree.tokens[nested.key];
try testing.expectEqual(nested_key.id, .Literal);
try testing.expect(mem.eql(u8, "key4", tree.source[nested_key.start..nested_key.end]));
const nested_v = nested.value.cast(Node.Value).?;
const leaf = tree.tokens[nested_v.start.?];
try testing.expectEqual(leaf.id, .Literal);
try testing.expect(mem.eql(u8, "value4", tree.source[leaf.start..leaf.end]));
}
}
test "list of lists" {
const source =
\\- [name , hr, avg ]
\\- [Mark McGwire , 65, 0.278]
\\- [Sammy Sosa , 63, 0.288]
;
var tree = Tree.init(testing.allocator);
defer tree.deinit();
try tree.parse(source);
try testing.expectEqual(tree.docs.items.len, 1);
const doc = tree.docs.items[0].cast(Node.Doc).?;
try testing.expectEqual(doc.start.?, 0);
try testing.expectEqual(doc.end.?, tree.tokens.len - 2);
try testing.expect(doc.value != null);
try testing.expectEqual(doc.value.?.tag, .list);
const list = doc.value.?.cast(Node.List).?;
try testing.expectEqual(list.start.?, 0);
try testing.expectEqual(list.end.?, tree.tokens.len - 2);
try testing.expectEqual(list.values.items.len, 3);
{
try testing.expectEqual(list.values.items[0].tag, .list);
const nested = list.values.items[0].cast(Node.List).?;
try testing.expectEqual(nested.values.items.len, 3);
{
try testing.expectEqual(nested.values.items[0].tag, .value);
const value = nested.values.items[0].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end]));
}
{
try testing.expectEqual(nested.values.items[1].tag, .value);
const value = nested.values.items[1].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end]));
}
{
try testing.expectEqual(nested.values.items[2].tag, .value);
const value = nested.values.items[2].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end]));
}
}
{
try testing.expectEqual(list.values.items[1].tag, .list);
const nested = list.values.items[1].cast(Node.List).?;
try testing.expectEqual(nested.values.items.len, 3);
{
try testing.expectEqual(nested.values.items[0].tag, .value);
const value = nested.values.items[0].cast(Node.Value).?;
const start = tree.tokens[value.start.?];
const end = tree.tokens[value.end.?];
try testing.expect(mem.eql(u8, "Mark McGwire", tree.source[start.start..end.end]));
}
{
try testing.expectEqual(nested.values.items[1].tag, .value);
const value = nested.values.items[1].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "65", tree.source[leaf.start..leaf.end]));
}
{
try testing.expectEqual(nested.values.items[2].tag, .value);
const value = nested.values.items[2].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "0.278", tree.source[leaf.start..leaf.end]));
}
}
{
try testing.expectEqual(list.values.items[2].tag, .list);
const nested = list.values.items[2].cast(Node.List).?;
try testing.expectEqual(nested.values.items.len, 3);
{
try testing.expectEqual(nested.values.items[0].tag, .value);
const value = nested.values.items[0].cast(Node.Value).?;
const start = tree.tokens[value.start.?];
const end = tree.tokens[value.end.?];
try testing.expect(mem.eql(u8, "Sammy Sosa", tree.source[start.start..end.end]));
}
{
try testing.expectEqual(nested.values.items[1].tag, .value);
const value = nested.values.items[1].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "63", tree.source[leaf.start..leaf.end]));
}
{
try testing.expectEqual(nested.values.items[2].tag, .value);
const value = nested.values.items[2].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "0.288", tree.source[leaf.start..leaf.end]));
}
}
}
test "inline list" {
const source =
\\[name , hr, avg ]
;
var tree = Tree.init(testing.allocator);
defer tree.deinit();
try tree.parse(source);
try testing.expectEqual(tree.docs.items.len, 1);
const doc = tree.docs.items[0].cast(Node.Doc).?;
try testing.expectEqual(doc.start.?, 0);
try testing.expectEqual(doc.end.?, tree.tokens.len - 2);
try testing.expect(doc.value != null);
try testing.expectEqual(doc.value.?.tag, .list);
const list = doc.value.?.cast(Node.List).?;
try testing.expectEqual(list.start.?, 0);
try testing.expectEqual(list.end.?, tree.tokens.len - 2);
try testing.expectEqual(list.values.items.len, 3);
{
try testing.expectEqual(list.values.items[0].tag, .value);
const value = list.values.items[0].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end]));
}
{
try testing.expectEqual(list.values.items[1].tag, .value);
const value = list.values.items[1].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end]));
}
{
try testing.expectEqual(list.values.items[2].tag, .value);
const value = list.values.items[2].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end]));
}
}
test "inline list as mapping value" {
const source =
\\key : [
\\ name ,
\\ hr, avg ]
;
var tree = Tree.init(testing.allocator);
defer tree.deinit();
try tree.parse(source);
try testing.expectEqual(tree.docs.items.len, 1);
const doc = tree.docs.items[0].cast(Node.Doc).?;
try testing.expectEqual(doc.start.?, 0);
try testing.expectEqual(doc.end.?, tree.tokens.len - 2);
try testing.expect(doc.value != null);
try testing.expectEqual(doc.value.?.tag, .map);
const map = doc.value.?.cast(Node.Map).?;
try testing.expectEqual(map.start.?, 0);
try testing.expectEqual(map.end.?, tree.tokens.len - 2);
try testing.expectEqual(map.values.items.len, 1);
const entry = map.values.items[0];
const key = tree.tokens[entry.key];
try testing.expectEqual(key.id, .Literal);
try testing.expect(mem.eql(u8, "key", tree.source[key.start..key.end]));
const list = entry.value.cast(Node.List).?;
try testing.expectEqual(list.start.?, 4);
try testing.expectEqual(list.end.?, tree.tokens.len - 2);
try testing.expectEqual(list.values.items.len, 3);
{
try testing.expectEqual(list.values.items[0].tag, .value);
const value = list.values.items[0].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end]));
}
{
try testing.expectEqual(list.values.items[1].tag, .value);
const value = list.values.items[1].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end]));
}
{
try testing.expectEqual(list.values.items[2].tag, .value);
const value = list.values.items[2].cast(Node.Value).?;
const leaf = tree.tokens[value.start.?];
try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end]));
}
}

651
src/link/tapi/yaml.zig Normal file
View File

@@ -0,0 +1,651 @@
const std = @import("std");
const assert = std.debug.assert;
const math = std.math;
const mem = std.mem;
const testing = std.testing;
const log = std.log.scoped(.tapi);
const Allocator = mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
pub const Tokenizer = @import("Tokenizer.zig");
pub const parse = @import("parse.zig");
const Node = parse.Node;
const Tree = parse.Tree;
const ParseError = parse.ParseError;
pub const YamlError = error{
UnexpectedNodeType,
OutOfMemory,
} || ParseError || std.fmt.ParseIntError;
pub const ValueType = enum {
empty,
int,
float,
string,
list,
map,
};
pub const List = []Value;
pub const Map = std.StringArrayHashMap(Value);
pub const Value = union(ValueType) {
empty,
int: i64,
float: f64,
string: []const u8,
list: List,
map: Map,
pub fn asInt(self: Value) !i64 {
if (self != .int) return error.TypeMismatch;
return self.int;
}
pub fn asFloat(self: Value) !f64 {
if (self != .float) return error.TypeMismatch;
return self.float;
}
pub fn asString(self: Value) ![]const u8 {
if (self != .string) return error.TypeMismatch;
return self.string;
}
pub fn asList(self: Value) !List {
if (self != .list) return error.TypeMismatch;
return self.list;
}
pub fn asMap(self: Value) !Map {
if (self != .map) return error.TypeMismatch;
return self.map;
}
const StringifyArgs = struct {
indentation: usize = 0,
should_inline_first_key: bool = false,
};
pub const StringifyError = std.os.WriteError;
pub fn stringify(self: Value, writer: anytype, args: StringifyArgs) StringifyError!void {
switch (self) {
.empty => return,
.int => |int| return writer.print("{}", .{int}),
.float => |float| return writer.print("{d}", .{float}),
.string => |string| return writer.print("{s}", .{string}),
.list => |list| {
const len = list.len;
if (len == 0) return;
const first = list[0];
if (first.is_compound()) {
for (list) |elem, i| {
try writer.writeByteNTimes(' ', args.indentation);
try writer.writeAll("- ");
try elem.stringify(writer, .{
.indentation = args.indentation + 2,
.should_inline_first_key = true,
});
if (i < len - 1) {
try writer.writeByte('\n');
}
}
return;
}
try writer.writeAll("[ ");
for (list) |elem, i| {
try elem.stringify(writer, args);
if (i < len - 1) {
try writer.writeAll(", ");
}
}
try writer.writeAll(" ]");
},
.map => |map| {
const keys = map.keys();
const len = keys.len;
if (len == 0) return;
for (keys) |key, i| {
if (!args.should_inline_first_key or i != 0) {
try writer.writeByteNTimes(' ', args.indentation);
}
try writer.print("{s}: ", .{key});
const value = map.get(key) orelse unreachable;
const should_inline = blk: {
if (!value.is_compound()) break :blk true;
if (value == .list and value.list.len > 0 and !value.list[0].is_compound()) break :blk true;
break :blk false;
};
if (should_inline) {
try value.stringify(writer, args);
} else {
try writer.writeByte('\n');
try value.stringify(writer, .{
.indentation = args.indentation + 4,
});
}
if (i < len - 1) {
try writer.writeByte('\n');
}
}
},
}
}
fn is_compound(self: Value) bool {
return switch (self) {
.list, .map => true,
else => false,
};
}
fn fromNode(arena: *Allocator, tree: *const Tree, node: *const Node, type_hint: ?ValueType) YamlError!Value {
if (node.cast(Node.Doc)) |doc| {
const inner = doc.value orelse {
// empty doc
return Value{ .empty = .{} };
};
return Value.fromNode(arena, tree, inner, null);
} else if (node.cast(Node.Map)) |map| {
var out_map = std.StringArrayHashMap(Value).init(arena);
try out_map.ensureUnusedCapacity(map.values.items.len);
for (map.values.items) |entry| {
const key_tok = tree.tokens[entry.key];
const key = try arena.dupe(u8, tree.source[key_tok.start..key_tok.end]);
const value = try Value.fromNode(arena, tree, entry.value, null);
out_map.putAssumeCapacityNoClobber(key, value);
}
return Value{ .map = out_map };
} else if (node.cast(Node.List)) |list| {
var out_list = std.ArrayList(Value).init(arena);
try out_list.ensureUnusedCapacity(list.values.items.len);
if (list.values.items.len > 0) {
const hint = if (list.values.items[0].cast(Node.Value)) |value| hint: {
const start = tree.tokens[value.start.?];
const end = tree.tokens[value.end.?];
const raw = tree.source[start.start..end.end];
_ = std.fmt.parseInt(i64, raw, 10) catch {
_ = std.fmt.parseFloat(f64, raw) catch {
break :hint ValueType.string;
};
break :hint ValueType.float;
};
break :hint ValueType.int;
} else null;
for (list.values.items) |elem| {
const value = try Value.fromNode(arena, tree, elem, hint);
out_list.appendAssumeCapacity(value);
}
}
return Value{ .list = out_list.toOwnedSlice() };
} else if (node.cast(Node.Value)) |value| {
const start = tree.tokens[value.start.?];
const end = tree.tokens[value.end.?];
const raw = tree.source[start.start..end.end];
if (type_hint) |hint| {
return switch (hint) {
.int => Value{ .int = try std.fmt.parseInt(i64, raw, 10) },
.float => Value{ .float = try std.fmt.parseFloat(f64, raw) },
.string => Value{ .string = try arena.dupe(u8, raw) },
else => unreachable,
};
}
try_int: {
// TODO infer base for int
const int = std.fmt.parseInt(i64, raw, 10) catch break :try_int;
return Value{ .int = int };
}
try_float: {
const float = std.fmt.parseFloat(f64, raw) catch break :try_float;
return Value{ .float = float };
}
return Value{ .string = try arena.dupe(u8, raw) };
} else {
log.err("Unexpected node type: {}", .{node.tag});
return error.UnexpectedNodeType;
}
}
};
pub const Yaml = struct {
arena: ArenaAllocator,
tree: ?Tree = null,
docs: std.ArrayList(Value),
pub fn deinit(self: *Yaml) void {
self.arena.deinit();
}
pub fn stringify(self: Yaml, writer: anytype) !void {
for (self.docs.items) |doc| {
// if (doc.directive) |directive| {
// try writer.print("--- !{s}\n", .{directive});
// }
try doc.stringify(writer, .{});
// if (doc.directive != null) {
// try writer.writeAll("...\n");
// }
}
}
pub fn load(allocator: *Allocator, source: []const u8) !Yaml {
var arena = ArenaAllocator.init(allocator);
var tree = Tree.init(&arena.allocator);
try tree.parse(source);
var docs = std.ArrayList(Value).init(&arena.allocator);
try docs.ensureUnusedCapacity(tree.docs.items.len);
for (tree.docs.items) |node| {
const value = try Value.fromNode(&arena.allocator, &tree, node, null);
docs.appendAssumeCapacity(value);
}
return Yaml{
.arena = arena,
.tree = tree,
.docs = docs,
};
}
pub const Error = error{
Unimplemented,
TypeMismatch,
StructFieldMissing,
ArraySizeMismatch,
UntaggedUnion,
UnionTagMissing,
Overflow,
OutOfMemory,
};
pub fn parse(self: *Yaml, comptime T: type) Error!T {
if (self.docs.items.len == 0) {
if (@typeInfo(T) == .Void) return {};
return error.TypeMismatch;
}
if (self.docs.items.len == 1) {
return self.parseValue(T, self.docs.items[0]);
}
switch (@typeInfo(T)) {
.Array => |info| {
var parsed: T = undefined;
for (self.docs.items) |doc, i| {
parsed[i] = try self.parseValue(info.child, doc);
}
return parsed;
},
.Pointer => |info| {
switch (info.size) {
.Slice => {
var parsed = try self.arena.allocator.alloc(info.child, self.docs.items.len);
for (self.docs.items) |doc, i| {
parsed[i] = try self.parseValue(info.child, doc);
}
return parsed;
},
else => return error.TypeMismatch,
}
},
.Union => return error.Unimplemented,
else => return error.TypeMismatch,
}
}
fn parseValue(self: *Yaml, comptime T: type, value: Value) Error!T {
return switch (@typeInfo(T)) {
.Int => math.cast(T, try value.asInt()),
.Float => math.lossyCast(T, try value.asFloat()),
.Struct => self.parseStruct(T, try value.asMap()),
.Union => self.parseUnion(T, value),
.Array => self.parseArray(T, try value.asList()),
.Pointer => {
if (value.asList()) |list| {
return self.parsePointer(T, .{ .list = list });
} else |_| {
return self.parsePointer(T, .{ .string = try value.asString() });
}
},
.Void => error.TypeMismatch,
.Optional => unreachable,
else => error.Unimplemented,
};
}
fn parseUnion(self: *Yaml, comptime T: type, value: Value) Error!T {
const union_info = @typeInfo(T).Union;
if (union_info.tag_type) |_| {
inline for (union_info.fields) |field| {
if (self.parseValue(field.field_type, value)) |u_value| {
return @unionInit(T, field.name, u_value);
} else |err| {
if (@as(@TypeOf(err) || error{TypeMismatch}, err) != error.TypeMismatch) return err;
}
}
} else return error.UntaggedUnion;
return error.UnionTagMissing;
}
fn parseOptional(self: *Yaml, comptime T: type, value: ?Value) Error!T {
const unwrapped = value orelse return null;
const opt_info = @typeInfo(T).Optional;
return @as(T, try self.parseValue(opt_info.child, unwrapped));
}
fn parseStruct(self: *Yaml, comptime T: type, map: Map) Error!T {
const struct_info = @typeInfo(T).Struct;
var parsed: T = undefined;
inline for (struct_info.fields) |field| {
const value: ?Value = map.get(field.name) orelse blk: {
const field_name = try mem.replaceOwned(u8, &self.arena.allocator, field.name, "_", "-");
break :blk map.get(field_name);
};
if (@typeInfo(field.field_type) == .Optional) {
@field(parsed, field.name) = try self.parseOptional(field.field_type, value);
continue;
}
const unwrapped = value orelse {
log.err("missing struct field: {s}: {s}", .{ field.name, @typeName(field.field_type) });
return error.StructFieldMissing;
};
@field(parsed, field.name) = try self.parseValue(field.field_type, unwrapped);
}
return parsed;
}
fn parsePointer(self: *Yaml, comptime T: type, value: Value) Error!T {
const ptr_info = @typeInfo(T).Pointer;
const arena = &self.arena.allocator;
switch (ptr_info.size) {
.Slice => {
const child_info = @typeInfo(ptr_info.child);
if (child_info == .Int and child_info.Int.bits == 8) {
return value.asString();
}
var parsed = try arena.alloc(ptr_info.child, value.list.len);
for (value.list) |elem, i| {
parsed[i] = try self.parseValue(ptr_info.child, elem);
}
return parsed;
},
else => return error.Unimplemented,
}
}
fn parseArray(self: *Yaml, comptime T: type, list: List) Error!T {
const array_info = @typeInfo(T).Array;
if (array_info.len != list.len) return error.ArraySizeMismatch;
var parsed: T = undefined;
for (list) |elem, i| {
parsed[i] = try self.parseValue(array_info.child, elem);
}
return parsed;
}
};
test {
testing.refAllDecls(@This());
}
test "simple list" {
const source =
\\- a
\\- b
\\- c
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
try testing.expectEqual(yaml.docs.items.len, 1);
const list = yaml.docs.items[0].list;
try testing.expectEqual(list.len, 3);
try testing.expect(mem.eql(u8, list[0].string, "a"));
try testing.expect(mem.eql(u8, list[1].string, "b"));
try testing.expect(mem.eql(u8, list[2].string, "c"));
}
test "simple list typed as array of strings" {
const source =
\\- a
\\- b
\\- c
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
try testing.expectEqual(yaml.docs.items.len, 1);
const arr = try yaml.parse([3][]const u8);
try testing.expectEqual(arr.len, 3);
try testing.expect(mem.eql(u8, arr[0], "a"));
try testing.expect(mem.eql(u8, arr[1], "b"));
try testing.expect(mem.eql(u8, arr[2], "c"));
}
test "simple list typed as array of ints" {
const source =
\\- 0
\\- 1
\\- 2
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
try testing.expectEqual(yaml.docs.items.len, 1);
const arr = try yaml.parse([3]u8);
try testing.expectEqual(arr.len, 3);
try testing.expectEqual(arr[0], 0);
try testing.expectEqual(arr[1], 1);
try testing.expectEqual(arr[2], 2);
}
test "list of mixed sign integer" {
const source =
\\- 0
\\- -1
\\- 2
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
try testing.expectEqual(yaml.docs.items.len, 1);
const arr = try yaml.parse([3]i8);
try testing.expectEqual(arr.len, 3);
try testing.expectEqual(arr[0], 0);
try testing.expectEqual(arr[1], -1);
try testing.expectEqual(arr[2], 2);
}
test "simple map untyped" {
const source =
\\a: 0
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
try testing.expectEqual(yaml.docs.items.len, 1);
const map = yaml.docs.items[0].map;
try testing.expect(map.contains("a"));
try testing.expectEqual(map.get("a").?.int, 0);
}
test "simple map typed" {
const source =
\\a: 0
\\b: hello there
\\c: 'wait, what?'
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
const simple = try yaml.parse(struct { a: usize, b: []const u8, c: []const u8 });
try testing.expectEqual(simple.a, 0);
try testing.expect(mem.eql(u8, simple.b, "hello there"));
try testing.expect(mem.eql(u8, simple.c, "wait, what?"));
}
test "typed nested structs" {
const source =
\\a:
\\ b: hello there
\\ c: 'wait, what?'
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
const simple = try yaml.parse(struct {
a: struct {
b: []const u8,
c: []const u8,
},
});
try testing.expect(mem.eql(u8, simple.a.b, "hello there"));
try testing.expect(mem.eql(u8, simple.a.c, "wait, what?"));
}
test "multidoc typed as a slice of structs" {
const source =
\\---
\\a: 0
\\---
\\a: 1
\\...
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
{
const result = try yaml.parse([2]struct { a: usize });
try testing.expectEqual(result.len, 2);
try testing.expectEqual(result[0].a, 0);
try testing.expectEqual(result[1].a, 1);
}
{
const result = try yaml.parse([]struct { a: usize });
try testing.expectEqual(result.len, 2);
try testing.expectEqual(result[0].a, 0);
try testing.expectEqual(result[1].a, 1);
}
}
test "multidoc typed as a struct is an error" {
const source =
\\---
\\a: 0
\\---
\\b: 1
\\...
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { a: usize }));
try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { b: usize }));
try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { a: usize, b: usize }));
}
test "multidoc typed as a slice of structs with optionals" {
const source =
\\---
\\a: 0
\\c: 1.0
\\---
\\a: 1
\\b: different field
\\...
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
const result = try yaml.parse([]struct { a: usize, b: ?[]const u8, c: ?f16 });
try testing.expectEqual(result.len, 2);
try testing.expectEqual(result[0].a, 0);
try testing.expect(result[0].b == null);
try testing.expect(result[0].c != null);
try testing.expectEqual(result[0].c.?, 1.0);
try testing.expectEqual(result[1].a, 1);
try testing.expect(result[1].b != null);
try testing.expect(mem.eql(u8, result[1].b.?, "different field"));
try testing.expect(result[1].c == null);
}
test "empty yaml can be represented as void" {
const source = "";
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
const result = try yaml.parse(void);
try testing.expect(@TypeOf(result) == void);
}
test "nonempty yaml cannot be represented as void" {
const source =
\\a: b
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(void));
}
test "typed array size mismatch" {
const source =
\\- 0
\\- 0
;
var yaml = try Yaml.load(testing.allocator, source);
defer yaml.deinit();
try testing.expectError(Yaml.Error.ArraySizeMismatch, yaml.parse([1]usize));
try testing.expectError(Yaml.Error.ArraySizeMismatch, yaml.parse([5]usize));
}