zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 8f2f12f94080ede762df4a5018249160eedc7f09 (tree)
parent 077994abb66be93f38142ee55243fee8cdc3a5da
Author: Andrew Kelley <andrew@ziglang.org>
Date:   Wed, 27 Sep 2023 18:20:00 -0700

Compilation: introduce saveState()

This commit introduces `--debug-incremental` so that we can start
playing around with incremental compilation while it is still being
developed, and before it is enabled by default.

Currently it saves InternPool data, and has TODO comments for the
remaining things. Deserialization is not implemented yet, which will
require some post-processing such as to build a string map out of
null-terminated string table bytes.

The saved compiler state is stored in a file called <root-name>.zcs
alongside <root-name>.o, <root-name>.pdb, <root-name>.exe, etc. In case
of using the zig build system, these files are all in a zig-cache
directory.

For the self-hosted compiler, here is one data point on the performance
penalty of saving this data:

```
Benchmark 1 (3 runs): zig build-exe ...
  measurement          mean ± σ            min … max           outliers         delta
  wall_time          51.1s  ±  354ms    50.7s  … 51.4s           0 ( 0%)        0%
  peak_rss           3.91GB ±  354KB    3.91GB … 3.91GB          0 ( 0%)        0%
  cpu_cycles          212G  ± 3.17G      210G  …  216G           0 ( 0%)        0%
  instructions        274G  ± 57.5M      274G  …  275G           0 ( 0%)        0%
  cache_references   13.1G  ± 97.6M     13.0G  … 13.2G           0 ( 0%)        0%
  cache_misses       1.12G  ± 24.6M     1.10G  … 1.15G           0 ( 0%)        0%
  branch_misses      1.53G  ± 1.46M     1.53G  … 1.53G           0 ( 0%)        0%
Benchmark 2 (3 runs): zig build-exe ... --debug-incremental
  measurement          mean ± σ            min … max           outliers         delta
  wall_time          51.8s  ±  271ms    51.5s  … 52.1s           0 ( 0%)          +  1.3% ±  1.4%
  peak_rss           3.91GB ±  317KB    3.91GB … 3.91GB          0 ( 0%)          -  0.0% ±  0.0%
  cpu_cycles          213G  ±  398M      212G  …  213G           0 ( 0%)          +  0.3% ±  2.4%
  instructions        275G  ± 79.1M      275G  …  275G           0 ( 0%)          +  0.1% ±  0.1%
  cache_references   13.1G  ± 26.9M     13.0G  … 13.1G           0 ( 0%)          -  0.1% ±  1.2%
  cache_misses       1.12G  ± 5.66M     1.11G  … 1.12G           0 ( 0%)          -  0.6% ±  3.6%
  branch_misses      1.53G  ± 1.75M     1.53G  … 1.54G           0 ( 0%)          +  0.2% ±  0.2%
```

At the end of each compilation with `--debug-incremental`, we end up
with a 43 MiB `zig.zcs` file that contains all of the InternPool data
serialized.

Of course, it will necessarily be more expensive to save the state than
to not save the state. However, this data point shows just how cheap the
save state operation is, with all of the groundwork laid for using a
serialization-friendly in-memory data layout.

Diffstat:
Msrc/Compilation.zig | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/main.zig | 19++++++++++++++++++-
2 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/src/Compilation.zig b/src/Compilation.zig @@ -2702,6 +2702,72 @@ pub fn makeBinFileWritable(self: *Compilation) !void { return self.bin_file.makeWritable(); } +const Header = extern struct { + intern_pool: extern struct { + items_len: u32, + extra_len: u32, + limbs_len: u32, + string_bytes_len: u32, + }, +}; + +/// Note that all state that is included in the cache hash namespace is *not* +/// saved, such as the target and most CLI flags. A cache hit will only occur +/// when subsequent compiler invocations use the same set of flags. +pub fn saveState(comp: *Compilation) !void { + var bufs_list: [6]std.os.iovec_const = undefined; + var bufs_len: usize = 0; + + const emit = comp.bin_file.options.emit orelse return; + + if (comp.bin_file.options.module) |mod| { + const ip = &mod.intern_pool; + const header: Header = .{ + .intern_pool = .{ + .items_len = @intCast(ip.items.len), + .extra_len = @intCast(ip.extra.items.len), + .limbs_len = @intCast(ip.limbs.items.len), + .string_bytes_len = @intCast(ip.string_bytes.items.len), + }, + }; + addBuf(&bufs_list, &bufs_len, mem.asBytes(&header)); + addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.limbs.items)); + addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.extra.items)); + addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.items.items(.data))); + addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.items.items(.tag))); + addBuf(&bufs_list, &bufs_len, ip.string_bytes.items); + + // TODO: compilation errors + // TODO: files + // TODO: namespaces + // TODO: decls + // TODO: linker state + } + var basename_buf: [255]u8 = undefined; + const basename = std.fmt.bufPrint(&basename_buf, "{s}.zcs", .{ + comp.bin_file.options.root_name, + }) catch o: { + basename_buf[basename_buf.len - 4 ..].* = ".zcs".*; + break :o &basename_buf; + }; + + // Using an atomic file prevents a crash or power failure from corrupting + // the previous incremental compilation state. + var af = try emit.directory.handle.atomicFile(basename, .{}); + defer af.deinit(); + try af.file.pwritevAll(bufs_list[0..bufs_len], 0); + try af.finish(); +} + +fn addBuf(bufs_list: []std.os.iovec_const, bufs_len: *usize, buf: []const u8) void { + const i = bufs_len.*; + bufs_len.* = i + 1; + bufs_list[i] = .{ + .iov_base = buf.ptr, + .iov_len = buf.len, + }; +} + /// This function is temporally single-threaded. pub fn totalErrorCount(self: *Compilation) u32 { var total: usize = self.failed_c_objects.count() + diff --git a/src/main.zig b/src/main.zig @@ -594,6 +594,7 @@ const usage_build_generic = \\ --debug-log [scope] Enable printing debug/info log messages for scope \\ --debug-compile-errors Crash with helpful diagnostics at the first compile error \\ --debug-link-snapshot Enable dumping of the linker's state in JSON format + \\ --debug-incremental Enable experimental feature: incremental compilation \\ ; @@ -904,6 +905,7 @@ fn buildOutputType( var minor_subsystem_version: ?u32 = null; var wasi_exec_model: ?std.builtin.WasiExecModel = null; var enable_link_snapshots: bool = false; + var debug_incremental: bool = false; var install_name: ?[]const u8 = null; var hash_style: link.HashStyle = .both; var entitlements: ?[]const u8 = null; @@ -1272,6 +1274,8 @@ fn buildOutputType( } else { enable_link_snapshots = true; } + } else if (mem.eql(u8, arg, "--debug-incremental")) { + debug_incremental = true; } else if (mem.eql(u8, arg, "--entitlements")) { entitlements = args_iter.nextOrFatal(); } else if (mem.eql(u8, arg, "-fcompiler-rt")) { @@ -3591,11 +3595,16 @@ fn buildOutputType( } updateModule(comp) catch |err| switch (err) { - error.SemanticAnalyzeFail => if (listen == .none) process.exit(1), + error.SemanticAnalyzeFail => { + assert(listen == .none); + saveState(comp, debug_incremental); + process.exit(1); + }, else => |e| return e, }; if (build_options.only_c) return cleanExit(); try comp.makeBinFileExecutable(); + saveState(comp, debug_incremental); if (test_exec_args.items.len == 0 and object_format == .c) default_exec_args: { // Default to using `zig run` to execute the produced .c code from `zig test`. @@ -3658,6 +3667,14 @@ fn buildOutputType( return cleanExit(); } +fn saveState(comp: *Compilation, debug_incremental: bool) void { + if (debug_incremental) { + comp.saveState() catch |err| { + warn("unable to save incremental compilation state: {s}", .{@errorName(err)}); + }; + } +} + fn serve( comp: *Compilation, in: fs.File,