Merge remote-tracking branch 'origin/master' into llvm12 - zig - fork of https://codeberg.org/ziglang/zig

commit b85ef2300fa72f5f4c73b8eb9e14f0218ada592d (tree)
parent 75080e351af8be45722bca50c1d5fcd503304d77
Author: Andrew Kelley <andrew@ziglang.org>
Date:   Sun, 28 Mar 2021 21:42:56 -0700

Merge remote-tracking branch 'origin/master' into llvm12

Diffstat:
M CMakeLists.txt  | 29 ++++++++++++++++++++++-------
A ci/azure/macos_arm64_script  | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M ci/azure/pipelines.yml  | 15 ++++++++++++++-
M ci/azure/windows_msvc_install  | 2 +-
M doc/docgen.zig  | 23 ++++++++++++++++++-----
M doc/langref.html.in  | 6 +++---
M lib/std/array_hash_map.zig  | 26 +++++++++++++-------------
M lib/std/base64.zig  | 646 +++++++++++++++++++++++++++++++++++++++----------------------------------------
M lib/std/bit_set.zig  | 28 +++++++++++++++++++++-------
M lib/std/build.zig  | 23 +++++++----------------
M lib/std/c.zig  | 6 +++---
M lib/std/c/builtins.zig  | 8 +++++++-
M lib/std/crypto.zig  | 20 +++++++++++++++++---
M lib/std/crypto/25519/curve25519.zig  | 13 +++++++------
M lib/std/crypto/25519/ed25519.zig  | 23 ++++++++++++-----------
M lib/std/crypto/25519/edwards25519.zig  | 21 +++++++++++----------
M lib/std/crypto/25519/field.zig  | 5 +++--
M lib/std/crypto/25519/ristretto255.zig  | 9 +++++----
M lib/std/crypto/25519/scalar.zig  | 3 ++-
M lib/std/crypto/25519/x25519.zig  | 11 ++++++-----
M lib/std/crypto/aegis.zig  | 5 +++--
M lib/std/crypto/aes_gcm.zig  | 3 ++-
M lib/std/crypto/aes_ocb.zig  | 3 ++-
M lib/std/crypto/bcrypt.zig  | 22 ++++++++--------------
M lib/std/crypto/benchmark.zig  | 1 +
M lib/std/crypto/chacha20.zig  | 1170 ++++++++++++++++++++++++++++++++++++++++---------------------------------------
A lib/std/crypto/error.zig  | 34 ++++++++++++++++++++++++++++++++++
M lib/std/crypto/gimli.zig  | 5 +++--
M lib/std/crypto/isap.zig  | 3 ++-
M lib/std/crypto/pbkdf2.zig  | 150 +++++++++++++++++++++++++++++++++++++------------------------------------------
M lib/std/crypto/salsa20.zig  | 15 ++++++++-------
M lib/std/debug.zig  | 18 ------------------
A lib/std/enums.zig  | 1281 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M lib/std/fmt.zig  | 9 +++++++--
M lib/std/fs.zig  | 10 +++++-----
M lib/std/fs/path.zig  | 12 +++++++++++-
M lib/std/hash/auto_hash.zig  | 2 +-
M lib/std/macho.zig  | 40 ++++++++++++++++++++++++++++++++++++++++
M lib/std/mem.zig  | 19 +++++++++++++++++++
M lib/std/meta.zig  | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++------------------
M lib/std/meta/trait.zig  | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M lib/std/os.zig  | 5 +++--
M lib/std/os/linux/io_uring.zig  | 2 +-
M lib/std/os/linux/mips.zig  | 37 +++++++++++++++++++++++++++++++++++++
M lib/std/os/uefi/tables/boot_services.zig  | 3 ++-
M lib/std/os/windows/user32.zig  | 2 +-
M lib/std/special/build_runner.zig  | 8 ++++----
M lib/std/std.zig  | 4 ++++
M lib/std/testing.zig  | 76 +++++++++++++++++++++++++++++++++++++++-------------------------------------
M lib/std/zig/parser_test.zig  | 319 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M lib/std/zig/render.zig  | 259 ++++++++++++++++++++++++++++++-------------------------------------------------
M src/BuiltinFn.zig  | 2 +-
M src/Compilation.zig  | 23 +++++++++++++++++------
M src/clang.zig  | 5 +++++
M src/clang_options_data.zig  | 9 ++++++++-
M src/codegen.zig  | 195 +++++++++++++++++++++++++------------------------------------------------------
M src/codegen/aarch64.zig  | 5 ++++-
M src/codegen/llvm.zig  | 22 +++++++++++-----------
M src/codegen/llvm/bindings.zig  | 33 +++++++++++++++++++++++----------
M src/codegen/wasm.zig  | 44 +++++++++++++++++++++++++++++++++++++++++---
M src/config.zig.in  | 2 +-
M src/introspect.zig  | 8 ++++++++
M src/link/MachO.zig  | 1091 ++++++++++++++++++++++++++++++++++++++++---------------------------------------
A src/link/MachO/Archive.zig  | 278 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/link/MachO/DebugSymbols.zig  | 8 ++++----
A src/link/MachO/Object.zig  | 229 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/link/MachO/Zld.zig  | 3294 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/link/MachO/bind.zig  | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D src/link/MachO/imports.zig  | 152 -------------------------------------------------------------------------------
M src/main.zig  | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
M src/stage1/all_types.hpp  | 29 +++++++++++++++++++++--------
M src/stage1/analyze.cpp  | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
M src/stage1/ir.cpp  | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
M src/stage1/ir_print.cpp  | 59 +++++++++++++++++++++++++++++++++++++++++++++++++----------
M src/translate_c.zig  | 369 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
M src/translate_c/ast.zig  | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
M src/zig_clang.cpp  | 5 +++++
M src/zig_clang.h  | 2 ++
M test/cli.zig  | 11 +++++++++++
M test/run_translated_c.zig  | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M test/stage1/behavior/vector.zig  | 12 ++++++++----
M test/stage2/cbe.zig  | 2 +-
M test/stage2/wasm.zig  | 35 +++++++++++++++++++++++++++++++++++
M test/standalone.zig  | 5 ++++-
M test/standalone/mix_o_files/base64.zig  | 6 +++---
M test/translate_c.zig  | 202 +++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
M tools/update_clang_options.zig  | 4 ++++

87 files changed, 9017 insertions(+), 2483 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -89,6 +89,7 @@ set(ZIG_TARGET_MCPU "baseline" CACHE STRING "-mcpu parameter to output binaries 
 set(ZIG_EXECUTABLE "" CACHE STRING "(when cross compiling) path to already-built zig binary")
 set(ZIG_SINGLE_THREADED off CACHE BOOL "limit the zig compiler to use only 1 thread")
 set(ZIG_OMIT_STAGE2 off CACHE BOOL "omit the stage2 backend from stage1")
+set(ZIG_ENABLE_LOGGING off CACHE BOOL "enable logging")
 
 if("${ZIG_TARGET_TRIPLE}" STREQUAL "native")
     set(ZIG_USE_LLVM_CONFIG ON CACHE BOOL "use llvm-config to find LLVM libraries")
@@ -564,7 +565,14 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/Coff.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Elf.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/Archive.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/CodeSignature.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig"
     "${CMAKE_SOURCE_DIR}/src/link/C/zig.h"
     "${CMAKE_SOURCE_DIR}/src/link/msdos-stub.bin"
@@ -600,6 +608,12 @@ else()
   set(ZIG_OMIT_STAGE2_BOOL "false")
 endif()
 
+if(ZIG_ENABLE_LOGGING)
+  set(ZIG_ENABLE_LOGGING_BOOL "true")
+else()
+  set(ZIG_ENABLE_LOGGING_BOOL "false")
+endif()
+
 configure_file (
     "${CMAKE_SOURCE_DIR}/src/stage1/config.h.in"
     "${ZIG_CONFIG_H_OUT}"
@@ -728,12 +742,14 @@ if(MSVC OR MINGW)
     target_link_libraries(zigstage1 LINK_PUBLIC version)
 endif()
 
-add_executable(zig0 ${ZIG0_SOURCES})
-set_target_properties(zig0 PROPERTIES
-    COMPILE_FLAGS ${EXE_CFLAGS}
-    LINK_FLAGS ${EXE_LDFLAGS}
-)
-target_link_libraries(zig0 zigstage1)
+if("${ZIG_EXECUTABLE}" STREQUAL "")
+  add_executable(zig0 ${ZIG0_SOURCES})
+  set_target_properties(zig0 PROPERTIES
+      COMPILE_FLAGS ${EXE_CFLAGS}
+      LINK_FLAGS ${EXE_LDFLAGS}
+  )
+  target_link_libraries(zig0 zigstage1)
+endif()
 
 if(MSVC)
     set(ZIG1_OBJECT "${CMAKE_BINARY_DIR}/zig1.obj")
@@ -782,7 +798,6 @@ if("${ZIG_EXECUTABLE}" STREQUAL "")
 else()
   add_custom_command(
       OUTPUT "${ZIG1_OBJECT}"
-      BYPRODUCTS "${ZIG1_OBJECT}"
       COMMAND "${ZIG_EXECUTABLE}" "build-obj" ${BUILD_ZIG1_ARGS}
       DEPENDS ${ZIG_STAGE2_SOURCES}
       COMMENT STATUS "Building self-hosted component ${ZIG1_OBJECT}"
diff --git a/ci/azure/macos_arm64_script b/ci/azure/macos_arm64_script
@@ -0,0 +1,132 @@
+#!/bin/sh
+
+set -x
+set -e
+
+brew install s3cmd ninja gnu-tar
+
+ZIGDIR="$(pwd)"
+ARCH="aarch64"
+# {product}-{os}{sdk_version}-{arch}-{llvm_version}-{cmake_build_type}
+CACHE_HOST_BASENAME="llvm-macos10.15-x86_64-11.0.1-release"
+CACHE_ARM64_BASENAME="llvm-macos11.0-arm64-11.0.1-release"
+PREFIX_HOST="$HOME/$CACHE_HOST_BASENAME"
+PREFIX_ARM64="$HOME/$CACHE_ARM64_BASENAME"
+JOBS="-j2"
+
+rm -rf $PREFIX
+cd $HOME
+wget -nv "https://ziglang.org/deps/$CACHE_HOST_BASENAME.tar.xz"
+wget -nv "https://ziglang.org/deps/$CACHE_ARM64_BASENAME.tar.xz"
+
+gtar xf "$CACHE_HOST_BASENAME.tar.xz"
+gtar xf "$CACHE_ARM64_BASENAME.tar.xz"
+
+cd $ZIGDIR
+
+# Make the `zig version` number consistent.
+# This will affect the cmake command below.
+git config core.abbrev 9
+git fetch --unshallow || true
+git fetch --tags
+
+# Select xcode: latest version found on vmImage macOS-10.15 .
+DEVELOPER_DIR=/Applications/Xcode_12.4.app
+
+export ZIG_LOCAL_CACHE_DIR="$ZIGDIR/zig-cache"
+export ZIG_GLOBAL_CACHE_DIR="$ZIGDIR/zig-cache"
+
+# Build zig for host and use `Debug` type to make builds a little faster.
+
+cd $ZIGDIR
+mkdir build.host
+cd build.host
+cmake -G "Ninja" .. \
+  -DCMAKE_INSTALL_PREFIX="$(pwd)/release" \
+  -DCMAKE_PREFIX_PATH="$PREFIX_HOST" \
+  -DCMAKE_BUILD_TYPE="Debug" \
+  -DZIG_STATIC="OFF"
+
+# Build but do not install.
+ninja $JOBS
+
+ZIG_EXE="$ZIGDIR/build.host/zig"
+
+# Build zig for arm64 target.
+# - use `Release` type for published tarballs
+# - ad-hoc codesign with linker
+# - note: apple quarantine of downloads (eg. via safari) still apply
+
+cd $ZIGDIR
+mkdir build.arm64
+cd build.arm64
+cmake -G "Ninja" .. \
+  -DCMAKE_INSTALL_PREFIX="$(pwd)/release" \
+  -DCMAKE_PREFIX_PATH="$PREFIX_ARM64" \
+  -DCMAKE_BUILD_TYPE="Release" \
+  -DCMAKE_CROSSCOMPILING="True" \
+  -DCMAKE_SYSTEM_NAME="Darwin" \
+  -DCMAKE_C_FLAGS="-arch arm64" \
+  -DCMAKE_CXX_FLAGS="-arch arm64" \
+  -DCMAKE_EXE_LINKER_FLAGS="-lz -Xlinker -adhoc_codesign" \
+  -DZIG_USE_LLVM_CONFIG="OFF" \
+  -DZIG_EXECUTABLE="$ZIG_EXE" \
+  -DZIG_TARGET_TRIPLE="${ARCH}-macos" \
+  -DZIG_STATIC="OFF"
+
+ninja $JOBS install
+
+# Disable test because binary is foreign arch.
+#release/bin/zig build test
+
+if [ "${BUILD_REASON}" != "PullRequest" ]; then
+  mv ../LICENSE release/
+
+  # We do not run test suite but still need langref.
+  mkdir -p release/docs
+  $ZIG_EXE run ../doc/docgen.zig -- $ZIG_EXE ../doc/langref.html.in release/docs/langref.html
+
+  # Produce the experimental std lib documentation.
+  mkdir -p release/docs/std
+  $ZIG_EXE test ../lib/std/std.zig \
+    --override-lib-dir ../lib \
+    -femit-docs=release/docs/std \
+    -fno-emit-bin
+
+  # Remove the unnecessary bin dir in $prefix/bin/zig
+  mv release/bin/zig release/
+  rmdir release/bin
+
+  # Remove the unnecessary zig dir in $prefix/lib/zig/std/std.zig
+  mv release/lib/zig release/lib2
+  rmdir release/lib
+  mv release/lib2 release/lib
+
+  VERSION=$($ZIG_EXE version)
+  DIRNAME="zig-macos-$ARCH-$VERSION"
+  TARBALL="$DIRNAME.tar.xz"
+  gtar cJf "$TARBALL" release/ --owner=root --sort=name --transform="s,^release,${DIRNAME},"
+  ln "$TARBALL" "$BUILD_ARTIFACTSTAGINGDIRECTORY/."
+
+  mv "$DOWNLOADSECUREFILE_SECUREFILEPATH" "$HOME/.s3cfg"
+  s3cmd put -P --add-header="cache-control: public, max-age=31536000, immutable" "$TARBALL" s3://ziglang.org/builds/
+
+  SHASUM=$(shasum -a 256 $TARBALL | cut '-d ' -f1)
+  BYTESIZE=$(wc -c < $TARBALL)
+
+  JSONFILE="macos-$GITBRANCH.json"
+  touch $JSONFILE
+  echo "{\"tarball\": \"$TARBALL\"," >>$JSONFILE
+  echo "\"shasum\": \"$SHASUM\"," >>$JSONFILE
+  echo "\"size\": \"$BYTESIZE\"}" >>$JSONFILE
+
+  s3cmd put -P --add-header="Cache-Control: max-age=0, must-revalidate" "$JSONFILE" "s3://ziglang.org/builds/$JSONFILE"
+  s3cmd put -P "$JSONFILE" "s3://ziglang.org/builds/$ARCH-macos-$VERSION.json"
+
+  # `set -x` causes these variables to be mangled.
+  # See https://developercommunity.visualstudio.com/content/problem/375679/pipeline-variable-incorrectly-inserts-single-quote.html
+  set +x
+  echo "##vso[task.setvariable variable=tarball;isOutput=true]$TARBALL"
+  echo "##vso[task.setvariable variable=shasum;isOutput=true]$SHASUM"
+  echo "##vso[task.setvariable variable=bytesize;isOutput=true]$BYTESIZE"
+fi
diff --git a/ci/azure/pipelines.yml b/ci/azure/pipelines.yml
@@ -12,6 +12,19 @@ jobs:
   - script: ci/azure/macos_script
     name: main
     displayName: 'Build and test'
+- job: BuildMacOS_arm64
+  pool:
+    vmImage: 'macOS-10.15'
+
+  timeoutInMinutes: 60
+
+  steps:
+  - task: DownloadSecureFile@1
+    inputs:
+      secureFile: s3cfg
+  - script: ci/azure/macos_arm64_script
+    name: main
+    displayName: 'Build and cross-compile'
 - job: BuildLinux
   pool:
     vmImage: 'ubuntu-18.04'
@@ -31,7 +44,7 @@ jobs:
   timeoutInMinutes: 360
   steps:
   - powershell: |
-      (New-Object Net.WebClient).DownloadFile("https://github.com/msys2/msys2-installer/releases/download/2021-01-05/msys2-base-x86_64-20210105.sfx.exe", "sfx.exe")
+      (New-Object Net.WebClient).DownloadFile("https://github.com/msys2/msys2-installer/releases/download/2021-02-28/msys2-base-x86_64-20210228.sfx.exe", "sfx.exe")
       .\sfx.exe -y -o\
       del sfx.exe
     displayName: Download/Extract/Install MSYS2
diff --git a/ci/azure/windows_msvc_install b/ci/azure/windows_msvc_install
@@ -3,7 +3,7 @@
 set -x
 set -e
 
-pacman -Su --needed --noconfirm
+pacman -Suy --needed --noconfirm
 pacman -S --needed --noconfirm wget p7zip python3-pip tar xz
 
 pip install s3cmd
diff --git a/doc/docgen.zig b/doc/docgen.zig
@@ -4,6 +4,7 @@ const io = std.io;
 const fs = std.fs;
 const process = std.process;
 const ChildProcess = std.ChildProcess;
+const Progress = std.Progress;
 const print = std.debug.print;
 const mem = std.mem;
 const testing = std.testing;
@@ -234,7 +235,7 @@ fn parseError(tokenizer: *Tokenizer, token: Token, comptime fmt: []const u8, arg
             }
         }
         {
-            const caret_count = token.end - token.start;
+            const caret_count = std.math.min(token.end, loc.line_end) - token.start;
             var i: usize = 0;
             while (i < caret_count) : (i += 1) {
                 print("~", .{});
@@ -1012,6 +1013,9 @@ fn tokenizeAndPrint(docgen_tokenizer: *Tokenizer, out: anytype, source_token: To
 
 fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: anytype, zig_exe: []const u8, do_code_tests: bool) !void {
     var code_progress_index: usize = 0;
+    var progress = Progress{};
+    const root_node = try progress.start("Generating docgen examples", toc.nodes.len);
+    defer root_node.end();
 
     var env_map = try process.getEnvMap(allocator);
     try env_map.set("ZIG_DEBUG_COLOR", "1");
@@ -1058,8 +1062,7 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: any
                 try tokenizeAndPrint(tokenizer, out, content_tok);
             },
             .Code => |code| {
-                code_progress_index += 1;
-                print("docgen example code {}/{}...", .{ code_progress_index, tokenizer.code_node_count });
+                root_node.completeOne();
 
                 const raw_source = tokenizer.buffer[code.source_token.start..code.source_token.end];
                 const trimmed_raw_source = mem.trim(u8, raw_source, " \n");
@@ -1071,7 +1074,6 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: any
                 try out.writeAll("</pre>");
 
                 if (!do_code_tests) {
-                    print("SKIP\n", .{});
                     continue;
                 }
 
@@ -1133,12 +1135,14 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: any
                             switch (result.term) {
                                 .Exited => |exit_code| {
                                     if (exit_code == 0) {
+                                        progress.log("", .{});
                                         print("{s}\nThe following command incorrectly succeeded:\n", .{result.stderr});
                                         dumpArgs(build_args.items);
                                         return parseError(tokenizer, code.source_token, "example incorrectly compiled", .{});
                                     }
                                 },
                                 else => {
+                                    progress.log("", .{});
                                     print("{s}\nThe following command crashed:\n", .{result.stderr});
                                     dumpArgs(build_args.items);
                                     return parseError(tokenizer, code.source_token, "example compile crashed", .{});
@@ -1187,6 +1191,7 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: any
                             switch (result.term) {
                                 .Exited => |exit_code| {
                                     if (exit_code == 0) {
+                                        progress.log("", .{});
                                         print("{s}\nThe following command incorrectly succeeded:\n", .{result.stderr});
                                         dumpArgs(run_args);
                                         return parseError(tokenizer, code.source_token, "example incorrectly compiled", .{});
@@ -1266,18 +1271,21 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: any
                         switch (result.term) {
                             .Exited => |exit_code| {
                                 if (exit_code == 0) {
+                                    progress.log("", .{});
                                     print("{s}\nThe following command incorrectly succeeded:\n", .{result.stderr});
                                     dumpArgs(test_args.items);
                                     return parseError(tokenizer, code.source_token, "example incorrectly compiled", .{});
                                 }
                             },
                             else => {
+                                progress.log("", .{});
                                 print("{s}\nThe following command crashed:\n", .{result.stderr});
                                 dumpArgs(test_args.items);
                                 return parseError(tokenizer, code.source_token, "example compile crashed", .{});
                             },
                         }
                         if (mem.indexOf(u8, result.stderr, error_match) == null) {
+                            progress.log("", .{});
                             print("{s}\nExpected to find '{s}' in stderr\n", .{ result.stderr, error_match });
                             return parseError(tokenizer, code.source_token, "example did not have expected compile error", .{});
                         }
@@ -1321,18 +1329,21 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: any
                         switch (result.term) {
                             .Exited => |exit_code| {
                                 if (exit_code == 0) {
+                                    progress.log("", .{});
                                     print("{s}\nThe following command incorrectly succeeded:\n", .{result.stderr});
                                     dumpArgs(test_args.items);
                                     return parseError(tokenizer, code.source_token, "example test incorrectly succeeded", .{});
                                 }
                             },
                             else => {
+                                progress.log("", .{});
                                 print("{s}\nThe following command crashed:\n", .{result.stderr});
                                 dumpArgs(test_args.items);
                                 return parseError(tokenizer, code.source_token, "example compile crashed", .{});
                             },
                         }
                         if (mem.indexOf(u8, result.stderr, error_match) == null) {
+                            progress.log("", .{});
                             print("{s}\nExpected to find '{s}' in stderr\n", .{ result.stderr, error_match });
                             return parseError(tokenizer, code.source_token, "example did not have expected runtime safety error message", .{});
                         }
@@ -1400,18 +1411,21 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: any
                             switch (result.term) {
                                 .Exited => |exit_code| {
                                     if (exit_code == 0) {
+                                        progress.log("", .{});
                                         print("{s}\nThe following command incorrectly succeeded:\n", .{result.stderr});
                                         dumpArgs(build_args.items);
                                         return parseError(tokenizer, code.source_token, "example build incorrectly succeeded", .{});
                                     }
                                 },
                                 else => {
+                                    progress.log("", .{});
                                     print("{s}\nThe following command crashed:\n", .{result.stderr});
                                     dumpArgs(build_args.items);
                                     return parseError(tokenizer, code.source_token, "example compile crashed", .{});
                                 },
                             }
                             if (mem.indexOf(u8, result.stderr, error_match) == null) {
+                                progress.log("", .{});
                                 print("{s}\nExpected to find '{s}' in stderr\n", .{ result.stderr, error_match });
                                 return parseError(tokenizer, code.source_token, "example did not have expected compile error message", .{});
                             }
@@ -1461,7 +1475,6 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: any
                         try out.print("\n{s}{s}</code></pre>\n", .{ escaped_stderr, escaped_stdout });
                     },
                 }
-                print("OK\n", .{});
             },
         }
     }
diff --git a/doc/langref.html.in b/doc/langref.html.in
@@ -9952,9 +9952,9 @@ export fn decode_base_64(
 ) usize {
     const src = source_ptr[0..source_len];
     const dest = dest_ptr[0..dest_len];
-    const base64_decoder = base64.standard_decoder_unsafe;
-    const decoded_size = base64_decoder.calcSize(src);
-    base64_decoder.decode(dest[0..decoded_size], src);
+    const base64_decoder = base64.standard.Decoder;
+    const decoded_size = base64_decoder.calcSizeForSlice(src) catch unreachable;
+    base64_decoder.decode(dest[0..decoded_size], src) catch unreachable;
     return decoded_size;
 }
       {#code_end#}
diff --git a/lib/std/array_hash_map.zig b/lib/std/array_hash_map.zig
@@ -687,8 +687,9 @@ pub fn ArrayHashMapUnmanaged(
 
         /// Removes the last inserted `Entry` in the hash map and returns it.
         pub fn pop(self: *Self) Entry {
-            const top = self.entries.pop();
+            const top = self.entries.items[self.entries.items.len - 1];
             _ = self.removeWithHash(top.key, top.hash, .index_only);
+            self.entries.items.len -= 1;
             return top;
         }
 
@@ -1258,19 +1259,18 @@ test "pop" {
     var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator);
     defer map.deinit();
 
-    testing.expect((try map.fetchPut(1, 11)) == null);
-    testing.expect((try map.fetchPut(2, 22)) == null);
-    testing.expect((try map.fetchPut(3, 33)) == null);
-    testing.expect((try map.fetchPut(4, 44)) == null);
+    // Insert just enough entries so that the map expands. Afterwards,
+    // pop all entries out of the map.
 
-    const pop1 = map.pop();
-    testing.expect(pop1.key == 4 and pop1.value == 44);
-    const pop2 = map.pop();
-    testing.expect(pop2.key == 3 and pop2.value == 33);
-    const pop3 = map.pop();
-    testing.expect(pop3.key == 2 and pop3.value == 22);
-    const pop4 = map.pop();
-    testing.expect(pop4.key == 1 and pop4.value == 11);
+    var i: i32 = 0;
+    while (i < 9) : (i += 1) {
+        testing.expect((try map.fetchPut(i, i)) == null);
+    }
+
+    while (i > 0) : (i -= 1) {
+        const pop = map.pop();
+        testing.expect(pop.key == i - 1 and pop.value == i - 1);
+    }
 }
 
 test "reIndex" {
diff --git a/lib/std/base64.zig b/lib/std/base64.zig
@@ -8,454 +8,452 @@ const assert = std.debug.assert;
 const testing = std.testing;
 const mem = std.mem;
 
-pub const standard_alphabet_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-pub const standard_pad_char = '=';
-pub const standard_encoder = Base64Encoder.init(standard_alphabet_chars, standard_pad_char);
+pub const Error = error{
+    InvalidCharacter,
+    InvalidPadding,
+    NoSpaceLeft,
+};
+
+/// Base64 codecs
+pub const Codecs = struct {
+    alphabet_chars: [64]u8,
+    pad_char: ?u8,
+    decoderWithIgnore: fn (ignore: []const u8) Base64DecoderWithIgnore,
+    Encoder: Base64Encoder,
+    Decoder: Base64Decoder,
+};
+
+pub const standard_alphabet_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".*;
+fn standardBase64DecoderWithIgnore(ignore: []const u8) Base64DecoderWithIgnore {
+    return Base64DecoderWithIgnore.init(standard_alphabet_chars, '=', ignore);
+}
+
+/// Standard Base64 codecs, with padding
+pub const standard = Codecs{
+    .alphabet_chars = standard_alphabet_chars,
+    .pad_char = '=',
+    .decoderWithIgnore = standardBase64DecoderWithIgnore,
+    .Encoder = Base64Encoder.init(standard_alphabet_chars, '='),
+    .Decoder = Base64Decoder.init(standard_alphabet_chars, '='),
+};
+
+/// Standard Base64 codecs, without padding
+pub const standard_no_pad = Codecs{
+    .alphabet_chars = standard_alphabet_chars,
+    .pad_char = null,
+    .decoderWithIgnore = standardBase64DecoderWithIgnore,
+    .Encoder = Base64Encoder.init(standard_alphabet_chars, null),
+    .Decoder = Base64Decoder.init(standard_alphabet_chars, null),
+};
+
+pub const url_safe_alphabet_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".*;
+fn urlSafeBase64DecoderWithIgnore(ignore: []const u8) Base64DecoderWithIgnore {
+    return Base64DecoderWithIgnore.init(url_safe_alphabet_chars, null, ignore);
+}
+
+/// URL-safe Base64 codecs, with padding
+pub const url_safe = Codecs{
+    .alphabet_chars = url_safe_alphabet_chars,
+    .pad_char = '=',
+    .decoderWithIgnore = urlSafeBase64DecoderWithIgnore,
+    .Encoder = Base64Encoder.init(url_safe_alphabet_chars, '='),
+    .Decoder = Base64Decoder.init(url_safe_alphabet_chars, '='),
+};
+
+/// URL-safe Base64 codecs, without padding
+pub const url_safe_no_pad = Codecs{
+    .alphabet_chars = url_safe_alphabet_chars,
+    .pad_char = null,
+    .decoderWithIgnore = urlSafeBase64DecoderWithIgnore,
+    .Encoder = Base64Encoder.init(url_safe_alphabet_chars, null),
+    .Decoder = Base64Decoder.init(url_safe_alphabet_chars, null),
+};
+
+// Backwards compatibility
+
+/// Deprecated - Use `standard.pad_char`
+pub const standard_pad_char = standard.pad_char;
+/// Deprecated - Use `standard.Encoder`
+pub const standard_encoder = standard.Encoder;
+/// Deprecated - Use `standard.Decoder`
+pub const standard_decoder = standard.Decoder;
 
 pub const Base64Encoder = struct {
-    alphabet_chars: []const u8,
-    pad_char: u8,
+    alphabet_chars: [64]u8,
+    pad_char: ?u8,
 
-    /// a bunch of assertions, then simply pass the data right through.
-    pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64Encoder {
+    /// A bunch of assertions, then simply pass the data right through.
+    pub fn init(alphabet_chars: [64]u8, pad_char: ?u8) Base64Encoder {
         assert(alphabet_chars.len == 64);
         var char_in_alphabet = [_]bool{false} ** 256;
         for (alphabet_chars) |c| {
             assert(!char_in_alphabet[c]);
-            assert(c != pad_char);
+            assert(pad_char == null or c != pad_char.?);
             char_in_alphabet[c] = true;
         }
-
         return Base64Encoder{
             .alphabet_chars = alphabet_chars,
             .pad_char = pad_char,
         };
     }
 
-    /// ceil(source_len * 4/3)
-    pub fn calcSize(source_len: usize) usize {
-        return @divTrunc(source_len + 2, 3) * 4;
+    /// Compute the encoded length
+    pub fn calcSize(encoder: *const Base64Encoder, source_len: usize) usize {
+        if (encoder.pad_char != null) {
+            return @divTrunc(source_len + 2, 3) * 4;
+        } else {
+            const leftover = source_len % 3;
+            return @divTrunc(source_len, 3) * 4 + @divTrunc(leftover * 4 + 2, 3);
+        }
     }
 
-    /// dest.len must be what you get from ::calcSize.
+    /// dest.len must at least be what you get from ::calcSize.
     pub fn encode(encoder: *const Base64Encoder, dest: []u8, source: []const u8) []const u8 {
-        assert(dest.len >= Base64Encoder.calcSize(source.len));
-
-        var i: usize = 0;
-        var out_index: usize = 0;
-        while (i + 2 < source.len) : (i += 3) {
-            dest[out_index] = encoder.alphabet_chars[(source[i] >> 2) & 0x3f];
-            out_index += 1;
-
-            dest[out_index] = encoder.alphabet_chars[((source[i] & 0x3) << 4) | ((source[i + 1] & 0xf0) >> 4)];
-            out_index += 1;
-
-            dest[out_index] = encoder.alphabet_chars[((source[i + 1] & 0xf) << 2) | ((source[i + 2] & 0xc0) >> 6)];
-            out_index += 1;
-
-            dest[out_index] = encoder.alphabet_chars[source[i + 2] & 0x3f];
-            out_index += 1;
+        const out_len = encoder.calcSize(source.len);
+        assert(dest.len >= out_len);
+
+        const nibbles = source.len / 3;
+        const leftover = source.len - 3 * nibbles;
+
+        var acc: u12 = 0;
+        var acc_len: u4 = 0;
+        var out_idx: usize = 0;
+        for (source) |v| {
+            acc = (acc << 8) + v;
+            acc_len += 8;
+            while (acc_len >= 6) {
+                acc_len -= 6;
+                dest[out_idx] = encoder.alphabet_chars[@truncate(u6, (acc >> acc_len))];
+                out_idx += 1;
+            }
         }
-
-        if (i < source.len) {
-            dest[out_index] = encoder.alphabet_chars[(source[i] >> 2) & 0x3f];
-            out_index += 1;
-
-            if (i + 1 == source.len) {
-                dest[out_index] = encoder.alphabet_chars[(source[i] & 0x3) << 4];
-                out_index += 1;
-
-                dest[out_index] = encoder.pad_char;
-                out_index += 1;
-            } else {
-                dest[out_index] = encoder.alphabet_chars[((source[i] & 0x3) << 4) | ((source[i + 1] & 0xf0) >> 4)];
-                out_index += 1;
-
-                dest[out_index] = encoder.alphabet_chars[(source[i + 1] & 0xf) << 2];
-                out_index += 1;
+        if (acc_len > 0) {
+            dest[out_idx] = encoder.alphabet_chars[@truncate(u6, (acc << 6 - acc_len))];
+            out_idx += 1;
+        }
+        if (encoder.pad_char) |pad_char| {
+            for (dest[out_idx..]) |*pad| {
+                pad.* = pad_char;
             }
-
-            dest[out_index] = encoder.pad_char;
-            out_index += 1;
         }
-        return dest[0..out_index];
+        return dest[0..out_len];
     }
 };
 
-pub const standard_decoder = Base64Decoder.init(standard_alphabet_chars, standard_pad_char);
-
 pub const Base64Decoder = struct {
+    const invalid_char: u8 = 0xff;
+
     /// e.g. 'A' => 0.
-    /// undefined for any value not in the 64 alphabet chars.
+    /// `invalid_char` for any value not in the 64 alphabet chars.
     char_to_index: [256]u8,
+    pad_char: ?u8,
 
-    /// true only for the 64 chars in the alphabet, not the pad char.
-    char_in_alphabet: [256]bool,
-    pad_char: u8,
-
-    pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64Decoder {
-        assert(alphabet_chars.len == 64);
-
+    pub fn init(alphabet_chars: [64]u8, pad_char: ?u8) Base64Decoder {
         var result = Base64Decoder{
-            .char_to_index = undefined,
-            .char_in_alphabet = [_]bool{false} ** 256,
+            .char_to_index = [_]u8{invalid_char} ** 256,
             .pad_char = pad_char,
         };
 
+        var char_in_alphabet = [_]bool{false} ** 256;
         for (alphabet_chars) |c, i| {
-            assert(!result.char_in_alphabet[c]);
-            assert(c != pad_char);
+            assert(!char_in_alphabet[c]);
+            assert(pad_char == null or c != pad_char.?);
 
             result.char_to_index[c] = @intCast(u8, i);
-            result.char_in_alphabet[c] = true;
+            char_in_alphabet[c] = true;
         }
+        return result;
+    }
 
+    /// Return the maximum possible decoded size for a given input length - The actual length may be less if the input includes padding.
+    /// `InvalidPadding` is returned if the input length is not valid.
+    pub fn calcSizeUpperBound(decoder: *const Base64Decoder, source_len: usize) Error!usize {
+        var result = source_len / 4 * 3;
+        const leftover = source_len % 4;
+        if (decoder.pad_char != null) {
+            if (leftover % 4 != 0) return error.InvalidPadding;
+        } else {
+            if (leftover % 4 == 1) return error.InvalidPadding;
+            result += leftover * 3 / 4;
+        }
         return result;
     }
 
-    /// If the encoded buffer is detected to be invalid, returns error.InvalidPadding.
-    pub fn calcSize(decoder: *const Base64Decoder, source: []const u8) !usize {
-        if (source.len % 4 != 0) return error.InvalidPadding;
-        return calcDecodedSizeExactUnsafe(source, decoder.pad_char);
+    /// Return the exact decoded size for a slice.
+    /// `InvalidPadding` is returned if the input length is not valid.
+    pub fn calcSizeForSlice(decoder: *const Base64Decoder, source: []const u8) Error!usize {
+        const source_len = source.len;
+        var result = try decoder.calcSizeUpperBound(source_len);
+        if (decoder.pad_char) |pad_char| {
+            if (source_len >= 1 and source[source_len - 1] == pad_char) result -= 1;
+            if (source_len >= 2 and source[source_len - 2] == pad_char) result -= 1;
+        }
+        return result;
     }
 
     /// dest.len must be what you get from ::calcSize.
     /// invalid characters result in error.InvalidCharacter.
     /// invalid padding results in error.InvalidPadding.
-    pub fn decode(decoder: *const Base64Decoder, dest: []u8, source: []const u8) !void {
-        assert(dest.len == (decoder.calcSize(source) catch unreachable));
-        assert(source.len % 4 == 0);
-
-        var src_cursor: usize = 0;
-        var dest_cursor: usize = 0;
-
-        while (src_cursor < source.len) : (src_cursor += 4) {
-            if (!decoder.char_in_alphabet[source[src_cursor + 0]]) return error.InvalidCharacter;
-            if (!decoder.char_in_alphabet[source[src_cursor + 1]]) return error.InvalidCharacter;
-            if (src_cursor < source.len - 4 or source[src_cursor + 3] != decoder.pad_char) {
-                // common case
-                if (!decoder.char_in_alphabet[source[src_cursor + 2]]) return error.InvalidCharacter;
-                if (!decoder.char_in_alphabet[source[src_cursor + 3]]) return error.InvalidCharacter;
-                dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4;
-                dest[dest_cursor + 1] = decoder.char_to_index[source[src_cursor + 1]] << 4 | decoder.char_to_index[source[src_cursor + 2]] >> 2;
-                dest[dest_cursor + 2] = decoder.char_to_index[source[src_cursor + 2]] << 6 | decoder.char_to_index[source[src_cursor + 3]];
-                dest_cursor += 3;
-            } else if (source[src_cursor + 2] != decoder.pad_char) {
-                // one pad char
-                if (!decoder.char_in_alphabet[source[src_cursor + 2]]) return error.InvalidCharacter;
-                dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4;
-                dest[dest_cursor + 1] = decoder.char_to_index[source[src_cursor + 1]] << 4 | decoder.char_to_index[source[src_cursor + 2]] >> 2;
-                if (decoder.char_to_index[source[src_cursor + 2]] << 6 != 0) return error.InvalidPadding;
-                dest_cursor += 2;
-            } else {
-                // two pad chars
-                dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4;
-                if (decoder.char_to_index[source[src_cursor + 1]] << 4 != 0) return error.InvalidPadding;
-                dest_cursor += 1;
+    pub fn decode(decoder: *const Base64Decoder, dest: []u8, source: []const u8) Error!void {
+        if (decoder.pad_char != null and source.len % 4 != 0) return error.InvalidPadding;
+        var acc: u12 = 0;
+        var acc_len: u4 = 0;
+        var dest_idx: usize = 0;
+        var leftover_idx: ?usize = null;
+        for (source) |c, src_idx| {
+            const d = decoder.char_to_index[c];
+            if (d == invalid_char) {
+                if (decoder.pad_char == null or c != decoder.pad_char.?) return error.InvalidCharacter;
+                leftover_idx = src_idx;
+                break;
+            }
+            acc = (acc << 6) + d;
+            acc_len += 6;
+            if (acc_len >= 8) {
+                acc_len -= 8;
+                dest[dest_idx] = @truncate(u8, acc >> acc_len);
+                dest_idx += 1;
             }
         }
-
-        assert(src_cursor == source.len);
-        assert(dest_cursor == dest.len);
+        if (acc_len > 4 or (acc & (@as(u12, 1) << acc_len) - 1) != 0) {
+            return error.InvalidPadding;
+        }
+        if (leftover_idx == null) return;
+        var leftover = source[leftover_idx.?..];
+        if (decoder.pad_char) |pad_char| {
+            const padding_len = acc_len / 2;
+            var padding_chars: usize = 0;
+            var i: usize = 0;
+            for (leftover) |c| {
+                if (c != pad_char) {
+                    return if (c == Base64Decoder.invalid_char) error.InvalidCharacter else error.InvalidPadding;
+                }
+                padding_chars += 1;
+            }
+            if (padding_chars != padding_len) return error.InvalidPadding;
+        }
     }
 };
 
 pub const Base64DecoderWithIgnore = struct {
     decoder: Base64Decoder,
     char_is_ignored: [256]bool,
-    pub fn init(alphabet_chars: []const u8, pad_char: u8, ignore_chars: []const u8) Base64DecoderWithIgnore {
+
+    pub fn init(alphabet_chars: [64]u8, pad_char: ?u8, ignore_chars: []const u8) Base64DecoderWithIgnore {
         var result = Base64DecoderWithIgnore{
             .decoder = Base64Decoder.init(alphabet_chars, pad_char),
             .char_is_ignored = [_]bool{false} ** 256,
         };
-
         for (ignore_chars) |c| {
-            assert(!result.decoder.char_in_alphabet[c]);
+            assert(result.decoder.char_to_index[c] == Base64Decoder.invalid_char);
             assert(!result.char_is_ignored[c]);
             assert(result.decoder.pad_char != c);
             result.char_is_ignored[c] = true;
         }
-
         return result;
     }
 
-    /// If no characters end up being ignored or padding, this will be the exact decoded size.
-    pub fn calcSizeUpperBound(encoded_len: usize) usize {
-        return @divTrunc(encoded_len, 4) * 3;
+    /// Return the maximum possible decoded size for a given input length - The actual length may be less if the input includes padding
+    /// `InvalidPadding` is returned if the input length is not valid.
+    pub fn calcSizeUpperBound(decoder_with_ignore: *const Base64DecoderWithIgnore, source_len: usize) Error!usize {
+        var result = source_len / 4 * 3;
+        if (decoder_with_ignore.decoder.pad_char == null) {
+            const leftover = source_len % 4;
+            result += leftover * 3 / 4;
+        }
+        return result;
     }
 
     /// Invalid characters that are not ignored result in error.InvalidCharacter.
     /// Invalid padding results in error.InvalidPadding.
-    /// Decoding more data than can fit in dest results in error.OutputTooSmall. See also ::calcSizeUpperBound.
+    /// Decoding more data than can fit in dest results in error.NoSpaceLeft. See also ::calcSizeUpperBound.
     /// Returns the number of bytes written to dest.
-    pub fn decode(decoder_with_ignore: *const Base64DecoderWithIgnore, dest: []u8, source: []const u8) !usize {
+    pub fn decode(decoder_with_ignore: *const Base64DecoderWithIgnore, dest: []u8, source: []const u8) Error!usize {
         const decoder = &decoder_with_ignore.decoder;
-
-        var src_cursor: usize = 0;
-        var dest_cursor: usize = 0;
-
-        while (true) {
-            // get the next 4 chars, if available
-            var next_4_chars: [4]u8 = undefined;
-            var available_chars: usize = 0;
-            var pad_char_count: usize = 0;
-            while (available_chars < 4 and src_cursor < source.len) {
-                var c = source[src_cursor];
-                src_cursor += 1;
-
-                if (decoder.char_in_alphabet[c]) {
-                    // normal char
-                    next_4_chars[available_chars] = c;
-                    available_chars += 1;
-                } else if (decoder_with_ignore.char_is_ignored[c]) {
-                    // we're told to skip this one
-                    continue;
-                } else if (c == decoder.pad_char) {
-                    // the padding has begun. count the pad chars.
-                    pad_char_count += 1;
-                    while (src_cursor < source.len) {
-                        c = source[src_cursor];
-                        src_cursor += 1;
-                        if (c == decoder.pad_char) {
-                            pad_char_count += 1;
-                            if (pad_char_count > 2) return error.InvalidCharacter;
-                        } else if (decoder_with_ignore.char_is_ignored[c]) {
-                            // we can even ignore chars during the padding
-                            continue;
-                        } else return error.InvalidCharacter;
-                    }
-                    break;
-                } else return error.InvalidCharacter;
+        var acc: u12 = 0;
+        var acc_len: u4 = 0;
+        var dest_idx: usize = 0;
+        var leftover_idx: ?usize = null;
+        for (source) |c, src_idx| {
+            if (decoder_with_ignore.char_is_ignored[c]) continue;
+            const d = decoder.char_to_index[c];
+            if (d == Base64Decoder.invalid_char) {
+                if (decoder.pad_char == null or c != decoder.pad_char.?) return error.InvalidCharacter;
+                leftover_idx = src_idx;
+                break;
             }
-
-            switch (available_chars) {
-                4 => {
-                    // common case
-                    if (dest_cursor + 3 > dest.len) return error.OutputTooSmall;
-                    assert(pad_char_count == 0);
-                    dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4;
-                    dest[dest_cursor + 1] = decoder.char_to_index[next_4_chars[1]] << 4 | decoder.char_to_index[next_4_chars[2]] >> 2;
-                    dest[dest_cursor + 2] = decoder.char_to_index[next_4_chars[2]] << 6 | decoder.char_to_index[next_4_chars[3]];
-                    dest_cursor += 3;
-                    continue;
-                },
-                3 => {
-                    if (dest_cursor + 2 > dest.len) return error.OutputTooSmall;
-                    if (pad_char_count != 1) return error.InvalidPadding;
-                    dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4;
-                    dest[dest_cursor + 1] = decoder.char_to_index[next_4_chars[1]] << 4 | decoder.char_to_index[next_4_chars[2]] >> 2;
-                    if (decoder.char_to_index[next_4_chars[2]] << 6 != 0) return error.InvalidPadding;
-                    dest_cursor += 2;
-                    break;
-                },
-                2 => {
-                    if (dest_cursor + 1 > dest.len) return error.OutputTooSmall;
-                    if (pad_char_count != 2) return error.InvalidPadding;
-                    dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4;
-                    if (decoder.char_to_index[next_4_chars[1]] << 4 != 0) return error.InvalidPadding;
-                    dest_cursor += 1;
-                    break;
-                },
-                1 => {
-                    return error.InvalidPadding;
-                },
-                0 => {
-                    if (pad_char_count != 0) return error.InvalidPadding;
-                    break;
-                },
-                else => unreachable,
+            acc = (acc << 6) + d;
+            acc_len += 6;
+            if (acc_len >= 8) {
+                if (dest_idx == dest.len) return error.NoSpaceLeft;
+                acc_len -= 8;
+                dest[dest_idx] = @truncate(u8, acc >> acc_len);
+                dest_idx += 1;
             }
         }
-
-        assert(src_cursor == source.len);
-
-        return dest_cursor;
-    }
-};
-
-pub const standard_decoder_unsafe = Base64DecoderUnsafe.init(standard_alphabet_chars, standard_pad_char);
-
-pub const Base64DecoderUnsafe = struct {
-    /// e.g. 'A' => 0.
-    /// undefined for any value not in the 64 alphabet chars.
-    char_to_index: [256]u8,
-    pad_char: u8,
-
-    pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64DecoderUnsafe {
-        assert(alphabet_chars.len == 64);
-        var result = Base64DecoderUnsafe{
-            .char_to_index = undefined,
-            .pad_char = pad_char,
-        };
-        for (alphabet_chars) |c, i| {
-            assert(c != pad_char);
-            result.char_to_index[c] = @intCast(u8, i);
+        if (acc_len > 4 or (acc & (@as(u12, 1) << acc_len) - 1) != 0) {
+            return error.InvalidPadding;
         }
-        return result;
-    }
-
-    /// The source buffer must be valid.
-    pub fn calcSize(decoder: *const Base64DecoderUnsafe, source: []const u8) usize {
-        return calcDecodedSizeExactUnsafe(source, decoder.pad_char);
-    }
-
-    /// dest.len must be what you get from ::calcDecodedSizeExactUnsafe.
-    /// invalid characters or padding will result in undefined values.
-    pub fn decode(decoder: *const Base64DecoderUnsafe, dest: []u8, source: []const u8) void {
-        assert(dest.len == decoder.calcSize(source));
-
-        var src_index: usize = 0;
-        var dest_index: usize = 0;
-        var in_buf_len: usize = source.len;
-
-        while (in_buf_len > 0 and source[in_buf_len - 1] == decoder.pad_char) {
-            in_buf_len -= 1;
+        const padding_len = acc_len / 2;
+        if (leftover_idx == null) {
+            if (decoder.pad_char != null and padding_len != 0) return error.InvalidPadding;
+            return dest_idx;
         }
-
-        while (in_buf_len > 4) {
-            dest[dest_index] = decoder.char_to_index[source[src_index + 0]] << 2 | decoder.char_to_index[source[src_index + 1]] >> 4;
-            dest_index += 1;
-
-            dest[dest_index] = decoder.char_to_index[source[src_index + 1]] << 4 | decoder.char_to_index[source[src_index + 2]] >> 2;
-            dest_index += 1;
-
-            dest[dest_index] = decoder.char_to_index[source[src_index + 2]] << 6 | decoder.char_to_index[source[src_index + 3]];
-            dest_index += 1;
-
-            src_index += 4;
-            in_buf_len -= 4;
-        }
-
-        if (in_buf_len > 1) {
-            dest[dest_index] = decoder.char_to_index[source[src_index + 0]] << 2 | decoder.char_to_index[source[src_index + 1]] >> 4;
-            dest_index += 1;
-        }
-        if (in_buf_len > 2) {
-            dest[dest_index] = decoder.char_to_index[source[src_index + 1]] << 4 | decoder.char_to_index[source[src_index + 2]] >> 2;
-            dest_index += 1;
-        }
-        if (in_buf_len > 3) {
-            dest[dest_index] = decoder.char_to_index[source[src_index + 2]] << 6 | decoder.char_to_index[source[src_index + 3]];
-            dest_index += 1;
+        var leftover = source[leftover_idx.?..];
+        if (decoder.pad_char) |pad_char| {
+            var padding_chars: usize = 0;
+            var i: usize = 0;
+            for (leftover) |c| {
+                if (decoder_with_ignore.char_is_ignored[c]) continue;
+                if (c != pad_char) {
+                    return if (c == Base64Decoder.invalid_char) error.InvalidCharacter else error.InvalidPadding;
+                }
+                padding_chars += 1;
+            }
+            if (padding_chars != padding_len) return error.InvalidPadding;
         }
+        return dest_idx;
     }
 };
 
-fn calcDecodedSizeExactUnsafe(source: []const u8, pad_char: u8) usize {
-    if (source.len == 0) return 0;
-    var result = @divExact(source.len, 4) * 3;
-    if (source[source.len - 1] == pad_char) {
-        result -= 1;
-        if (source[source.len - 2] == pad_char) {
-            result -= 1;
-        }
-    }
-    return result;
-}
-
 test "base64" {
     @setEvalBranchQuota(8000);
     testBase64() catch unreachable;
-    comptime (testBase64() catch unreachable);
+    comptime testAllApis(standard, "comptime", "Y29tcHRpbWU=") catch unreachable;
+}
+
+test "base64 url_safe_no_pad" {
+    @setEvalBranchQuota(8000);
+    testBase64UrlSafeNoPad() catch unreachable;
+    comptime testAllApis(url_safe_no_pad, "comptime", "Y29tcHRpbWU") catch unreachable;
 }
 
 fn testBase64() !void {
-    try testAllApis("", "");
-    try testAllApis("f", "Zg==");
-    try testAllApis("fo", "Zm8=");
-    try testAllApis("foo", "Zm9v");
-    try testAllApis("foob", "Zm9vYg==");
-    try testAllApis("fooba", "Zm9vYmE=");
-    try testAllApis("foobar", "Zm9vYmFy");
-
-    try testDecodeIgnoreSpace("", " ");
-    try testDecodeIgnoreSpace("f", "Z g= =");
-    try testDecodeIgnoreSpace("fo", "    Zm8=");
-    try testDecodeIgnoreSpace("foo", "Zm9v    ");
-    try testDecodeIgnoreSpace("foob", "Zm9vYg = = ");
-    try testDecodeIgnoreSpace("fooba", "Zm9v YmE=");
-    try testDecodeIgnoreSpace("foobar", " Z m 9 v Y m F y ");
+    const codecs = standard;
+
+    try testAllApis(codecs, "", "");
+    try testAllApis(codecs, "f", "Zg==");
+    try testAllApis(codecs, "fo", "Zm8=");
+    try testAllApis(codecs, "foo", "Zm9v");
+    try testAllApis(codecs, "foob", "Zm9vYg==");
+    try testAllApis(codecs, "fooba", "Zm9vYmE=");
+    try testAllApis(codecs, "foobar", "Zm9vYmFy");
+
+    try testDecodeIgnoreSpace(codecs, "", " ");
+    try testDecodeIgnoreSpace(codecs, "f", "Z g= =");
+    try testDecodeIgnoreSpace(codecs, "fo", "    Zm8=");
+    try testDecodeIgnoreSpace(codecs, "foo", "Zm9v    ");
+    try testDecodeIgnoreSpace(codecs, "foob", "Zm9vYg = = ");
+    try testDecodeIgnoreSpace(codecs, "fooba", "Zm9v YmE=");
+    try testDecodeIgnoreSpace(codecs, "foobar", " Z m 9 v Y m F y ");
+
+    // test getting some api errors
+    try testError(codecs, "A", error.InvalidPadding);
+    try testError(codecs, "AA", error.InvalidPadding);
+    try testError(codecs, "AAA", error.InvalidPadding);
+    try testError(codecs, "A..A", error.InvalidCharacter);
+    try testError(codecs, "AA=A", error.InvalidPadding);
+    try testError(codecs, "AA/=", error.InvalidPadding);
+    try testError(codecs, "A/==", error.InvalidPadding);
+    try testError(codecs, "A===", error.InvalidPadding);
+    try testError(codecs, "====", error.InvalidPadding);
+
+    try testNoSpaceLeftError(codecs, "AA==");
+    try testNoSpaceLeftError(codecs, "AAA=");
+    try testNoSpaceLeftError(codecs, "AAAA");
+    try testNoSpaceLeftError(codecs, "AAAAAA==");
+}
+
+fn testBase64UrlSafeNoPad() !void {
+    const codecs = url_safe_no_pad;
+
+    try testAllApis(codecs, "", "");
+    try testAllApis(codecs, "f", "Zg");
+    try testAllApis(codecs, "fo", "Zm8");
+    try testAllApis(codecs, "foo", "Zm9v");
+    try testAllApis(codecs, "foob", "Zm9vYg");
+    try testAllApis(codecs, "fooba", "Zm9vYmE");
+    try testAllApis(codecs, "foobar", "Zm9vYmFy");
+
+    try testDecodeIgnoreSpace(codecs, "", " ");
+    try testDecodeIgnoreSpace(codecs, "f", "Z g ");
+    try testDecodeIgnoreSpace(codecs, "fo", "    Zm8");
+    try testDecodeIgnoreSpace(codecs, "foo", "Zm9v    ");
+    try testDecodeIgnoreSpace(codecs, "foob", "Zm9vYg   ");
+    try testDecodeIgnoreSpace(codecs, "fooba", "Zm9v YmE");
+    try testDecodeIgnoreSpace(codecs, "foobar", " Z m 9 v Y m F y ");
 
     // test getting some api errors
-    try testError("A", error.InvalidPadding);
-    try testError("AA", error.InvalidPadding);
-    try testError("AAA", error.InvalidPadding);
-    try testError("A..A", error.InvalidCharacter);
-    try testError("AA=A", error.InvalidCharacter);
-    try testError("AA/=", error.InvalidPadding);
-    try testError("A/==", error.InvalidPadding);
-    try testError("A===", error.InvalidCharacter);
-    try testError("====", error.InvalidCharacter);
-
-    try testOutputTooSmallError("AA==");
-    try testOutputTooSmallError("AAA=");
-    try testOutputTooSmallError("AAAA");
-    try testOutputTooSmallError("AAAAAA==");
+    try testError(codecs, "A", error.InvalidPadding);
+    try testError(codecs, "AAA=", error.InvalidCharacter);
+    try testError(codecs, "A..A", error.InvalidCharacter);
+    try testError(codecs, "AA=A", error.InvalidCharacter);
+    try testError(codecs, "AA/=", error.InvalidCharacter);
+    try testError(codecs, "A/==", error.InvalidCharacter);
+    try testError(codecs, "A===", error.InvalidCharacter);
+    try testError(codecs, "====", error.InvalidCharacter);
+
+    try testNoSpaceLeftError(codecs, "AA");
+    try testNoSpaceLeftError(codecs, "AAA");
+    try testNoSpaceLeftError(codecs, "AAAA");
+    try testNoSpaceLeftError(codecs, "AAAAAA");
 }
 
-fn testAllApis(expected_decoded: []const u8, expected_encoded: []const u8) !void {
+fn testAllApis(codecs: Codecs, expected_decoded: []const u8, expected_encoded: []const u8) !void {
     // Base64Encoder
     {
         var buffer: [0x100]u8 = undefined;
-        const encoded = standard_encoder.encode(&buffer, expected_decoded);
+        const encoded = codecs.Encoder.encode(&buffer, expected_decoded);
         testing.expectEqualSlices(u8, expected_encoded, encoded);
     }
 
     // Base64Decoder
     {
         var buffer: [0x100]u8 = undefined;
-        var decoded = buffer[0..try standard_decoder.calcSize(expected_encoded)];
-        try standard_decoder.decode(decoded, expected_encoded);
+        var decoded = buffer[0..try codecs.Decoder.calcSizeForSlice(expected_encoded)];
+        try codecs.Decoder.decode(decoded, expected_encoded);
         testing.expectEqualSlices(u8, expected_decoded, decoded);
     }
 
     // Base64DecoderWithIgnore
     {
-        const standard_decoder_ignore_nothing = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, "");
+        const decoder_ignore_nothing = codecs.decoderWithIgnore("");
         var buffer: [0x100]u8 = undefined;
-        var decoded = buffer[0..Base64DecoderWithIgnore.calcSizeUpperBound(expected_encoded.len)];
-        var written = try standard_decoder_ignore_nothing.decode(decoded, expected_encoded);
+        var decoded = buffer[0..try decoder_ignore_nothing.calcSizeUpperBound(expected_encoded.len)];
+        var written = try decoder_ignore_nothing.decode(decoded, expected_encoded);
         testing.expect(written <= decoded.len);
         testing.expectEqualSlices(u8, expected_decoded, decoded[0..written]);
     }
-
-    // Base64DecoderUnsafe
-    {
-        var buffer: [0x100]u8 = undefined;
-        var decoded = buffer[0..standard_decoder_unsafe.calcSize(expected_encoded)];
-        standard_decoder_unsafe.decode(decoded, expected_encoded);
-        testing.expectEqualSlices(u8, expected_decoded, decoded);
-    }
 }
 
-fn testDecodeIgnoreSpace(expected_decoded: []const u8, encoded: []const u8) !void {
-    const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " ");
+fn testDecodeIgnoreSpace(codecs: Codecs, expected_decoded: []const u8, encoded: []const u8) !void {
+    const decoder_ignore_space = codecs.decoderWithIgnore(" ");
     var buffer: [0x100]u8 = undefined;
-    var decoded = buffer[0..Base64DecoderWithIgnore.calcSizeUpperBound(encoded.len)];
-    var written = try standard_decoder_ignore_space.decode(decoded, encoded);
+    var decoded = buffer[0..try decoder_ignore_space.calcSizeUpperBound(encoded.len)];
+    var written = try decoder_ignore_space.decode(decoded, encoded);
     testing.expectEqualSlices(u8, expected_decoded, decoded[0..written]);
 }
 
-fn testError(encoded: []const u8, expected_err: anyerror) !void {
-    const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " ");
+fn testError(codecs: Codecs, encoded: []const u8, expected_err: anyerror) !void {
+    const decoder_ignore_space = codecs.decoderWithIgnore(" ");
     var buffer: [0x100]u8 = undefined;
-    if (standard_decoder.calcSize(encoded)) |decoded_size| {
+    if (codecs.Decoder.calcSizeForSlice(encoded)) |decoded_size| {
         var decoded = buffer[0..decoded_size];
-        if (standard_decoder.decode(decoded, encoded)) |_| {
+        if (codecs.Decoder.decode(decoded, encoded)) |_| {
             return error.ExpectedError;
         } else |err| if (err != expected_err) return err;
     } else |err| if (err != expected_err) return err;
 
-    if (standard_decoder_ignore_space.decode(buffer[0..], encoded)) |_| {
+    if (decoder_ignore_space.decode(buffer[0..], encoded)) |_| {
         return error.ExpectedError;
     } else |err| if (err != expected_err) return err;
 }
 
-fn testOutputTooSmallError(encoded: []const u8) !void {
-    const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " ");
+fn testNoSpaceLeftError(codecs: Codecs, encoded: []const u8) !void {
+    const decoder_ignore_space = codecs.decoderWithIgnore(" ");
     var buffer: [0x100]u8 = undefined;
-    var decoded = buffer[0 .. calcDecodedSizeExactUnsafe(encoded, standard_pad_char) - 1];
-    if (standard_decoder_ignore_space.decode(decoded, encoded)) |_| {
+    var decoded = buffer[0 .. (try codecs.Decoder.calcSizeForSlice(encoded)) - 1];
+    if (decoder_ignore_space.decode(decoded, encoded)) |_| {
         return error.ExpectedError;
-    } else |err| if (err != error.OutputTooSmall) return err;
+    } else |err| if (err != error.NoSpaceLeft) return err;
 }
diff --git a/lib/std/bit_set.zig b/lib/std/bit_set.zig
@@ -176,7 +176,7 @@ pub fn IntegerBitSet(comptime size: u16) type {
         /// The default options (.{}) will iterate indices of set bits in
         /// ascending order.  Modifications to the underlying bit set may
         /// or may not be observed by the iterator.
-        pub fn iterator(self: *const Self, comptime options: IteratorOptions) Iterator(options.direction) {
+        pub fn iterator(self: *const Self, comptime options: IteratorOptions) Iterator(options) {
             return .{
                 .bits_remain = switch (options.kind) {
                     .set => self.mask,
@@ -185,7 +185,11 @@ pub fn IntegerBitSet(comptime size: u16) type {
             };
         }
 
-        fn Iterator(comptime direction: IteratorOptions.Direction) type {
+        pub fn Iterator(comptime options: IteratorOptions) type {
+            return SingleWordIterator(options.direction);
+        }
+
+        fn SingleWordIterator(comptime direction: IteratorOptions.Direction) type {
             return struct {
                 const IterSelf = @This();
                 // all bits which have not yet been iterated over
@@ -425,8 +429,12 @@ pub fn ArrayBitSet(comptime MaskIntType: type, comptime size: usize) type {
         /// The default options (.{}) will iterate indices of set bits in
         /// ascending order.  Modifications to the underlying bit set may
         /// or may not be observed by the iterator.
-        pub fn iterator(self: *const Self, comptime options: IteratorOptions) BitSetIterator(MaskInt, options) {
-            return BitSetIterator(MaskInt, options).init(&self.masks, last_item_mask);
+        pub fn iterator(self: *const Self, comptime options: IteratorOptions) Iterator(options) {
+            return Iterator(options).init(&self.masks, last_item_mask);
+        }
+
+        pub fn Iterator(comptime options: IteratorOptions) type {
+            return BitSetIterator(MaskInt, options);
         }
 
         fn maskBit(index: usize) MaskInt {
@@ -700,11 +708,15 @@ pub const DynamicBitSetUnmanaged = struct {
     /// ascending order.  Modifications to the underlying bit set may
     /// or may not be observed by the iterator.  Resizing the underlying
     /// bit set invalidates the iterator.
-    pub fn iterator(self: *const Self, comptime options: IteratorOptions) BitSetIterator(MaskInt, options) {
+    pub fn iterator(self: *const Self, comptime options: IteratorOptions) Iterator(options) {
         const num_masks = numMasks(self.bit_length);
         const padding_bits = num_masks * @bitSizeOf(MaskInt) - self.bit_length;
         const last_item_mask = (~@as(MaskInt, 0)) >> @intCast(ShiftInt, padding_bits);
-        return BitSetIterator(MaskInt, options).init(self.masks[0..num_masks], last_item_mask);
+        return Iterator(options).init(self.masks[0..num_masks], last_item_mask);
+    }
+
+    pub fn Iterator(comptime options: IteratorOptions) type {
+        return BitSetIterator(MaskInt, options);
     }
 
     fn maskBit(index: usize) MaskInt {
@@ -858,9 +870,11 @@ pub const DynamicBitSet = struct {
     /// ascending order.  Modifications to the underlying bit set may
     /// or may not be observed by the iterator.  Resizing the underlying
     /// bit set invalidates the iterator.
-    pub fn iterator(self: *Self, comptime options: IteratorOptions) BitSetIterator(MaskInt, options) {
+    pub fn iterator(self: *Self, comptime options: IteratorOptions) Iterator(options) {
         return self.unmanaged.iterator(options);
     }
+
+    pub const Iterator = DynamicBitSetUnmanaged.Iterator;
 };
 
 /// Options for configuring an iterator over a bit set
diff --git a/lib/std/build.zig b/lib/std/build.zig
@@ -51,7 +51,7 @@ pub const Builder = struct {
     default_step: *Step,
     env_map: *BufMap,
     top_level_steps: ArrayList(*TopLevelStep),
-    install_prefix: ?[]const u8,
+    install_prefix: []const u8,
     dest_dir: ?[]const u8,
     lib_dir: []const u8,
     exe_dir: []const u8,
@@ -156,7 +156,7 @@ pub const Builder = struct {
             .default_step = undefined,
             .env_map = env_map,
             .search_prefixes = ArrayList([]const u8).init(allocator),
-            .install_prefix = null,
+            .install_prefix = undefined,
             .lib_dir = undefined,
             .exe_dir = undefined,
             .h_dir = undefined,
@@ -190,22 +190,13 @@ pub const Builder = struct {
     }
 
     /// This function is intended to be called by std/special/build_runner.zig, not a build.zig file.
-    pub fn setInstallPrefix(self: *Builder, optional_prefix: ?[]const u8) void {
-        self.install_prefix = optional_prefix;
-    }
-
-    /// This function is intended to be called by std/special/build_runner.zig, not a build.zig file.
-    pub fn resolveInstallPrefix(self: *Builder) void {
+    pub fn resolveInstallPrefix(self: *Builder, install_prefix: ?[]const u8) void {
         if (self.dest_dir) |dest_dir| {
-            const install_prefix = self.install_prefix orelse "/usr";
-            self.install_path = fs.path.join(self.allocator, &[_][]const u8{ dest_dir, install_prefix }) catch unreachable;
+            self.install_prefix = install_prefix orelse "/usr";
+            self.install_path = fs.path.join(self.allocator, &[_][]const u8{ dest_dir, self.install_prefix }) catch unreachable;
         } else {
-            const install_prefix = self.install_prefix orelse blk: {
-                const p = self.cache_root;
-                self.install_prefix = p;
-                break :blk p;
-            };
-            self.install_path = install_prefix;
+            self.install_prefix = install_prefix orelse self.cache_root;
+            self.install_path = self.install_prefix;
         }
         self.lib_dir = fs.path.join(self.allocator, &[_][]const u8{ self.install_path, "lib" }) catch unreachable;
         self.exe_dir = fs.path.join(self.allocator, &[_][]const u8{ self.install_path, "bin" }) catch unreachable;
diff --git a/lib/std/c.zig b/lib/std/c.zig
@@ -295,9 +295,9 @@ pub extern "c" fn kevent(
 ) c_int;
 
 pub extern "c" fn getaddrinfo(
-    noalias node: [*:0]const u8,
-    noalias service: [*:0]const u8,
-    noalias hints: *const addrinfo,
+    noalias node: ?[*:0]const u8,
+    noalias service: ?[*:0]const u8,
+    noalias hints: ?*const addrinfo,
     noalias res: **addrinfo,
 ) EAI;
 
diff --git a/lib/std/c/builtins.zig b/lib/std/c/builtins.zig
@@ -140,7 +140,7 @@ pub fn __builtin_object_size(ptr: ?*const c_void, ty: c_int) callconv(.Inline) u
     // If it is not possible to determine which objects ptr points to at compile time,
     // __builtin_object_size should return (size_t) -1 for type 0 or 1 and (size_t) 0
     // for type 2 or 3.
-    if (ty == 0 or ty == 1) return @bitCast(usize, -@as(c_long, 1));
+    if (ty == 0 or ty == 1) return @bitCast(usize, -@as(isize, 1));
     if (ty == 2 or ty == 3) return 0;
     unreachable;
 }
@@ -188,3 +188,9 @@ pub fn __builtin_memcpy(
 pub fn __builtin_expect(expr: c_long, c: c_long) callconv(.Inline) c_long {
     return expr;
 }
+
+// __builtin_alloca_with_align is not currently implemented.
+// It is used in a run-translated-c test and a test-translate-c test to ensure that non-implemented
+// builtins are correctly demoted. If you implement __builtin_alloca_with_align, please update the
+// run-translated-c test and the test-translate-c test to use a different non-implemented builtin.
+// pub fn __builtin_alloca_with_align(size: usize, alignment: usize) callconv(.Inline) *c_void {}
diff --git a/lib/std/crypto.zig b/lib/std/crypto.zig
@@ -24,8 +24,12 @@ pub const aead = struct {
     pub const Gimli = @import("crypto/gimli.zig").Aead;
 
     pub const chacha_poly = struct {
-        pub const ChaCha20Poly1305 = @import("crypto/chacha20.zig").Chacha20Poly1305;
-        pub const XChaCha20Poly1305 = @import("crypto/chacha20.zig").XChacha20Poly1305;
+        pub const ChaCha20Poly1305 = @import("crypto/chacha20.zig").ChaCha20Poly1305;
+        pub const ChaCha12Poly1305 = @import("crypto/chacha20.zig").ChaCha12Poly1305;
+        pub const ChaCha8Poly1305 = @import("crypto/chacha20.zig").ChaCha8Poly1305;
+        pub const XChaCha20Poly1305 = @import("crypto/chacha20.zig").XChaCha20Poly1305;
+        pub const XChaCha12Poly1305 = @import("crypto/chacha20.zig").XChaCha12Poly1305;
+        pub const XChaCha8Poly1305 = @import("crypto/chacha20.zig").XChaCha8Poly1305;
     };
 
     pub const isap = @import("crypto/isap.zig");
@@ -119,8 +123,14 @@ pub const sign = struct {
 pub const stream = struct {
     pub const chacha = struct {
         pub const ChaCha20IETF = @import("crypto/chacha20.zig").ChaCha20IETF;
+        pub const ChaCha12IETF = @import("crypto/chacha20.zig").ChaCha12IETF;
+        pub const ChaCha8IETF = @import("crypto/chacha20.zig").ChaCha8IETF;
         pub const ChaCha20With64BitNonce = @import("crypto/chacha20.zig").ChaCha20With64BitNonce;
+        pub const ChaCha12With64BitNonce = @import("crypto/chacha20.zig").ChaCha12With64BitNonce;
+        pub const ChaCha8With64BitNonce = @import("crypto/chacha20.zig").ChaCha8With64BitNonce;
         pub const XChaCha20IETF = @import("crypto/chacha20.zig").XChaCha20IETF;
+        pub const XChaCha12IETF = @import("crypto/chacha20.zig").XChaCha12IETF;
+        pub const XChaCha8IETF = @import("crypto/chacha20.zig").XChaCha8IETF;
     };
 
     pub const salsa = struct {
@@ -144,6 +154,8 @@ pub const random = &@import("crypto/tlcsprng.zig").interface;
 
 const std = @import("std.zig");
 
+pub const Error = @import("crypto/error.zig").Error;
+
 test "crypto" {
     const please_windows_dont_oom = std.Target.current.os.tag == .windows;
     if (please_windows_dont_oom) return error.SkipZigTest;
@@ -151,7 +163,9 @@ test "crypto" {
     inline for (std.meta.declarations(@This())) |decl| {
         switch (decl.data) {
             .Type => |t| {
-                std.testing.refAllDecls(t);
+                if (@typeInfo(t) != .ErrorSet) {
+                    std.testing.refAllDecls(t);
+                }
             },
             .Var => |v| {
                 _ = v;
diff --git a/lib/std/crypto/25519/curve25519.zig b/lib/std/crypto/25519/curve25519.zig
@@ -4,6 +4,7 @@
 // The MIT license requires this copyright notice to be included in all copies
 // and substantial portions of the software.
 const std = @import("std");
+const Error = std.crypto.Error;
 
 /// Group operations over Curve25519.
 pub const Curve25519 = struct {
@@ -28,12 +29,12 @@ pub const Curve25519 = struct {
     pub const basePoint = Curve25519{ .x = Fe.curve25519BasePoint };
 
     /// Check that the encoding of a Curve25519 point is canonical.
-    pub fn rejectNonCanonical(s: [32]u8) !void {
+    pub fn rejectNonCanonical(s: [32]u8) Error!void {
         return Fe.rejectNonCanonical(s, false);
     }
 
     /// Reject the neutral element.
-    pub fn rejectIdentity(p: Curve25519) !void {
+    pub fn rejectIdentity(p: Curve25519) Error!void {
         if (p.x.isZero()) {
             return error.IdentityElement;
         }
@@ -44,7 +45,7 @@ pub const Curve25519 = struct {
         return p.dbl().dbl().dbl();
     }
 
-    fn ladder(p: Curve25519, s: [32]u8, comptime bits: usize) !Curve25519 {
+    fn ladder(p: Curve25519, s: [32]u8, comptime bits: usize) Error!Curve25519 {
         var x1 = p.x;
         var x2 = Fe.one;
         var z2 = Fe.zero;
@@ -85,7 +86,7 @@ pub const Curve25519 = struct {
     /// way to use Curve25519 for a DH operation.
     /// Return error.IdentityElement if the resulting point is
     /// the identity element.
-    pub fn clampedMul(p: Curve25519, s: [32]u8) !Curve25519 {
+    pub fn clampedMul(p: Curve25519, s: [32]u8) Error!Curve25519 {
         var t: [32]u8 = s;
         scalar.clamp(&t);
         return try ladder(p, t, 255);
@@ -95,14 +96,14 @@ pub const Curve25519 = struct {
     /// Return error.IdentityElement if the resulting point is
     /// the identity element or error.WeakPublicKey if the public
     /// key is a low-order point.
-    pub fn mul(p: Curve25519, s: [32]u8) !Curve25519 {
+    pub fn mul(p: Curve25519, s: [32]u8) Error!Curve25519 {
         const cofactor = [_]u8{8} ++ [_]u8{0} ** 31;
         _ = ladder(p, cofactor, 4) catch |_| return error.WeakPublicKey;
         return try ladder(p, s, 256);
     }
 
     /// Compute the Curve25519 equivalent to an Edwards25519 point.
-    pub fn fromEdwards25519(p: std.crypto.ecc.Edwards25519) !Curve25519 {
+    pub fn fromEdwards25519(p: std.crypto.ecc.Edwards25519) Error!Curve25519 {
         try p.clearCofactor().rejectIdentity();
         const one = std.crypto.ecc.Edwards25519.Fe.one;
         const x = one.add(p.y).mul(one.sub(p.y).invert()); // xMont=(1+yEd)/(1-yEd)
diff --git a/lib/std/crypto/25519/ed25519.zig b/lib/std/crypto/25519/ed25519.zig
@@ -8,7 +8,8 @@ const crypto = std.crypto;
 const debug = std.debug;
 const fmt = std.fmt;
 const mem = std.mem;
-const Sha512 = std.crypto.hash.sha2.Sha512;
+const Sha512 = crypto.hash.sha2.Sha512;
+const Error = crypto.Error;
 
 /// Ed25519 (EdDSA) signatures.
 pub const Ed25519 = struct {
@@ -40,7 +41,7 @@ pub const Ed25519 = struct {
         ///
         /// For this reason, an EdDSA secret key is commonly called a seed,
         /// from which the actual secret is derived.
-        pub fn create(seed: ?[seed_length]u8) !KeyPair {
+        pub fn create(seed: ?[seed_length]u8) Error!KeyPair {
             const ss = seed orelse ss: {
                 var random_seed: [seed_length]u8 = undefined;
                 crypto.random.bytes(&random_seed);
@@ -71,7 +72,7 @@ pub const Ed25519 = struct {
     /// Sign a message using a key pair, and optional random noise.
     /// Having noise creates non-standard, non-deterministic signatures,
     /// but has been proven to increase resilience against fault attacks.
-    pub fn sign(msg: []const u8, key_pair: KeyPair, noise: ?[noise_length]u8) ![signature_length]u8 {
+    pub fn sign(msg: []const u8, key_pair: KeyPair, noise: ?[noise_length]u8) Error![signature_length]u8 {
         const seed = key_pair.secret_key[0..seed_length];
         const public_key = key_pair.secret_key[seed_length..];
         if (!mem.eql(u8, public_key, &key_pair.public_key)) {
@@ -111,8 +112,8 @@ pub const Ed25519 = struct {
     }
 
     /// Verify an Ed25519 signature given a message and a public key.
-    /// Returns error.InvalidSignature is the signature verification failed.
-    pub fn verify(sig: [signature_length]u8, msg: []const u8, public_key: [public_length]u8) !void {
+    /// Returns error.SignatureVerificationFailed is the signature verification failed.
+    pub fn verify(sig: [signature_length]u8, msg: []const u8, public_key: [public_length]u8) Error!void {
         const r = sig[0..32];
         const s = sig[32..64];
         try Curve.scalar.rejectNonCanonical(s.*);
@@ -133,7 +134,7 @@ pub const Ed25519 = struct {
         const ah = try a.neg().mulPublic(hram);
         const sb_ah = (try Curve.basePoint.mulPublic(s.*)).add(ah);
         if (expected_r.sub(sb_ah).clearCofactor().rejectIdentity()) |_| {
-            return error.InvalidSignature;
+            return error.SignatureVerificationFailed;
         } else |_| {}
     }
 
@@ -145,7 +146,7 @@ pub const Ed25519 = struct {
     };
 
     /// Verify several signatures in a single operation, much faster than verifying signatures one-by-one
-    pub fn verifyBatch(comptime count: usize, signature_batch: [count]BatchElement) !void {
+    pub fn verifyBatch(comptime count: usize, signature_batch: [count]BatchElement) Error!void {
         var r_batch: [count][32]u8 = undefined;
         var s_batch: [count][32]u8 = undefined;
         var a_batch: [count]Curve = undefined;
@@ -200,7 +201,7 @@ pub const Ed25519 = struct {
 
         const zsb = try Curve.basePoint.mulPublic(zs_sum);
         if (zr.add(zah).sub(zsb).rejectIdentity()) |_| {
-            return error.InvalidSignature;
+            return error.SignatureVerificationFailed;
         } else |_| {}
     }
 };
@@ -223,7 +224,7 @@ test "ed25519 signature" {
     var buf: [128]u8 = undefined;
     std.testing.expectEqualStrings(try std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexUpper(&sig)}), "10A442B4A80CC4225B154F43BEF28D2472CA80221951262EB8E0DF9091575E2687CC486E77263C3418C757522D54F84B0359236ABBBD4ACD20DC297FDCA66808");
     try Ed25519.verify(sig, "test", key_pair.public_key);
-    std.testing.expectError(error.InvalidSignature, Ed25519.verify(sig, "TEST", key_pair.public_key));
+    std.testing.expectError(error.SignatureVerificationFailed, Ed25519.verify(sig, "TEST", key_pair.public_key));
 }
 
 test "ed25519 batch verification" {
@@ -251,7 +252,7 @@ test "ed25519 batch verification" {
         try Ed25519.verifyBatch(2, signature_batch);
 
         signature_batch[1].sig = sig1;
-        std.testing.expectError(error.InvalidSignature, Ed25519.verifyBatch(signature_batch.len, signature_batch));
+        std.testing.expectError(error.SignatureVerificationFailed, Ed25519.verifyBatch(signature_batch.len, signature_batch));
     }
 }
 
@@ -316,7 +317,7 @@ test "ed25519 test vectors" {
             .msg_hex = "9bedc267423725d473888631ebf45988bad3db83851ee85c85e241a07d148b41",
             .public_key_hex = "f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43",
             .sig_hex = "ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff03be9678ac102edcd92b0210bb34d7428d12ffc5df5f37e359941266a4e35f0f",
-            .expected = error.InvalidSignature, // 8 - non-canonical R
+            .expected = error.SignatureVerificationFailed, // 8 - non-canonical R
         },
         Vec{
             .msg_hex = "9bedc267423725d473888631ebf45988bad3db83851ee85c85e241a07d148b41",
diff --git a/lib/std/crypto/25519/edwards25519.zig b/lib/std/crypto/25519/edwards25519.zig
@@ -7,6 +7,7 @@ const std = @import("std");
 const debug = std.debug;
 const fmt = std.fmt;
 const mem = std.mem;
+const Error = std.crypto.Error;
 
 /// Group operations over Edwards25519.
 pub const Edwards25519 = struct {
@@ -25,7 +26,7 @@ pub const Edwards25519 = struct {
     is_base: bool = false,
 
     /// Decode an Edwards25519 point from its compressed (Y+sign) coordinates.
-    pub fn fromBytes(s: [encoded_length]u8) !Edwards25519 {
+    pub fn fromBytes(s: [encoded_length]u8) Error!Edwards25519 {
         const z = Fe.one;
         const y = Fe.fromBytes(s);
         var u = y.sq();
@@ -55,7 +56,7 @@ pub const Edwards25519 = struct {
     }
 
     /// Check that the encoding of a point is canonical.
-    pub fn rejectNonCanonical(s: [32]u8) !void {
+    pub fn rejectNonCanonical(s: [32]u8) Error!void {
         return Fe.rejectNonCanonical(s, true);
     }
 
@@ -80,7 +81,7 @@ pub const Edwards25519 = struct {
     const identityElement = Edwards25519{ .x = Fe.zero, .y = Fe.one, .z = Fe.one, .t = Fe.zero };
 
     /// Reject the neutral element.
-    pub fn rejectIdentity(p: Edwards25519) !void {
+    pub fn rejectIdentity(p: Edwards25519) Error!void {
         if (p.x.isZero()) {
             return error.IdentityElement;
         }
@@ -176,7 +177,7 @@ pub const Edwards25519 = struct {
     // Based on real-world benchmarks, we only use this for multi-scalar multiplication.
     // NAF could be useful to half the size of precomputation tables, but we intentionally
     // avoid these to keep the standard library lightweight.
-    fn pcMul(pc: [9]Edwards25519, s: [32]u8, comptime vartime: bool) !Edwards25519 {
+    fn pcMul(pc: [9]Edwards25519, s: [32]u8, comptime vartime: bool) Error!Edwards25519 {
         std.debug.assert(vartime);
         const e = nonAdjacentForm(s);
         var q = Edwards25519.identityElement;
@@ -196,7 +197,7 @@ pub const Edwards25519 = struct {
     }
 
     // Scalar multiplication with a 4-bit window and the first 15 multiples.
-    fn pcMul16(pc: [16]Edwards25519, s: [32]u8, comptime vartime: bool) !Edwards25519 {
+    fn pcMul16(pc: [16]Edwards25519, s: [32]u8, comptime vartime: bool) Error!Edwards25519 {
         var q = Edwards25519.identityElement;
         var pos: usize = 252;
         while (true) : (pos -= 4) {
@@ -234,7 +235,7 @@ pub const Edwards25519 = struct {
     /// Multiply an Edwards25519 point by a scalar without clamping it.
     /// Return error.WeakPublicKey if the resulting point is
     /// the identity element.
-    pub fn mul(p: Edwards25519, s: [32]u8) !Edwards25519 {
+    pub fn mul(p: Edwards25519, s: [32]u8) Error!Edwards25519 {
         const pc = if (p.is_base) basePointPc else pc: {
             const xpc = precompute(p, 15);
             xpc[4].rejectIdentity() catch |_| return error.WeakPublicKey;
@@ -245,7 +246,7 @@ pub const Edwards25519 = struct {
 
     /// Multiply an Edwards25519 point by a *PUBLIC* scalar *IN VARIABLE TIME*
     /// This can be used for signature verification.
-    pub fn mulPublic(p: Edwards25519, s: [32]u8) !Edwards25519 {
+    pub fn mulPublic(p: Edwards25519, s: [32]u8) Error!Edwards25519 {
         if (p.is_base) {
             return pcMul16(basePointPc, s, true);
         } else {
@@ -257,7 +258,7 @@ pub const Edwards25519 = struct {
 
     /// Multiscalar multiplication *IN VARIABLE TIME* for public data
     /// Computes ps0*ss0 + ps1*ss1 + ps2*ss2... faster than doing many of these operations individually
-    pub fn mulMulti(comptime count: usize, ps: [count]Edwards25519, ss: [count][32]u8) !Edwards25519 {
+    pub fn mulMulti(comptime count: usize, ps: [count]Edwards25519, ss: [count][32]u8) Error!Edwards25519 {
         var pcs: [count][9]Edwards25519 = undefined;
         for (ps) |p, i| {
             if (p.is_base) {
@@ -296,14 +297,14 @@ pub const Edwards25519 = struct {
     /// This is strongly recommended for DH operations.
     /// Return error.WeakPublicKey if the resulting point is
     /// the identity element.
-    pub fn clampedMul(p: Edwards25519, s: [32]u8) !Edwards25519 {
+    pub fn clampedMul(p: Edwards25519, s: [32]u8) Error!Edwards25519 {
         var t: [32]u8 = s;
         scalar.clamp(&t);
         return mul(p, t);
     }
 
     // montgomery -- recover y = sqrt(x^3 + A*x^2 + x)
-    fn xmontToYmont(x: Fe) !Fe {
+    fn xmontToYmont(x: Fe) Error!Fe {
         var x2 = x.sq();
         const x3 = x.mul(x2);
         x2 = x2.mul32(Fe.edwards25519a_32);
diff --git a/lib/std/crypto/25519/field.zig b/lib/std/crypto/25519/field.zig
@@ -6,6 +6,7 @@
 const std = @import("std");
 const readIntLittle = std.mem.readIntLittle;
 const writeIntLittle = std.mem.writeIntLittle;
+const Error = std.crypto.Error;
 
 pub const Fe = struct {
     limbs: [5]u64,
@@ -112,7 +113,7 @@ pub const Fe = struct {
     }
 
     /// Reject non-canonical encodings of an element, possibly ignoring the top bit
-    pub fn rejectNonCanonical(s: [32]u8, comptime ignore_extra_bit: bool) !void {
+    pub fn rejectNonCanonical(s: [32]u8, comptime ignore_extra_bit: bool) Error!void {
         var c: u16 = (s[31] & 0x7f) ^ 0x7f;
         comptime var i = 30;
         inline while (i > 0) : (i -= 1) {
@@ -412,7 +413,7 @@ pub const Fe = struct {
     }
 
     /// Compute the square root of `x2`, returning `error.NotSquare` if `x2` was not a square
-    pub fn sqrt(x2: Fe) !Fe {
+    pub fn sqrt(x2: Fe) Error!Fe {
         var x2_copy = x2;
         const x = x2.uncheckedSqrt();
         const check = x.sq().sub(x2_copy);
diff --git a/lib/std/crypto/25519/ristretto255.zig b/lib/std/crypto/25519/ristretto255.zig
@@ -5,6 +5,7 @@
 // and substantial portions of the software.
 const std = @import("std");
 const fmt = std.fmt;
+const Error = std.crypto.Error;
 
 /// Group operations over Edwards25519.
 pub const Ristretto255 = struct {
@@ -34,7 +35,7 @@ pub const Ristretto255 = struct {
         return .{ .ratio_is_square = @boolToInt(has_m_root) | @boolToInt(has_p_root), .root = x.abs() };
     }
 
-    fn rejectNonCanonical(s: [encoded_length]u8) !void {
+    fn rejectNonCanonical(s: [encoded_length]u8) Error!void {
         if ((s[0] & 1) != 0) {
             return error.NonCanonical;
         }
@@ -42,7 +43,7 @@ pub const Ristretto255 = struct {
     }
 
     /// Reject the neutral element.
-    pub fn rejectIdentity(p: Ristretto255) callconv(.Inline) !void {
+    pub fn rejectIdentity(p: Ristretto255) callconv(.Inline) Error!void {
         return p.p.rejectIdentity();
     }
 
@@ -50,7 +51,7 @@ pub const Ristretto255 = struct {
     pub const basePoint = Ristretto255{ .p = Curve.basePoint };
 
     /// Decode a Ristretto255 representative.
-    pub fn fromBytes(s: [encoded_length]u8) !Ristretto255 {
+    pub fn fromBytes(s: [encoded_length]u8) Error!Ristretto255 {
         try rejectNonCanonical(s);
         const s_ = Fe.fromBytes(s);
         const ss = s_.sq(); // s^2
@@ -153,7 +154,7 @@ pub const Ristretto255 = struct {
     /// Multiply a Ristretto255 element with a scalar.
     /// Return error.WeakPublicKey if the resulting element is
     /// the identity element.
-    pub fn mul(p: Ristretto255, s: [encoded_length]u8) callconv(.Inline) !Ristretto255 {
+    pub fn mul(p: Ristretto255, s: [encoded_length]u8) callconv(.Inline) Error!Ristretto255 {
         return Ristretto255{ .p = try p.p.mul(s) };
     }
 
diff --git a/lib/std/crypto/25519/scalar.zig b/lib/std/crypto/25519/scalar.zig
@@ -5,6 +5,7 @@
 // and substantial portions of the software.
 const std = @import("std");
 const mem = std.mem;
+const Error = std.crypto.Error;
 
 /// 2^252 + 27742317777372353535851937790883648493
 pub const field_size = [32]u8{
@@ -18,7 +19,7 @@ pub const CompressedScalar = [32]u8;
 pub const zero = [_]u8{0} ** 32;
 
 /// Reject a scalar whose encoding is not canonical.
-pub fn rejectNonCanonical(s: [32]u8) !void {
+pub fn rejectNonCanonical(s: [32]u8) Error!void {
     var c: u8 = 0;
     var n: u8 = 1;
     var i: usize = 31;
diff --git a/lib/std/crypto/25519/x25519.zig b/lib/std/crypto/25519/x25519.zig
@@ -9,6 +9,7 @@ const mem = std.mem;
 const fmt = std.fmt;
 
 const Sha512 = crypto.hash.sha2.Sha512;
+const Error = crypto.Error;
 
 /// X25519 DH function.
 pub const X25519 = struct {
@@ -31,7 +32,7 @@ pub const X25519 = struct {
         secret_key: [secret_length]u8,
 
         /// Create a new key pair using an optional seed.
-        pub fn create(seed: ?[seed_length]u8) !KeyPair {
+        pub fn create(seed: ?[seed_length]u8) Error!KeyPair {
             const sk = seed orelse sk: {
                 var random_seed: [seed_length]u8 = undefined;
                 crypto.random.bytes(&random_seed);
@@ -44,7 +45,7 @@ pub const X25519 = struct {
         }
 
         /// Create a key pair from an Ed25519 key pair
-        pub fn fromEd25519(ed25519_key_pair: crypto.sign.Ed25519.KeyPair) !KeyPair {
+        pub fn fromEd25519(ed25519_key_pair: crypto.sign.Ed25519.KeyPair) Error!KeyPair {
             const seed = ed25519_key_pair.secret_key[0..32];
             var az: [Sha512.digest_length]u8 = undefined;
             Sha512.hash(seed, &az, .{});
@@ -59,13 +60,13 @@ pub const X25519 = struct {
     };
 
     /// Compute the public key for a given private key.
-    pub fn recoverPublicKey(secret_key: [secret_length]u8) ![public_length]u8 {
+    pub fn recoverPublicKey(secret_key: [secret_length]u8) Error![public_length]u8 {
         const q = try Curve.basePoint.clampedMul(secret_key);
         return q.toBytes();
     }
 
     /// Compute the X25519 equivalent to an Ed25519 public eky.
-    pub fn publicKeyFromEd25519(ed25519_public_key: [crypto.sign.Ed25519.public_length]u8) ![public_length]u8 {
+    pub fn publicKeyFromEd25519(ed25519_public_key: [crypto.sign.Ed25519.public_length]u8) Error![public_length]u8 {
         const pk_ed = try crypto.ecc.Edwards25519.fromBytes(ed25519_public_key);
         const pk = try Curve.fromEdwards25519(pk_ed);
         return pk.toBytes();
@@ -74,7 +75,7 @@ pub const X25519 = struct {
     /// Compute the scalar product of a public key and a secret scalar.
     /// Note that the output should not be used as a shared secret without
     /// hashing it first.
-    pub fn scalarmult(secret_key: [secret_length]u8, public_key: [public_length]u8) ![shared_length]u8 {
+    pub fn scalarmult(secret_key: [secret_length]u8, public_key: [public_length]u8) Error![shared_length]u8 {
         const q = try Curve.fromBytes(public_key).clampedMul(secret_key);
         return q.toBytes();
     }
diff --git a/lib/std/crypto/aegis.zig b/lib/std/crypto/aegis.zig
@@ -8,6 +8,7 @@ const std = @import("std");
 const mem = std.mem;
 const assert = std.debug.assert;
 const AesBlock = std.crypto.core.aes.Block;
+const Error = std.crypto.Error;
 
 const State128L = struct {
     blocks: [8]AesBlock,
@@ -136,7 +137,7 @@ pub const Aegis128L = struct {
     /// ad: Associated Data
     /// npub: public nonce
     /// k: private key
-    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) !void {
+    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) Error!void {
         assert(c.len == m.len);
         var state = State128L.init(key, npub);
         var src: [32]u8 align(16) = undefined;
@@ -298,7 +299,7 @@ pub const Aegis256 = struct {
     /// ad: Associated Data
     /// npub: public nonce
     /// k: private key
-    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) !void {
+    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) Error!void {
         assert(c.len == m.len);
         var state = State256.init(key, npub);
         var src: [16]u8 align(16) = undefined;
diff --git a/lib/std/crypto/aes_gcm.zig b/lib/std/crypto/aes_gcm.zig
@@ -12,6 +12,7 @@ const debug = std.debug;
 const Ghash = std.crypto.onetimeauth.Ghash;
 const mem = std.mem;
 const modes = crypto.core.modes;
+const Error = crypto.Error;
 
 pub const Aes128Gcm = AesGcm(crypto.core.aes.Aes128);
 pub const Aes256Gcm = AesGcm(crypto.core.aes.Aes256);
@@ -59,7 +60,7 @@ fn AesGcm(comptime Aes: anytype) type {
             }
         }
 
-        pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) !void {
+        pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) Error!void {
             assert(c.len == m.len);
 
             const aes = Aes.initEnc(key);
diff --git a/lib/std/crypto/aes_ocb.zig b/lib/std/crypto/aes_ocb.zig
@@ -10,6 +10,7 @@ const aes = crypto.core.aes;
 const assert = std.debug.assert;
 const math = std.math;
 const mem = std.mem;
+const Error = crypto.Error;
 
 pub const Aes128Ocb = AesOcb(aes.Aes128);
 pub const Aes256Ocb = AesOcb(aes.Aes256);
@@ -178,7 +179,7 @@ fn AesOcb(comptime Aes: anytype) type {
         /// ad: Associated Data
         /// npub: public nonce
         /// k: secret key
-        pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) !void {
+        pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) Error!void {
             assert(c.len == m.len);
 
             const aes_enc_ctx = Aes.initEnc(key);
diff --git a/lib/std/crypto/bcrypt.zig b/lib/std/crypto/bcrypt.zig
@@ -11,7 +11,8 @@ const math = std.math;
 const mem = std.mem;
 const debug = std.debug;
 const testing = std.testing;
-const utils = std.crypto.utils;
+const utils = crypto.utils;
+const Error = crypto.Error;
 
 const salt_length: usize = 16;
 const salt_str_length: usize = 22;
@@ -21,13 +22,6 @@ const ct_length: usize = 24;
 /// Length (in bytes) of a password hash
 pub const hash_length: usize = 60;
 
-pub const BcryptError = error{
-    /// The hashed password cannot be decoded.
-    InvalidEncoding,
-    /// The hash is not valid for the given password.
-    InvalidPassword,
-};
-
 const State = struct {
     sboxes: [4][256]u32 = [4][256]u32{
         .{ 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a },
@@ -185,7 +179,7 @@ const Codec = struct {
         debug.assert(j == b64.len);
     }
 
-    fn decode(bin: []u8, b64: []const u8) BcryptError!void {
+    fn decode(bin: []u8, b64: []const u8) Error!void {
         var i: usize = 0;
         var j: usize = 0;
         while (j < bin.len) {
@@ -210,7 +204,7 @@ const Codec = struct {
     }
 };
 
-fn strHashInternal(password: []const u8, rounds_log: u6, salt: [salt_length]u8) BcryptError![hash_length]u8 {
+fn strHashInternal(password: []const u8, rounds_log: u6, salt: [salt_length]u8) Error![hash_length]u8 {
     var state = State{};
     var password_buf: [73]u8 = undefined;
     const trimmed_len = math.min(password.len, password_buf.len - 1);
@@ -258,14 +252,14 @@ fn strHashInternal(password: []const u8, rounds_log: u6, salt: [salt_length]u8) 
 /// IMPORTANT: by design, bcrypt silently truncates passwords to 72 bytes.
 /// If this is an issue for your application, hash the password first using a function such as SHA-512,
 /// and then use the resulting hash as the password parameter for bcrypt.
-pub fn strHash(password: []const u8, rounds_log: u6) ![hash_length]u8 {
+pub fn strHash(password: []const u8, rounds_log: u6) Error![hash_length]u8 {
     var salt: [salt_length]u8 = undefined;
     crypto.random.bytes(&salt);
     return strHashInternal(password, rounds_log, salt);
 }
 
 /// Verify that a previously computed hash is valid for a given password.
-pub fn strVerify(h: [hash_length]u8, password: []const u8) BcryptError!void {
+pub fn strVerify(h: [hash_length]u8, password: []const u8) Error!void {
     if (!mem.eql(u8, "$2", h[0..2])) return error.InvalidEncoding;
     if (h[3] != '$' or h[6] != '$') return error.InvalidEncoding;
     const rounds_log_str = h[4..][0..2];
@@ -275,7 +269,7 @@ pub fn strVerify(h: [hash_length]u8, password: []const u8) BcryptError!void {
     const rounds_log = fmt.parseInt(u6, rounds_log_str[0..], 10) catch return error.InvalidEncoding;
     const wanted_s = try strHashInternal(password, rounds_log, salt);
     if (!mem.eql(u8, wanted_s[0..], h[0..])) {
-        return error.InvalidPassword;
+        return error.PasswordVerificationFailed;
     }
 }
 
@@ -292,7 +286,7 @@ test "bcrypt codec" {
 test "bcrypt" {
     const s = try strHash("password", 5);
     try strVerify(s, "password");
-    testing.expectError(error.InvalidPassword, strVerify(s, "invalid password"));
+    testing.expectError(error.PasswordVerificationFailed, strVerify(s, "invalid password"));
 
     const long_s = try strHash("password" ** 100, 5);
     try strVerify(long_s, "password" ** 100);
diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig
@@ -202,6 +202,7 @@ pub fn benchmarkBatchSignatureVerification(comptime Signature: anytype, comptime
 const aeads = [_]Crypto{
     Crypto{ .ty = crypto.aead.chacha_poly.ChaCha20Poly1305, .name = "chacha20Poly1305" },
     Crypto{ .ty = crypto.aead.chacha_poly.XChaCha20Poly1305, .name = "xchacha20Poly1305" },
+    Crypto{ .ty = crypto.aead.chacha_poly.XChaCha8Poly1305, .name = "xchacha8Poly1305" },
     Crypto{ .ty = crypto.aead.salsa_poly.XSalsa20Poly1305, .name = "xsalsa20Poly1305" },
     Crypto{ .ty = crypto.aead.Gimli, .name = "gimli-aead" },
     Crypto{ .ty = crypto.aead.aegis.Aegis128L, .name = "aegis-128l" },
diff --git a/lib/std/crypto/chacha20.zig b/lib/std/crypto/chacha20.zig
@@ -13,287 +13,359 @@ const testing = std.testing;
 const maxInt = math.maxInt;
 const Vector = std.meta.Vector;
 const Poly1305 = std.crypto.onetimeauth.Poly1305;
+const Error = std.crypto.Error;
+
+/// IETF-variant of the ChaCha20 stream cipher, as designed for TLS.
+pub const ChaCha20IETF = ChaChaIETF(20);
+
+/// IETF-variant of the ChaCha20 stream cipher, reduced to 12 rounds.
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha12IETF = ChaChaIETF(12);
+
+/// IETF-variant of the ChaCha20 stream cipher, reduced to 8 rounds.
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha8IETF = ChaChaIETF(8);
+
+/// Original ChaCha20 stream cipher.
+pub const ChaCha20With64BitNonce = ChaChaWith64BitNonce(20);
+
+/// Original ChaCha20 stream cipher, reduced to 12 rounds.
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha12With64BitNonce = ChaChaWith64BitNonce(12);
+
+/// Original ChaCha20 stream cipher, reduced to 8 rounds.
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha8With64BitNonce = ChaChaWith64BitNonce(8);
+
+/// XChaCha20 (nonce-extended version of the IETF ChaCha20 variant) stream cipher
+pub const XChaCha20IETF = XChaChaIETF(20);
+
+/// XChaCha20 (nonce-extended version of the IETF ChaCha20 variant) stream cipher, reduced to 12 rounds
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const XChaCha12IETF = XChaChaIETF(12);
+
+/// XChaCha20 (nonce-extended version of the IETF ChaCha20 variant) stream cipher, reduced to 8 rounds
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const XChaCha8IETF = XChaChaIETF(8);
+
+/// ChaCha20-Poly1305 authenticated cipher, as designed for TLS
+pub const ChaCha20Poly1305 = ChaChaPoly1305(20);
+
+/// ChaCha20-Poly1305 authenticated cipher, reduced to 12 rounds
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha12Poly1305 = ChaChaPoly1305(12);
+
+/// ChaCha20-Poly1305 authenticated cipher, reduced to 8 rounds
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha8Poly1305 = ChaChaPoly1305(8);
+
+/// XChaCha20-Poly1305 authenticated cipher
+pub const XChaCha20Poly1305 = XChaChaPoly1305(20);
+
+/// XChaCha20-Poly1305 authenticated cipher
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const XChaCha12Poly1305 = XChaChaPoly1305(12);
+
+/// XChaCha20-Poly1305 authenticated cipher
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const XChaCha8Poly1305 = XChaChaPoly1305(8);
 
 // Vectorized implementation of the core function
-const ChaCha20VecImpl = struct {
-    const Lane = Vector(4, u32);
-    const BlockVec = [4]Lane;
-
-    fn initContext(key: [8]u32, d: [4]u32) BlockVec {
-        const c = "expand 32-byte k";
-        const constant_le = comptime Lane{
-            mem.readIntLittle(u32, c[0..4]),
-            mem.readIntLittle(u32, c[4..8]),
-            mem.readIntLittle(u32, c[8..12]),
-            mem.readIntLittle(u32, c[12..16]),
-        };
-        return BlockVec{
-            constant_le,
-            Lane{ key[0], key[1], key[2], key[3] },
-            Lane{ key[4], key[5], key[6], key[7] },
-            Lane{ d[0], d[1], d[2], d[3] },
-        };
-    }
+fn ChaChaVecImpl(comptime rounds_nb: usize) type {
+    return struct {
+        const Lane = Vector(4, u32);
+        const BlockVec = [4]Lane;
+
+        fn initContext(key: [8]u32, d: [4]u32) BlockVec {
+            const c = "expand 32-byte k";
+            const constant_le = comptime Lane{
+                mem.readIntLittle(u32, c[0..4]),
+                mem.readIntLittle(u32, c[4..8]),
+                mem.readIntLittle(u32, c[8..12]),
+                mem.readIntLittle(u32, c[12..16]),
+            };
+            return BlockVec{
+                constant_le,
+                Lane{ key[0], key[1], key[2], key[3] },
+                Lane{ key[4], key[5], key[6], key[7] },
+                Lane{ d[0], d[1], d[2], d[3] },
+            };
+        }
 
-    fn chacha20Core(x: *BlockVec, input: BlockVec) callconv(.Inline) void {
-        x.* = input;
-
-        var r: usize = 0;
-        while (r < 20) : (r += 2) {
-            x[0] +%= x[1];
-            x[3] ^= x[0];
-            x[3] = math.rotl(Lane, x[3], 16);
-
-            x[2] +%= x[3];
-            x[1] ^= x[2];
-            x[1] = math.rotl(Lane, x[1], 12);
-
-            x[0] +%= x[1];
-            x[3] ^= x[0];
-            x[0] = @shuffle(u32, x[0], undefined, [_]i32{ 3, 0, 1, 2 });
-            x[3] = math.rotl(Lane, x[3], 8);
-
-            x[2] +%= x[3];
-            x[3] = @shuffle(u32, x[3], undefined, [_]i32{ 2, 3, 0, 1 });
-            x[1] ^= x[2];
-            x[2] = @shuffle(u32, x[2], undefined, [_]i32{ 1, 2, 3, 0 });
-            x[1] = math.rotl(Lane, x[1], 7);
-
-            x[0] +%= x[1];
-            x[3] ^= x[0];
-            x[3] = math.rotl(Lane, x[3], 16);
-
-            x[2] +%= x[3];
-            x[1] ^= x[2];
-            x[1] = math.rotl(Lane, x[1], 12);
-
-            x[0] +%= x[1];
-            x[3] ^= x[0];
-            x[0] = @shuffle(u32, x[0], undefined, [_]i32{ 1, 2, 3, 0 });
-            x[3] = math.rotl(Lane, x[3], 8);
-
-            x[2] +%= x[3];
-            x[3] = @shuffle(u32, x[3], undefined, [_]i32{ 2, 3, 0, 1 });
-            x[1] ^= x[2];
-            x[2] = @shuffle(u32, x[2], undefined, [_]i32{ 3, 0, 1, 2 });
-            x[1] = math.rotl(Lane, x[1], 7);
+        fn chacha20Core(x: *BlockVec, input: BlockVec) callconv(.Inline) void {
+            x.* = input;
+
+            var r: usize = 0;
+            while (r < rounds_nb) : (r += 2) {
+                x[0] +%= x[1];
+                x[3] ^= x[0];
+                x[3] = math.rotl(Lane, x[3], 16);
+
+                x[2] +%= x[3];
+                x[1] ^= x[2];
+                x[1] = math.rotl(Lane, x[1], 12);
+
+                x[0] +%= x[1];
+                x[3] ^= x[0];
+                x[0] = @shuffle(u32, x[0], undefined, [_]i32{ 3, 0, 1, 2 });
+                x[3] = math.rotl(Lane, x[3], 8);
+
+                x[2] +%= x[3];
+                x[3] = @shuffle(u32, x[3], undefined, [_]i32{ 2, 3, 0, 1 });
+                x[1] ^= x[2];
+                x[2] = @shuffle(u32, x[2], undefined, [_]i32{ 1, 2, 3, 0 });
+                x[1] = math.rotl(Lane, x[1], 7);
+
+                x[0] +%= x[1];
+                x[3] ^= x[0];
+                x[3] = math.rotl(Lane, x[3], 16);
+
+                x[2] +%= x[3];
+                x[1] ^= x[2];
+                x[1] = math.rotl(Lane, x[1], 12);
+
+                x[0] +%= x[1];
+                x[3] ^= x[0];
+                x[0] = @shuffle(u32, x[0], undefined, [_]i32{ 1, 2, 3, 0 });
+                x[3] = math.rotl(Lane, x[3], 8);
+
+                x[2] +%= x[3];
+                x[3] = @shuffle(u32, x[3], undefined, [_]i32{ 2, 3, 0, 1 });
+                x[1] ^= x[2];
+                x[2] = @shuffle(u32, x[2], undefined, [_]i32{ 3, 0, 1, 2 });
+                x[1] = math.rotl(Lane, x[1], 7);
+            }
         }
-    }
 
-    fn hashToBytes(out: *[64]u8, x: BlockVec) callconv(.Inline) void {
-        var i: usize = 0;
-        while (i < 4) : (i += 1) {
-            mem.writeIntLittle(u32, out[16 * i + 0 ..][0..4], x[i][0]);
-            mem.writeIntLittle(u32, out[16 * i + 4 ..][0..4], x[i][1]);
-            mem.writeIntLittle(u32, out[16 * i + 8 ..][0..4], x[i][2]);
-            mem.writeIntLittle(u32, out[16 * i + 12 ..][0..4], x[i][3]);
+        fn hashToBytes(out: *[64]u8, x: BlockVec) callconv(.Inline) void {
+            var i: usize = 0;
+            while (i < 4) : (i += 1) {
+                mem.writeIntLittle(u32, out[16 * i + 0 ..][0..4], x[i][0]);
+                mem.writeIntLittle(u32, out[16 * i + 4 ..][0..4], x[i][1]);
+                mem.writeIntLittle(u32, out[16 * i + 8 ..][0..4], x[i][2]);
+                mem.writeIntLittle(u32, out[16 * i + 12 ..][0..4], x[i][3]);
+            }
         }
-    }
 
-    fn contextFeedback(x: *BlockVec, ctx: BlockVec) callconv(.Inline) void {
-        x[0] +%= ctx[0];
-        x[1] +%= ctx[1];
-        x[2] +%= ctx[2];
-        x[3] +%= ctx[3];
-    }
+        fn contextFeedback(x: *BlockVec, ctx: BlockVec) callconv(.Inline) void {
+            x[0] +%= ctx[0];
+            x[1] +%= ctx[1];
+            x[2] +%= ctx[2];
+            x[3] +%= ctx[3];
+        }
 
-    fn chacha20Xor(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) void {
-        var ctx = initContext(key, counter);
-        var x: BlockVec = undefined;
-        var buf: [64]u8 = undefined;
-        var i: usize = 0;
-        while (i + 64 <= in.len) : (i += 64) {
-            chacha20Core(x[0..], ctx);
-            contextFeedback(&x, ctx);
-            hashToBytes(buf[0..], x);
-
-            var xout = out[i..];
-            const xin = in[i..];
-            var j: usize = 0;
-            while (j < 64) : (j += 1) {
-                xout[j] = xin[j];
-            }
-            j = 0;
-            while (j < 64) : (j += 1) {
-                xout[j] ^= buf[j];
+        fn chacha20Xor(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) void {
+            var ctx = initContext(key, counter);
+            var x: BlockVec = undefined;
+            var buf: [64]u8 = undefined;
+            var i: usize = 0;
+            while (i + 64 <= in.len) : (i += 64) {
+                chacha20Core(x[0..], ctx);
+                contextFeedback(&x, ctx);
+                hashToBytes(buf[0..], x);
+
+                var xout = out[i..];
+                const xin = in[i..];
+                var j: usize = 0;
+                while (j < 64) : (j += 1) {
+                    xout[j] = xin[j];
+                }
+                j = 0;
+                while (j < 64) : (j += 1) {
+                    xout[j] ^= buf[j];
+                }
+                ctx[3][0] += 1;
             }
-            ctx[3][0] += 1;
-        }
-        if (i < in.len) {
-            chacha20Core(x[0..], ctx);
-            contextFeedback(&x, ctx);
-            hashToBytes(buf[0..], x);
-
-            var xout = out[i..];
-            const xin = in[i..];
-            var j: usize = 0;
-            while (j < in.len % 64) : (j += 1) {
-                xout[j] = xin[j] ^ buf[j];
+            if (i < in.len) {
+                chacha20Core(x[0..], ctx);
+                contextFeedback(&x, ctx);
+                hashToBytes(buf[0..], x);
+
+                var xout = out[i..];
+                const xin = in[i..];
+                var j: usize = 0;
+                while (j < in.len % 64) : (j += 1) {
+                    xout[j] = xin[j] ^ buf[j];
+                }
             }
         }
-    }
 
-    fn hchacha20(input: [16]u8, key: [32]u8) [32]u8 {
-        var c: [4]u32 = undefined;
-        for (c) |_, i| {
-            c[i] = mem.readIntLittle(u32, input[4 * i ..][0..4]);
+        fn hchacha20(input: [16]u8, key: [32]u8) [32]u8 {
+            var c: [4]u32 = undefined;
+            for (c) |_, i| {
+                c[i] = mem.readIntLittle(u32, input[4 * i ..][0..4]);
+            }
+            const ctx = initContext(keyToWords(key), c);
+            var x: BlockVec = undefined;
+            chacha20Core(x[0..], ctx);
+            var out: [32]u8 = undefined;
+            mem.writeIntLittle(u32, out[0..4], x[0][0]);
+            mem.writeIntLittle(u32, out[4..8], x[0][1]);
+            mem.writeIntLittle(u32, out[8..12], x[0][2]);
+            mem.writeIntLittle(u32, out[12..16], x[0][3]);
+            mem.writeIntLittle(u32, out[16..20], x[3][0]);
+            mem.writeIntLittle(u32, out[20..24], x[3][1]);
+            mem.writeIntLittle(u32, out[24..28], x[3][2]);
+            mem.writeIntLittle(u32, out[28..32], x[3][3]);
+            return out;
         }
-        const ctx = initContext(keyToWords(key), c);
-        var x: BlockVec = undefined;
-        chacha20Core(x[0..], ctx);
-        var out: [32]u8 = undefined;
-        mem.writeIntLittle(u32, out[0..4], x[0][0]);
-        mem.writeIntLittle(u32, out[4..8], x[0][1]);
-        mem.writeIntLittle(u32, out[8..12], x[0][2]);
-        mem.writeIntLittle(u32, out[12..16], x[0][3]);
-        mem.writeIntLittle(u32, out[16..20], x[3][0]);
-        mem.writeIntLittle(u32, out[20..24], x[3][1]);
-        mem.writeIntLittle(u32, out[24..28], x[3][2]);
-        mem.writeIntLittle(u32, out[28..32], x[3][3]);
-        return out;
-    }
-};
+    };
+}
 
 // Non-vectorized implementation of the core function
-const ChaCha20NonVecImpl = struct {
-    const BlockVec = [16]u32;
-
-    fn initContext(key: [8]u32, d: [4]u32) BlockVec {
-        const c = "expand 32-byte k";
-        const constant_le = comptime [4]u32{
-            mem.readIntLittle(u32, c[0..4]),
-            mem.readIntLittle(u32, c[4..8]),
-            mem.readIntLittle(u32, c[8..12]),
-            mem.readIntLittle(u32, c[12..16]),
-        };
-        return BlockVec{
-            constant_le[0], constant_le[1], constant_le[2], constant_le[3],
-            key[0],         key[1],         key[2],         key[3],
-            key[4],         key[5],         key[6],         key[7],
-            d[0],           d[1],           d[2],           d[3],
-        };
-    }
-
-    const QuarterRound = struct {
-        a: usize,
-        b: usize,
-        c: usize,
-        d: usize,
-    };
+fn ChaChaNonVecImpl(comptime rounds_nb: usize) type {
+    return struct {
+        const BlockVec = [16]u32;
+
+        fn initContext(key: [8]u32, d: [4]u32) BlockVec {
+            const c = "expand 32-byte k";
+            const constant_le = comptime [4]u32{
+                mem.readIntLittle(u32, c[0..4]),
+                mem.readIntLittle(u32, c[4..8]),
+                mem.readIntLittle(u32, c[8..12]),
+                mem.readIntLittle(u32, c[12..16]),
+            };
+            return BlockVec{
+                constant_le[0], constant_le[1], constant_le[2], constant_le[3],
+                key[0],         key[1],         key[2],         key[3],
+                key[4],         key[5],         key[6],         key[7],
+                d[0],           d[1],           d[2],           d[3],
+            };
+        }
 
-    fn Rp(a: usize, b: usize, c: usize, d: usize) QuarterRound {
-        return QuarterRound{
-            .a = a,
-            .b = b,
-            .c = c,
-            .d = d,
+        const QuarterRound = struct {
+            a: usize,
+            b: usize,
+            c: usize,
+            d: usize,
         };
-    }
 
-    fn chacha20Core(x: *BlockVec, input: BlockVec) callconv(.Inline) void {
-        x.* = input;
-
-        const rounds = comptime [_]QuarterRound{
-            Rp(0, 4, 8, 12),
-            Rp(1, 5, 9, 13),
-            Rp(2, 6, 10, 14),
-            Rp(3, 7, 11, 15),
-            Rp(0, 5, 10, 15),
-            Rp(1, 6, 11, 12),
-            Rp(2, 7, 8, 13),
-            Rp(3, 4, 9, 14),
-        };
+        fn Rp(a: usize, b: usize, c: usize, d: usize) QuarterRound {
+            return QuarterRound{
+                .a = a,
+                .b = b,
+                .c = c,
+                .d = d,
+            };
+        }
 
-        comptime var j: usize = 0;
-        inline while (j < 20) : (j += 2) {
-            inline for (rounds) |r| {
-                x[r.a] +%= x[r.b];
-                x[r.d] = math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 16));
-                x[r.c] +%= x[r.d];
-                x[r.b] = math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 12));
-                x[r.a] +%= x[r.b];
-                x[r.d] = math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 8));
-                x[r.c] +%= x[r.d];
-                x[r.b] = math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 7));
+        fn chacha20Core(x: *BlockVec, input: BlockVec) callconv(.Inline) void {
+            x.* = input;
+
+            const rounds = comptime [_]QuarterRound{
+                Rp(0, 4, 8, 12),
+                Rp(1, 5, 9, 13),
+                Rp(2, 6, 10, 14),
+                Rp(3, 7, 11, 15),
+                Rp(0, 5, 10, 15),
+                Rp(1, 6, 11, 12),
+                Rp(2, 7, 8, 13),
+                Rp(3, 4, 9, 14),
+            };
+
+            comptime var j: usize = 0;
+            inline while (j < rounds_nb) : (j += 2) {
+                inline for (rounds) |r| {
+                    x[r.a] +%= x[r.b];
+                    x[r.d] = math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 16));
+                    x[r.c] +%= x[r.d];
+                    x[r.b] = math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 12));
+                    x[r.a] +%= x[r.b];
+                    x[r.d] = math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 8));
+                    x[r.c] +%= x[r.d];
+                    x[r.b] = math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 7));
+                }
             }
         }
-    }
 
-    fn hashToBytes(out: *[64]u8, x: BlockVec) callconv(.Inline) void {
-        var i: usize = 0;
-        while (i < 4) : (i += 1) {
-            mem.writeIntLittle(u32, out[16 * i + 0 ..][0..4], x[i * 4 + 0]);
-            mem.writeIntLittle(u32, out[16 * i + 4 ..][0..4], x[i * 4 + 1]);
-            mem.writeIntLittle(u32, out[16 * i + 8 ..][0..4], x[i * 4 + 2]);
-            mem.writeIntLittle(u32, out[16 * i + 12 ..][0..4], x[i * 4 + 3]);
+        fn hashToBytes(out: *[64]u8, x: BlockVec) callconv(.Inline) void {
+            var i: usize = 0;
+            while (i < 4) : (i += 1) {
+                mem.writeIntLittle(u32, out[16 * i + 0 ..][0..4], x[i * 4 + 0]);
+                mem.writeIntLittle(u32, out[16 * i + 4 ..][0..4], x[i * 4 + 1]);
+                mem.writeIntLittle(u32, out[16 * i + 8 ..][0..4], x[i * 4 + 2]);
+                mem.writeIntLittle(u32, out[16 * i + 12 ..][0..4], x[i * 4 + 3]);
+            }
         }
-    }
 
-    fn contextFeedback(x: *BlockVec, ctx: BlockVec) callconv(.Inline) void {
-        var i: usize = 0;
-        while (i < 16) : (i += 1) {
-            x[i] +%= ctx[i];
+        fn contextFeedback(x: *BlockVec, ctx: BlockVec) callconv(.Inline) void {
+            var i: usize = 0;
+            while (i < 16) : (i += 1) {
+                x[i] +%= ctx[i];
+            }
         }
-    }
 
-    fn chacha20Xor(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) void {
-        var ctx = initContext(key, counter);
-        var x: BlockVec = undefined;
-        var buf: [64]u8 = undefined;
-        var i: usize = 0;
-        while (i + 64 <= in.len) : (i += 64) {
-            chacha20Core(x[0..], ctx);
-            contextFeedback(&x, ctx);
-            hashToBytes(buf[0..], x);
-
-            var xout = out[i..];
-            const xin = in[i..];
-            var j: usize = 0;
-            while (j < 64) : (j += 1) {
-                xout[j] = xin[j];
-            }
-            j = 0;
-            while (j < 64) : (j += 1) {
-                xout[j] ^= buf[j];
+        fn chacha20Xor(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) void {
+            var ctx = initContext(key, counter);
+            var x: BlockVec = undefined;
+            var buf: [64]u8 = undefined;
+            var i: usize = 0;
+            while (i + 64 <= in.len) : (i += 64) {
+                chacha20Core(x[0..], ctx);
+                contextFeedback(&x, ctx);
+                hashToBytes(buf[0..], x);
+
+                var xout = out[i..];
+                const xin = in[i..];
+                var j: usize = 0;
+                while (j < 64) : (j += 1) {
+                    xout[j] = xin[j];
+                }
+                j = 0;
+                while (j < 64) : (j += 1) {
+                    xout[j] ^= buf[j];
+                }
+                ctx[12] += 1;
             }
-            ctx[12] += 1;
-        }
-        if (i < in.len) {
-            chacha20Core(x[0..], ctx);
-            contextFeedback(&x, ctx);
-            hashToBytes(buf[0..], x);
-
-            var xout = out[i..];
-            const xin = in[i..];
-            var j: usize = 0;
-            while (j < in.len % 64) : (j += 1) {
-                xout[j] = xin[j] ^ buf[j];
+            if (i < in.len) {
+                chacha20Core(x[0..], ctx);
+                contextFeedback(&x, ctx);
+                hashToBytes(buf[0..], x);
+
+                var xout = out[i..];
+                const xin = in[i..];
+                var j: usize = 0;
+                while (j < in.len % 64) : (j += 1) {
+                    xout[j] = xin[j] ^ buf[j];
+                }
             }
         }
-    }
 
-    fn hchacha20(input: [16]u8, key: [32]u8) [32]u8 {
-        var c: [4]u32 = undefined;
-        for (c) |_, i| {
-            c[i] = mem.readIntLittle(u32, input[4 * i ..][0..4]);
+        fn hchacha20(input: [16]u8, key: [32]u8) [32]u8 {
+            var c: [4]u32 = undefined;
+            for (c) |_, i| {
+                c[i] = mem.readIntLittle(u32, input[4 * i ..][0..4]);
+            }
+            const ctx = initContext(keyToWords(key), c);
+            var x: BlockVec = undefined;
+            chacha20Core(x[0..], ctx);
+            var out: [32]u8 = undefined;
+            mem.writeIntLittle(u32, out[0..4], x[0]);
+            mem.writeIntLittle(u32, out[4..8], x[1]);
+            mem.writeIntLittle(u32, out[8..12], x[2]);
+            mem.writeIntLittle(u32, out[12..16], x[3]);
+            mem.writeIntLittle(u32, out[16..20], x[12]);
+            mem.writeIntLittle(u32, out[20..24], x[13]);
+            mem.writeIntLittle(u32, out[24..28], x[14]);
+            mem.writeIntLittle(u32, out[28..32], x[15]);
+            return out;
         }
-        const ctx = initContext(keyToWords(key), c);
-        var x: BlockVec = undefined;
-        chacha20Core(x[0..], ctx);
-        var out: [32]u8 = undefined;
-        mem.writeIntLittle(u32, out[0..4], x[0]);
-        mem.writeIntLittle(u32, out[4..8], x[1]);
-        mem.writeIntLittle(u32, out[8..12], x[2]);
-        mem.writeIntLittle(u32, out[12..16], x[3]);
-        mem.writeIntLittle(u32, out[16..20], x[12]);
-        mem.writeIntLittle(u32, out[20..24], x[13]);
-        mem.writeIntLittle(u32, out[24..28], x[14]);
-        mem.writeIntLittle(u32, out[28..32], x[15]);
-        return out;
-    }
-};
+    };
+}
 
-const ChaCha20Impl = if (std.Target.current.cpu.arch == .x86_64) ChaCha20VecImpl else ChaCha20NonVecImpl;
+fn ChaChaImpl(comptime rounds_nb: usize) type {
+    return if (std.Target.current.cpu.arch == .x86_64) ChaChaVecImpl(rounds_nb) else ChaChaNonVecImpl(rounds_nb);
+}
 
 fn keyToWords(key: [32]u8) [8]u32 {
     var k: [8]u32 = undefined;
@@ -304,68 +376,239 @@ fn keyToWords(key: [32]u8) [8]u32 {
     return k;
 }
 
-/// ChaCha20 avoids the possibility of timing attacks, as there are no branches
-/// on secret key data.
-///
-/// in and out should be the same length.
-/// counter should generally be 0 or 1
-///
-/// ChaCha20 is self-reversing. To decrypt just run the cipher with the same
-/// counter, nonce, and key.
-pub const ChaCha20IETF = struct {
-    pub fn xor(out: []u8, in: []const u8, counter: u32, key: [32]u8, nonce: [12]u8) void {
-        assert(in.len == out.len);
-        assert((in.len >> 6) + counter <= maxInt(u32));
-
-        var c: [4]u32 = undefined;
-        c[0] = counter;
-        c[1] = mem.readIntLittle(u32, nonce[0..4]);
-        c[2] = mem.readIntLittle(u32, nonce[4..8]);
-        c[3] = mem.readIntLittle(u32, nonce[8..12]);
-        ChaCha20Impl.chacha20Xor(out, in, keyToWords(key), c);
-    }
-};
-
-/// This is the original ChaCha20 before RFC 7539, which recommends using the
-/// orgininal version on applications such as disk or file encryption that might
-/// exceed the 256 GiB limit of the 96-bit nonce version.
-pub const ChaCha20With64BitNonce = struct {
-    pub fn xor(out: []u8, in: []const u8, counter: u64, key: [32]u8, nonce: [8]u8) void {
-        assert(in.len == out.len);
-        assert(counter +% (in.len >> 6) >= counter);
-
-        var cursor: usize = 0;
-        const k = keyToWords(key);
-        var c: [4]u32 = undefined;
-        c[0] = @truncate(u32, counter);
-        c[1] = @truncate(u32, counter >> 32);
-        c[2] = mem.readIntLittle(u32, nonce[0..4]);
-        c[3] = mem.readIntLittle(u32, nonce[4..8]);
-
-        const block_length = (1 << 6);
-        // The full block size is greater than the address space on a 32bit machine
-        const big_block = if (@sizeOf(usize) > 4) (block_length << 32) else maxInt(usize);
-
-        // first partial big block
-        if (((@intCast(u64, maxInt(u32) - @truncate(u32, counter)) + 1) << 6) < in.len) {
-            ChaCha20Impl.chacha20Xor(out[cursor..big_block], in[cursor..big_block], k, c);
-            cursor = big_block - cursor;
-            c[1] += 1;
-            if (comptime @sizeOf(usize) > 4) {
-                // A big block is giant: 256 GiB, but we can avoid this limitation
-                var remaining_blocks: u32 = @intCast(u32, (in.len / big_block));
-                var i: u32 = 0;
-                while (remaining_blocks > 0) : (remaining_blocks -= 1) {
-                    ChaCha20Impl.chacha20Xor(out[cursor .. cursor + big_block], in[cursor .. cursor + big_block], k, c);
-                    c[1] += 1; // upper 32-bit of counter, generic chacha20Xor() doesn't know about this.
-                    cursor += big_block;
+fn extend(key: [32]u8, nonce: [24]u8, comptime rounds_nb: usize) struct { key: [32]u8, nonce: [12]u8 } {
+    var subnonce: [12]u8 = undefined;
+    mem.set(u8, subnonce[0..4], 0);
+    mem.copy(u8, subnonce[4..], nonce[16..24]);
+    return .{
+        .key = ChaChaImpl(rounds_nb).hchacha20(nonce[0..16].*, key),
+        .nonce = subnonce,
+    };
+}
+
+fn ChaChaIETF(comptime rounds_nb: usize) type {
+    return struct {
+        /// Nonce length in bytes.
+        pub const nonce_length = 12;
+        /// Key length in bytes.
+        pub const key_length = 32;
+
+        /// Add the output of the ChaCha20 stream cipher to `in` and stores the result into `out`.
+        /// WARNING: This function doesn't provide authenticated encryption.
+        /// Using the AEAD or one of the `box` versions is usually preferred.
+        pub fn xor(out: []u8, in: []const u8, counter: u32, key: [key_length]u8, nonce: [nonce_length]u8) void {
+            assert(in.len == out.len);
+            assert(in.len / 64 <= (1 << 32 - 1) - counter);
+
+            var d: [4]u32 = undefined;
+            d[0] = counter;
+            d[1] = mem.readIntLittle(u32, nonce[0..4]);
+            d[2] = mem.readIntLittle(u32, nonce[4..8]);
+            d[3] = mem.readIntLittle(u32, nonce[8..12]);
+            ChaChaImpl(rounds_nb).chacha20Xor(out, in, keyToWords(key), d);
+        }
+    };
+}
+
+fn ChaChaWith64BitNonce(comptime rounds_nb: usize) type {
+    return struct {
+        /// Nonce length in bytes.
+        pub const nonce_length = 8;
+        /// Key length in bytes.
+        pub const key_length = 32;
+
+        /// Add the output of the ChaCha20 stream cipher to `in` and stores the result into `out`.
+        /// WARNING: This function doesn't provide authenticated encryption.
+        /// Using the AEAD or one of the `box` versions is usually preferred.
+        pub fn xor(out: []u8, in: []const u8, counter: u64, key: [key_length]u8, nonce: [nonce_length]u8) void {
+            assert(in.len == out.len);
+            assert(in.len / 64 <= (1 << 64 - 1) - counter);
+
+            var cursor: usize = 0;
+            const k = keyToWords(key);
+            var c: [4]u32 = undefined;
+            c[0] = @truncate(u32, counter);
+            c[1] = @truncate(u32, counter >> 32);
+            c[2] = mem.readIntLittle(u32, nonce[0..4]);
+            c[3] = mem.readIntLittle(u32, nonce[4..8]);
+
+            const block_length = (1 << 6);
+            // The full block size is greater than the address space on a 32bit machine
+            const big_block = if (@sizeOf(usize) > 4) (block_length << 32) else maxInt(usize);
+
+            // first partial big block
+            if (((@intCast(u64, maxInt(u32) - @truncate(u32, counter)) + 1) << 6) < in.len) {
+                ChaChaImpl(rounds_nb).chacha20Xor(out[cursor..big_block], in[cursor..big_block], k, c);
+                cursor = big_block - cursor;
+                c[1] += 1;
+                if (comptime @sizeOf(usize) > 4) {
+                    // A big block is giant: 256 GiB, but we can avoid this limitation
+                    var remaining_blocks: u32 = @intCast(u32, (in.len / big_block));
+                    var i: u32 = 0;
+                    while (remaining_blocks > 0) : (remaining_blocks -= 1) {
+                        ChaChaImpl(rounds_nb).chacha20Xor(out[cursor .. cursor + big_block], in[cursor .. cursor + big_block], k, c);
+                        c[1] += 1; // upper 32-bit of counter, generic chacha20Xor() doesn't know about this.
+                        cursor += big_block;
+                    }
                 }
             }
+            ChaChaImpl(rounds_nb).chacha20Xor(out[cursor..], in[cursor..], k, c);
+        }
+    };
+}
+
+fn XChaChaIETF(comptime rounds_nb: usize) type {
+    return struct {
+        /// Nonce length in bytes.
+        pub const nonce_length = 24;
+        /// Key length in bytes.
+        pub const key_length = 32;
+
+        /// Add the output of the XChaCha20 stream cipher to `in` and stores the result into `out`.
+        /// WARNING: This function doesn't provide authenticated encryption.
+        /// Using the AEAD or one of the `box` versions is usually preferred.
+        pub fn xor(out: []u8, in: []const u8, counter: u32, key: [key_length]u8, nonce: [nonce_length]u8) void {
+            const extended = extend(key, nonce, rounds_nb);
+            ChaChaIETF(rounds_nb).xor(out, in, counter, extended.key, extended.nonce);
+        }
+    };
+}
+
+fn ChaChaPoly1305(comptime rounds_nb: usize) type {
+    return struct {
+        pub const tag_length = 16;
+        pub const nonce_length = 12;
+        pub const key_length = 32;
+
+        /// c: ciphertext: output buffer should be of size m.len
+        /// tag: authentication tag: output MAC
+        /// m: message
+        /// ad: Associated Data
+        /// npub: public nonce
+        /// k: private key
+        pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
+            assert(c.len == m.len);
+
+            var polyKey = [_]u8{0} ** 32;
+            ChaChaIETF(rounds_nb).xor(polyKey[0..], polyKey[0..], 0, k, npub);
+
+            ChaChaIETF(rounds_nb).xor(c[0..m.len], m, 1, k, npub);
+
+            var mac = Poly1305.init(polyKey[0..]);
+            mac.update(ad);
+            if (ad.len % 16 != 0) {
+                const zeros = [_]u8{0} ** 16;
+                const padding = 16 - (ad.len % 16);
+                mac.update(zeros[0..padding]);
+            }
+            mac.update(c[0..m.len]);
+            if (m.len % 16 != 0) {
+                const zeros = [_]u8{0} ** 16;
+                const padding = 16 - (m.len % 16);
+                mac.update(zeros[0..padding]);
+            }
+            var lens: [16]u8 = undefined;
+            mem.writeIntLittle(u64, lens[0..8], ad.len);
+            mem.writeIntLittle(u64, lens[8..16], m.len);
+            mac.update(lens[0..]);
+            mac.final(tag);
+        }
+
+        /// m: message: output buffer should be of size c.len
+        /// c: ciphertext
+        /// tag: authentication tag
+        /// ad: Associated Data
+        /// npub: public nonce
+        /// k: private key
+        /// NOTE: the check of the authentication tag is currently not done in constant time
+        pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) Error!void {
+            assert(c.len == m.len);
+
+            var polyKey = [_]u8{0} ** 32;
+            ChaChaIETF(rounds_nb).xor(polyKey[0..], polyKey[0..], 0, k, npub);
+
+            var mac = Poly1305.init(polyKey[0..]);
+
+            mac.update(ad);
+            if (ad.len % 16 != 0) {
+                const zeros = [_]u8{0} ** 16;
+                const padding = 16 - (ad.len % 16);
+                mac.update(zeros[0..padding]);
+            }
+            mac.update(c);
+            if (c.len % 16 != 0) {
+                const zeros = [_]u8{0} ** 16;
+                const padding = 16 - (c.len % 16);
+                mac.update(zeros[0..padding]);
+            }
+            var lens: [16]u8 = undefined;
+            mem.writeIntLittle(u64, lens[0..8], ad.len);
+            mem.writeIntLittle(u64, lens[8..16], c.len);
+            mac.update(lens[0..]);
+            var computedTag: [16]u8 = undefined;
+            mac.final(computedTag[0..]);
+
+            var acc: u8 = 0;
+            for (computedTag) |_, i| {
+                acc |= computedTag[i] ^ tag[i];
+            }
+            if (acc != 0) {
+                return error.AuthenticationFailed;
+            }
+            ChaChaIETF(rounds_nb).xor(m[0..c.len], c, 1, k, npub);
+        }
+    };
+}
+
+fn XChaChaPoly1305(comptime rounds_nb: usize) type {
+    return struct {
+        pub const tag_length = 16;
+        pub const nonce_length = 24;
+        pub const key_length = 32;
+
+        /// c: ciphertext: output buffer should be of size m.len
+        /// tag: authentication tag: output MAC
+        /// m: message
+        /// ad: Associated Data
+        /// npub: public nonce
+        /// k: private key
+        pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
+            const extended = extend(k, npub, rounds_nb);
+            return ChaChaPoly1305(rounds_nb).encrypt(c, tag, m, ad, extended.nonce, extended.key);
         }
 
-        ChaCha20Impl.chacha20Xor(out[cursor..], in[cursor..], k, c);
+        /// m: message: output buffer should be of size c.len
+        /// c: ciphertext
+        /// tag: authentication tag
+        /// ad: Associated Data
+        /// npub: public nonce
+        /// k: private key
+        pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) Error!void {
+            const extended = extend(k, npub, rounds_nb);
+            return ChaChaPoly1305(rounds_nb).decrypt(m, c, tag, ad, extended.nonce, extended.key);
+        }
+    };
+}
+
+test "chacha20 AEAD API" {
+    const aeads = [_]type{ ChaCha20Poly1305, XChaCha20Poly1305 };
+    const m = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
+    const ad = "Additional data";
+
+    inline for (aeads) |aead| {
+        const key = [_]u8{69} ** aead.key_length;
+        const nonce = [_]u8{42} ** aead.nonce_length;
+        var c: [m.len]u8 = undefined;
+        var tag: [aead.tag_length]u8 = undefined;
+        var out: [m.len]u8 = undefined;
+
+        aead.encrypt(c[0..], tag[0..], m, ad, nonce, key);
+        try aead.decrypt(out[0..], c[0..], tag, ad[0..], nonce, key);
+        testing.expectEqualSlices(u8, out[0..], m);
+        c[0] += 1;
+        testing.expectError(error.AuthenticationFailed, aead.decrypt(out[0..], c[0..], tag, ad[0..], nonce, key));
     }
-};
+}
 
 // https://tools.ietf.org/html/rfc7539#section-2.4.2
 test "crypto.chacha20 test vector sunscreen" {
@@ -386,7 +629,7 @@ test "crypto.chacha20 test vector sunscreen" {
         0xb4, 0x0b, 0x8e, 0xed, 0xf2, 0x78, 0x5e, 0x42,
         0x87, 0x4d,
     };
-    const input = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
+    const m = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
     var result: [114]u8 = undefined;
     const key = [_]u8{
         0,  1,  2,  3,  4,  5,  6,  7,
@@ -400,13 +643,12 @@ test "crypto.chacha20 test vector sunscreen" {
         0, 0, 0, 0,
     };
 
-    ChaCha20IETF.xor(result[0..], input[0..], 1, key, nonce);
+    ChaCha20IETF.xor(result[0..], m[0..], 1, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 
-    // Chacha20 is self-reversing.
-    var plaintext: [114]u8 = undefined;
-    ChaCha20IETF.xor(plaintext[0..], result[0..], 1, key, nonce);
-    testing.expect(mem.order(u8, input, &plaintext) == .eq);
+    var m2: [114]u8 = undefined;
+    ChaCha20IETF.xor(m2[0..], result[0..], 1, key, nonce);
+    testing.expect(mem.order(u8, m, &m2) == .eq);
 }
 
 // https://tools.ietf.org/html/draft-agl-tls-chacha20poly1305-04#section-7
@@ -421,7 +663,7 @@ test "crypto.chacha20 test vector 1" {
         0x6a, 0x43, 0xb8, 0xf4, 0x15, 0x18, 0xa1, 0x1c,
         0xc3, 0x87, 0xb6, 0x69, 0xb2, 0xee, 0x65, 0x86,
     };
-    const input = [_]u8{
+    const m = [_]u8{
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -440,7 +682,7 @@ test "crypto.chacha20 test vector 1" {
     };
     const nonce = [_]u8{ 0, 0, 0, 0, 0, 0, 0, 0 };
 
-    ChaCha20With64BitNonce.xor(result[0..], input[0..], 0, key, nonce);
+    ChaCha20With64BitNonce.xor(result[0..], m[0..], 0, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 }
 
@@ -455,7 +697,7 @@ test "crypto.chacha20 test vector 2" {
         0x53, 0xd7, 0x92, 0xb1, 0xc4, 0x3f, 0xea, 0x81,
         0x7e, 0x9a, 0xd2, 0x75, 0xae, 0x54, 0x69, 0x63,
     };
-    const input = [_]u8{
+    const m = [_]u8{
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -474,7 +716,7 @@ test "crypto.chacha20 test vector 2" {
     };
     const nonce = [_]u8{ 0, 0, 0, 0, 0, 0, 0, 0 };
 
-    ChaCha20With64BitNonce.xor(result[0..], input[0..], 0, key, nonce);
+    ChaCha20With64BitNonce.xor(result[0..], m[0..], 0, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 }
 
@@ -489,7 +731,7 @@ test "crypto.chacha20 test vector 3" {
         0x52, 0x77, 0x06, 0x2e, 0xb7, 0xa0, 0x43, 0x3e,
         0x44, 0x5f, 0x41, 0xe3,
     };
-    const input = [_]u8{
+    const m = [_]u8{
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -508,7 +750,7 @@ test "crypto.chacha20 test vector 3" {
     };
     const nonce = [_]u8{ 0, 0, 0, 0, 0, 0, 0, 1 };
 
-    ChaCha20With64BitNonce.xor(result[0..], input[0..], 0, key, nonce);
+    ChaCha20With64BitNonce.xor(result[0..], m[0..], 0, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 }
 
@@ -523,7 +765,7 @@ test "crypto.chacha20 test vector 4" {
         0x5d, 0xdc, 0x49, 0x7a, 0x0b, 0x46, 0x6e, 0x7d,
         0x6b, 0xbd, 0xb0, 0x04, 0x1b, 0x2f, 0x58, 0x6b,
     };
-    const input = [_]u8{
+    const m = [_]u8{
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -542,7 +784,7 @@ test "crypto.chacha20 test vector 4" {
     };
     const nonce = [_]u8{ 1, 0, 0, 0, 0, 0, 0, 0 };
 
-    ChaCha20With64BitNonce.xor(result[0..], input[0..], 0, key, nonce);
+    ChaCha20With64BitNonce.xor(result[0..], m[0..], 0, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 }
 
@@ -584,7 +826,7 @@ test "crypto.chacha20 test vector 5" {
         0x87, 0x46, 0xd4, 0x52, 0x4d, 0x38, 0x40, 0x7a,
         0x6d, 0xeb, 0x3a, 0xb7, 0x8f, 0xab, 0x78, 0xc9,
     };
-    const input = [_]u8{
+    const m = [_]u8{
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -614,147 +856,14 @@ test "crypto.chacha20 test vector 5" {
         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
     };
 
-    ChaCha20With64BitNonce.xor(result[0..], input[0..], 0, key, nonce);
+    ChaCha20With64BitNonce.xor(result[0..], m[0..], 0, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 }
 
-pub const chacha20poly1305_tag_length = 16;
-
-fn chacha20poly1305SealDetached(ciphertext: []u8, tag: *[chacha20poly1305_tag_length]u8, plaintext: []const u8, data: []const u8, key: [32]u8, nonce: [12]u8) void {
-    assert(ciphertext.len == plaintext.len);
-
-    // derive poly1305 key
-    var polyKey = [_]u8{0} ** 32;
-    ChaCha20IETF.xor(polyKey[0..], polyKey[0..], 0, key, nonce);
-
-    // encrypt plaintext
-    ChaCha20IETF.xor(ciphertext[0..plaintext.len], plaintext, 1, key, nonce);
-
-    // construct mac
-    var mac = Poly1305.init(polyKey[0..]);
-    mac.update(data);
-    if (data.len % 16 != 0) {
-        const zeros = [_]u8{0} ** 16;
-        const padding = 16 - (data.len % 16);
-        mac.update(zeros[0..padding]);
-    }
-    mac.update(ciphertext[0..plaintext.len]);
-    if (plaintext.len % 16 != 0) {
-        const zeros = [_]u8{0} ** 16;
-        const padding = 16 - (plaintext.len % 16);
-        mac.update(zeros[0..padding]);
-    }
-    var lens: [16]u8 = undefined;
-    mem.writeIntLittle(u64, lens[0..8], data.len);
-    mem.writeIntLittle(u64, lens[8..16], plaintext.len);
-    mac.update(lens[0..]);
-    mac.final(tag);
-}
-
-fn chacha20poly1305Seal(ciphertextAndTag: []u8, plaintext: []const u8, data: []const u8, key: [32]u8, nonce: [12]u8) void {
-    return chacha20poly1305SealDetached(ciphertextAndTag[0..plaintext.len], ciphertextAndTag[plaintext.len..][0..chacha20poly1305_tag_length], plaintext, data, key, nonce);
-}
-
-/// Verifies and decrypts an authenticated message produced by chacha20poly1305SealDetached.
-fn chacha20poly1305OpenDetached(dst: []u8, ciphertext: []const u8, tag: *const [chacha20poly1305_tag_length]u8, data: []const u8, key: [32]u8, nonce: [12]u8) !void {
-    // split ciphertext and tag
-    assert(dst.len == ciphertext.len);
-
-    // derive poly1305 key
-    var polyKey = [_]u8{0} ** 32;
-    ChaCha20IETF.xor(polyKey[0..], polyKey[0..], 0, key, nonce);
-
-    // construct mac
-    var mac = Poly1305.init(polyKey[0..]);
-
-    mac.update(data);
-    if (data.len % 16 != 0) {
-        const zeros = [_]u8{0} ** 16;
-        const padding = 16 - (data.len % 16);
-        mac.update(zeros[0..padding]);
-    }
-    mac.update(ciphertext);
-    if (ciphertext.len % 16 != 0) {
-        const zeros = [_]u8{0} ** 16;
-        const padding = 16 - (ciphertext.len % 16);
-        mac.update(zeros[0..padding]);
-    }
-    var lens: [16]u8 = undefined;
-    mem.writeIntLittle(u64, lens[0..8], data.len);
-    mem.writeIntLittle(u64, lens[8..16], ciphertext.len);
-    mac.update(lens[0..]);
-    var computedTag: [16]u8 = undefined;
-    mac.final(computedTag[0..]);
-
-    // verify mac in constant time
-    // TODO: we can't currently guarantee that this will run in constant time.
-    // See https://github.com/ziglang/zig/issues/1776
-    var acc: u8 = 0;
-    for (computedTag) |_, i| {
-        acc |= computedTag[i] ^ tag[i];
-    }
-    if (acc != 0) {
-        return error.AuthenticationFailed;
-    }
-
-    // decrypt ciphertext
-    ChaCha20IETF.xor(dst[0..ciphertext.len], ciphertext, 1, key, nonce);
-}
-
-/// Verifies and decrypts an authenticated message produced by chacha20poly1305Seal.
-fn chacha20poly1305Open(dst: []u8, ciphertextAndTag: []const u8, data: []const u8, key: [32]u8, nonce: [12]u8) !void {
-    if (ciphertextAndTag.len < chacha20poly1305_tag_length) {
-        return error.InvalidMessage;
-    }
-    const ciphertextLen = ciphertextAndTag.len - chacha20poly1305_tag_length;
-    return try chacha20poly1305OpenDetached(dst, ciphertextAndTag[0..ciphertextLen], ciphertextAndTag[ciphertextLen..][0..chacha20poly1305_tag_length], data, key, nonce);
-}
-
-fn extend(key: [32]u8, nonce: [24]u8) struct { key: [32]u8, nonce: [12]u8 } {
-    var subnonce: [12]u8 = undefined;
-    mem.set(u8, subnonce[0..4], 0);
-    mem.copy(u8, subnonce[4..], nonce[16..24]);
-    return .{
-        .key = ChaCha20Impl.hchacha20(nonce[0..16].*, key),
-        .nonce = subnonce,
-    };
-}
-
-pub const XChaCha20IETF = struct {
-    pub fn xor(out: []u8, in: []const u8, counter: u32, key: [32]u8, nonce: [24]u8) void {
-        const extended = extend(key, nonce);
-        ChaCha20IETF.xor(out, in, counter, extended.key, extended.nonce);
-    }
-};
-
-pub const xchacha20poly1305_tag_length = 16;
-
-fn xchacha20poly1305SealDetached(ciphertext: []u8, tag: *[chacha20poly1305_tag_length]u8, plaintext: []const u8, data: []const u8, key: [32]u8, nonce: [24]u8) void {
-    const extended = extend(key, nonce);
-    return chacha20poly1305SealDetached(ciphertext, tag, plaintext, data, extended.key, extended.nonce);
-}
-
-fn xchacha20poly1305Seal(ciphertextAndTag: []u8, plaintext: []const u8, data: []const u8, key: [32]u8, nonce: [24]u8) void {
-    const extended = extend(key, nonce);
-    return chacha20poly1305Seal(ciphertextAndTag, plaintext, data, extended.key, extended.nonce);
-}
-
-/// Verifies and decrypts an authenticated message produced by xchacha20poly1305SealDetached.
-fn xchacha20poly1305OpenDetached(plaintext: []u8, ciphertext: []const u8, tag: *const [chacha20poly1305_tag_length]u8, data: []const u8, key: [32]u8, nonce: [24]u8) !void {
-    const extended = extend(key, nonce);
-    return try chacha20poly1305OpenDetached(plaintext, ciphertext, tag, data, extended.key, extended.nonce);
-}
-
-/// Verifies and decrypts an authenticated message produced by xchacha20poly1305Seal.
-fn xchacha20poly1305Open(ciphertextAndTag: []u8, msgAndTag: []const u8, data: []const u8, key: [32]u8, nonce: [24]u8) !void {
-    const extended = extend(key, nonce);
-    return try chacha20poly1305Open(ciphertextAndTag, msgAndTag, data, extended.key, extended.nonce);
-}
-
 test "seal" {
     {
-        const plaintext = "";
-        const data = "";
+        const m = "";
+        const ad = "";
         const key = [_]u8{
             0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
             0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
@@ -763,11 +872,11 @@ test "seal" {
         const exp_out = [_]u8{ 0xa0, 0x78, 0x4d, 0x7a, 0x47, 0x16, 0xf3, 0xfe, 0xb4, 0xf6, 0x4e, 0x7f, 0x4b, 0x39, 0xbf, 0x4 };
 
         var out: [exp_out.len]u8 = undefined;
-        chacha20poly1305Seal(out[0..], plaintext, data, key, nonce);
+        ChaCha20Poly1305.encrypt(out[0..m.len], out[m.len..], m, ad, nonce, key);
         testing.expectEqualSlices(u8, exp_out[0..], out[0..]);
     }
     {
-        const plaintext = [_]u8{
+        const m = [_]u8{
             0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c,
             0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73,
             0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39, 0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63,
@@ -777,7 +886,7 @@ test "seal" {
             0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69,
             0x74, 0x2e,
         };
-        const data = [_]u8{ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 };
+        const ad = [_]u8{ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 };
         const key = [_]u8{
             0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
             0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
@@ -796,15 +905,15 @@ test "seal" {
         };
 
         var out: [exp_out.len]u8 = undefined;
-        chacha20poly1305Seal(out[0..], plaintext[0..], data[0..], key, nonce);
+        ChaCha20Poly1305.encrypt(out[0..m.len], out[m.len..], m[0..], ad[0..], nonce, key);
         testing.expectEqualSlices(u8, exp_out[0..], out[0..]);
     }
 }
 
 test "open" {
     {
-        const ciphertext = [_]u8{ 0xa0, 0x78, 0x4d, 0x7a, 0x47, 0x16, 0xf3, 0xfe, 0xb4, 0xf6, 0x4e, 0x7f, 0x4b, 0x39, 0xbf, 0x4 };
-        const data = "";
+        const c = [_]u8{ 0xa0, 0x78, 0x4d, 0x7a, 0x47, 0x16, 0xf3, 0xfe, 0xb4, 0xf6, 0x4e, 0x7f, 0x4b, 0x39, 0xbf, 0x4 };
+        const ad = "";
         const key = [_]u8{
             0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
             0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
@@ -813,11 +922,11 @@ test "open" {
         const exp_out = "";
 
         var out: [exp_out.len]u8 = undefined;
-        try chacha20poly1305Open(out[0..], ciphertext[0..], data, key, nonce);
+        try ChaCha20Poly1305.decrypt(out[0..], c[0..exp_out.len], c[exp_out.len..].*, ad[0..], nonce, key);
         testing.expectEqualSlices(u8, exp_out[0..], out[0..]);
     }
     {
-        const ciphertext = [_]u8{
+        const c = [_]u8{
             0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb, 0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2,
             0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x8,  0xfe, 0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6,
             0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12, 0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b,
@@ -828,7 +937,7 @@ test "open" {
             0x61, 0x16, 0x1a, 0xe1, 0xb,  0x59, 0x4f, 0x9,  0xe2, 0x6a, 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60,
             0x6,  0x91,
         };
-        const data = [_]u8{ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 };
+        const ad = [_]u8{ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 };
         const key = [_]u8{
             0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
             0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
@@ -846,126 +955,45 @@ test "open" {
         };
 
         var out: [exp_out.len]u8 = undefined;
-        try chacha20poly1305Open(out[0..], ciphertext[0..], data[0..], key, nonce);
+        try ChaCha20Poly1305.decrypt(out[0..], c[0..exp_out.len], c[exp_out.len..].*, ad[0..], nonce, key);
         testing.expectEqualSlices(u8, exp_out[0..], out[0..]);
 
         // corrupting the ciphertext, data, key, or nonce should cause a failure
-        var bad_ciphertext = ciphertext;
-        bad_ciphertext[0] ^= 1;
-        testing.expectError(error.AuthenticationFailed, chacha20poly1305Open(out[0..], bad_ciphertext[0..], data[0..], key, nonce));
-        var bad_data = data;
-        bad_data[0] ^= 1;
-        testing.expectError(error.AuthenticationFailed, chacha20poly1305Open(out[0..], ciphertext[0..], bad_data[0..], key, nonce));
+        var bad_c = c;
+        bad_c[0] ^= 1;
+        testing.expectError(error.AuthenticationFailed, ChaCha20Poly1305.decrypt(out[0..], bad_c[0..out.len], bad_c[out.len..].*, ad[0..], nonce, key));
+        var bad_ad = ad;
+        bad_ad[0] ^= 1;
+        testing.expectError(error.AuthenticationFailed, ChaCha20Poly1305.decrypt(out[0..], c[0..out.len], c[out.len..].*, bad_ad[0..], nonce, key));
         var bad_key = key;
         bad_key[0] ^= 1;
-        testing.expectError(error.AuthenticationFailed, chacha20poly1305Open(out[0..], ciphertext[0..], data[0..], bad_key, nonce));
+        testing.expectError(error.AuthenticationFailed, ChaCha20Poly1305.decrypt(out[0..], c[0..out.len], c[out.len..].*, ad[0..], nonce, bad_key));
         var bad_nonce = nonce;
         bad_nonce[0] ^= 1;
-        testing.expectError(error.AuthenticationFailed, chacha20poly1305Open(out[0..], ciphertext[0..], data[0..], key, bad_nonce));
-
-        // a short ciphertext should result in a different error
-        testing.expectError(error.InvalidMessage, chacha20poly1305Open(out[0..], "", data[0..], key, bad_nonce));
+        testing.expectError(error.AuthenticationFailed, ChaCha20Poly1305.decrypt(out[0..], c[0..out.len], c[out.len..].*, ad[0..], bad_nonce, key));
     }
 }
 
 test "crypto.xchacha20" {
     const key = [_]u8{69} ** 32;
     const nonce = [_]u8{42} ** 24;
-    const input = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
+    const m = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
     {
-        var ciphertext: [input.len]u8 = undefined;
-        XChaCha20IETF.xor(ciphertext[0..], input[0..], 0, key, nonce);
-        var buf: [2 * ciphertext.len]u8 = undefined;
-        testing.expectEqualStrings(try std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexUpper(&ciphertext)}), "E0A1BCF939654AFDBDC1746EC49832647C19D891F0D1A81FC0C1703B4514BDEA584B512F6908C2C5E9DD18D5CBC1805DE5803FE3B9CA5F193FB8359E91FAB0C3BB40309A292EB1CF49685C65C4A3ADF4F11DB0CD2B6B67FBC174BC2E860E8F769FD3565BBFAD1C845E05A0FED9BE167C240D");
+        var c: [m.len]u8 = undefined;
+        XChaCha20IETF.xor(c[0..], m[0..], 0, key, nonce);
+        var buf: [2 * c.len]u8 = undefined;
+        testing.expectEqualStrings(try std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexUpper(&c)}), "E0A1BCF939654AFDBDC1746EC49832647C19D891F0D1A81FC0C1703B4514BDEA584B512F6908C2C5E9DD18D5CBC1805DE5803FE3B9CA5F193FB8359E91FAB0C3BB40309A292EB1CF49685C65C4A3ADF4F11DB0CD2B6B67FBC174BC2E860E8F769FD3565BBFAD1C845E05A0FED9BE167C240D");
     }
     {
-        const data = "Additional data";
-        var ciphertext: [input.len + xchacha20poly1305_tag_length]u8 = undefined;
-        xchacha20poly1305Seal(ciphertext[0..], input, data, key, nonce);
-        var out: [input.len]u8 = undefined;
-        try xchacha20poly1305Open(out[0..], ciphertext[0..], data, key, nonce);
-        var buf: [2 * ciphertext.len]u8 = undefined;
-        testing.expectEqualStrings(try std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexUpper(&ciphertext)}), "994D2DD32333F48E53650C02C7A2ABB8E018B0836D7175AEC779F52E961780768F815C58F1AA52D211498DB89B9216763F569C9433A6BBFCEFB4D4A49387A4C5207FBB3B5A92B5941294DF30588C6740D39DC16FA1F0E634F7246CF7CDCB978E44347D89381B7A74EB7084F754B90BDE9AAF5A94B8F2A85EFD0B50692AE2D425E234");
-        testing.expectEqualSlices(u8, out[0..], input);
-        ciphertext[0] += 1;
-        testing.expectError(error.AuthenticationFailed, xchacha20poly1305Open(out[0..], ciphertext[0..], data, key, nonce));
-    }
-}
-
-pub const Chacha20Poly1305 = struct {
-    pub const tag_length = 16;
-    pub const nonce_length = 12;
-    pub const key_length = 32;
-
-    /// c: ciphertext: output buffer should be of size m.len
-    /// tag: authentication tag: output MAC
-    /// m: message
-    /// ad: Associated Data
-    /// npub: public nonce
-    /// k: private key
-    pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
-        assert(c.len == m.len);
-        return chacha20poly1305SealDetached(c, tag, m, ad, k, npub);
-    }
-
-    /// m: message: output buffer should be of size c.len
-    /// c: ciphertext
-    /// tag: authentication tag
-    /// ad: Associated Data
-    /// npub: public nonce
-    /// k: private key
-    /// NOTE: the check of the authentication tag is currently not done in constant time
-    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) !void {
-        assert(c.len == m.len);
-        return try chacha20poly1305OpenDetached(m, c, tag[0..], ad, k, npub);
-    }
-};
-
-pub const XChacha20Poly1305 = struct {
-    pub const tag_length = 16;
-    pub const nonce_length = 24;
-    pub const key_length = 32;
-
-    /// c: ciphertext: output buffer should be of size m.len
-    /// tag: authentication tag: output MAC
-    /// m: message
-    /// ad: Associated Data
-    /// npub: public nonce
-    /// k: private key
-    pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
-        assert(c.len == m.len);
-        return xchacha20poly1305SealDetached(c, tag, m, ad, k, npub);
-    }
-
-    /// m: message: output buffer should be of size c.len
-    /// c: ciphertext
-    /// tag: authentication tag
-    /// ad: Associated Data
-    /// npub: public nonce
-    /// k: private key
-    /// NOTE: the check of the authentication tag is currently not done in constant time
-    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) !void {
-        assert(c.len == m.len);
-        return try xchacha20poly1305OpenDetached(m, c, tag[0..], ad, k, npub);
-    }
-};
-
-test "chacha20 AEAD API" {
-    const aeads = [_]type{ Chacha20Poly1305, XChacha20Poly1305 };
-    const input = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
-    const data = "Additional data";
-
-    inline for (aeads) |aead| {
-        const key = [_]u8{69} ** aead.key_length;
-        const nonce = [_]u8{42} ** aead.nonce_length;
-        var ciphertext: [input.len]u8 = undefined;
-        var tag: [aead.tag_length]u8 = undefined;
-        var out: [input.len]u8 = undefined;
-
-        aead.encrypt(ciphertext[0..], tag[0..], input, data, nonce, key);
-        try aead.decrypt(out[0..], ciphertext[0..], tag, data[0..], nonce, key);
-        testing.expectEqualSlices(u8, out[0..], input);
-        ciphertext[0] += 1;
-        testing.expectError(error.AuthenticationFailed, aead.decrypt(out[0..], ciphertext[0..], tag, data[0..], nonce, key));
+        const ad = "Additional data";
+        var c: [m.len + XChaCha20Poly1305.tag_length]u8 = undefined;
+        XChaCha20Poly1305.encrypt(c[0..m.len], c[m.len..], m, ad, nonce, key);
+        var out: [m.len]u8 = undefined;
+        try XChaCha20Poly1305.decrypt(out[0..], c[0..m.len], c[m.len..].*, ad, nonce, key);
+        var buf: [2 * c.len]u8 = undefined;
+        testing.expectEqualStrings(try std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexUpper(&c)}), "994D2DD32333F48E53650C02C7A2ABB8E018B0836D7175AEC779F52E961780768F815C58F1AA52D211498DB89B9216763F569C9433A6BBFCEFB4D4A49387A4C5207FBB3B5A92B5941294DF30588C6740D39DC16FA1F0E634F7246CF7CDCB978E44347D89381B7A74EB7084F754B90BDE9AAF5A94B8F2A85EFD0B50692AE2D425E234");
+        testing.expectEqualSlices(u8, out[0..], m);
+        c[0] += 1;
+        testing.expectError(error.AuthenticationFailed, XChaCha20Poly1305.decrypt(out[0..], c[0..m.len], c[m.len..].*, ad, nonce, key));
     }
 }
diff --git a/lib/std/crypto/error.zig b/lib/std/crypto/error.zig
@@ -0,0 +1,34 @@
+pub const Error = error{
+    /// MAC verification failed - The tag doesn't verify for the given ciphertext and secret key
+    AuthenticationFailed,
+
+    /// The requested output length is too long for the chosen algorithm
+    OutputTooLong,
+
+    /// Finite field operation returned the identity element
+    IdentityElement,
+
+    /// Encoded input cannot be decoded
+    InvalidEncoding,
+
+    /// The signature does't verify for the given message and public key
+    SignatureVerificationFailed,
+
+    /// Both a public and secret key have been provided, but they are incompatible
+    KeyMismatch,
+
+    /// Encoded input is not in canonical form
+    NonCanonical,
+
+    /// Square root has no solutions
+    NotSquare,
+
+    /// Verification string doesn't match the provided password and parameters
+    PasswordVerificationFailed,
+
+    /// Parameters would be insecure to use
+    WeakParameters,
+
+    /// Public key would be insecure to use
+    WeakPublicKey,
+};
diff --git a/lib/std/crypto/gimli.zig b/lib/std/crypto/gimli.zig
@@ -20,6 +20,7 @@ const assert = std.debug.assert;
 const testing = std.testing;
 const htest = @import("test.zig");
 const Vector = std.meta.Vector;
+const Error = std.crypto.Error;
 
 pub const State = struct {
     pub const BLOCKBYTES = 48;
@@ -392,7 +393,7 @@ pub const Aead = struct {
     /// npub: public nonce
     /// k: private key
     /// NOTE: the check of the authentication tag is currently not done in constant time
-    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) !void {
+    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) Error!void {
         assert(c.len == m.len);
 
         var state = Aead.init(ad, npub, k);
@@ -429,7 +430,7 @@ pub const Aead = struct {
         // TODO: use a constant-time equality check here, see https://github.com/ziglang/zig/issues/1776
         if (!mem.eql(u8, buf[0..State.RATE], &tag)) {
             @memset(m.ptr, undefined, m.len);
-            return error.InvalidMessage;
+            return error.AuthenticationFailed;
         }
     }
 };
diff --git a/lib/std/crypto/isap.zig b/lib/std/crypto/isap.zig
@@ -3,6 +3,7 @@ const debug = std.debug;
 const mem = std.mem;
 const math = std.math;
 const testing = std.testing;
+const Error = std.crypto.Error;
 
 /// ISAPv2 is an authenticated encryption system hardened against side channels and fault attacks.
 /// https://csrc.nist.gov/CSRC/media/Projects/lightweight-cryptography/documents/round-2/spec-doc-rnd2/isap-spec-round2.pdf
@@ -217,7 +218,7 @@ pub const IsapA128A = struct {
         tag.* = mac(c, ad, npub, key);
     }
 
-    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) !void {
+    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) Error!void {
         var computed_tag = mac(c, ad, npub, key);
         var acc: u8 = 0;
         for (computed_tag) |_, j| {
diff --git a/lib/std/crypto/pbkdf2.zig b/lib/std/crypto/pbkdf2.zig
@@ -7,6 +7,7 @@
 const std = @import("std");
 const mem = std.mem;
 const maxInt = std.math.maxInt;
+const Error = std.crypto.Error;
 
 // RFC 2898 Section 5.2
 //
@@ -19,36 +20,28 @@ const maxInt = std.math.maxInt;
 // pseudorandom function. See Appendix B.1 for further discussion.)
 // PBKDF2 is recommended for new applications.
 //
-// PBKDF2 (P, S, c, dkLen)
+// PBKDF2 (P, S, c, dk_len)
 //
-// Options:        PRF        underlying pseudorandom function (hLen
+// Options:        PRF        underlying pseudorandom function (h_len
 //                            denotes the length in octets of the
 //                            pseudorandom function output)
 //
 // Input:          P          password, an octet string
 //                 S          salt, an octet string
 //                 c          iteration count, a positive integer
-//                 dkLen      intended length in octets of the derived
+//                 dk_len      intended length in octets of the derived
 //                            key, a positive integer, at most
-//                            (2^32 - 1) * hLen
+//                            (2^32 - 1) * h_len
 //
-// Output:         DK         derived key, a dkLen-octet string
+// Output:         DK         derived key, a dk_len-octet string
 
 // Based on Apple's CommonKeyDerivation, based originally on code by Damien Bergamini.
 
-pub const Pbkdf2Error = error{
-    /// At least one round is required
-    TooFewRounds,
-
-    /// Maximum length of the derived key is `maxInt(u32) * Prf.mac_length`
-    DerivedKeyTooLong,
-};
-
 /// Apply PBKDF2 to generate a key from a password.
 ///
 /// PBKDF2 is defined in RFC 2898, and is a recommendation of NIST SP 800-132.
 ///
-/// derivedKey: Slice of appropriate size for generated key. Generally 16 or 32 bytes in length.
+/// dk: Slice of appropriate size for generated key. Generally 16 or 32 bytes in length.
 ///             May be uninitialized. All bytes will be overwritten.
 ///             Maximum size is `maxInt(u32) * Hash.digest_length`
 ///             It is a programming error to pass buffer longer than the maximum size.
@@ -59,43 +52,38 @@ pub const Pbkdf2Error = error{
 ///
 /// rounds: Iteration count. Must be greater than 0. Common values range from 1,000 to 100,000.
 ///         Larger iteration counts improve security by increasing the time required to compute
-///         the derivedKey. It is common to tune this parameter to achieve approximately 100ms.
+///         the dk. It is common to tune this parameter to achieve approximately 100ms.
 ///
 /// Prf: Pseudo-random function to use. A common choice is `std.crypto.auth.hmac.HmacSha256`.
-pub fn pbkdf2(derivedKey: []u8, password: []const u8, salt: []const u8, rounds: u32, comptime Prf: type) Pbkdf2Error!void {
-    if (rounds < 1) return error.TooFewRounds;
+pub fn pbkdf2(dk: []u8, password: []const u8, salt: []const u8, rounds: u32, comptime Prf: type) Error!void {
+    if (rounds < 1) return error.WeakParameters;
 
-    const dkLen = derivedKey.len;
-    const hLen = Prf.mac_length;
-    comptime std.debug.assert(hLen >= 1);
+    const dk_len = dk.len;
+    const h_len = Prf.mac_length;
+    comptime std.debug.assert(h_len >= 1);
 
     // FromSpec:
     //
-    //   1. If dkLen > maxInt(u32) * hLen, output "derived key too long" and
+    //   1. If dk_len > maxInt(u32) * h_len, output "derived key too long" and
     //      stop.
     //
-    if (comptime (maxInt(usize) > maxInt(u32) * hLen) and (dkLen > @as(usize, maxInt(u32) * hLen))) {
-        // If maxInt(usize) is less than `maxInt(u32) * hLen` then dkLen is always inbounds
-        return error.DerivedKeyTooLong;
+    if (dk_len / h_len >= maxInt(u32)) {
+        // Counter starts at 1 and is 32 bit, so if we have to return more blocks, we would overflow
+        return error.OutputTooLong;
     }
 
     // FromSpec:
     //
-    //   2. Let l be the number of hLen-long blocks of bytes in the derived key,
+    //   2. Let l be the number of h_len-long blocks of bytes in the derived key,
     //      rounding up, and let r be the number of bytes in the last
     //      block
     //
 
-    // l will not overflow, proof:
-    // let `L(dkLen, hLen) = (dkLen + hLen - 1) / hLen`
-    // then `L^-1(l, hLen) = l*hLen - hLen + 1`
-    // 1) L^-1(maxInt(u32), hLen) <= maxInt(u32)*hLen
-    // 2) maxInt(u32)*hLen - hLen + 1 <= maxInt(u32)*hLen // subtract maxInt(u32)*hLen + 1
-    // 3) -hLen <= -1 // multiply by -1
-    // 4) hLen >= 1
-    const r_ = dkLen % hLen;
-    const l = @intCast(u32, (dkLen / hLen) + @as(u1, if (r_ == 0) 0 else 1)); // original: (dkLen + hLen - 1) / hLen
-    const r = if (r_ == 0) hLen else r_;
+    const blocks_count = @intCast(u32, std.math.divCeil(usize, dk_len, h_len) catch unreachable);
+    var r = dk_len % h_len;
+    if (r == 0) {
+        r = h_len;
+    }
 
     // FromSpec:
     //
@@ -125,37 +113,38 @@ pub fn pbkdf2(derivedKey: []u8, password: []const u8, salt: []const u8, rounds: 
     //  Here, INT (i) is a four-octet encoding of the integer i, most
     //  significant octet first.
     //
-    //  4. Concatenate the blocks and extract the first dkLen octets to
+    //  4. Concatenate the blocks and extract the first dk_len octets to
     //  produce a derived key DK:
     //
     //            DK = T_1 || T_2 ||  ...  || T_l<0..r-1>
-    var block: u32 = 0; // Spec limits to u32
-    while (block < l) : (block += 1) {
-        var prevBlock: [hLen]u8 = undefined;
-        var newBlock: [hLen]u8 = undefined;
+
+    var block: u32 = 0;
+    while (block < blocks_count) : (block += 1) {
+        var prev_block: [h_len]u8 = undefined;
+        var new_block: [h_len]u8 = undefined;
 
         // U_1 = PRF (P, S || INT (i))
-        const blockIndex = mem.toBytes(mem.nativeToBig(u32, block + 1)); // Block index starts at 0001
+        const block_index = mem.toBytes(mem.nativeToBig(u32, block + 1)); // Block index starts at 0001
         var ctx = Prf.init(password);
         ctx.update(salt);
-        ctx.update(blockIndex[0..]);
-        ctx.final(prevBlock[0..]);
+        ctx.update(block_index[0..]);
+        ctx.final(prev_block[0..]);
 
         // Choose portion of DK to write into (T_n) and initialize
-        const offset = block * hLen;
-        const blockLen = if (block != l - 1) hLen else r;
-        const dkBlock: []u8 = derivedKey[offset..][0..blockLen];
-        mem.copy(u8, dkBlock, prevBlock[0..dkBlock.len]);
+        const offset = block * h_len;
+        const block_len = if (block != blocks_count - 1) h_len else r;
+        const dk_block: []u8 = dk[offset..][0..block_len];
+        mem.copy(u8, dk_block, prev_block[0..dk_block.len]);
 
         var i: u32 = 1;
         while (i < rounds) : (i += 1) {
             // U_c = PRF (P, U_{c-1})
-            Prf.create(&newBlock, prevBlock[0..], password);
-            mem.copy(u8, prevBlock[0..], newBlock[0..]);
+            Prf.create(&new_block, prev_block[0..], password);
+            mem.copy(u8, prev_block[0..], new_block[0..]);
 
             // F (P, S, c, i) = U_1 \xor U_2 \xor ... \xor U_c
-            for (dkBlock) |_, j| {
-                dkBlock[j] ^= newBlock[j];
+            for (dk_block) |_, j| {
+                dk_block[j] ^= new_block[j];
             }
         }
     }
@@ -165,49 +154,50 @@ const htest = @import("test.zig");
 const HmacSha1 = std.crypto.auth.hmac.HmacSha1;
 
 // RFC 6070 PBKDF2 HMAC-SHA1 Test Vectors
+
 test "RFC 6070 one iteration" {
     const p = "password";
     const s = "salt";
     const c = 1;
-    const dkLen = 20;
+    const dk_len = 20;
 
-    var derivedKey: [dkLen]u8 = undefined;
+    var dk: [dk_len]u8 = undefined;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "0c60c80f961f0e71f3a9b524af6012062fe037a6";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
 test "RFC 6070 two iterations" {
     const p = "password";
     const s = "salt";
     const c = 2;
-    const dkLen = 20;
+    const dk_len = 20;
 
-    var derivedKey: [dkLen]u8 = undefined;
+    var dk: [dk_len]u8 = undefined;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "ea6c014dc72d6f8ccd1ed92ace1d41f0d8de8957";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
 test "RFC 6070 4096 iterations" {
     const p = "password";
     const s = "salt";
     const c = 4096;
-    const dkLen = 20;
+    const dk_len = 20;
 
-    var derivedKey: [dkLen]u8 = undefined;
+    var dk: [dk_len]u8 = undefined;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "4b007901b765489abead49d926f721d065a429c1";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
 test "RFC 6070 16,777,216 iterations" {
@@ -219,48 +209,48 @@ test "RFC 6070 16,777,216 iterations" {
     const p = "password";
     const s = "salt";
     const c = 16777216;
-    const dkLen = 20;
+    const dk_len = 20;
 
-    var derivedKey = [_]u8{0} ** dkLen;
+    var dk = [_]u8{0} ** dk_len;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "eefe3d61cd4da4e4e9945b3d6ba2158c2634e984";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
 test "RFC 6070 multi-block salt and password" {
     const p = "passwordPASSWORDpassword";
     const s = "saltSALTsaltSALTsaltSALTsaltSALTsalt";
     const c = 4096;
-    const dkLen = 25;
+    const dk_len = 25;
 
-    var derivedKey: [dkLen]u8 = undefined;
+    var dk: [dk_len]u8 = undefined;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "3d2eec4fe41c849b80c8d83662c0e44a8b291a964cf2f07038";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
 test "RFC 6070 embedded NUL" {
     const p = "pass\x00word";
     const s = "sa\x00lt";
     const c = 4096;
-    const dkLen = 16;
+    const dk_len = 16;
 
-    var derivedKey: [dkLen]u8 = undefined;
+    var dk: [dk_len]u8 = undefined;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "56fa6aa75548099dcc37d7f03425e0c3";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
-test "Very large dkLen" {
+test "Very large dk_len" {
     // This test allocates 8GB of memory and is expected to take several hours to run.
     if (true) {
         return error.SkipZigTest;
@@ -268,13 +258,13 @@ test "Very large dkLen" {
     const p = "password";
     const s = "salt";
     const c = 1;
-    const dkLen = 1 << 33;
+    const dk_len = 1 << 33;
 
-    var derivedKey = try std.testing.allocator.alloc(u8, dkLen);
+    var dk = try std.testing.allocator.alloc(u8, dk_len);
     defer {
-        std.testing.allocator.free(derivedKey);
+        std.testing.allocator.free(dk);
     }
 
-    try pbkdf2(derivedKey, p, s, c, HmacSha1);
     // Just verify this doesn't crash with an overflow
+    try pbkdf2(dk, p, s, c, HmacSha1);
 }
diff --git a/lib/std/crypto/salsa20.zig b/lib/std/crypto/salsa20.zig
@@ -15,6 +15,7 @@ const Vector = std.meta.Vector;
 const Poly1305 = crypto.onetimeauth.Poly1305;
 const Blake2b = crypto.hash.blake2.Blake2b;
 const X25519 = crypto.dh.X25519;
+const Error = crypto.Error;
 
 const Salsa20VecImpl = struct {
     const Lane = Vector(4, u32);
@@ -398,7 +399,7 @@ pub const XSalsa20Poly1305 = struct {
     /// ad: Associated Data
     /// npub: public nonce
     /// k: private key
-    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) !void {
+    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) Error!void {
         debug.assert(c.len == m.len);
         const extended = extend(k, npub);
         var block0 = [_]u8{0} ** 64;
@@ -446,7 +447,7 @@ pub const SecretBox = struct {
 
     /// Verify and decrypt `c` using a nonce `npub` and a key `k`.
     /// `m` must be exactly `tag_length` smaller than `c`, as `c` includes an authentication tag in addition to the encrypted message.
-    pub fn open(m: []u8, c: []const u8, npub: [nonce_length]u8, k: [key_length]u8) !void {
+    pub fn open(m: []u8, c: []const u8, npub: [nonce_length]u8, k: [key_length]u8) Error!void {
         if (c.len < tag_length) {
             return error.AuthenticationFailed;
         }
@@ -481,20 +482,20 @@ pub const Box = struct {
     pub const KeyPair = X25519.KeyPair;
 
     /// Compute a secret suitable for `secretbox` given a recipent's public key and a sender's secret key.
-    pub fn createSharedSecret(public_key: [public_length]u8, secret_key: [secret_length]u8) ![shared_length]u8 {
+    pub fn createSharedSecret(public_key: [public_length]u8, secret_key: [secret_length]u8) Error![shared_length]u8 {
         const p = try X25519.scalarmult(secret_key, public_key);
         const zero = [_]u8{0} ** 16;
         return Salsa20Impl.hsalsa20(zero, p);
     }
 
     /// Encrypt and authenticate a message using a recipient's public key `public_key` and a sender's `secret_key`.
-    pub fn seal(c: []u8, m: []const u8, npub: [nonce_length]u8, public_key: [public_length]u8, secret_key: [secret_length]u8) !void {
+    pub fn seal(c: []u8, m: []const u8, npub: [nonce_length]u8, public_key: [public_length]u8, secret_key: [secret_length]u8) Error!void {
         const shared_key = try createSharedSecret(public_key, secret_key);
         return SecretBox.seal(c, m, npub, shared_key);
     }
 
     /// Verify and decrypt a message using a recipient's secret key `public_key` and a sender's `public_key`.
-    pub fn open(m: []u8, c: []const u8, npub: [nonce_length]u8, public_key: [public_length]u8, secret_key: [secret_length]u8) !void {
+    pub fn open(m: []u8, c: []const u8, npub: [nonce_length]u8, public_key: [public_length]u8, secret_key: [secret_length]u8) Error!void {
         const shared_key = try createSharedSecret(public_key, secret_key);
         return SecretBox.open(m, c, npub, shared_key);
     }
@@ -527,7 +528,7 @@ pub const SealedBox = struct {
 
     /// Encrypt a message `m` for a recipient whose public key is `public_key`.
     /// `c` must be `seal_length` bytes larger than `m`, so that the required metadata can be added.
-    pub fn seal(c: []u8, m: []const u8, public_key: [public_length]u8) !void {
+    pub fn seal(c: []u8, m: []const u8, public_key: [public_length]u8) Error!void {
         debug.assert(c.len == m.len + seal_length);
         var ekp = try KeyPair.create(null);
         const nonce = createNonce(ekp.public_key, public_key);
@@ -538,7 +539,7 @@ pub const SealedBox = struct {
 
     /// Decrypt a message using a key pair.
     /// `m` must be exactly `seal_length` bytes smaller than `c`, as `c` also includes metadata.
-    pub fn open(m: []u8, c: []const u8, keypair: KeyPair) !void {
+    pub fn open(m: []u8, c: []const u8, keypair: KeyPair) Error!void {
         if (c.len < seal_length) {
             return error.AuthenticationFailed;
         }
diff --git a/lib/std/debug.zig b/lib/std/debug.zig
@@ -250,24 +250,6 @@ pub fn panicExtra(trace: ?*const builtin.StackTrace, first_trace_addr: ?usize, c
         resetSegfaultHandler();
     }
 
-    if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64)
-        nosuspend {
-            // As a workaround for not having threadlocal variable support in LLD for this target,
-            // we have a simpler panic implementation that does not use threadlocal variables.
-            // TODO https://github.com/ziglang/zig/issues/7527
-            const stderr = io.getStdErr().writer();
-            if (@atomicRmw(u8, &panicking, .Add, 1, .SeqCst) == 0) {
-                stderr.print("panic: " ++ format ++ "\n", args) catch os.abort();
-                if (trace) |t| {
-                    dumpStackTrace(t.*);
-                }
-                dumpCurrentStackTrace(first_trace_addr);
-            } else {
-                stderr.print("Panicked during a panic. Aborting.\n", .{}) catch os.abort();
-            }
-            os.abort();
-        };
-
     nosuspend switch (panic_stage) {
         0 => {
             panic_stage = 1;
diff --git a/lib/std/enums.zig b/lib/std/enums.zig
@@ -0,0 +1,1281 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2015-2021 Zig Contributors
+// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
+// The MIT license requires this copyright notice to be included in all copies
+// and substantial portions of the software.
+
+//! This module contains utilities and data structures for working with enums.
+
+const std = @import("std.zig");
+const assert = std.debug.assert;
+const testing = std.testing;
+const EnumField = std.builtin.TypeInfo.EnumField;
+
+/// Returns a struct with a field matching each unique named enum element.
+/// If the enum is extern and has multiple names for the same value, only
+/// the first name is used.  Each field is of type Data and has the provided
+/// default, which may be undefined.
+pub fn EnumFieldStruct(comptime E: type, comptime Data: type, comptime field_default: ?Data) type {
+    const StructField = std.builtin.TypeInfo.StructField;
+    var fields: []const StructField = &[_]StructField{};
+    for (uniqueFields(E)) |field, i| {
+        fields = fields ++ &[_]StructField{.{
+            .name = field.name,
+            .field_type = Data,
+            .default_value = field_default,
+            .is_comptime = false,
+            .alignment = if (@sizeOf(Data) > 0) @alignOf(Data) else 0,
+        }};
+    }
+    return @Type(.{ .Struct = .{
+        .layout = .Auto,
+        .fields = fields,
+        .decls = &[_]std.builtin.TypeInfo.Declaration{},
+        .is_tuple = false,
+    }});
+}
+
+/// Looks up the supplied fields in the given enum type.
+/// Uses only the field names, field values are ignored.
+/// The result array is in the same order as the input.
+pub fn valuesFromFields(comptime E: type, comptime fields: []const EnumField) []const E {
+    comptime {
+        var result: [fields.len]E = undefined;
+        for (fields) |f, i| {
+            result[i] = @field(E, f.name);
+        }
+        return &result;
+    }
+}
+
+test "std.enums.valuesFromFields" {
+    const E = extern enum { a, b, c, d = 0 };
+    const fields = valuesFromFields(E, &[_]EnumField{
+        .{ .name = "b", .value = undefined },
+        .{ .name = "a", .value = undefined },
+        .{ .name = "a", .value = undefined },
+        .{ .name = "d", .value = undefined },
+    });
+    testing.expectEqual(E.b, fields[0]);
+    testing.expectEqual(E.a, fields[1]);
+    testing.expectEqual(E.d, fields[2]); // a == d
+    testing.expectEqual(E.d, fields[3]);
+}
+
+/// Returns the set of all named values in the given enum, in
+/// declaration order.
+pub fn values(comptime E: type) []const E {
+    return comptime valuesFromFields(E, @typeInfo(E).Enum.fields);
+}
+
+test "std.enum.values" {
+    const E = extern enum { a, b, c, d = 0 };
+    testing.expectEqualSlices(E, &.{.a, .b, .c, .d}, values(E));
+}
+
+/// Returns the set of all unique named values in the given enum, in
+/// declaration order.  For repeated values in extern enums, only the
+/// first name for each value is included.
+pub fn uniqueValues(comptime E: type) []const E {
+    return comptime valuesFromFields(E, uniqueFields(E));
+}
+
+test "std.enum.uniqueValues" {
+    const E = extern enum { a, b, c, d = 0, e, f = 3 };
+    testing.expectEqualSlices(E, &.{.a, .b, .c, .f}, uniqueValues(E));
+
+    const F = enum { a, b, c };
+    testing.expectEqualSlices(F, &.{.a, .b, .c}, uniqueValues(F));
+}
+
+/// Returns the set of all unique field values in the given enum, in
+/// declaration order.  For repeated values in extern enums, only the
+/// first name for each value is included.
+pub fn uniqueFields(comptime E: type) []const EnumField {
+    comptime {
+        const info = @typeInfo(E).Enum;
+        const raw_fields = info.fields;
+        // Only extern enums can contain duplicates,
+        // so fast path other types.
+        if (info.layout != .Extern) {
+            return raw_fields;
+        }
+
+        var unique_fields: []const EnumField = &[_]EnumField{};
+    outer:
+        for (raw_fields) |candidate| {
+            for (unique_fields) |u| {
+                if (u.value == candidate.value)
+                    continue :outer;
+            }
+            unique_fields = unique_fields ++ &[_]EnumField{candidate};
+        }
+
+        return unique_fields;
+    }
+}
+
+/// Determines the length of a direct-mapped enum array, indexed by
+/// @intCast(usize, @enumToInt(enum_value)).  The enum must be exhaustive.
+/// If the enum contains any fields with values that cannot be represented
+/// by usize, a compile error is issued.  The max_unused_slots parameter limits
+/// the total number of items which have no matching enum key (holes in the enum
+/// numbering).  So for example, if an enum has values 1, 2, 5, and 6, max_unused_slots
+/// must be at least 3, to allow unused slots 0, 3, and 4.
+fn directEnumArrayLen(comptime E: type, comptime max_unused_slots: comptime_int) comptime_int {
+    const info = @typeInfo(E).Enum;
+    if (!info.is_exhaustive) {
+        @compileError("Cannot create direct array of non-exhaustive enum "++@typeName(E));
+    }
+
+    var max_value: comptime_int = -1;
+    const max_usize: comptime_int = ~@as(usize, 0);
+    const fields = uniqueFields(E);
+    for (fields) |f| {
+        if (f.value < 0) {
+            @compileError("Cannot create a direct enum array for "++@typeName(E)++", field ."++f.name++" has a negative value.");
+        }
+        if (f.value > max_value) {
+            if (f.value > max_usize) {
+                @compileError("Cannot create a direct enum array for "++@typeName(E)++", field ."++f.name++" is larger than the max value of usize.");
+            }
+            max_value = f.value;
+        }
+    }
+
+    const unused_slots = max_value + 1 - fields.len;
+    if (unused_slots > max_unused_slots) {
+        const unused_str = std.fmt.comptimePrint("{d}", .{unused_slots});
+        const allowed_str = std.fmt.comptimePrint("{d}", .{max_unused_slots});
+        @compileError("Cannot create a direct enum array for "++@typeName(E)++". It would have "++unused_str++" unused slots, but only "++allowed_str++" are allowed.");
+    }
+
+    return max_value + 1;
+}
+
+/// Initializes an array of Data which can be indexed by
+/// @intCast(usize, @enumToInt(enum_value)).  The enum must be exhaustive.
+/// If the enum contains any fields with values that cannot be represented
+/// by usize, a compile error is issued.  The max_unused_slots parameter limits
+/// the total number of items which have no matching enum key (holes in the enum
+/// numbering).  So for example, if an enum has values 1, 2, 5, and 6, max_unused_slots
+/// must be at least 3, to allow unused slots 0, 3, and 4.
+/// The init_values parameter must be a struct with field names that match the enum values.
+/// If the enum has multiple fields with the same value, the name of the first one must
+/// be used.
+pub fn directEnumArray(
+    comptime E: type,
+    comptime Data: type,
+    comptime max_unused_slots: comptime_int,
+    init_values: EnumFieldStruct(E, Data, null),
+) [directEnumArrayLen(E, max_unused_slots)]Data {
+    return directEnumArrayDefault(E, Data, null, max_unused_slots, init_values);
+}
+
+test "std.enums.directEnumArray" {
+    const E = enum(i4) { a = 4, b = 6, c = 2 };
+    var runtime_false: bool = false;
+    const array = directEnumArray(E, bool, 4, .{
+        .a = true,
+        .b = runtime_false,
+        .c = true,
+    });
+
+    testing.expectEqual([7]bool, @TypeOf(array));
+    testing.expectEqual(true, array[4]);
+    testing.expectEqual(false, array[6]);
+    testing.expectEqual(true, array[2]);
+}
+
+/// Initializes an array of Data which can be indexed by
+/// @intCast(usize, @enumToInt(enum_value)).  The enum must be exhaustive.
+/// If the enum contains any fields with values that cannot be represented
+/// by usize, a compile error is issued.  The max_unused_slots parameter limits
+/// the total number of items which have no matching enum key (holes in the enum
+/// numbering).  So for example, if an enum has values 1, 2, 5, and 6, max_unused_slots
+/// must be at least 3, to allow unused slots 0, 3, and 4.
+/// The init_values parameter must be a struct with field names that match the enum values.
+/// If the enum has multiple fields with the same value, the name of the first one must
+/// be used.
+pub fn directEnumArrayDefault(
+    comptime E: type,
+    comptime Data: type,
+    comptime default: ?Data,
+    comptime max_unused_slots: comptime_int,
+    init_values: EnumFieldStruct(E, Data, default),
+) [directEnumArrayLen(E, max_unused_slots)]Data {
+    const len = comptime directEnumArrayLen(E, max_unused_slots);
+    var result: [len]Data = if (default) |d| [_]Data{d} ** len else undefined;
+    inline for (@typeInfo(@TypeOf(init_values)).Struct.fields) |f, i| {
+        const enum_value = @field(E, f.name);
+        const index = @intCast(usize, @enumToInt(enum_value));
+        result[index] = @field(init_values, f.name);
+    }
+    return result;
+}
+
+test "std.enums.directEnumArrayDefault" {
+    const E = enum(i4) { a = 4, b = 6, c = 2 };
+    var runtime_false: bool = false;
+    const array = directEnumArrayDefault(E, bool, false, 4, .{
+        .a = true,
+        .b = runtime_false,
+    });
+
+    testing.expectEqual([7]bool, @TypeOf(array));
+    testing.expectEqual(true, array[4]);
+    testing.expectEqual(false, array[6]);
+    testing.expectEqual(false, array[2]);
+}
+
+/// Cast an enum literal, value, or string to the enum value of type E
+/// with the same name.
+pub fn nameCast(comptime E: type, comptime value: anytype) E {
+    comptime {
+        const V = @TypeOf(value);
+        if (V == E) return value;
+        var name: ?[]const u8 = switch (@typeInfo(V)) {
+            .EnumLiteral, .Enum => @tagName(value),
+            .Pointer => if (std.meta.trait.isZigString(V)) value else null,
+            else => null,
+        };
+        if (name) |n| {
+            if (@hasField(E, n)) {
+                return @field(E, n);
+            }
+            @compileError("Enum "++@typeName(E)++" has no field named "++n);
+        }
+        @compileError("Cannot cast from "++@typeName(@TypeOf(value))++" to "++@typeName(E));
+    }
+}
+
+test "std.enums.nameCast" {
+    const A = enum { a = 0, b = 1 };
+    const B = enum { a = 1, b = 0 };
+    testing.expectEqual(A.a, nameCast(A, .a));
+    testing.expectEqual(A.a, nameCast(A, A.a));
+    testing.expectEqual(A.a, nameCast(A, B.a));
+    testing.expectEqual(A.a, nameCast(A, "a"));
+    testing.expectEqual(A.a, nameCast(A, @as(*const[1]u8, "a")));
+    testing.expectEqual(A.a, nameCast(A, @as([:0]const u8, "a")));
+    testing.expectEqual(A.a, nameCast(A, @as([]const u8, "a")));
+
+    testing.expectEqual(B.a, nameCast(B, .a));
+    testing.expectEqual(B.a, nameCast(B, A.a));
+    testing.expectEqual(B.a, nameCast(B, B.a));
+    testing.expectEqual(B.a, nameCast(B, "a"));
+
+    testing.expectEqual(B.b, nameCast(B, .b));
+    testing.expectEqual(B.b, nameCast(B, A.b));
+    testing.expectEqual(B.b, nameCast(B, B.b));
+    testing.expectEqual(B.b, nameCast(B, "b"));
+}
+
+/// A set of enum elements, backed by a bitfield.  If the enum
+/// is not dense, a mapping will be constructed from enum values
+/// to dense indices.  This type does no dynamic allocation and
+/// can be copied by value.
+pub fn EnumSet(comptime E: type) type {
+    const mixin = struct {
+        fn EnumSetExt(comptime Self: type) type {
+            const Indexer = Self.Indexer;
+            return struct {
+                /// Initializes the set using a struct of bools
+                pub fn init(init_values: EnumFieldStruct(E, bool, false)) Self {
+                    var result = Self{};
+                    comptime var i: usize = 0;
+                    inline while (i < Self.len) : (i += 1) {
+                        comptime const key = Indexer.keyForIndex(i);
+                        comptime const tag = @tagName(key);
+                        if (@field(init_values, tag)) {
+                            result.bits.set(i);
+                        }
+                    }
+                    return result;
+                }
+            };
+        }
+    };
+    return IndexedSet(EnumIndexer(E), mixin.EnumSetExt);
+}
+
+/// A map keyed by an enum, backed by a bitfield and a dense array.
+/// If the enum is not dense, a mapping will be constructed from
+/// enum values to dense indices.  This type does no dynamic
+/// allocation and can be copied by value.
+pub fn EnumMap(comptime E: type, comptime V: type) type {
+    const mixin = struct {
+        fn EnumMapExt(comptime Self: type) type {
+            const Indexer = Self.Indexer;
+            return struct {
+                /// Initializes the map using a sparse struct of optionals
+                pub fn init(init_values: EnumFieldStruct(E, ?V, @as(?V, null))) Self {
+                    var result = Self{};
+                    comptime var i: usize = 0;
+                    inline while (i < Self.len) : (i += 1) {
+                        comptime const key = Indexer.keyForIndex(i);
+                        comptime const tag = @tagName(key);
+                        if (@field(init_values, tag)) |*v| {
+                            result.bits.set(i);
+                            result.values[i] = v.*;
+                        }
+                    }
+                    return result;
+                }
+                /// Initializes a full mapping with all keys set to value.
+                /// Consider using EnumArray instead if the map will remain full.
+                pub fn initFull(value: V) Self {
+                    var result = Self{
+                        .bits = Self.BitSet.initFull(),
+                        .values = undefined,
+                    };
+                    std.mem.set(V, &result.values, value);
+                    return result;
+                }
+                /// Initializes a full mapping with supplied values.
+                /// Consider using EnumArray instead if the map will remain full.
+                pub fn initFullWith(init_values: EnumFieldStruct(E, V, @as(?V, null))) Self {
+                    return initFullWithDefault(@as(?V, null), init_values);
+                }
+                /// Initializes a full mapping with a provided default.
+                /// Consider using EnumArray instead if the map will remain full.
+                pub fn initFullWithDefault(comptime default: ?V, init_values: EnumFieldStruct(E, V, default)) Self {
+                    var result = Self{
+                        .bits = Self.BitSet.initFull(),
+                        .values = undefined,
+                    };
+                    comptime var i: usize = 0;
+                    inline while (i < Self.len) : (i += 1) {
+                        comptime const key = Indexer.keyForIndex(i);
+                        comptime const tag = @tagName(key);
+                        result.values[i] = @field(init_values, tag);
+                    }
+                    return result;
+                }
+            };
+        }
+    };
+    return IndexedMap(EnumIndexer(E), V, mixin.EnumMapExt);
+}
+
+/// An array keyed by an enum, backed by a dense array.
+/// If the enum is not dense, a mapping will be constructed from
+/// enum values to dense indices.  This type does no dynamic
+/// allocation and can be copied by value.
+pub fn EnumArray(comptime E: type, comptime V: type) type {
+    const mixin = struct {
+        fn EnumArrayExt(comptime Self: type) type {
+            const Indexer = Self.Indexer;
+            return struct {
+                /// Initializes all values in the enum array
+                pub fn init(init_values: EnumFieldStruct(E, V, @as(?V, null))) Self {
+                    return initDefault(@as(?V, null), init_values);
+                }
+
+                /// Initializes values in the enum array, with the specified default.
+                pub fn initDefault(comptime default: ?V, init_values: EnumFieldStruct(E, V, default)) Self {
+                    var result = Self{ .values = undefined };
+                    comptime var i: usize = 0;
+                    inline while (i < Self.len) : (i += 1) {
+                        const key = comptime Indexer.keyForIndex(i);
+                        const tag = @tagName(key);
+                        result.values[i] = @field(init_values, tag);
+                    }
+                    return result;
+                }
+            };
+        }
+    };
+    return IndexedArray(EnumIndexer(E), V, mixin.EnumArrayExt);
+}
+
+/// Pass this function as the Ext parameter to Indexed* if you
+/// do not want to attach any extensions.  This parameter was
+/// originally an optional, but optional generic functions
+/// seem to be broken at the moment.
+/// TODO: Once #8169 is fixed, consider switching this param
+/// back to an optional.
+pub fn NoExtension(comptime Self: type) type {
+    return NoExt;
+}
+const NoExt = struct{};
+
+/// A set type with an Indexer mapping from keys to indices.
+/// Presence or absence is stored as a dense bitfield.  This
+/// type does no allocation and can be copied by value.
+pub fn IndexedSet(comptime I: type, comptime Ext: fn(type)type) type {
+    comptime ensureIndexer(I);
+    return struct {
+        const Self = @This();
+
+        pub usingnamespace Ext(Self);
+
+        /// The indexing rules for converting between keys and indices.
+        pub const Indexer = I;
+        /// The element type for this set.
+        pub const Key = Indexer.Key;
+
+        const BitSet = std.StaticBitSet(Indexer.count);
+
+        /// The maximum number of items in this set.
+        pub const len = Indexer.count;
+
+        bits: BitSet = BitSet.initEmpty(),
+
+        /// Returns a set containing all possible keys. 
+        pub fn initFull() Self {
+            return .{ .bits = BitSet.initFull() };
+        }
+
+        /// Returns the number of keys in the set.
+        pub fn count(self: Self) usize {
+            return self.bits.count();
+        }
+
+        /// Checks if a key is in the set.
+        pub fn contains(self: Self, key: Key) bool {
+            return self.bits.isSet(Indexer.indexOf(key));
+        }
+
+        /// Puts a key in the set.
+        pub fn insert(self: *Self, key: Key) void {
+            self.bits.set(Indexer.indexOf(key));
+        }
+
+        /// Removes a key from the set.
+        pub fn remove(self: *Self, key: Key) void {
+            self.bits.unset(Indexer.indexOf(key));
+        }
+
+        /// Changes the presence of a key in the set to match the passed bool.
+        pub fn setPresent(self: *Self, key: Key, present: bool) void {
+            self.bits.setValue(Indexer.indexOf(key), present);
+        }
+
+        /// Toggles the presence of a key in the set.  If the key is in
+        /// the set, removes it.  Otherwise adds it.
+        pub fn toggle(self: *Self, key: Key) void {
+            self.bits.toggle(Indexer.indexOf(key));
+        }
+
+        /// Toggles the presence of all keys in the passed set.
+        pub fn toggleSet(self: *Self, other: Self) void {
+            self.bits.toggleSet(other.bits);
+        }
+
+        /// Toggles all possible keys in the set.
+        pub fn toggleAll(self: *Self) void {
+            self.bits.toggleAll();
+        }
+
+        /// Adds all keys in the passed set to this set.
+        pub fn setUnion(self: *Self, other: Self) void {
+            self.bits.setUnion(other.bits);
+        }
+
+        /// Removes all keys which are not in the passed set.
+        pub fn setIntersection(self: *Self, other: Self) void {
+            self.bits.setIntersection(other.bits);
+        }
+
+        /// Returns an iterator over this set, which iterates in
+        /// index order.  Modifications to the set during iteration
+        /// may or may not be observed by the iterator, but will
+        /// not invalidate it.
+        pub fn iterator(self: *Self) Iterator {
+            return .{ .inner = self.bits.iterator(.{}) };
+        }
+
+        pub const Iterator = struct {
+            inner: BitSet.Iterator(.{}),
+
+            pub fn next(self: *Iterator) ?Key {
+                return if (self.inner.next()) |index|
+                    Indexer.keyForIndex(index)
+                else null;
+            }
+        };
+    };
+}
+
+/// A map from keys to values, using an index lookup.  Uses a
+/// bitfield to track presence and a dense array of values.
+/// This type does no allocation and can be copied by value.
+pub fn IndexedMap(comptime I: type, comptime V: type, comptime Ext: fn(type)type) type {
+    comptime ensureIndexer(I);
+    return struct {
+        const Self = @This();
+
+        pub usingnamespace Ext(Self);
+
+        /// The index mapping for this map
+        pub const Indexer = I;
+        /// The key type used to index this map
+        pub const Key = Indexer.Key;
+        /// The value type stored in this map
+        pub const Value = V;
+        /// The number of possible keys in the map
+        pub const len = Indexer.count;
+
+        const BitSet = std.StaticBitSet(Indexer.count);
+
+        /// Bits determining whether items are in the map
+        bits: BitSet = BitSet.initEmpty(),
+        /// Values of items in the map.  If the associated
+        /// bit is zero, the value is undefined.
+        values: [Indexer.count]Value = undefined,
+
+        /// The number of items in the map.
+        pub fn count(self: Self) usize {
+            return self.bits.count();
+        }
+
+        /// Checks if the map contains an item.
+        pub fn contains(self: Self, key: Key) bool {
+            return self.bits.isSet(Indexer.indexOf(key));
+        }
+
+        /// Gets the value associated with a key.
+        /// If the key is not in the map, returns null.
+        pub fn get(self: Self, key: Key) ?Value {
+            const index = Indexer.indexOf(key);
+            return if (self.bits.isSet(index)) self.values[index] else null;
+        }
+
+        /// Gets the value associated with a key, which must
+        /// exist in the map.
+        pub fn getAssertContains(self: Self, key: Key) Value {
+            const index = Indexer.indexOf(key);
+            assert(self.bits.isSet(index));
+            return self.values[index];
+        }
+
+        /// Gets the address of the value associated with a key.
+        /// If the key is not in the map, returns null.
+        pub fn getPtr(self: *Self, key: Key) ?*Value {
+            const index = Indexer.indexOf(key);
+            return if (self.bits.isSet(index)) &self.values[index] else null;
+        }
+
+        /// Gets the address of the const value associated with a key.
+        /// If the key is not in the map, returns null.
+        pub fn getPtrConst(self: *const Self, key: Key) ?*const Value {
+            const index = Indexer.indexOf(key);
+            return if (self.bits.isSet(index)) &self.values[index] else null;
+        }
+
+        /// Gets the address of the value associated with a key.
+        /// The key must be present in the map.
+        pub fn getPtrAssertContains(self: *Self, key: Key) *Value {
+            const index = Indexer.indexOf(key);
+            assert(self.bits.isSet(index));
+            return &self.values[index];
+        }
+
+        /// Adds the key to the map with the supplied value.
+        /// If the key is already in the map, overwrites the value.
+        pub fn put(self: *Self, key: Key, value: Value) void {
+            const index = Indexer.indexOf(key);
+            self.bits.set(index);
+            self.values[index] = value;
+        }
+
+        /// Adds the key to the map with an undefined value.
+        /// If the key is already in the map, the value becomes undefined.
+        /// A pointer to the value is returned, which should be
+        /// used to initialize the value.
+        pub fn putUninitialized(self: *Self, key: Key) *Value {
+            const index = Indexer.indexOf(key);
+            self.bits.set(index);
+            self.values[index] = undefined;
+            return &self.values[index];
+        }
+
+        /// Sets the value associated with the key in the map,
+        /// and returns the old value.  If the key was not in
+        /// the map, returns null.
+        pub fn fetchPut(self: *Self, key: Key, value: Value) ?Value {
+            const index = Indexer.indexOf(key);
+            const result: ?Value = if (self.bits.isSet(index)) self.values[index] else null;
+            self.bits.set(index);
+            self.values[index] = value;
+            return result;
+        }
+
+        /// Removes a key from the map.  If the key was not in the map,
+        /// does nothing.
+        pub fn remove(self: *Self, key: Key) void {
+            const index = Indexer.indexOf(key);
+            self.bits.unset(index);
+            self.values[index] = undefined;
+        }
+
+        /// Removes a key from the map, and returns the old value.
+        /// If the key was not in the map, returns null.
+        pub fn fetchRemove(self: *Self, key: Key) ?Value {
+            const index = Indexer.indexOf(key);
+            const result: ?Value = if (self.bits.isSet(index)) self.values[index] else null;
+            self.bits.unset(index);
+            self.values[index] = undefined;
+            return result;
+        }
+
+        /// Returns an iterator over the map, which visits items in index order.
+        /// Modifications to the underlying map may or may not be observed by
+        /// the iterator, but will not invalidate it.
+        pub fn iterator(self: *Self) Iterator {
+            return .{
+                .inner = self.bits.iterator(.{}),
+                .values = &self.values,
+            };
+        }
+
+        /// An entry in the map.
+        pub const Entry = struct {
+            /// The key associated with this entry.
+            /// Modifying this key will not change the map.
+            key: Key,
+
+            /// A pointer to the value in the map associated
+            /// with this key.  Modifications through this
+            /// pointer will modify the underlying data.
+            value: *Value,
+        };
+
+        pub const Iterator = struct {
+            inner: BitSet.Iterator(.{}),
+            values: *[Indexer.count]Value,
+
+            pub fn next(self: *Iterator) ?Entry {
+                return if (self.inner.next()) |index|
+                    Entry{
+                        .key = Indexer.keyForIndex(index),
+                        .value = &self.values[index],
+                    }
+                else null;
+            }
+        };
+    };
+}
+
+/// A dense array of values, using an indexed lookup.
+/// This type does no allocation and can be copied by value.
+pub fn IndexedArray(comptime I: type, comptime V: type, comptime Ext: fn(type)type) type {
+    comptime ensureIndexer(I);
+    return struct {
+        const Self = @This();
+
+        pub usingnamespace Ext(Self);
+
+        /// The index mapping for this map
+        pub const Indexer = I;
+        /// The key type used to index this map
+        pub const Key = Indexer.Key;
+        /// The value type stored in this map
+        pub const Value = V;
+        /// The number of possible keys in the map
+        pub const len = Indexer.count;
+
+        values: [Indexer.count]Value,
+
+        pub fn initUndefined() Self {
+            return Self{ .values = undefined };
+        }
+
+        pub fn initFill(v: Value) Self {
+            var self: Self = undefined;
+            std.mem.set(Value, &self.values, v);
+            return self;
+        }
+
+        /// Returns the value in the array associated with a key.
+        pub fn get(self: Self, key: Key) Value {
+            return self.values[Indexer.indexOf(key)];
+        }
+
+        /// Returns a pointer to the slot in the array associated with a key.
+        pub fn getPtr(self: *Self, key: Key) *Value {
+            return &self.values[Indexer.indexOf(key)];
+        }
+
+        /// Returns a const pointer to the slot in the array associated with a key.
+        pub fn getPtrConst(self: *const Self, key: Key) *const Value {
+            return &self.values[Indexer.indexOf(key)];
+        }
+
+        /// Sets the value in the slot associated with a key.
+        pub fn set(self: *Self, key: Key, value: Value) void {
+            self.values[Indexer.indexOf(key)] = value;
+        }
+
+        /// Iterates over the items in the array, in index order.
+        pub fn iterator(self: *Self) Iterator {
+            return .{
+                .values = &self.values,
+            };
+        }
+
+        /// An entry in the array.
+        pub const Entry = struct {
+            /// The key associated with this entry.
+            /// Modifying this key will not change the array.
+            key: Key,
+
+            /// A pointer to the value in the array associated
+            /// with this key.  Modifications through this
+            /// pointer will modify the underlying data.
+            value: *Value,
+        };
+
+        pub const Iterator = struct {
+            index: usize = 0,
+            values: *[Indexer.count]Value,
+
+            pub fn next(self: *Iterator) ?Entry {
+                const index = self.index;
+                if (index < Indexer.count) {
+                    self.index += 1;
+                    return Entry{
+                        .key = Indexer.keyForIndex(index),
+                        .value = &self.values[index],
+                    };
+                }
+                return null;
+            }
+        };
+    };
+}
+
+/// Verifies that a type is a valid Indexer, providing a helpful
+/// compile error if not.  An Indexer maps a comptime known set
+/// of keys to a dense set of zero-based indices.
+/// The indexer interface must look like this:
+/// ```
+/// struct {
+///     /// The key type which this indexer converts to indices
+///     pub const Key: type,
+///     /// The number of indexes in the dense mapping
+///     pub const count: usize,
+///     /// Converts from a key to an index
+///     pub fn indexOf(Key) usize;
+///     /// Converts from an index to a key
+///     pub fn keyForIndex(usize) Key;
+/// }
+/// ```
+pub fn ensureIndexer(comptime T: type) void {
+    comptime {
+        if (!@hasDecl(T, "Key")) @compileError("Indexer must have decl Key: type.");
+        if (@TypeOf(T.Key) != type) @compileError("Indexer.Key must be a type.");
+        if (!@hasDecl(T, "count")) @compileError("Indexer must have decl count: usize.");
+        if (@TypeOf(T.count) != usize) @compileError("Indexer.count must be a usize.");
+        if (!@hasDecl(T, "indexOf")) @compileError("Indexer.indexOf must be a fn(Key)usize.");
+        if (@TypeOf(T.indexOf) != fn(T.Key)usize) @compileError("Indexer must have decl indexOf: fn(Key)usize.");
+        if (!@hasDecl(T, "keyForIndex")) @compileError("Indexer must have decl keyForIndex: fn(usize)Key.");
+        if (@TypeOf(T.keyForIndex) != fn(usize)T.Key) @compileError("Indexer.keyForIndex must be a fn(usize)Key.");
+    }
+}
+
+test "std.enums.ensureIndexer" {
+    ensureIndexer(struct {
+        pub const Key = u32;
+        pub const count: usize = 8;
+        pub fn indexOf(k: Key) usize {
+            return @intCast(usize, k);
+        }
+        pub fn keyForIndex(index: usize) Key {
+            return @intCast(Key, index);
+        }
+    });
+}
+
+fn ascByValue(ctx: void, comptime a: EnumField, comptime b: EnumField) bool {
+    return a.value < b.value;
+}
+pub fn EnumIndexer(comptime E: type) type {
+    if (!@typeInfo(E).Enum.is_exhaustive) {
+        @compileError("Cannot create an enum indexer for a non-exhaustive enum.");
+    }
+
+    const const_fields = uniqueFields(E);
+    var fields = const_fields[0..const_fields.len].*;
+    if (fields.len == 0) {
+        return struct {
+            pub const Key = E;
+            pub const count: usize = 0;
+            pub fn indexOf(e: E) usize { unreachable; }
+            pub fn keyForIndex(i: usize) E { unreachable; }
+        };
+    }
+    std.sort.sort(EnumField, &fields, {}, ascByValue);
+    const min = fields[0].value;
+    const max = fields[fields.len-1].value;
+    if (max - min == fields.len-1) {
+        return struct {
+            pub const Key = E;
+            pub const count = fields.len;
+            pub fn indexOf(e: E) usize {
+                return @intCast(usize, @enumToInt(e) - min);
+            }
+            pub fn keyForIndex(i: usize) E {
+                // TODO fix addition semantics.  This calculation
+                // gives up some safety to avoid artificially limiting
+                // the range of signed enum values to max_isize.
+                const enum_value = if (min < 0) @bitCast(isize, i) +% min else i + min;
+                return @intToEnum(E, @intCast(std.meta.Tag(E), enum_value));
+            }
+        };
+    }
+
+    const keys = valuesFromFields(E, &fields);
+
+    return struct {
+        pub const Key = E;
+        pub const count = fields.len;
+        pub fn indexOf(e: E) usize {
+            for (keys) |k, i| {
+                if (k == e) return i;
+            }
+            unreachable;
+        }
+        pub fn keyForIndex(i: usize) E {
+            return keys[i];
+        }
+    };
+}
+
+test "std.enums.EnumIndexer dense zeroed" {
+    const E = enum{ b = 1, a = 0, c = 2 };
+    const Indexer = EnumIndexer(E);
+    ensureIndexer(Indexer);
+    testing.expectEqual(E, Indexer.Key);
+    testing.expectEqual(@as(usize, 3), Indexer.count);
+
+    testing.expectEqual(@as(usize, 0), Indexer.indexOf(.a));
+    testing.expectEqual(@as(usize, 1), Indexer.indexOf(.b));
+    testing.expectEqual(@as(usize, 2), Indexer.indexOf(.c));
+
+    testing.expectEqual(E.a, Indexer.keyForIndex(0));
+    testing.expectEqual(E.b, Indexer.keyForIndex(1));
+    testing.expectEqual(E.c, Indexer.keyForIndex(2));
+}
+
+test "std.enums.EnumIndexer dense positive" {
+    const E = enum(u4) { c = 6, a = 4, b = 5 };
+    const Indexer = EnumIndexer(E);
+    ensureIndexer(Indexer);
+    testing.expectEqual(E, Indexer.Key);
+    testing.expectEqual(@as(usize, 3), Indexer.count);
+
+    testing.expectEqual(@as(usize, 0), Indexer.indexOf(.a));
+    testing.expectEqual(@as(usize, 1), Indexer.indexOf(.b));
+    testing.expectEqual(@as(usize, 2), Indexer.indexOf(.c));
+
+    testing.expectEqual(E.a, Indexer.keyForIndex(0));
+    testing.expectEqual(E.b, Indexer.keyForIndex(1));
+    testing.expectEqual(E.c, Indexer.keyForIndex(2));
+}
+
+test "std.enums.EnumIndexer dense negative" {
+    const E = enum(i4) { a = -6, c = -4, b = -5 };
+    const Indexer = EnumIndexer(E);
+    ensureIndexer(Indexer);
+    testing.expectEqual(E, Indexer.Key);
+    testing.expectEqual(@as(usize, 3), Indexer.count);
+
+    testing.expectEqual(@as(usize, 0), Indexer.indexOf(.a));
+    testing.expectEqual(@as(usize, 1), Indexer.indexOf(.b));
+    testing.expectEqual(@as(usize, 2), Indexer.indexOf(.c));
+
+    testing.expectEqual(E.a, Indexer.keyForIndex(0));
+    testing.expectEqual(E.b, Indexer.keyForIndex(1));
+    testing.expectEqual(E.c, Indexer.keyForIndex(2));
+}
+
+test "std.enums.EnumIndexer sparse" {
+    const E = enum(i4) { a = -2, c = 6, b = 4 };
+    const Indexer = EnumIndexer(E);
+    ensureIndexer(Indexer);
+    testing.expectEqual(E, Indexer.Key);
+    testing.expectEqual(@as(usize, 3), Indexer.count);
+
+    testing.expectEqual(@as(usize, 0), Indexer.indexOf(.a));
+    testing.expectEqual(@as(usize, 1), Indexer.indexOf(.b));
+    testing.expectEqual(@as(usize, 2), Indexer.indexOf(.c));
+
+    testing.expectEqual(E.a, Indexer.keyForIndex(0));
+    testing.expectEqual(E.b, Indexer.keyForIndex(1));
+    testing.expectEqual(E.c, Indexer.keyForIndex(2));
+}
+
+test "std.enums.EnumIndexer repeats" {
+    const E = extern enum{ a = -2, c = 6, b = 4, b2 = 4 };
+    const Indexer = EnumIndexer(E);
+    ensureIndexer(Indexer);
+    testing.expectEqual(E, Indexer.Key);
+    testing.expectEqual(@as(usize, 3), Indexer.count);
+
+    testing.expectEqual(@as(usize, 0), Indexer.indexOf(.a));
+    testing.expectEqual(@as(usize, 1), Indexer.indexOf(.b));
+    testing.expectEqual(@as(usize, 2), Indexer.indexOf(.c));
+
+    testing.expectEqual(E.a, Indexer.keyForIndex(0));
+    testing.expectEqual(E.b, Indexer.keyForIndex(1));
+    testing.expectEqual(E.c, Indexer.keyForIndex(2));
+}
+
+test "std.enums.EnumSet" {
+    const E = extern enum { a, b, c, d, e = 0 };
+    const Set = EnumSet(E);
+    testing.expectEqual(E, Set.Key);
+    testing.expectEqual(EnumIndexer(E), Set.Indexer);
+    testing.expectEqual(@as(usize, 4), Set.len);
+
+    // Empty sets
+    const empty = Set{};
+    comptime testing.expect(empty.count() == 0);
+
+    var empty_b = Set.init(.{});
+    testing.expect(empty_b.count() == 0);
+
+    const empty_c = comptime Set.init(.{});
+    comptime testing.expect(empty_c.count() == 0);
+
+    const full = Set.initFull();
+    testing.expect(full.count() == Set.len);
+
+    const full_b = comptime Set.initFull();
+    comptime testing.expect(full_b.count() == Set.len);
+
+    testing.expectEqual(false, empty.contains(.a));
+    testing.expectEqual(false, empty.contains(.b));
+    testing.expectEqual(false, empty.contains(.c));
+    testing.expectEqual(false, empty.contains(.d));
+    testing.expectEqual(false, empty.contains(.e));
+    {
+        var iter = empty_b.iterator();
+        testing.expectEqual(@as(?E, null), iter.next());
+    }
+
+    var mut = Set.init(.{
+        .a=true, .c=true,
+    });
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(false, mut.contains(.b));
+    testing.expectEqual(true, mut.contains(.c));
+    testing.expectEqual(false, mut.contains(.d));
+    testing.expectEqual(true, mut.contains(.e)); // aliases a
+    {
+        var it = mut.iterator();
+        testing.expectEqual(@as(?E, .a), it.next());
+        testing.expectEqual(@as(?E, .c), it.next());
+        testing.expectEqual(@as(?E, null), it.next());
+    }
+
+    mut.toggleAll();
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(false, mut.contains(.a));
+    testing.expectEqual(true, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(true, mut.contains(.d));
+    testing.expectEqual(false, mut.contains(.e)); // aliases a
+    {
+        var it = mut.iterator();
+        testing.expectEqual(@as(?E, .b), it.next());
+        testing.expectEqual(@as(?E, .d), it.next());
+        testing.expectEqual(@as(?E, null), it.next());
+    }
+
+    mut.toggleSet(Set.init(.{ .a=true, .b=true }));
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(false, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(true, mut.contains(.d));
+    testing.expectEqual(true, mut.contains(.e)); // aliases a
+
+    mut.setUnion(Set.init(.{ .a=true, .b=true }));
+    testing.expectEqual(@as(usize, 3), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(true, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(true, mut.contains(.d));
+
+    mut.remove(.c);
+    mut.remove(.b);
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(false, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(true, mut.contains(.d));
+
+    mut.setIntersection(Set.init(.{ .a=true, .b=true }));
+    testing.expectEqual(@as(usize, 1), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(false, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(false, mut.contains(.d));
+
+    mut.insert(.a);
+    mut.insert(.b);
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(true, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(false, mut.contains(.d));
+
+    mut.setPresent(.a, false);
+    mut.toggle(.b);
+    mut.toggle(.c);
+    mut.setPresent(.d, true);
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(false, mut.contains(.a));
+    testing.expectEqual(false, mut.contains(.b));
+    testing.expectEqual(true, mut.contains(.c));
+    testing.expectEqual(true, mut.contains(.d));
+}
+
+test "std.enums.EnumArray void" {
+    const E = extern enum { a, b, c, d, e = 0 };
+    const ArrayVoid = EnumArray(E, void);
+    testing.expectEqual(E, ArrayVoid.Key);
+    testing.expectEqual(EnumIndexer(E), ArrayVoid.Indexer);
+    testing.expectEqual(void, ArrayVoid.Value);
+    testing.expectEqual(@as(usize, 4), ArrayVoid.len);
+
+    const undef = ArrayVoid.initUndefined();
+    var inst = ArrayVoid.initFill({});
+    const inst2 = ArrayVoid.init(.{ .a = {}, .b = {}, .c = {}, .d = {} });
+    const inst3 = ArrayVoid.initDefault({}, .{});
+
+    _ = inst.get(.a);
+    _ = inst.getPtr(.b);
+    _ = inst.getPtrConst(.c);
+    inst.set(.a, {});
+
+    var it = inst.iterator();
+    testing.expectEqual(E.a, it.next().?.key);
+    testing.expectEqual(E.b, it.next().?.key);
+    testing.expectEqual(E.c, it.next().?.key);
+    testing.expectEqual(E.d, it.next().?.key);
+    testing.expect(it.next() == null);
+}
+
+test "std.enums.EnumArray sized" {
+    const E = extern enum { a, b, c, d, e = 0 };
+    const Array = EnumArray(E, usize);
+    testing.expectEqual(E, Array.Key);
+    testing.expectEqual(EnumIndexer(E), Array.Indexer);
+    testing.expectEqual(usize, Array.Value);
+    testing.expectEqual(@as(usize, 4), Array.len);
+
+    const undef = Array.initUndefined();
+    var inst = Array.initFill(5);
+    const inst2 = Array.init(.{ .a = 1, .b = 2, .c = 3, .d = 4 });
+    const inst3 = Array.initDefault(6, .{.b = 4, .c = 2});
+
+    testing.expectEqual(@as(usize, 5), inst.get(.a));
+    testing.expectEqual(@as(usize, 5), inst.get(.b));
+    testing.expectEqual(@as(usize, 5), inst.get(.c));
+    testing.expectEqual(@as(usize, 5), inst.get(.d));
+
+    testing.expectEqual(@as(usize, 1), inst2.get(.a));
+    testing.expectEqual(@as(usize, 2), inst2.get(.b));
+    testing.expectEqual(@as(usize, 3), inst2.get(.c));
+    testing.expectEqual(@as(usize, 4), inst2.get(.d));
+
+    testing.expectEqual(@as(usize, 6), inst3.get(.a));
+    testing.expectEqual(@as(usize, 4), inst3.get(.b));
+    testing.expectEqual(@as(usize, 2), inst3.get(.c));
+    testing.expectEqual(@as(usize, 6), inst3.get(.d));
+
+    testing.expectEqual(&inst.values[0], inst.getPtr(.a));
+    testing.expectEqual(&inst.values[1], inst.getPtr(.b));
+    testing.expectEqual(&inst.values[2], inst.getPtr(.c));
+    testing.expectEqual(&inst.values[3], inst.getPtr(.d));
+
+    testing.expectEqual(@as(*const usize, &inst.values[0]), inst.getPtrConst(.a));
+    testing.expectEqual(@as(*const usize, &inst.values[1]), inst.getPtrConst(.b));
+    testing.expectEqual(@as(*const usize, &inst.values[2]), inst.getPtrConst(.c));
+    testing.expectEqual(@as(*const usize, &inst.values[3]), inst.getPtrConst(.d));
+
+    inst.set(.c, 8);
+    testing.expectEqual(@as(usize, 5), inst.get(.a));
+    testing.expectEqual(@as(usize, 5), inst.get(.b));
+    testing.expectEqual(@as(usize, 8), inst.get(.c));
+    testing.expectEqual(@as(usize, 5), inst.get(.d));
+
+    var it = inst.iterator();
+    const Entry = Array.Entry;
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .a,
+        .value = &inst.values[0],
+    }), it.next());
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .b,
+        .value = &inst.values[1],
+    }), it.next());
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .c,
+        .value = &inst.values[2],
+    }), it.next());
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .d,
+        .value = &inst.values[3],
+    }), it.next());
+    testing.expectEqual(@as(?Entry, null), it.next());
+}
+
+test "std.enums.EnumMap void" {
+    const E = extern enum { a, b, c, d, e = 0 };
+    const Map = EnumMap(E, void);
+    testing.expectEqual(E, Map.Key);
+    testing.expectEqual(EnumIndexer(E), Map.Indexer);
+    testing.expectEqual(void, Map.Value);
+    testing.expectEqual(@as(usize, 4), Map.len);
+
+    const b = Map.initFull({});
+    testing.expectEqual(@as(usize, 4), b.count());
+
+    const c = Map.initFullWith(.{ .a = {}, .b = {}, .c = {}, .d = {} });
+    testing.expectEqual(@as(usize, 4), c.count());
+
+    const d = Map.initFullWithDefault({}, .{ .b = {} });
+    testing.expectEqual(@as(usize, 4), d.count());
+
+    var a = Map.init(.{ .b = {}, .d = {} });
+    testing.expectEqual(@as(usize, 2), a.count());
+    testing.expectEqual(false, a.contains(.a));
+    testing.expectEqual(true, a.contains(.b));
+    testing.expectEqual(false, a.contains(.c));
+    testing.expectEqual(true, a.contains(.d));
+    testing.expect(a.get(.a) == null);
+    testing.expect(a.get(.b) != null);
+    testing.expect(a.get(.c) == null);
+    testing.expect(a.get(.d) != null);
+    testing.expect(a.getPtr(.a) == null);
+    testing.expect(a.getPtr(.b) != null);
+    testing.expect(a.getPtr(.c) == null);
+    testing.expect(a.getPtr(.d) != null);
+    testing.expect(a.getPtrConst(.a) == null);
+    testing.expect(a.getPtrConst(.b) != null);
+    testing.expect(a.getPtrConst(.c) == null);
+    testing.expect(a.getPtrConst(.d) != null);
+    _ = a.getPtrAssertContains(.b);
+    _ = a.getAssertContains(.d);
+
+    a.put(.a, {});
+    a.put(.a, {});
+    a.putUninitialized(.c).* = {};
+    a.putUninitialized(.c).* = {};
+
+    testing.expectEqual(@as(usize, 4), a.count());
+    testing.expect(a.get(.a) != null);
+    testing.expect(a.get(.b) != null);
+    testing.expect(a.get(.c) != null);
+    testing.expect(a.get(.d) != null);
+
+    a.remove(.a);
+    _ = a.fetchRemove(.c);
+
+    var iter = a.iterator();
+    const Entry = Map.Entry;
+    testing.expectEqual(E.b, iter.next().?.key);
+    testing.expectEqual(E.d, iter.next().?.key);
+    testing.expect(iter.next() == null);
+}
+
+test "std.enums.EnumMap sized" {
+    const E = extern enum { a, b, c, d, e = 0 };
+    const Map = EnumMap(E, usize);
+    testing.expectEqual(E, Map.Key);
+    testing.expectEqual(EnumIndexer(E), Map.Indexer);
+    testing.expectEqual(usize, Map.Value);
+    testing.expectEqual(@as(usize, 4), Map.len);
+
+    const b = Map.initFull(5);
+    testing.expectEqual(@as(usize, 4), b.count());
+    testing.expect(b.contains(.a));
+    testing.expect(b.contains(.b));
+    testing.expect(b.contains(.c));
+    testing.expect(b.contains(.d));
+    testing.expectEqual(@as(?usize, 5), b.get(.a));
+    testing.expectEqual(@as(?usize, 5), b.get(.b));
+    testing.expectEqual(@as(?usize, 5), b.get(.c));
+    testing.expectEqual(@as(?usize, 5), b.get(.d));
+
+    const c = Map.initFullWith(.{ .a = 1, .b = 2, .c = 3, .d = 4 });
+    testing.expectEqual(@as(usize, 4), c.count());
+    testing.expect(c.contains(.a));
+    testing.expect(c.contains(.b));
+    testing.expect(c.contains(.c));
+    testing.expect(c.contains(.d));
+    testing.expectEqual(@as(?usize, 1), c.get(.a));
+    testing.expectEqual(@as(?usize, 2), c.get(.b));
+    testing.expectEqual(@as(?usize, 3), c.get(.c));
+    testing.expectEqual(@as(?usize, 4), c.get(.d));
+
+    const d = Map.initFullWithDefault(6, .{ .b = 2, .c = 4 });
+    testing.expectEqual(@as(usize, 4), d.count());
+    testing.expect(d.contains(.a));
+    testing.expect(d.contains(.b));
+    testing.expect(d.contains(.c));
+    testing.expect(d.contains(.d));
+    testing.expectEqual(@as(?usize, 6), d.get(.a));
+    testing.expectEqual(@as(?usize, 2), d.get(.b));
+    testing.expectEqual(@as(?usize, 4), d.get(.c));
+    testing.expectEqual(@as(?usize, 6), d.get(.d));
+
+    var a = Map.init(.{ .b = 2, .d = 4 });
+    testing.expectEqual(@as(usize, 2), a.count());
+    testing.expectEqual(false, a.contains(.a));
+    testing.expectEqual(true, a.contains(.b));
+    testing.expectEqual(false, a.contains(.c));
+    testing.expectEqual(true, a.contains(.d));
+
+    testing.expectEqual(@as(?usize, null), a.get(.a));
+    testing.expectEqual(@as(?usize, 2), a.get(.b));
+    testing.expectEqual(@as(?usize, null), a.get(.c));
+    testing.expectEqual(@as(?usize, 4), a.get(.d));
+
+    testing.expectEqual(@as(?*usize, null), a.getPtr(.a));
+    testing.expectEqual(@as(?*usize, &a.values[1]), a.getPtr(.b));
+    testing.expectEqual(@as(?*usize, null), a.getPtr(.c));
+    testing.expectEqual(@as(?*usize, &a.values[3]), a.getPtr(.d));
+
+    testing.expectEqual(@as(?*const usize, null), a.getPtrConst(.a));
+    testing.expectEqual(@as(?*const usize, &a.values[1]), a.getPtrConst(.b));
+    testing.expectEqual(@as(?*const usize, null), a.getPtrConst(.c));
+    testing.expectEqual(@as(?*const usize, &a.values[3]), a.getPtrConst(.d));
+
+    testing.expectEqual(@as(*const usize, &a.values[1]), a.getPtrAssertContains(.b));
+    testing.expectEqual(@as(*const usize, &a.values[3]), a.getPtrAssertContains(.d));
+    testing.expectEqual(@as(usize, 2), a.getAssertContains(.b));
+    testing.expectEqual(@as(usize, 4), a.getAssertContains(.d));
+
+    a.put(.a, 3);
+    a.put(.a, 5);
+    a.putUninitialized(.c).* = 7;
+    a.putUninitialized(.c).* = 9;
+
+    testing.expectEqual(@as(usize, 4), a.count());
+    testing.expectEqual(@as(?usize, 5), a.get(.a));
+    testing.expectEqual(@as(?usize, 2), a.get(.b));
+    testing.expectEqual(@as(?usize, 9), a.get(.c));
+    testing.expectEqual(@as(?usize, 4), a.get(.d));
+
+    a.remove(.a);
+    testing.expectEqual(@as(?usize, null), a.fetchRemove(.a));
+    testing.expectEqual(@as(?usize, 9), a.fetchRemove(.c));
+    a.remove(.c);
+
+    var iter = a.iterator();
+    const Entry = Map.Entry;
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .b, .value = &a.values[1],
+    }), iter.next());
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .d, .value = &a.values[3],
+    }), iter.next());
+    testing.expectEqual(@as(?Entry, null), iter.next());
+}
diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig
@@ -1250,9 +1250,9 @@ fn formatDuration(ns: u64, comptime fmt: []const u8, options: std.fmt.FormatOpti
         const kunits = ns_remaining * 1000 / unit.ns;
         if (kunits >= 1000) {
             try formatInt(kunits / 1000, 10, false, .{}, writer);
-            if (kunits > 1000) {
+            const frac = kunits % 1000;
+            if (frac > 0) {
                 // Write up to 3 decimal places
-                const frac = kunits % 1000;
                 var buf = [_]u8{ '.', 0, 0, 0 };
                 _ = formatIntBuf(buf[1..], frac, 10, false, .{ .fill = '0', .width = 3 });
                 var end: usize = 4;
@@ -1286,9 +1286,14 @@ test "fmtDuration" {
         .{ .s = "1us", .d = std.time.ns_per_us },
         .{ .s = "1.45us", .d = 1450 },
         .{ .s = "1.5us", .d = 3 * std.time.ns_per_us / 2 },
+        .{ .s = "14.5us", .d = 14500 },
+        .{ .s = "145us", .d = 145000 },
         .{ .s = "999.999us", .d = std.time.ns_per_ms - 1 },
         .{ .s = "1ms", .d = std.time.ns_per_ms + 1 },
         .{ .s = "1.5ms", .d = 3 * std.time.ns_per_ms / 2 },
+        .{ .s = "1.11ms", .d = 1110000 },
+        .{ .s = "1.111ms", .d = 1111000 },
+        .{ .s = "1.111ms", .d = 1111100 },
         .{ .s = "999.999ms", .d = std.time.ns_per_s - 1 },
         .{ .s = "1s", .d = std.time.ns_per_s },
         .{ .s = "59.999s", .d = std.time.ns_per_min - 1 },
diff --git a/lib/std/fs.zig b/lib/std/fs.zig
@@ -50,13 +50,13 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) {
     else => @compileError("Unsupported OS"),
 };
 
-pub const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+pub const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".*;
 
 /// Base64 encoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem.
-pub const base64_encoder = base64.Base64Encoder.init(base64_alphabet, base64.standard_pad_char);
+pub const base64_encoder = base64.Base64Encoder.init(base64_alphabet, null);
 
 /// Base64 decoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem.
-pub const base64_decoder = base64.Base64Decoder.init(base64_alphabet, base64.standard_pad_char);
+pub const base64_decoder = base64.Base64Decoder.init(base64_alphabet, null);
 
 /// Whether or not async file system syscalls need a dedicated thread because the operating
 /// system does not support non-blocking I/O on the file system.
@@ -77,7 +77,7 @@ pub fn atomicSymLink(allocator: *Allocator, existing_path: []const u8, new_path:
     const dirname = path.dirname(new_path) orelse ".";
 
     var rand_buf: [AtomicFile.RANDOM_BYTES]u8 = undefined;
-    const tmp_path = try allocator.alloc(u8, dirname.len + 1 + base64.Base64Encoder.calcSize(rand_buf.len));
+    const tmp_path = try allocator.alloc(u8, dirname.len + 1 + base64_encoder.calcSize(rand_buf.len));
     defer allocator.free(tmp_path);
     mem.copy(u8, tmp_path[0..], dirname);
     tmp_path[dirname.len] = path.sep;
@@ -142,7 +142,7 @@ pub const AtomicFile = struct {
     const InitError = File.OpenError;
 
     const RANDOM_BYTES = 12;
-    const TMP_PATH_LEN = base64.Base64Encoder.calcSize(RANDOM_BYTES);
+    const TMP_PATH_LEN = base64_encoder.calcSize(RANDOM_BYTES);
 
     /// Note that the `Dir.atomicFile` API may be more handy than this lower-level function.
     pub fn init(
diff --git a/lib/std/fs/path.zig b/lib/std/fs/path.zig
@@ -92,7 +92,7 @@ pub fn join(allocator: *Allocator, paths: []const []const u8) ![]u8 {
 /// Naively combines a series of paths with the native path seperator and null terminator.
 /// Allocates memory for the result, which must be freed by the caller.
 pub fn joinZ(allocator: *Allocator, paths: []const []const u8) ![:0]u8 {
-    const out = joinSepMaybeZ(allocator, sep, isSep, paths, true);
+    const out = try joinSepMaybeZ(allocator, sep, isSep, paths, true);
     return out[0 .. out.len - 1 :0];
 }
 
@@ -119,6 +119,16 @@ fn testJoinMaybeZPosix(paths: []const []const u8, expected: []const u8, zero: bo
 }
 
 test "join" {
+    {
+        const actual: []u8 = try join(testing.allocator, &[_][]const u8{});
+        defer testing.allocator.free(actual);
+        testing.expectEqualSlices(u8, "", actual);
+    }
+    {
+        const actual: [:0]u8 = try joinZ(testing.allocator, &[_][]const u8{});
+        defer testing.allocator.free(actual);
+        testing.expectEqualSlices(u8, "", actual);
+    }
     for (&[_]bool{ false, true }) |zero| {
         testJoinMaybeZWindows(&[_][]const u8{}, "", zero);
         testJoinMaybeZWindows(&[_][]const u8{ "c:\\a\\b", "c" }, "c:\\a\\b\\c", zero);
diff --git a/lib/std/hash/auto_hash.zig b/lib/std/hash/auto_hash.zig
@@ -95,7 +95,7 @@ pub fn hash(hasher: anytype, key: anytype, comptime strat: HashStrategy) void {
         .EnumLiteral,
         .Frame,
         .Float,
-        => @compileError("cannot hash this type"),
+        => @compileError("unable to hash type " ++ @typeName(Key)),
 
         // Help the optimizer see that hashing an int is easy by inlining!
         // TODO Check if the situation is better after #561 is resolved.
diff --git a/lib/std/macho.zig b/lib/std/macho.zig
@@ -1227,6 +1227,24 @@ pub const S_ATTR_EXT_RELOC = 0x200;
 /// section has local relocation entries
 pub const S_ATTR_LOC_RELOC = 0x100;
 
+/// template of initial values for TLVs
+pub const S_THREAD_LOCAL_REGULAR = 0x11;
+
+/// template of initial values for TLVs
+pub const S_THREAD_LOCAL_ZEROFILL = 0x12;
+
+/// TLV descriptors
+pub const S_THREAD_LOCAL_VARIABLES = 0x13;
+
+/// pointers to TLV descriptors
+pub const S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14;
+
+/// functions to call to initialize TLV values
+pub const S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15;
+
+/// 32-bit offsets to initializers
+pub const S_INIT_FUNC_OFFSETS = 0x16;
+
 pub const cpu_type_t = integer_t;
 pub const cpu_subtype_t = integer_t;
 pub const integer_t = c_int;
@@ -1422,6 +1440,14 @@ pub const EXPORT_SYMBOL_FLAGS_KIND_WEAK_DEFINITION: u8 = 0x04;
 pub const EXPORT_SYMBOL_FLAGS_REEXPORT: u8 = 0x08;
 pub const EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER: u8 = 0x10;
 
+// An indirect symbol table entry is simply a 32bit index into the symbol table
+// to the symbol that the pointer or stub is refering to.  Unless it is for a
+// non-lazy symbol pointer section for a defined symbol which strip(1) as
+// removed.  In which case it has the value INDIRECT_SYMBOL_LOCAL.  If the
+// symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that.
+pub const INDIRECT_SYMBOL_LOCAL: u32 = 0x80000000;
+pub const INDIRECT_SYMBOL_ABS: u32 = 0x40000000;
+
 // Codesign consts and structs taken from:
 // https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/kern/cs_blobs.h.auto.html
 
@@ -1589,3 +1615,17 @@ pub const GenericBlob = extern struct {
     /// Total length of blob
     length: u32,
 };
+
+/// The LC_DATA_IN_CODE load commands uses a linkedit_data_command
+/// to point to an array of data_in_code_entry entries. Each entry
+/// describes a range of data in a code section.
+pub const data_in_code_entry = extern struct {
+    /// From mach_header to start of data range.
+    offset: u32,
+
+    /// Number of bytes in data range.
+    length: u16,
+
+    /// A DICE_KIND value.
+    kind: u16,
+};
diff --git a/lib/std/mem.zig b/lib/std/mem.zig
@@ -1373,6 +1373,20 @@ test "mem.tokenize (multibyte)" {
     testing.expect(it.next() == null);
 }
 
+test "mem.tokenize (reset)" {
+    var it = tokenize("   abc def   ghi  ", " ");
+    testing.expect(eql(u8, it.next().?, "abc"));
+    testing.expect(eql(u8, it.next().?, "def"));
+    testing.expect(eql(u8, it.next().?, "ghi"));
+
+    it.reset();
+
+    testing.expect(eql(u8, it.next().?, "abc"));
+    testing.expect(eql(u8, it.next().?, "def"));
+    testing.expect(eql(u8, it.next().?, "ghi"));
+    testing.expect(it.next() == null);
+}
+
 /// Returns an iterator that iterates over the slices of `buffer` that
 /// are separated by bytes in `delimiter`.
 /// split("abc|def||ghi", "|")
@@ -1471,6 +1485,11 @@ pub const TokenIterator = struct {
         return self.buffer[index..];
     }
 
+    /// Resets the iterator to the initial token.
+    pub fn reset(self: *TokenIterator) void {
+        self.index = 0;
+    }
+
     fn isSplitByte(self: TokenIterator, byte: u8) bool {
         for (self.delimiter_bytes) |delimiter_byte| {
             if (byte == delimiter_byte) {
diff --git a/lib/std/meta.zig b/lib/std/meta.zig
@@ -888,19 +888,20 @@ pub fn Vector(comptime len: u32, comptime child: type) type {
 /// Given a type and value, cast the value to the type as c would.
 /// This is for translate-c and is not intended for general use.
 pub fn cast(comptime DestType: type, target: anytype) DestType {
-    const TargetType = @TypeOf(target);
+    // this function should behave like transCCast in translate-c, except it's for macros
+    const SourceType = @TypeOf(target);
     switch (@typeInfo(DestType)) {
-        .Pointer => |dest_ptr| {
-            switch (@typeInfo(TargetType)) {
+        .Pointer => {
+            switch (@typeInfo(SourceType)) {
                 .Int, .ComptimeInt => {
                     return @intToPtr(DestType, target);
                 },
-                .Pointer => |ptr| {
-                    return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target));
+                .Pointer => {
+                    return castPtr(DestType, target);
                 },
                 .Optional => |opt| {
                     if (@typeInfo(opt.child) == .Pointer) {
-                        return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target));
+                        return castPtr(DestType, target);
                     }
                 },
                 else => {},
@@ -908,17 +909,16 @@ pub fn cast(comptime DestType: type, target: anytype) DestType {
         },
         .Optional => |dest_opt| {
             if (@typeInfo(dest_opt.child) == .Pointer) {
-                const dest_ptr = @typeInfo(dest_opt.child).Pointer;
-                switch (@typeInfo(TargetType)) {
+                switch (@typeInfo(SourceType)) {
                     .Int, .ComptimeInt => {
                         return @intToPtr(DestType, target);
                     },
                     .Pointer => {
-                        return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target));
+                        return castPtr(DestType, target);
                     },
                     .Optional => |target_opt| {
                         if (@typeInfo(target_opt.child) == .Pointer) {
-                            return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target));
+                            return castPtr(DestType, target);
                         }
                     },
                     else => {},
@@ -926,25 +926,25 @@ pub fn cast(comptime DestType: type, target: anytype) DestType {
             }
         },
         .Enum => {
-            if (@typeInfo(TargetType) == .Int or @typeInfo(TargetType) == .ComptimeInt) {
+            if (@typeInfo(SourceType) == .Int or @typeInfo(SourceType) == .ComptimeInt) {
                 return @intToEnum(DestType, target);
             }
         },
-        .Int, .ComptimeInt => {
-            switch (@typeInfo(TargetType)) {
+        .Int => {
+            switch (@typeInfo(SourceType)) {
                 .Pointer => {
-                    return @intCast(DestType, @ptrToInt(target));
+                    return castInt(DestType, @ptrToInt(target));
                 },
                 .Optional => |opt| {
                     if (@typeInfo(opt.child) == .Pointer) {
-                        return @intCast(DestType, @ptrToInt(target));
+                        return castInt(DestType, @ptrToInt(target));
                     }
                 },
                 .Enum => {
-                    return @intCast(DestType, @enumToInt(target));
+                    return castInt(DestType, @enumToInt(target));
                 },
-                .Int, .ComptimeInt => {
-                    return @intCast(DestType, target);
+                .Int => {
+                    return castInt(DestType, target);
                 },
                 else => {},
             }
@@ -954,6 +954,34 @@ pub fn cast(comptime DestType: type, target: anytype) DestType {
     return @as(DestType, target);
 }
 
+fn castInt(comptime DestType: type, target: anytype) DestType {
+    const dest = @typeInfo(DestType).Int;
+    const source = @typeInfo(@TypeOf(target)).Int;
+
+    if (dest.bits < source.bits)
+        return @bitCast(DestType, @truncate(Int(source.signedness, dest.bits), target))
+    else
+        return @bitCast(DestType, @as(Int(source.signedness, dest.bits), target));
+}
+
+fn castPtr(comptime DestType: type, target: anytype) DestType {
+    const dest = ptrInfo(DestType);
+    const source = ptrInfo(@TypeOf(target));
+
+    if (source.is_const and !dest.is_const or source.is_volatile and !dest.is_volatile)
+        return @intToPtr(DestType, @ptrToInt(target))
+    else
+        return @ptrCast(DestType, @alignCast(dest.alignment, target));
+}
+
+fn ptrInfo(comptime PtrType: type) TypeInfo.Pointer {
+    return switch(@typeInfo(PtrType)){
+        .Optional => |opt_info| @typeInfo(opt_info.child).Pointer,
+        .Pointer => |ptr_info| ptr_info,
+        else => unreachable,
+    };
+}
+
 test "std.meta.cast" {
     const E = enum(u2) {
         Zero,
@@ -977,6 +1005,11 @@ test "std.meta.cast" {
     testing.expectEqual(@as(u32, 4), cast(u32, @intToPtr(?*u32, 4)));
     testing.expectEqual(@as(u32, 10), cast(u32, @as(u64, 10)));
     testing.expectEqual(@as(u8, 2), cast(u8, E.Two));
+
+    testing.expectEqual(@bitCast(i32, @as(u32, 0x8000_0000)), cast(i32, @as(u32, 0x8000_0000)));
+
+    testing.expectEqual(@intToPtr(*u8, 2), cast(*u8, @intToPtr(*const u8, 2)));
+    testing.expectEqual(@intToPtr(*u8, 2), cast(*u8, @intToPtr(*volatile u8, 2)));
 }
 
 /// Given a value returns its size as C's sizeof operator would.
diff --git a/lib/std/meta/trait.zig b/lib/std/meta/trait.zig
@@ -408,6 +408,84 @@ test "std.meta.trait.isTuple" {
     testing.expect(isTuple(@TypeOf(t3)));
 }
 
+/// Returns true if the passed type will coerce to []const u8.
+/// Any of the following are considered strings:
+/// ```
+/// []const u8, [:S]const u8, *const [N]u8, *const [N:S]u8,
+/// []u8, [:S]u8, *[:S]u8, *[N:S]u8.
+/// ```
+/// These types are not considered strings:
+/// ```
+/// u8, [N]u8, [*]const u8, [*:0]const u8,
+/// [*]const [N]u8, []const u16, []const i8,
+/// *const u8, ?[]const u8, ?*const [N]u8.
+/// ```
+pub fn isZigString(comptime T: type) bool {
+    comptime {
+        // Only pointer types can be strings, no optionals
+        const info = @typeInfo(T);
+        if (info != .Pointer) return false;
+
+        const ptr = &info.Pointer;
+        // Check for CV qualifiers that would prevent coerction to []const u8
+        if (ptr.is_volatile or ptr.is_allowzero) return false;
+
+        // If it's already a slice, simple check.
+        if (ptr.size == .Slice) {
+            return ptr.child == u8;
+        }
+
+        // Otherwise check if it's an array type that coerces to slice.
+        if (ptr.size == .One) {
+            const child = @typeInfo(ptr.child);
+            if (child == .Array) {
+                const arr = &child.Array;
+                return arr.child == u8;
+            }
+        }
+
+        return false;
+    }
+}
+
+test "std.meta.trait.isZigString" {
+    testing.expect(isZigString([]const u8));
+    testing.expect(isZigString([]u8));
+    testing.expect(isZigString([:0]const u8));
+    testing.expect(isZigString([:0]u8));
+    testing.expect(isZigString([:5]const u8));
+    testing.expect(isZigString([:5]u8));
+    testing.expect(isZigString(*const [0]u8));
+    testing.expect(isZigString(*[0]u8));
+    testing.expect(isZigString(*const [0:0]u8));
+    testing.expect(isZigString(*[0:0]u8));
+    testing.expect(isZigString(*const [0:5]u8));
+    testing.expect(isZigString(*[0:5]u8));
+    testing.expect(isZigString(*const [10]u8));
+    testing.expect(isZigString(*[10]u8));
+    testing.expect(isZigString(*const [10:0]u8));
+    testing.expect(isZigString(*[10:0]u8));
+    testing.expect(isZigString(*const [10:5]u8));
+    testing.expect(isZigString(*[10:5]u8));
+
+    testing.expect(!isZigString(u8));
+    testing.expect(!isZigString([4]u8));
+    testing.expect(!isZigString([4:0]u8));
+    testing.expect(!isZigString([*]const u8));
+    testing.expect(!isZigString([*]const [4]u8));
+    testing.expect(!isZigString([*c]const u8));
+    testing.expect(!isZigString([*c]const [4]u8));
+    testing.expect(!isZigString([*:0]const u8));
+    testing.expect(!isZigString([*:0]const u8));
+    testing.expect(!isZigString(*[]const u8));
+    testing.expect(!isZigString(?[]const u8));
+    testing.expect(!isZigString(?*const [4]u8));
+    testing.expect(!isZigString([]allowzero u8));
+    testing.expect(!isZigString([]volatile u8));
+    testing.expect(!isZigString(*allowzero [4]u8));
+    testing.expect(!isZigString(*volatile [4]u8));
+}
+
 pub fn hasDecls(comptime T: type, comptime names: anytype) bool {
     inline for (names) |name| {
         if (!@hasDecl(T, name))
diff --git a/lib/std/os.zig b/lib/std/os.zig
@@ -2879,7 +2879,7 @@ pub fn bind(sock: socket_t, addr: *const sockaddr, len: socklen_t) BindError!voi
     unreachable;
 }
 
-const ListenError = error{
+pub const ListenError = error{
     /// Another socket is already listening on the same port.
     /// For Internet domain sockets, the  socket referred to by sockfd had not previously
     /// been bound to an address and, upon attempting to bind it to an ephemeral port, it
@@ -5610,6 +5610,7 @@ pub fn recvfrom(
                 EAGAIN => return error.WouldBlock,
                 ENOMEM => return error.SystemResources,
                 ECONNREFUSED => return error.ConnectionRefused,
+                ECONNRESET => return error.ConnectionResetByPeer,
                 else => |err| return unexpectedErrno(err),
             }
         }
@@ -5827,7 +5828,7 @@ pub fn tcsetattr(handle: fd_t, optional_action: TCSA, termios_p: termios) Termio
     }
 }
 
-const IoCtl_SIOCGIFINDEX_Error = error{
+pub const IoCtl_SIOCGIFINDEX_Error = error{
     FileSystem,
     InterfaceNotFound,
 } || UnexpectedError;
diff --git a/lib/std/os/linux/io_uring.zig b/lib/std/os/linux/io_uring.zig
@@ -1353,7 +1353,7 @@ test "timeout (after a relative time)" {
         .res = -linux.ETIME,
         .flags = 0,
     }, cqe);
-    testing.expectWithinMargin(@intToFloat(f64, ms), @intToFloat(f64, stopped - started), margin);
+    testing.expectApproxEqAbs(@intToFloat(f64, ms), @intToFloat(f64, stopped - started), margin);
 }
 
 test "timeout (after a number of completions)" {
diff --git a/lib/std/os/linux/mips.zig b/lib/std/os/linux/mips.zig
@@ -115,6 +115,9 @@ pub fn syscall5(number: SYS, arg1: usize, arg2: usize, arg3: usize, arg4: usize,
     );
 }
 
+// NOTE: The o32 calling convention requires the callee to reserve 16 bytes for
+// the first four arguments even though they're passed in $a0-$a3.
+
 pub fn syscall6(
     number: SYS,
     arg1: usize,
@@ -146,6 +149,40 @@ pub fn syscall6(
     );
 }
 
+pub fn syscall7(
+    number: SYS,
+    arg1: usize,
+    arg2: usize,
+    arg3: usize,
+    arg4: usize,
+    arg5: usize,
+    arg6: usize,
+    arg7: usize,
+) usize {
+    return asm volatile (
+        \\ .set noat
+        \\ subu $sp, $sp, 32
+        \\ sw %[arg5], 16($sp)
+        \\ sw %[arg6], 20($sp)
+        \\ sw %[arg7], 24($sp)
+        \\ syscall
+        \\ addu $sp, $sp, 32
+        \\ blez $7, 1f
+        \\ subu $2, $0, $2
+        \\ 1:
+        : [ret] "={$2}" (-> usize)
+        : [number] "{$2}" (@enumToInt(number)),
+          [arg1] "{$4}" (arg1),
+          [arg2] "{$5}" (arg2),
+          [arg3] "{$6}" (arg3),
+          [arg4] "{$7}" (arg4),
+          [arg5] "r" (arg5),
+          [arg6] "r" (arg6),
+          [arg7] "r" (arg7)
+        : "memory", "cc", "$7"
+    );
+}
+
 /// This matches the libc clone function.
 pub extern fn clone(func: fn (arg: usize) callconv(.C) u8, stack: usize, flags: u32, arg: usize, ptid: *i32, tls: usize, ctid: *i32) usize;
 
diff --git a/lib/std/os/uefi/tables/boot_services.zig b/lib/std/os/uefi/tables/boot_services.zig
@@ -78,7 +78,8 @@ pub const BootServices = extern struct {
     /// Returns an array of handles that support a specified protocol.
     locateHandle: fn (LocateSearchType, ?*align(8) const Guid, ?*const c_void, *usize, [*]Handle) callconv(.C) Status,
 
-    locateDevicePath: Status, // TODO
+    /// Locates the handle to a device on the device path that supports the specified protocol
+    locateDevicePath: fn (*align(8) const Guid, **const DevicePathProtocol, *?Handle) callconv(.C) Status,
     installConfigurationTable: Status, // TODO
 
     /// Loads an EFI image into memory.
diff --git a/lib/std/os/windows/user32.zig b/lib/std/os/windows/user32.zig
@@ -373,7 +373,7 @@ pub fn createWindowExA(dwExStyle: u32, lpClassName: [*:0]const u8, lpWindowName:
 }
 
 pub extern "user32" fn CreateWindowExW(dwExStyle: DWORD, lpClassName: [*:0]const u16, lpWindowName: [*:0]const u16, dwStyle: DWORD, X: i32, Y: i32, nWidth: i32, nHeight: i32, hWindParent: ?HWND, hMenu: ?HMENU, hInstance: HINSTANCE, lpParam: ?LPVOID) callconv(WINAPI) ?HWND;
-pub var pfnCreateWindowExW: @TypeOf(RegisterClassExW) = undefined;
+pub var pfnCreateWindowExW: @TypeOf(CreateWindowExW) = undefined;
 pub fn createWindowExW(dwExStyle: u32, lpClassName: [*:0]const u16, lpWindowName: [*:0]const u16, dwStyle: u32, X: i32, Y: i32, nWidth: i32, nHeight: i32, hWindParent: ?HWND, hMenu: ?HMENU, hInstance: HINSTANCE, lpParam: ?*c_void) !HWND {
     const function = selectSymbol(CreateWindowExW, pfnCreateWindowExW, .win2k);
     const window = function(dwExStyle, lpClassName, lpWindowName, dwStyle, X, Y, nWidth, nHeight, hWindParent, hMenu, hInstance, lpParam);
diff --git a/lib/std/special/build_runner.zig b/lib/std/special/build_runner.zig
@@ -60,6 +60,7 @@ pub fn main() !void {
     const stderr_stream = io.getStdErr().writer();
     const stdout_stream = io.getStdOut().writer();
 
+    var install_prefix: ?[]const u8 = null;
     while (nextArg(args, &arg_idx)) |arg| {
         if (mem.startsWith(u8, arg, "-D")) {
             const option_contents = arg[2..];
@@ -82,7 +83,7 @@ pub fn main() !void {
             } else if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) {
                 return usage(builder, false, stdout_stream);
             } else if (mem.eql(u8, arg, "--prefix")) {
-                builder.install_prefix = nextArg(args, &arg_idx) orelse {
+                install_prefix = nextArg(args, &arg_idx) orelse {
                     warn("Expected argument after --prefix\n\n", .{});
                     return usageAndErr(builder, false, stderr_stream);
                 };
@@ -134,7 +135,7 @@ pub fn main() !void {
         }
     }
 
-    builder.resolveInstallPrefix();
+    builder.resolveInstallPrefix(install_prefix);
     try runBuild(builder);
 
     if (builder.validateUserInputDidItFail())
@@ -162,8 +163,7 @@ fn runBuild(builder: *Builder) anyerror!void {
 fn usage(builder: *Builder, already_ran_build: bool, out_stream: anytype) !void {
     // run the build script to collect the options
     if (!already_ran_build) {
-        builder.setInstallPrefix(null);
-        builder.resolveInstallPrefix();
+        builder.resolveInstallPrefix(null);
         try runBuild(builder);
     }
 
diff --git a/lib/std/std.zig b/lib/std/std.zig
@@ -20,6 +20,9 @@ pub const ComptimeStringMap = @import("comptime_string_map.zig").ComptimeStringM
 pub const DynLib = @import("dynamic_library.zig").DynLib;
 pub const DynamicBitSet = bit_set.DynamicBitSet;
 pub const DynamicBitSetUnmanaged = bit_set.DynamicBitSetUnmanaged;
+pub const EnumArray = enums.EnumArray;
+pub const EnumMap = enums.EnumMap;
+pub const EnumSet = enums.EnumSet;
 pub const HashMap = hash_map.HashMap;
 pub const HashMapUnmanaged = hash_map.HashMapUnmanaged;
 pub const MultiArrayList = @import("multi_array_list.zig").MultiArrayList;
@@ -54,6 +57,7 @@ pub const cstr = @import("cstr.zig");
 pub const debug = @import("debug.zig");
 pub const dwarf = @import("dwarf.zig");
 pub const elf = @import("elf.zig");
+pub const enums = @import("enums.zig");
 pub const event = @import("event.zig");
 pub const fifo = @import("fifo.zig");
 pub const fmt = @import("fmt.zig");
diff --git a/lib/std/testing.zig b/lib/std/testing.zig
@@ -200,67 +200,69 @@ pub fn expectFmt(expected: []const u8, comptime template: []const u8, args: anyt
     return error.TestFailed;
 }
 
-/// This function is intended to be used only in tests. When the actual value is not
-/// within the margin of the expected value,
-/// prints diagnostics to stderr to show exactly how they are not equal, then aborts.
+pub const expectWithinMargin = @compileError("expectWithinMargin is deprecated, use expectApproxEqAbs or expectApproxEqRel");
+pub const expectWithinEpsilon = @compileError("expectWithinEpsilon is deprecated, use expectApproxEqAbs or expectApproxEqRel");
+
+/// This function is intended to be used only in tests. When the actual value is
+/// not approximately equal to the expected value, prints diagnostics to stderr
+/// to show exactly how they are not equal, then aborts.
+/// See `math.approxEqAbs` for more informations on the tolerance parameter.
 /// The types must be floating point
-pub fn expectWithinMargin(expected: anytype, actual: @TypeOf(expected), margin: @TypeOf(expected)) void {
-    std.debug.assert(margin >= 0.0);
+pub fn expectApproxEqAbs(expected: anytype, actual: @TypeOf(expected), tolerance: @TypeOf(expected)) void {
+    const T = @TypeOf(expected);
+
+    switch (@typeInfo(T)) {
+        .Float => if (!math.approxEqAbs(T, expected, actual, tolerance))
+            std.debug.panic("actual {}, not within absolute tolerance {} of expected {}", .{ actual, tolerance, expected }),
+
+        .ComptimeFloat => @compileError("Cannot approximately compare two comptime_float values"),
 
-    switch (@typeInfo(@TypeOf(actual))) {
-        .Float,
-        .ComptimeFloat,
-        => {
-            if (@fabs(expected - actual) > margin) {
-                std.debug.panic("actual {}, not within margin {} of expected {}", .{ actual, margin, expected });
-            }
-        },
         else => @compileError("Unable to compare non floating point values"),
     }
 }
 
-test "expectWithinMargin" {
+test "expectApproxEqAbs" {
     inline for ([_]type{ f16, f32, f64, f128 }) |T| {
         const pos_x: T = 12.0;
         const pos_y: T = 12.06;
         const neg_x: T = -12.0;
         const neg_y: T = -12.06;
 
-        expectWithinMargin(pos_x, pos_y, 0.1);
-        expectWithinMargin(neg_x, neg_y, 0.1);
+        expectApproxEqAbs(pos_x, pos_y, 0.1);
+        expectApproxEqAbs(neg_x, neg_y, 0.1);
     }
 }
 
-/// This function is intended to be used only in tests. When the actual value is not
-/// within the epsilon of the expected value,
-/// prints diagnostics to stderr to show exactly how they are not equal, then aborts.
+/// This function is intended to be used only in tests. When the actual value is
+/// not approximately equal to the expected value, prints diagnostics to stderr
+/// to show exactly how they are not equal, then aborts.
+/// See `math.approxEqRel` for more informations on the tolerance parameter.
 /// The types must be floating point
-pub fn expectWithinEpsilon(expected: anytype, actual: @TypeOf(expected), epsilon: @TypeOf(expected)) void {
-    std.debug.assert(epsilon >= 0.0 and epsilon <= 1.0);
+pub fn expectApproxEqRel(expected: anytype, actual: @TypeOf(expected), tolerance: @TypeOf(expected)) void {
+    const T = @TypeOf(expected);
+
+    switch (@typeInfo(T)) {
+        .Float => if (!math.approxEqRel(T, expected, actual, tolerance))
+            std.debug.panic("actual {}, not within relative tolerance {} of expected {}", .{ actual, tolerance, expected }),
+
+        .ComptimeFloat => @compileError("Cannot approximately compare two comptime_float values"),
 
-    // Relative epsilon test.
-    const margin = math.max(math.fabs(expected), math.fabs(actual)) * epsilon;
-    switch (@typeInfo(@TypeOf(actual))) {
-        .Float,
-        .ComptimeFloat,
-        => {
-            if (@fabs(expected - actual) > margin) {
-                std.debug.panic("actual {}, not within epsilon {}, of expected {}", .{ actual, epsilon, expected });
-            }
-        },
         else => @compileError("Unable to compare non floating point values"),
     }
 }
 
-test "expectWithinEpsilon" {
+test "expectApproxEqRel" {
     inline for ([_]type{ f16, f32, f64, f128 }) |T| {
+        const eps_value = comptime math.epsilon(T);
+        const sqrt_eps_value = comptime math.sqrt(eps_value);
+
         const pos_x: T = 12.0;
-        const pos_y: T = 13.2;
+        const pos_y: T = pos_x + 2 * eps_value;
         const neg_x: T = -12.0;
-        const neg_y: T = -13.2;
+        const neg_y: T = neg_x - 2 * eps_value;
 
-        expectWithinEpsilon(pos_x, pos_y, 0.1);
-        expectWithinEpsilon(neg_x, neg_y, 0.1);
+        expectApproxEqRel(pos_x, pos_y, sqrt_eps_value);
+        expectApproxEqRel(neg_x, neg_y, sqrt_eps_value);
     }
 }
 
@@ -296,7 +298,7 @@ pub const TmpDir = struct {
     sub_path: [sub_path_len]u8,
 
     const random_bytes_count = 12;
-    const sub_path_len = std.base64.Base64Encoder.calcSize(random_bytes_count);
+    const sub_path_len = std.fs.base64_encoder.calcSize(random_bytes_count);
 
     pub fn cleanup(self: *TmpDir) void {
         self.dir.close();
diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig
@@ -4,6 +4,31 @@
 // The MIT license requires this copyright notice to be included in all copies
 // and substantial portions of the software.
 
+test "zig fmt: respect line breaks in struct field value declaration" {
+    try testCanonical(
+        \\const Foo = struct {
+        \\    bar: u32 =
+        \\        42,
+        \\    bar: u32 =
+        \\        // a comment
+        \\        42,
+        \\    bar: u32 =
+        \\        42,
+        \\    // a comment
+        \\    bar: []const u8 =
+        \\        \\ foo
+        \\        \\ bar
+        \\        \\ baz
+        \\    ,
+        \\    bar: u32 =
+        \\        blk: {
+        \\            break :blk 42;
+        \\        },
+        \\};
+        \\
+    );
+}
+
 // TODO Remove this after zig 0.9.0 is released.
 test "zig fmt: rewrite inline functions as callconv(.Inline)" {
     try testTransform(
@@ -3038,6 +3063,54 @@ test "zig fmt: switch" {
         \\}
         \\
     );
+
+    try testTransform(
+        \\test {
+        \\    switch (x) {
+        \\        foo =>
+        \\            "bar",
+        \\    }
+        \\}
+        \\
+    ,
+        \\test {
+        \\    switch (x) {
+        \\        foo => "bar",
+        \\    }
+        \\}
+        \\
+    );
+}
+
+test "zig fmt: switch multiline string" {
+    try testCanonical(
+        \\test "switch multiline string" {
+        \\    const x: u32 = 0;
+        \\    const str = switch (x) {
+        \\        1 => "one",
+        \\        2 =>
+        \\        \\ Comma after the multiline string
+        \\        \\ is needed
+        \\        ,
+        \\        3 => "three",
+        \\        else => "else",
+        \\    };
+        \\
+        \\    const Union = union(enum) {
+        \\        Int: i64,
+        \\        Float: f64,
+        \\    };
+        \\
+        \\    const str = switch (u) {
+        \\        Union.Int => |int|
+        \\        \\ Comma after the multiline string
+        \\        \\ is needed
+        \\        ,
+        \\        Union.Float => |*float| unreachable,
+        \\    };
+        \\}
+        \\
+    );
 }
 
 test "zig fmt: while" {
@@ -3068,6 +3141,11 @@ test "zig fmt: while" {
         \\    while (i < 10) : ({
         \\        i += 1;
         \\        j += 1;
+        \\    }) continue;
+        \\
+        \\    while (i < 10) : ({
+        \\        i += 1;
+        \\        j += 1;
         \\    }) {
         \\        continue;
         \\    }
@@ -3184,6 +3262,156 @@ test "zig fmt: for" {
     );
 }
 
+test "zig fmt: for if" {
+    try testCanonical(
+        \\test {
+        \\    for (a) |x| if (x) f(x);
+        \\
+        \\    for (a) |x| if (x)
+        \\        f(x);
+        \\
+        \\    for (a) |x| if (x) {
+        \\        f(x);
+        \\    };
+        \\
+        \\    for (a) |x|
+        \\        if (x)
+        \\            f(x);
+        \\
+        \\    for (a) |x|
+        \\        if (x) {
+        \\            f(x);
+        \\        };
+        \\}
+        \\
+    );
+}
+
+test "zig fmt: if for" {
+    try testCanonical(
+        \\test {
+        \\    if (a) for (x) |x| f(x);
+        \\
+        \\    if (a) for (x) |x|
+        \\        f(x);
+        \\
+        \\    if (a) for (x) |x| {
+        \\        f(x);
+        \\    };
+        \\
+        \\    if (a)
+        \\        for (x) |x|
+        \\            f(x);
+        \\
+        \\    if (a)
+        \\        for (x) |x| {
+        \\            f(x);
+        \\        };
+        \\}
+        \\
+    );
+}
+
+test "zig fmt: while if" {
+    try testCanonical(
+        \\test {
+        \\    while (a) if (x) f(x);
+        \\
+        \\    while (a) if (x)
+        \\        f(x);
+        \\
+        \\    while (a) if (x) {
+        \\        f(x);
+        \\    };
+        \\
+        \\    while (a)
+        \\        if (x)
+        \\            f(x);
+        \\
+        \\    while (a)
+        \\        if (x) {
+        \\            f(x);
+        \\        };
+        \\}
+        \\
+    );
+}
+
+test "zig fmt: if while" {
+    try testCanonical(
+        \\test {
+        \\    if (a) while (x) : (cont) f(x);
+        \\
+        \\    if (a) while (x) : (cont)
+        \\        f(x);
+        \\
+        \\    if (a) while (x) : (cont) {
+        \\        f(x);
+        \\    };
+        \\
+        \\    if (a)
+        \\        while (x) : (cont)
+        \\            f(x);
+        \\
+        \\    if (a)
+        \\        while (x) : (cont) {
+        \\            f(x);
+        \\        };
+        \\}
+        \\
+    );
+}
+
+test "zig fmt: while for" {
+    try testCanonical(
+        \\test {
+        \\    while (a) for (x) |x| f(x);
+        \\
+        \\    while (a) for (x) |x|
+        \\        f(x);
+        \\
+        \\    while (a) for (x) |x| {
+        \\        f(x);
+        \\    };
+        \\
+        \\    while (a)
+        \\        for (x) |x|
+        \\            f(x);
+        \\
+        \\    while (a)
+        \\        for (x) |x| {
+        \\            f(x);
+        \\        };
+        \\}
+        \\
+    );
+}
+
+test "zig fmt: for while" {
+    try testCanonical(
+        \\test {
+        \\    for (a) |a| while (x) |x| f(x);
+        \\
+        \\    for (a) |a| while (x) |x|
+        \\        f(x);
+        \\
+        \\    for (a) |a| while (x) |x| {
+        \\        f(x);
+        \\    };
+        \\
+        \\    for (a) |a|
+        \\        while (x) |x|
+        \\            f(x);
+        \\
+        \\    for (a) |a|
+        \\        while (x) |x| {
+        \\            f(x);
+        \\        };
+        \\}
+        \\
+    );
+}
+
 test "zig fmt: if" {
     try testCanonical(
         \\test "if" {
@@ -3233,6 +3461,82 @@ test "zig fmt: if" {
     );
 }
 
+test "zig fmt: fix single statement if/for/while line breaks" {
+    try testTransform(
+        \\test {
+        \\    if (cond) a
+        \\    else b;
+        \\
+        \\    if (cond)
+        \\        a
+        \\    else b;
+        \\
+        \\    for (xs) |x| foo()
+        \\    else bar();
+        \\
+        \\    for (xs) |x|
+        \\        foo()
+        \\    else bar();
+        \\
+        \\    while (a) : (b) foo()
+        \\    else bar();
+        \\
+        \\    while (a) : (b)
+        \\        foo()
+        \\    else bar();
+        \\}
+        \\
+    ,
+        \\test {
+        \\    if (cond) a else b;
+        \\
+        \\    if (cond)
+        \\        a
+        \\    else
+        \\        b;
+        \\
+        \\    for (xs) |x| foo() else bar();
+        \\
+        \\    for (xs) |x|
+        \\        foo()
+        \\    else
+        \\        bar();
+        \\
+        \\    while (a) : (b) foo() else bar();
+        \\
+        \\    while (a) : (b)
+        \\        foo()
+        \\    else
+        \\        bar();
+        \\}
+        \\
+    );
+}
+
+test "zig fmt: anon struct/array literal in if" {
+    try testCanonical(
+        \\test {
+        \\    const a = if (cond) .{
+        \\        1, 2,
+        \\        3, 4,
+        \\    } else .{
+        \\        1,
+        \\        2,
+        \\        3,
+        \\    };
+        \\
+        \\    const rl_and_tag: struct { rl: ResultLoc, tag: zir.Inst.Tag } = if (any_payload_is_ref) .{
+        \\        .rl = .ref,
+        \\        .tag = .switchbr_ref,
+        \\    } else .{
+        \\        .rl = .none,
+        \\        .tag = .switchbr,
+        \\    };
+        \\}
+        \\
+    );
+}
+
 test "zig fmt: defer" {
     try testCanonical(
         \\test "defer" {
@@ -3820,6 +4124,7 @@ test "zig fmt: comments in ternary ifs" {
         \\    // Comment
         \\    1
         \\else
+        \\    // Comment
         \\    0;
         \\
         \\pub extern "c" fn printf(format: [*:0]const u8, ...) c_int;
@@ -3827,6 +4132,20 @@ test "zig fmt: comments in ternary ifs" {
     );
 }
 
+test "zig fmt: while statement in blockless if" {
+    try testCanonical(
+        \\pub fn main() void {
+        \\    const zoom_node = if (focused_node == layout_first)
+        \\        while (it.next()) |node| {
+        \\            if (!node.view.pending.float and !node.view.pending.fullscreen) break node;
+        \\        } else null
+        \\    else
+        \\        focused_node;
+        \\}
+        \\
+    );
+}
+
 test "zig fmt: test comments in field access chain" {
     try testCanonical(
         \\pub const str = struct {
diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig
@@ -1018,147 +1018,14 @@ fn renderWhile(gpa: *Allocator, ais: *Ais, tree: ast.Tree, while_node: ast.full.
         try renderToken(ais, tree, inline_token, .space); // inline
     }
 
-    try renderToken(ais, tree, while_node.ast.while_token, .space); // if
+    try renderToken(ais, tree, while_node.ast.while_token, .space); // if/for/while
     try renderToken(ais, tree, while_node.ast.while_token + 1, .none); // lparen
     try renderExpression(gpa, ais, tree, while_node.ast.cond_expr, .none); // condition
 
-    const then_tag = node_tags[while_node.ast.then_expr];
-    if (nodeIsBlock(then_tag) and !nodeIsIf(then_tag)) {
-        if (while_node.payload_token) |payload_token| {
-            try renderToken(ais, tree, payload_token - 2, .space); // rparen
-            try renderToken(ais, tree, payload_token - 1, .none); // |
-            const ident = blk: {
-                if (token_tags[payload_token] == .asterisk) {
-                    try renderToken(ais, tree, payload_token, .none); // *
-                    break :blk payload_token + 1;
-                } else {
-                    break :blk payload_token;
-                }
-            };
-            try renderToken(ais, tree, ident, .none); // identifier
-            const pipe = blk: {
-                if (token_tags[ident + 1] == .comma) {
-                    try renderToken(ais, tree, ident + 1, .space); // ,
-                    try renderToken(ais, tree, ident + 2, .none); // index
-                    break :blk ident + 3;
-                } else {
-                    break :blk ident + 1;
-                }
-            };
-            const brace_space = if (while_node.ast.cont_expr == 0 and ais.isLineOverIndented())
-                Space.newline
-            else
-                Space.space;
-            try renderToken(ais, tree, pipe, brace_space); // |
-        } else {
-            const rparen = tree.lastToken(while_node.ast.cond_expr) + 1;
-            const brace_space = if (while_node.ast.cont_expr == 0 and ais.isLineOverIndented())
-                Space.newline
-            else
-                Space.space;
-            try renderToken(ais, tree, rparen, brace_space); // rparen
-        }
-        if (while_node.ast.cont_expr != 0) {
-            const rparen = tree.lastToken(while_node.ast.cont_expr) + 1;
-            const lparen = tree.firstToken(while_node.ast.cont_expr) - 1;
-            try renderToken(ais, tree, lparen - 1, .space); // :
-            try renderToken(ais, tree, lparen, .none); // lparen
-            try renderExpression(gpa, ais, tree, while_node.ast.cont_expr, .none);
-            const brace_space: Space = if (ais.isLineOverIndented()) .newline else .space;
-            try renderToken(ais, tree, rparen, brace_space); // rparen
-        }
-        if (while_node.ast.else_expr != 0) {
-            try renderExpression(gpa, ais, tree, while_node.ast.then_expr, Space.space);
-            try renderToken(ais, tree, while_node.else_token, .space); // else
-            if (while_node.error_token) |error_token| {
-                try renderToken(ais, tree, error_token - 1, .none); // |
-                try renderToken(ais, tree, error_token, .none); // identifier
-                try renderToken(ais, tree, error_token + 1, .space); // |
-            }
-            return renderExpression(gpa, ais, tree, while_node.ast.else_expr, space);
-        } else {
-            return renderExpression(gpa, ais, tree, while_node.ast.then_expr, space);
-        }
-    }
-
-    const rparen = tree.lastToken(while_node.ast.cond_expr) + 1;
-    const last_then_token = tree.lastToken(while_node.ast.then_expr);
-    const src_has_newline = !tree.tokensOnSameLine(rparen, last_then_token);
-
-    if (src_has_newline) {
-        if (while_node.payload_token) |payload_token| {
-            try renderToken(ais, tree, payload_token - 2, .space); // rparen
-            try renderToken(ais, tree, payload_token - 1, .none); // |
-            const ident = blk: {
-                if (token_tags[payload_token] == .asterisk) {
-                    try renderToken(ais, tree, payload_token, .none); // *
-                    break :blk payload_token + 1;
-                } else {
-                    break :blk payload_token;
-                }
-            };
-            try renderToken(ais, tree, ident, .none); // identifier
-            const pipe = blk: {
-                if (token_tags[ident + 1] == .comma) {
-                    try renderToken(ais, tree, ident + 1, .space); // ,
-                    try renderToken(ais, tree, ident + 2, .none); // index
-                    break :blk ident + 3;
-                } else {
-                    break :blk ident + 1;
-                }
-            };
-            const after_space: Space = if (while_node.ast.cont_expr != 0) .space else .newline;
-            try renderToken(ais, tree, pipe, after_space); // |
-        } else {
-            ais.pushIndent();
-            const after_space: Space = if (while_node.ast.cont_expr != 0) .space else .newline;
-            try renderToken(ais, tree, rparen, after_space); // rparen
-            ais.popIndent();
-        }
-        if (while_node.ast.cont_expr != 0) {
-            const cont_rparen = tree.lastToken(while_node.ast.cont_expr) + 1;
-            const cont_lparen = tree.firstToken(while_node.ast.cont_expr) - 1;
-            try renderToken(ais, tree, cont_lparen - 1, .space); // :
-            try renderToken(ais, tree, cont_lparen, .none); // lparen
-            try renderExpression(gpa, ais, tree, while_node.ast.cont_expr, .none);
-            try renderToken(ais, tree, cont_rparen, .newline); // rparen
-        }
-        if (while_node.ast.else_expr != 0) {
-            ais.pushIndent();
-            try renderExpression(gpa, ais, tree, while_node.ast.then_expr, Space.newline);
-            ais.popIndent();
-            const else_is_block = nodeIsBlock(node_tags[while_node.ast.else_expr]);
-            if (else_is_block) {
-                try renderToken(ais, tree, while_node.else_token, .space); // else
-                if (while_node.error_token) |error_token| {
-                    try renderToken(ais, tree, error_token - 1, .none); // |
-                    try renderToken(ais, tree, error_token, .none); // identifier
-                    try renderToken(ais, tree, error_token + 1, .space); // |
-                }
-                return renderExpression(gpa, ais, tree, while_node.ast.else_expr, space);
-            } else {
-                if (while_node.error_token) |error_token| {
-                    try renderToken(ais, tree, while_node.else_token, .space); // else
-                    try renderToken(ais, tree, error_token - 1, .none); // |
-                    try renderToken(ais, tree, error_token, .none); // identifier
-                    try renderToken(ais, tree, error_token + 1, .space); // |
-                } else {
-                    try renderToken(ais, tree, while_node.else_token, .newline); // else
-                }
-                try renderExpressionIndented(gpa, ais, tree, while_node.ast.else_expr, space);
-                return;
-            }
-        } else {
-            try renderExpressionIndented(gpa, ais, tree, while_node.ast.then_expr, space);
-            return;
-        }
-    }
-
-    // Render everything on a single line.
+    var last_prefix_token = tree.lastToken(while_node.ast.cond_expr) + 1; // rparen
 
     if (while_node.payload_token) |payload_token| {
-        assert(payload_token - 2 == rparen);
-        try renderToken(ais, tree, payload_token - 2, .space); // )
+        try renderToken(ais, tree, last_prefix_token, .space);
         try renderToken(ais, tree, payload_token - 1, .none); // |
         const ident = blk: {
             if (token_tags[payload_token] == .asterisk) {
@@ -1178,33 +1045,67 @@ fn renderWhile(gpa: *Allocator, ais: *Ais, tree: ast.Tree, while_node: ast.full.
                 break :blk ident + 1;
             }
         };
-        try renderToken(ais, tree, pipe, .space); // |
-    } else {
-        try renderToken(ais, tree, rparen, .space); // )
+        last_prefix_token = pipe;
     }
 
     if (while_node.ast.cont_expr != 0) {
-        const cont_rparen = tree.lastToken(while_node.ast.cont_expr) + 1;
-        const cont_lparen = tree.firstToken(while_node.ast.cont_expr) - 1;
-        try renderToken(ais, tree, cont_lparen - 1, .space); // :
-        try renderToken(ais, tree, cont_lparen, .none); // lparen
+        try renderToken(ais, tree, last_prefix_token, .space);
+        const lparen = tree.firstToken(while_node.ast.cont_expr) - 1;
+        try renderToken(ais, tree, lparen - 1, .space); // :
+        try renderToken(ais, tree, lparen, .none); // lparen
         try renderExpression(gpa, ais, tree, while_node.ast.cont_expr, .none);
-        try renderToken(ais, tree, cont_rparen, .space); // rparen
+        last_prefix_token = tree.lastToken(while_node.ast.cont_expr) + 1; // rparen
+    }
+
+    const then_expr_is_block = nodeIsBlock(node_tags[while_node.ast.then_expr]);
+    const indent_then_expr = !then_expr_is_block and
+        !tree.tokensOnSameLine(last_prefix_token, tree.firstToken(while_node.ast.then_expr));
+    if (indent_then_expr or (then_expr_is_block and ais.isLineOverIndented())) {
+        ais.pushIndentNextLine();
+        try renderToken(ais, tree, last_prefix_token, .newline);
+        ais.popIndent();
+    } else {
+        try renderToken(ais, tree, last_prefix_token, .space);
     }
 
     if (while_node.ast.else_expr != 0) {
-        try renderExpression(gpa, ais, tree, while_node.ast.then_expr, .space);
-        try renderToken(ais, tree, while_node.else_token, .space); // else
+        const first_else_expr_tok = tree.firstToken(while_node.ast.else_expr);
+
+        if (indent_then_expr) {
+            ais.pushIndent();
+            try renderExpression(gpa, ais, tree, while_node.ast.then_expr, .newline);
+            ais.popIndent();
+        } else {
+            try renderExpression(gpa, ais, tree, while_node.ast.then_expr, .space);
+        }
+
+        var last_else_token = while_node.else_token;
 
         if (while_node.error_token) |error_token| {
+            try renderToken(ais, tree, while_node.else_token, .space); // else
             try renderToken(ais, tree, error_token - 1, .none); // |
             try renderToken(ais, tree, error_token, .none); // identifier
-            try renderToken(ais, tree, error_token + 1, .space); // |
+            last_else_token = error_token + 1; // |
         }
 
-        return renderExpression(gpa, ais, tree, while_node.ast.else_expr, space);
+        const indent_else_expr = indent_then_expr and
+            !nodeIsBlock(node_tags[while_node.ast.else_expr]) and
+            !nodeIsIfForWhileSwitch(node_tags[while_node.ast.else_expr]);
+        if (indent_else_expr) {
+            ais.pushIndentNextLine();
+            try renderToken(ais, tree, last_else_token, .newline);
+            ais.popIndent();
+            try renderExpressionIndented(gpa, ais, tree, while_node.ast.else_expr, space);
+        } else {
+            try renderToken(ais, tree, last_else_token, .space);
+            try renderExpression(gpa, ais, tree, while_node.ast.else_expr, space);
+        }
     } else {
-        return renderExpression(gpa, ais, tree, while_node.ast.then_expr, space);
+        if (indent_then_expr) {
+            try renderExpressionIndented(gpa, ais, tree, while_node.ast.then_expr, space);
+        } else {
+            try renderExpression(gpa, ais, tree, while_node.ast.then_expr, space);
+        }
     }
 }
 
@@ -1258,8 +1159,29 @@ fn renderContainerField(
         try renderToken(ais, tree, rparen_token, .space); // )
     }
     const eq_token = tree.firstToken(field.ast.value_expr) - 1;
-    try renderToken(ais, tree, eq_token, .space); // =
-    return renderExpressionComma(gpa, ais, tree, field.ast.value_expr, space); // value
+    const eq_space: Space = if (tree.tokensOnSameLine(eq_token, eq_token + 1)) .space else .newline;
+    {
+        ais.pushIndent();
+        try renderToken(ais, tree, eq_token, eq_space); // =
+        ais.popIndent();
+    }
+
+    if (eq_space == .space)
+        return renderExpressionComma(gpa, ais, tree, field.ast.value_expr, space); // value
+
+    const token_tags = tree.tokens.items(.tag);
+    const maybe_comma = tree.lastToken(field.ast.value_expr) + 1;
+
+    if (token_tags[maybe_comma] == .comma) {
+        ais.pushIndent();
+        try renderExpression(gpa, ais, tree, field.ast.value_expr, .none); // value
+        ais.popIndent();
+        try renderToken(ais, tree, maybe_comma, space);
+    } else {
+        ais.pushIndent();
+        try renderExpression(gpa, ais, tree, field.ast.value_expr, space); // value
+        ais.popIndent();
+    }
 }
 
 fn renderBuiltinCall(
@@ -1522,6 +1444,7 @@ fn renderSwitchCase(
     switch_case: ast.full.SwitchCase,
     space: Space,
 ) Error!void {
+    const node_tags = tree.nodes.items(.tag);
     const token_tags = tree.tokens.items(.tag);
     const trailing_comma = token_tags[switch_case.ast.arrow_token - 1] == .comma;
 
@@ -1544,17 +1467,23 @@ fn renderSwitchCase(
     }
 
     // Render the arrow and everything after it
-    try renderToken(ais, tree, switch_case.ast.arrow_token, .space);
+    const pre_target_space = if (node_tags[switch_case.ast.target_expr] == .multiline_string_literal)
+        // Newline gets inserted when rendering the target expr.
+        Space.none
+    else
+        Space.space;
+    const after_arrow_space: Space = if (switch_case.payload_token == null) pre_target_space else .space;
+    try renderToken(ais, tree, switch_case.ast.arrow_token, after_arrow_space);
 
     if (switch_case.payload_token) |payload_token| {
         try renderToken(ais, tree, payload_token - 1, .none); // pipe
         if (token_tags[payload_token] == .asterisk) {
             try renderToken(ais, tree, payload_token, .none); // asterisk
             try renderToken(ais, tree, payload_token + 1, .none); // identifier
-            try renderToken(ais, tree, payload_token + 2, .space); // pipe
+            try renderToken(ais, tree, payload_token + 2, pre_target_space); // pipe
         } else {
             try renderToken(ais, tree, payload_token, .none); // identifier
-            try renderToken(ais, tree, payload_token + 1, .space); // pipe
+            try renderToken(ais, tree, payload_token + 1, pre_target_space); // pipe
         }
     }
 
@@ -2493,6 +2422,21 @@ fn nodeIsBlock(tag: ast.Node.Tag) bool {
         .block_semicolon,
         .block_two,
         .block_two_semicolon,
+        .struct_init_dot,
+        .struct_init_dot_comma,
+        .struct_init_dot_two,
+        .struct_init_dot_two_comma,
+        .array_init_dot,
+        .array_init_dot_comma,
+        .array_init_dot_two,
+        .array_init_dot_two_comma,
+        => true,
+        else => false,
+    };
+}
+
+fn nodeIsIfForWhileSwitch(tag: ast.Node.Tag) bool {
+    return switch (tag) {
         .@"if",
         .if_simple,
         .@"for",
@@ -2507,13 +2451,6 @@ fn nodeIsBlock(tag: ast.Node.Tag) bool {
     };
 }
 
-fn nodeIsIf(tag: ast.Node.Tag) bool {
-    return switch (tag) {
-        .@"if", .if_simple => true,
-        else => false,
-    };
-}
-
 fn nodeCausesSliceOpSpace(tag: ast.Node.Tag) bool {
     return switch (tag) {
         .@"catch",
diff --git a/src/BuiltinFn.zig b/src/BuiltinFn.zig
@@ -477,7 +477,7 @@ pub const list = list: {
             "@intCast",
             .{
                 .tag = .int_cast,
-                .param_count = 1,
+                .param_count = 2,
             },
         },
         .{
diff --git a/src/Compilation.zig b/src/Compilation.zig
@@ -3180,7 +3180,11 @@ fn updateStage1Module(comp: *Compilation, main_progress_node: *std.Progress.Node
             id_symlink_basename,
             &prev_digest_buf,
         ) catch |err| blk: {
-            log.debug("stage1 {s} new_digest={} error: {s}", .{ mod.root_pkg.root_src_path, digest, @errorName(err) });
+            log.debug("stage1 {s} new_digest={s} error: {s}", .{
+                mod.root_pkg.root_src_path,
+                std.fmt.fmtSliceHexLower(&digest),
+                @errorName(err),
+            });
             // Handle this as a cache miss.
             break :blk prev_digest_buf[0..0];
         };
@@ -3188,10 +3192,13 @@ fn updateStage1Module(comp: *Compilation, main_progress_node: *std.Progress.Node
             if (!mem.eql(u8, prev_digest[0..digest.len], &digest))
                 break :hit;
 
-            log.debug("stage1 {s} digest={} match - skipping invocation", .{ mod.root_pkg.root_src_path, digest });
+            log.debug("stage1 {s} digest={s} match - skipping invocation", .{
+                mod.root_pkg.root_src_path,
+                std.fmt.fmtSliceHexLower(&digest),
+            });
             var flags_bytes: [1]u8 = undefined;
             _ = std.fmt.hexToBytes(&flags_bytes, prev_digest[digest.len..]) catch {
-                log.warn("bad cache stage1 digest: '{s}'", .{prev_digest});
+                log.warn("bad cache stage1 digest: '{s}'", .{std.fmt.fmtSliceHexLower(prev_digest)});
                 break :hit;
             };
 
@@ -3211,7 +3218,11 @@ fn updateStage1Module(comp: *Compilation, main_progress_node: *std.Progress.Node
             mod.stage1_flags = @bitCast(@TypeOf(mod.stage1_flags), flags_bytes[0]);
             return;
         }
-        log.debug("stage1 {s} prev_digest={} new_digest={}", .{ mod.root_pkg.root_src_path, prev_digest, digest });
+        log.debug("stage1 {s} prev_digest={s} new_digest={s}", .{
+            mod.root_pkg.root_src_path,
+            std.fmt.fmtSliceHexLower(prev_digest),
+            std.fmt.fmtSliceHexLower(&digest),
+        });
         man.unhit(prev_hash_state, input_file_count);
     }
 
@@ -3358,8 +3369,8 @@ fn updateStage1Module(comp: *Compilation, main_progress_node: *std.Progress.Node
     // Update the small file with the digest. If it fails we can continue; it only
     // means that the next invocation will have an unnecessary cache miss.
     const stage1_flags_byte = @bitCast(u8, mod.stage1_flags);
-    log.debug("stage1 {s} final digest={} flags={x}", .{
-        mod.root_pkg.root_src_path, digest, stage1_flags_byte,
+    log.debug("stage1 {s} final digest={s} flags={x}", .{
+        mod.root_pkg.root_src_path, std.fmt.fmtSliceHexLower(&digest), stage1_flags_byte,
     });
     var digest_plus_flags: [digest.len + 2]u8 = undefined;
     digest_plus_flags[0..digest.len].* = digest;
diff --git a/src/clang.zig b/src/clang.zig
@@ -537,6 +537,11 @@ pub const FunctionType = opaque {
     extern fn ZigClangFunctionType_getReturnType(*const FunctionType) QualType;
 };
 
+pub const GenericSelectionExpr = opaque {
+    pub const getResultExpr = ZigClangGenericSelectionExpr_getResultExpr;
+    extern fn ZigClangGenericSelectionExpr_getResultExpr(*const GenericSelectionExpr) *const Expr;
+};
+
 pub const IfStmt = opaque {
     pub const getThen = ZigClangIfStmt_getThen;
     extern fn ZigClangIfStmt_getThen(*const IfStmt) *const Stmt;
diff --git a/src/clang_options_data.zig b/src/clang_options_data.zig
@@ -2415,7 +2415,14 @@ flagpd1("dwarf-ext-refs"),
 sepd1("dylib_file"),
 flagpd1("dylinker"),
 flagpd1("dynamic"),
-flagpd1("dynamiclib"),
+.{
+    .name = "dynamiclib",
+    .syntax = .flag,
+    .zig_equivalent = .shared,
+    .pd1 = true,
+    .pd2 = false,
+    .psl = false,
+},
 flagpd1("emit-ast"),
 flagpd1("emit-codegen-only"),
 flagpd1("emit-header-module"),
diff --git a/src/codegen.zig b/src/codegen.zig
@@ -2132,9 +2132,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 if (inst.func.value()) |func_value| {
                     if (func_value.castTag(.function)) |func_payload| {
                         const func = func_payload.data;
-                        const text_segment = &macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment;
-                        const got = &text_segment.sections.items[macho_file.got_section_index.?];
-                        const got_addr = got.addr + func.owner_decl.link.macho.offset_table_index * @sizeOf(u64);
+                        const got_addr = blk: {
+                            const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment;
+                            const got = seg.sections.items[macho_file.got_section_index.?];
+                            break :blk got.addr + func.owner_decl.link.macho.offset_table_index * @sizeOf(u64);
+                        };
+                        log.debug("got_addr = 0x{x}", .{got_addr});
                         switch (arch) {
                             .x86_64 => {
                                 try self.genSetReg(inst.base.src, Type.initTag(.u32), .rax, .{ .memory = got_addr });
@@ -2152,8 +2155,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         const decl = func_payload.data;
                         const decl_name = try std.fmt.allocPrint(self.bin_file.allocator, "_{s}", .{decl.name});
                         defer self.bin_file.allocator.free(decl_name);
-                        const already_defined = macho_file.extern_lazy_symbols.contains(decl_name);
-                        const symbol: u32 = if (macho_file.extern_lazy_symbols.getIndex(decl_name)) |index|
+                        const already_defined = macho_file.lazy_imports.contains(decl_name);
+                        const symbol: u32 = if (macho_file.lazy_imports.getIndex(decl_name)) |index|
                             @intCast(u32, index)
                         else
                             try macho_file.addExternSymbol(decl_name);
@@ -3111,7 +3114,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                             4, 8 => {
                                 const offset = if (math.cast(i9, adj_off)) |imm|
                                     Instruction.LoadStoreOffset.imm_post_index(-imm)
-                                else |_| Instruction.LoadStoreOffset.reg(try self.copyToTmpRegister(src, Type.initTag(.u64), MCValue{ .immediate = adj_off }));
+                                else |_|
+                                    Instruction.LoadStoreOffset.reg(try self.copyToTmpRegister(src, Type.initTag(.u64), MCValue{ .immediate = adj_off }));
                                 const rn: Register = switch (arch) {
                                     .aarch64, .aarch64_be => .x29,
                                     .aarch64_32 => .w29,
@@ -3302,80 +3306,32 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     },
                     .memory => |addr| {
                         if (self.bin_file.options.pie) {
-                            // For MachO, the binary, with the exception of object files, has to be a PIE.
-                            // Therefore we cannot load an absolute address.
-                            // Instead, we need to make use of PC-relative addressing.
-                            if (reg.id() == 0) { // x0 is special-cased
-                                // TODO This needs to be optimised in the stack usage (perhaps use a shadow stack
-                                // like described here:
-                                // https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/using-the-stack-in-aarch64-implementing-push-and-pop)
-                                // str x28, [sp, #-16]
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.str(.x28, Register.sp, .{
-                                    .offset = Instruction.LoadStoreOffset.imm_pre_index(-16),
-                                }).toU32());
-                                // adr x28, #8
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.adr(.x28, 8).toU32());
-                                if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                                    try macho_file.pie_fixups.append(self.bin_file.allocator, .{
-                                        .address = addr,
-                                        .start = self.code.items.len,
-                                        .len = 4,
-                                    });
-                                } else {
-                                    return self.fail(src, "TODO implement genSetReg for PIE on this platform", .{});
-                                }
-                                // b [label]
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.b(0).toU32());
-                                // mov r, x0
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(
-                                    reg,
-                                    .xzr,
-                                    .x0,
-                                    Instruction.Shift.none,
-                                ).toU32());
-                                // ldr x28, [sp], #16
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.x28, .{
-                                    .register = .{
-                                        .rn = Register.sp,
-                                        .offset = Instruction.LoadStoreOffset.imm_post_index(16),
-                                    },
-                                }).toU32());
+                            // PC-relative displacement to the entry in the GOT table.
+                            // TODO we should come up with our own, backend independent relocation types
+                            // which each backend (Elf, MachO, etc.) would then translate into an actual
+                            // fixup when linking.
+                            // adrp reg, pages
+                            if (self.bin_file.cast(link.File.MachO)) |macho_file| {
+                                try macho_file.pie_fixups.append(self.bin_file.allocator, .{
+                                    .target_addr = addr,
+                                    .offset = self.code.items.len,
+                                    .size = 4,
+                                });
                             } else {
-                                // stp x0, x28, [sp, #-16]
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.stp(
-                                    .x0,
-                                    .x28,
-                                    Register.sp,
-                                    Instruction.LoadStorePairOffset.pre_index(-16),
-                                ).toU32());
-                                // adr x28, #8
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.adr(.x28, 8).toU32());
-                                if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                                    try macho_file.pie_fixups.append(self.bin_file.allocator, .{
-                                        .address = addr,
-                                        .start = self.code.items.len,
-                                        .len = 4,
-                                    });
-                                } else {
-                                    return self.fail(src, "TODO implement genSetReg for PIE on this platform", .{});
-                                }
-                                // b [label]
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.b(0).toU32());
-                                // mov r, x0
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(
-                                    reg,
-                                    .xzr,
-                                    .x0,
-                                    Instruction.Shift.none,
-                                ).toU32());
-                                // ldp x0, x28, [sp, #16]
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldp(
-                                    .x0,
-                                    .x28,
-                                    Register.sp,
-                                    Instruction.LoadStorePairOffset.post_index(16),
-                                ).toU32());
+                                return self.fail(src, "TODO implement genSetReg for PIE GOT indirection on this platform", .{});
                             }
+                            mem.writeIntLittle(
+                                u32,
+                                try self.code.addManyAsArray(4),
+                                Instruction.adrp(reg, 0).toU32(),
+                            );
+                            // ldr reg, reg, offset
+                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(reg, .{
+                                .register = .{
+                                    .rn = reg,
+                                    .offset = Instruction.LoadStoreOffset.imm(0),
+                                },
+                            }).toU32());
                         } else {
                             // The value is in memory at a hard-coded address.
                             // If the type is a pointer, it means the pointer address is at this memory location.
@@ -3559,62 +3515,31 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     },
                     .memory => |x| {
                         if (self.bin_file.options.pie) {
-                            // For MachO, the binary, with the exception of object files, has to be a PIE.
-                            // Therefore, we cannot load an absolute address.
-                            assert(x > math.maxInt(u32)); // 32bit direct addressing is not supported by MachO.
-                            // The plan here is to use unconditional relative jump to GOT entry, where we store
-                            // pre-calculated and stored effective address to load into the target register.
-                            // We leave the actual displacement information empty (0-padded) and fixing it up
-                            // later in the linker.
-                            if (reg.id() == 0) { // %rax is special-cased
-                                try self.code.ensureCapacity(self.code.items.len + 5);
-                                if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                                    try macho_file.pie_fixups.append(self.bin_file.allocator, .{
-                                        .address = x,
-                                        .start = self.code.items.len,
-                                        .len = 5,
-                                    });
-                                } else {
-                                    return self.fail(src, "TODO implement genSetReg for PIE on this platform", .{});
-                                }
-                                // call [label]
-                                self.code.appendSliceAssumeCapacity(&[_]u8{
-                                    0xE8,
-                                    0x0,
-                                    0x0,
-                                    0x0,
-                                    0x0,
+                            // RIP-relative displacement to the entry in the GOT table.
+                            // TODO we should come up with our own, backend independent relocation types
+                            // which each backend (Elf, MachO, etc.) would then translate into an actual
+                            // fixup when linking.
+                            if (self.bin_file.cast(link.File.MachO)) |macho_file| {
+                                try macho_file.pie_fixups.append(self.bin_file.allocator, .{
+                                    .target_addr = x,
+                                    .offset = self.code.items.len + 3,
+                                    .size = 4,
                                 });
                             } else {
-                                try self.code.ensureCapacity(self.code.items.len + 10);
-                                // push %rax
-                                self.code.appendSliceAssumeCapacity(&[_]u8{0x50});
-                                if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                                    try macho_file.pie_fixups.append(self.bin_file.allocator, .{
-                                        .address = x,
-                                        .start = self.code.items.len,
-                                        .len = 5,
-                                    });
-                                } else {
-                                    return self.fail(src, "TODO implement genSetReg for PIE on this platform", .{});
-                                }
-                                // call [label]
-                                self.code.appendSliceAssumeCapacity(&[_]u8{
-                                    0xE8,
-                                    0x0,
-                                    0x0,
-                                    0x0,
-                                    0x0,
-                                });
-                                // mov %r, %rax
-                                self.code.appendSliceAssumeCapacity(&[_]u8{
-                                    0x48,
-                                    0x89,
-                                    0xC0 | @as(u8, reg.id()),
-                                });
-                                // pop %rax
-                                self.code.appendSliceAssumeCapacity(&[_]u8{0x58});
+                                return self.fail(src, "TODO implement genSetReg for PIE GOT indirection on this platform", .{});
                             }
+                            try self.code.ensureCapacity(self.code.items.len + 7);
+                            self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() });
+                            self.code.appendSliceAssumeCapacity(&[_]u8{
+                                0x8D,
+                                0x05 | (@as(u8, reg.id() & 0b111) << 3),
+                            });
+                            mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), 0);
+
+                            try self.code.ensureCapacity(self.code.items.len + 3);
+                            self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended(), .r = reg.isExtended() });
+                            const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id());
+                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM });
                         } else if (x <= math.maxInt(u32)) {
                             // Moving from memory to a register is a variant of `8B /r`.
                             // Since we're using 64-bit moves, we require a REX.
@@ -3777,9 +3702,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                             return MCValue{ .memory = got_addr };
                         } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
                             const decl = payload.data;
-                            const text_segment = &macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment;
-                            const got = &text_segment.sections.items[macho_file.got_section_index.?];
-                            const got_addr = got.addr + decl.link.macho.offset_table_index * ptr_bytes;
+                            const got_addr = blk: {
+                                const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment;
+                                const got = seg.sections.items[macho_file.got_section_index.?];
+                                break :blk got.addr + decl.link.macho.offset_table_index * ptr_bytes;
+                            };
                             return MCValue{ .memory = got_addr };
                         } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
                             const decl = payload.data;
diff --git a/src/codegen/aarch64.zig b/src/codegen/aarch64.zig
@@ -221,7 +221,8 @@ pub const Instruction = union(enum) {
         offset: u12,
         opc: u2,
         op1: u2,
-        fixed: u4 = 0b111_0,
+        v: u1,
+        fixed: u3 = 0b111,
         size: u2,
     },
     LoadStorePairOfRegisters: packed struct {
@@ -505,6 +506,7 @@ pub const Instruction = union(enum) {
                         .offset = offset.toU12(),
                         .opc = opc,
                         .op1 = op1,
+                        .v = 0,
                         .size = 0b10,
                     },
                 };
@@ -517,6 +519,7 @@ pub const Instruction = union(enum) {
                         .offset = offset.toU12(),
                         .opc = opc,
                         .op1 = op1,
+                        .v = 0,
                         .size = 0b11,
                     },
                 };
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
@@ -222,7 +222,7 @@ pub const LLVMIRModule = struct {
 
         var error_message: [*:0]const u8 = undefined;
         var target: *const llvm.Target = undefined;
-        if (llvm.Target.getFromTriple(llvm_target_triple.ptr, &target, &error_message)) {
+        if (llvm.Target.getFromTriple(llvm_target_triple.ptr, &target, &error_message).toBool()) {
             defer llvm.disposeMessage(error_message);
 
             const stderr = std.io.getStdErr().writer();
@@ -306,7 +306,7 @@ pub const LLVMIRModule = struct {
             // verifyModule always allocs the error_message even if there is no error
             defer llvm.disposeMessage(error_message);
 
-            if (self.llvm_module.verify(.ReturnStatus, &error_message)) {
+            if (self.llvm_module.verify(.ReturnStatus, &error_message).toBool()) {
                 const stderr = std.io.getStdErr().writer();
                 try stderr.print("broken LLVM module found: {s}\nThis is a bug in the Zig compiler.", .{error_message});
                 return error.BrokenLLVMModule;
@@ -322,7 +322,7 @@ pub const LLVMIRModule = struct {
             object_pathZ.ptr,
             .ObjectFile,
             &error_message,
-        )) {
+        ).toBool()) {
             defer llvm.disposeMessage(error_message);
 
             const stderr = std.io.getStdErr().writer();
@@ -617,7 +617,7 @@ pub const LLVMIRModule = struct {
 
             var indices: [2]*const llvm.Value = .{
                 index_type.constNull(),
-                index_type.constInt(1, false),
+                index_type.constInt(1, .False),
             };
 
             return self.builder.buildLoad(self.builder.buildInBoundsGEP(operand, &indices, 2, ""), "");
@@ -679,7 +679,7 @@ pub const LLVMIRModule = struct {
         const signed = inst.base.ty.isSignedInt();
         // TODO: Should we use intcast here or just a simple bitcast?
         //       LLVM does truncation vs bitcast (+signed extension) in the intcast depending on the sizes
-        return self.builder.buildIntCast2(val, try self.getLLVMType(inst.base.ty, inst.base.src), signed, "");
+        return self.builder.buildIntCast2(val, try self.getLLVMType(inst.base.ty, inst.base.src), llvm.Bool.fromBool(signed), "");
     }
 
     fn genBitCast(self: *LLVMIRModule, inst: *Inst.UnOp) !?*const llvm.Value {
@@ -785,7 +785,7 @@ pub const LLVMIRModule = struct {
                 if (bigint.limbs.len != 1) {
                     return self.fail(src, "TODO implement bigger bigint", .{});
                 }
-                const llvm_int = llvm_type.constInt(bigint.limbs[0], false);
+                const llvm_int = llvm_type.constInt(bigint.limbs[0], .False);
                 if (!bigint.positive) {
                     return llvm.constNeg(llvm_int);
                 }
@@ -823,7 +823,7 @@ pub const LLVMIRModule = struct {
                         return self.fail(src, "TODO handle other sentinel values", .{});
                     } else false;
 
-                    return self.context.constString(payload.data.ptr, @intCast(c_uint, payload.data.len), !zero_sentinel);
+                    return self.context.constString(payload.data.ptr, @intCast(c_uint, payload.data.len), llvm.Bool.fromBool(!zero_sentinel));
                 } else {
                     return self.fail(src, "TODO handle more array values", .{});
                 }
@@ -839,13 +839,13 @@ pub const LLVMIRModule = struct {
                             llvm_child_type.constNull(),
                             self.context.intType(1).constNull(),
                         };
-                        return self.context.constStruct(&optional_values, 2, false);
+                        return self.context.constStruct(&optional_values, 2, .False);
                     } else {
                         var optional_values: [2]*const llvm.Value = .{
                             try self.genTypedValue(src, .{ .ty = child_type, .val = tv.val }),
                             self.context.intType(1).constAllOnes(),
                         };
-                        return self.context.constStruct(&optional_values, 2, false);
+                        return self.context.constStruct(&optional_values, 2, .False);
                     }
                 } else {
                     return self.fail(src, "TODO implement const of optional pointer", .{});
@@ -885,7 +885,7 @@ pub const LLVMIRModule = struct {
                         try self.getLLVMType(child_type, src),
                         self.context.intType(1),
                     };
-                    return self.context.structType(&optional_types, 2, false);
+                    return self.context.structType(&optional_types, 2, .False);
                 } else {
                     return self.fail(src, "TODO implement optional pointers as actual pointers", .{});
                 }
@@ -937,7 +937,7 @@ pub const LLVMIRModule = struct {
             try self.getLLVMType(return_type, src),
             if (fn_param_len == 0) null else llvm_param.ptr,
             @intCast(c_uint, fn_param_len),
-            false,
+            .False,
         );
         const llvm_fn = self.llvm_module.addFunction(func.name, fn_type);
 
diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig
@@ -1,7 +1,20 @@
 //! We do this instead of @cImport because the self-hosted compiler is easier
 //! to bootstrap if it does not depend on translate-c.
 
-const LLVMBool = bool;
+/// Do not compare directly to .True, use toBool() instead.
+pub const Bool = enum(c_int) {
+    False,
+    True,
+    _,
+
+    pub fn fromBool(b: bool) Bool {
+        return @intToEnum(Bool, @boolToInt(b));
+    }
+
+    pub fn toBool(b: Bool) bool {
+        return b != .False;
+    }
+};
 pub const AttributeIndex = c_uint;
 
 /// Make sure to use the *InContext functions instead of the global ones.
@@ -22,13 +35,13 @@ pub const Context = opaque {
     extern fn LLVMVoidTypeInContext(C: *const Context) *const Type;
 
     pub const structType = LLVMStructTypeInContext;
-    extern fn LLVMStructTypeInContext(C: *const Context, ElementTypes: [*]*const Type, ElementCount: c_uint, Packed: LLVMBool) *const Type;
+    extern fn LLVMStructTypeInContext(C: *const Context, ElementTypes: [*]*const Type, ElementCount: c_uint, Packed: Bool) *const Type;
 
     pub const constString = LLVMConstStringInContext;
-    extern fn LLVMConstStringInContext(C: *const Context, Str: [*]const u8, Length: c_uint, DontNullTerminate: LLVMBool) *const Value;
+    extern fn LLVMConstStringInContext(C: *const Context, Str: [*]const u8, Length: c_uint, DontNullTerminate: Bool) *const Value;
 
     pub const constStruct = LLVMConstStructInContext;
-    extern fn LLVMConstStructInContext(C: *const Context, ConstantVals: [*]*const Value, Count: c_uint, Packed: LLVMBool) *const Value;
+    extern fn LLVMConstStructInContext(C: *const Context, ConstantVals: [*]*const Value, Count: c_uint, Packed: Bool) *const Value;
 
     pub const createBasicBlock = LLVMCreateBasicBlockInContext;
     extern fn LLVMCreateBasicBlockInContext(C: *const Context, Name: [*:0]const u8) *const BasicBlock;
@@ -59,7 +72,7 @@ pub const Value = opaque {
 
 pub const Type = opaque {
     pub const functionType = LLVMFunctionType;
-    extern fn LLVMFunctionType(ReturnType: *const Type, ParamTypes: ?[*]*const Type, ParamCount: c_uint, IsVarArg: LLVMBool) *const Type;
+    extern fn LLVMFunctionType(ReturnType: *const Type, ParamTypes: ?[*]*const Type, ParamCount: c_uint, IsVarArg: Bool) *const Type;
 
     pub const constNull = LLVMConstNull;
     extern fn LLVMConstNull(Ty: *const Type) *const Value;
@@ -68,7 +81,7 @@ pub const Type = opaque {
     extern fn LLVMConstAllOnes(Ty: *const Type) *const Value;
 
     pub const constInt = LLVMConstInt;
-    extern fn LLVMConstInt(IntTy: *const Type, N: c_ulonglong, SignExtend: LLVMBool) *const Value;
+    extern fn LLVMConstInt(IntTy: *const Type, N: c_ulonglong, SignExtend: Bool) *const Value;
 
     pub const constArray = LLVMConstArray;
     extern fn LLVMConstArray(ElementTy: *const Type, ConstantVals: ?[*]*const Value, Length: c_uint) *const Value;
@@ -91,7 +104,7 @@ pub const Module = opaque {
     extern fn LLVMDisposeModule(*const Module) void;
 
     pub const verify = LLVMVerifyModule;
-    extern fn LLVMVerifyModule(*const Module, Action: VerifierFailureAction, OutMessage: *[*:0]const u8) LLVMBool;
+    extern fn LLVMVerifyModule(*const Module, Action: VerifierFailureAction, OutMessage: *[*:0]const u8) Bool;
 
     pub const addFunction = LLVMAddFunction;
     extern fn LLVMAddFunction(*const Module, Name: [*:0]const u8, FunctionTy: *const Type) *const Value;
@@ -191,7 +204,7 @@ pub const Builder = opaque {
     extern fn LLVMBuildNUWSub(*const Builder, LHS: *const Value, RHS: *const Value, Name: [*:0]const u8) *const Value;
 
     pub const buildIntCast2 = LLVMBuildIntCast2;
-    extern fn LLVMBuildIntCast2(*const Builder, Val: *const Value, DestTy: *const Type, IsSigned: LLVMBool, Name: [*:0]const u8) *const Value;
+    extern fn LLVMBuildIntCast2(*const Builder, Val: *const Value, DestTy: *const Type, IsSigned: Bool, Name: [*:0]const u8) *const Value;
 
     pub const buildBitCast = LLVMBuildBitCast;
     extern fn LLVMBuildBitCast(*const Builder, Val: *const Value, DestTy: *const Type, Name: [*:0]const u8) *const Value;
@@ -258,7 +271,7 @@ pub const TargetMachine = opaque {
         Filename: [*:0]const u8,
         codegen: CodeGenFileType,
         ErrorMessage: *[*:0]const u8,
-    ) LLVMBool;
+    ) Bool;
 };
 
 pub const CodeMode = extern enum {
@@ -295,7 +308,7 @@ pub const CodeGenFileType = extern enum {
 
 pub const Target = opaque {
     pub const getFromTriple = LLVMGetTargetFromTriple;
-    extern fn LLVMGetTargetFromTriple(Triple: [*:0]const u8, T: **const Target, ErrorMessage: *[*:0]const u8) LLVMBool;
+    extern fn LLVMGetTargetFromTriple(Triple: [*:0]const u8, T: **const Target, ErrorMessage: *[*:0]const u8) Bool;
 };
 
 extern fn LLVMInitializeAArch64TargetInfo() void;
diff --git a/src/codegen/wasm.zig b/src/codegen/wasm.zig
@@ -95,7 +95,7 @@ pub const Context = struct {
         return switch (ty.tag()) {
             .f32 => wasm.valtype(.f32),
             .f64 => wasm.valtype(.f64),
-            .u32, .i32 => wasm.valtype(.i32),
+            .u32, .i32, .bool => wasm.valtype(.i32),
             .u64, .i64 => wasm.valtype(.i64),
             else => self.fail(src, "TODO - Wasm genValtype for type '{s}'", .{ty.tag()}),
         };
@@ -208,6 +208,7 @@ pub const Context = struct {
             .alloc => self.genAlloc(inst.castTag(.alloc).?),
             .arg => self.genArg(inst.castTag(.arg).?),
             .block => self.genBlock(inst.castTag(.block).?),
+            .breakpoint => self.genBreakpoint(inst.castTag(.breakpoint).?),
             .br => self.genBr(inst.castTag(.br).?),
             .call => self.genCall(inst.castTag(.call).?),
             .cmp_eq => self.genCmp(inst.castTag(.cmp_eq).?, .eq),
@@ -221,9 +222,11 @@ pub const Context = struct {
             .dbg_stmt => WValue.none,
             .load => self.genLoad(inst.castTag(.load).?),
             .loop => self.genLoop(inst.castTag(.loop).?),
+            .not => self.genNot(inst.castTag(.not).?),
             .ret => self.genRet(inst.castTag(.ret).?),
             .retvoid => WValue.none,
             .store => self.genStore(inst.castTag(.store).?),
+            .unreach => self.genUnreachable(inst.castTag(.unreach).?),
             else => self.fail(inst.src, "TODO: Implement wasm inst: {s}", .{inst.tag}),
         };
     }
@@ -329,7 +332,7 @@ pub const Context = struct {
                 try writer.writeByte(wasm.opcode(.i32_const));
                 try leb.writeILEB128(writer, inst.val.toUnsignedInt());
             },
-            .i32 => {
+            .i32, .bool => {
                 try writer.writeByte(wasm.opcode(.i32_const));
                 try leb.writeILEB128(writer, inst.val.toSignedInt());
             },
@@ -414,7 +417,14 @@ pub const Context = struct {
 
         // insert blocks at the position of `offset` so
         // the condition can jump to it
-        const offset = condition.code_offset;
+        const offset = switch (condition) {
+            .code_offset => |offset| offset,
+            else => blk: {
+                const offset = self.code.items.len;
+                try self.emitWValue(condition);
+                break :blk offset;
+            },
+        };
         const block_ty = try self.genBlockType(condbr.base.src, condbr.base.ty);
         try self.startBlock(.block, block_ty, offset);
 
@@ -523,4 +533,32 @@ pub const Context = struct {
 
         return .none;
     }
+
+    fn genNot(self: *Context, not: *Inst.UnOp) InnerError!WValue {
+        const offset = self.code.items.len;
+
+        const operand = self.resolveInst(not.operand);
+        try self.emitWValue(operand);
+
+        // wasm does not have booleans nor the `not` instruction, therefore compare with 0
+        // to create the same logic
+        const writer = self.code.writer();
+        try writer.writeByte(wasm.opcode(.i32_const));
+        try leb.writeILEB128(writer, @as(i32, 0));
+
+        try writer.writeByte(wasm.opcode(.i32_eq));
+
+        return WValue{ .code_offset = offset };
+    }
+
+    fn genBreakpoint(self: *Context, breakpoint: *Inst.NoOp) InnerError!WValue {
+        // unsupported by wasm itself. Can be implemented once we support DWARF
+        // for wasm
+        return .none;
+    }
+
+    fn genUnreachable(self: *Context, unreach: *Inst.NoOp) InnerError!WValue {
+        try self.code.append(wasm.opcode(.@"unreachable"));
+        return .none;
+    }
 };
diff --git a/src/config.zig.in b/src/config.zig.in
@@ -1,7 +1,7 @@
 pub const have_llvm = true;
 pub const version: [:0]const u8 = "@ZIG_VERSION@";
 pub const semver = try @import("std").SemanticVersion.parse(version);
-pub const enable_logging: bool = false;
+pub const enable_logging: bool = @ZIG_ENABLE_LOGGING_BOOL@;
 pub const enable_tracy = false;
 pub const is_stage1 = true;
 pub const skip_non_native = false;
diff --git a/src/introspect.zig b/src/introspect.zig
@@ -61,6 +61,14 @@ pub fn findZigLibDirFromSelfExe(
 
 /// Caller owns returned memory.
 pub fn resolveGlobalCacheDir(allocator: *mem.Allocator) ![]u8 {
+    if (std.process.getEnvVarOwned(allocator, "ZIG_GLOBAL_CACHE_DIR")) |value| {
+        if (value.len > 0) {
+            return value;
+        } else {
+            allocator.free(value);
+        }
+    } else |_| {}
+
     const appname = "zig";
 
     if (std.Target.current.os.tag != .windows) {
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
@@ -11,7 +11,9 @@ const codegen = @import("../codegen.zig");
 const aarch64 = @import("../codegen/aarch64.zig");
 const math = std.math;
 const mem = std.mem;
+const meta = std.meta;
 
+const bind = @import("MachO/bind.zig");
 const trace = @import("../tracy.zig").trace;
 const build_options = @import("build_options");
 const Module = @import("../Module.zig");
@@ -24,9 +26,9 @@ const target_util = @import("../target.zig");
 const DebugSymbols = @import("MachO/DebugSymbols.zig");
 const Trie = @import("MachO/Trie.zig");
 const CodeSignature = @import("MachO/CodeSignature.zig");
+const Zld = @import("MachO/Zld.zig");
 
 usingnamespace @import("MachO/commands.zig");
-usingnamespace @import("MachO/imports.zig");
 
 pub const base_tag: File.Tag = File.Tag.macho;
 
@@ -87,14 +89,12 @@ code_signature_cmd_index: ?u16 = null,
 
 /// Index into __TEXT,__text section.
 text_section_index: ?u16 = null,
-/// Index into __TEXT,__ziggot section.
-got_section_index: ?u16 = null,
 /// Index into __TEXT,__stubs section.
 stubs_section_index: ?u16 = null,
 /// Index into __TEXT,__stub_helper section.
 stub_helper_section_index: ?u16 = null,
 /// Index into __DATA_CONST,__got section.
-data_got_section_index: ?u16 = null,
+got_section_index: ?u16 = null,
 /// Index into __DATA,__la_symbol_ptr section.
 la_symbol_ptr_section_index: ?u16 = null,
 /// Index into __DATA,__data section.
@@ -104,16 +104,16 @@ entry_addr: ?u64 = null,
 
 /// Table of all local symbols
 /// Internally references string table for names (which are optional).
-local_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
+locals: std.ArrayListUnmanaged(macho.nlist_64) = .{},
 /// Table of all global symbols
-global_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
+globals: std.ArrayListUnmanaged(macho.nlist_64) = .{},
 /// Table of all extern nonlazy symbols, indexed by name.
-extern_nonlazy_symbols: std.StringArrayHashMapUnmanaged(ExternSymbol) = .{},
+nonlazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
 /// Table of all extern lazy symbols, indexed by name.
-extern_lazy_symbols: std.StringArrayHashMapUnmanaged(ExternSymbol) = .{},
+lazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
 
-local_symbol_free_list: std.ArrayListUnmanaged(u32) = .{},
-global_symbol_free_list: std.ArrayListUnmanaged(u32) = .{},
+locals_free_list: std.ArrayListUnmanaged(u32) = .{},
+globals_free_list: std.ArrayListUnmanaged(u32) = .{},
 offset_table_free_list: std.ArrayListUnmanaged(u32) = .{},
 
 stub_helper_stubs_start_off: ?u64 = null,
@@ -122,8 +122,8 @@ stub_helper_stubs_start_off: ?u64 = null,
 string_table: std.ArrayListUnmanaged(u8) = .{},
 string_table_directory: std.StringHashMapUnmanaged(u32) = .{},
 
-/// Table of trampolines to the actual symbols in __text section.
-offset_table: std.ArrayListUnmanaged(u64) = .{},
+/// Table of GOT entries.
+offset_table: std.ArrayListUnmanaged(GOTEntry) = .{},
 
 error_flags: File.ErrorFlags = File.ErrorFlags{},
 
@@ -154,14 +154,19 @@ string_table_needs_relocation: bool = false,
 /// allocate a fresh text block, which will have ideal capacity, and then grow it
 /// by 1 byte. It will then have -1 overcapacity.
 text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{},
+
 /// Pointer to the last allocated text block
 last_text_block: ?*TextBlock = null,
+
 /// A list of all PIE fixups required for this run of the linker.
 /// Warning, this is currently NOT thread-safe. See the TODO below.
 /// TODO Move this list inside `updateDecl` where it should be allocated
 /// prior to calling `generateSymbol`, and then immediately deallocated
 /// rather than sitting in the global scope.
-pie_fixups: std.ArrayListUnmanaged(PieFixup) = .{},
+/// TODO We should also rewrite this using generic relocations common to all
+/// backends.
+pie_fixups: std.ArrayListUnmanaged(PIEFixup) = .{},
+
 /// A list of all stub (extern decls) fixups required for this run of the linker.
 /// Warning, this is currently NOT thread-safe. See the TODO below.
 /// TODO Move this list inside `updateDecl` where it should be allocated
@@ -169,14 +174,42 @@ pie_fixups: std.ArrayListUnmanaged(PieFixup) = .{},
 /// rather than sitting in the global scope.
 stub_fixups: std.ArrayListUnmanaged(StubFixup) = .{},
 
-pub const PieFixup = struct {
-    /// Target address we wanted to address in absolute terms.
-    address: u64,
-    /// Where in the byte stream we should perform the fixup.
-    start: usize,
-    /// The length of the byte stream. For x86_64, this will be
-    /// variable. For aarch64, it will be fixed at 4 bytes.
-    len: usize,
+pub const GOTEntry = struct {
+    /// GOT entry can either be a local pointer or an extern (nonlazy) import.
+    kind: enum {
+        Local,
+        Extern,
+    },
+
+    /// Id to the macho.nlist_64 from the respective table: either locals or nonlazy imports.
+    /// TODO I'm more and more inclined to just manage a single, max two symbol tables
+    ///  rather than 4 as we currently do, but I'll follow up in the future PR.
+    symbol: u32,
+
+    /// Index of this entry in the GOT.
+    index: u32,
+};
+
+pub const Import = struct {
+    /// MachO symbol table entry.
+    symbol: macho.nlist_64,
+
+    /// Id of the dynamic library where the specified entries can be found.
+    dylib_ordinal: i64,
+
+    /// Index of this import within the import list.
+    index: u32,
+};
+
+pub const PIEFixup = struct {
+    /// Target VM address of this relocation.
+    target_addr: u64,
+
+    /// Offset within the byte stream.
+    offset: usize,
+
+    /// Size of the relocation.
+    size: usize,
 };
 
 pub const StubFixup = struct {
@@ -260,9 +293,9 @@ pub const TextBlock = struct {
     /// File offset relocation happens transparently, so it is not included in
     /// this calculation.
     fn capacity(self: TextBlock, macho_file: MachO) u64 {
-        const self_sym = macho_file.local_symbols.items[self.local_sym_index];
+        const self_sym = macho_file.locals.items[self.local_sym_index];
         if (self.next) |next| {
-            const next_sym = macho_file.local_symbols.items[next.local_sym_index];
+            const next_sym = macho_file.locals.items[next.local_sym_index];
             return next_sym.n_value - self_sym.n_value;
         } else {
             // We are the last block.
@@ -274,8 +307,8 @@ pub const TextBlock = struct {
     fn freeListEligible(self: TextBlock, macho_file: MachO) bool {
         // No need to keep a free list node for the last block.
         const next = self.next orelse return false;
-        const self_sym = macho_file.local_symbols.items[self.local_sym_index];
-        const next_sym = macho_file.local_symbols.items[next.local_sym_index];
+        const self_sym = macho_file.locals.items[self.local_sym_index];
+        const next_sym = macho_file.locals.items[next.local_sym_index];
         const cap = next_sym.n_value - self_sym.n_value;
         const ideal_cap = padToIdeal(self.size);
         if (cap <= ideal_cap) return false;
@@ -344,7 +377,7 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio
     };
 
     // Index 0 is always a null symbol.
-    try self.local_symbols.append(allocator, .{
+    try self.locals.append(allocator, .{
         .n_strx = 0,
         .n_type = 0,
         .n_sect = 0,
@@ -600,7 +633,74 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
         if (!mem.eql(u8, the_object_path, full_out_path)) {
             try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{});
         }
-    } else {
+    } else outer: {
+        const use_zld = blk: {
+            if (self.base.options.is_native_os and self.base.options.system_linker_hack) {
+                // If the user forces the use of ld64, make sure we are running native!
+                break :blk false;
+            }
+
+            if (self.base.options.target.cpu.arch == .aarch64) {
+                // On aarch64, always use zld.
+                break :blk true;
+            }
+
+            if (self.base.options.link_libcpp or
+                self.base.options.output_mode == .Lib or
+                self.base.options.linker_script != null)
+            {
+                // Fallback to LLD in this handful of cases on x86_64 only.
+                break :blk false;
+            }
+
+            break :blk true;
+        };
+
+        if (use_zld) {
+            var zld = Zld.init(self.base.allocator);
+            defer zld.deinit();
+            zld.arch = target.cpu.arch;
+
+            var input_files = std.ArrayList([]const u8).init(self.base.allocator);
+            defer input_files.deinit();
+            // Positional arguments to the linker such as object files.
+            try input_files.appendSlice(self.base.options.objects);
+            for (comp.c_object_table.items()) |entry| {
+                try input_files.append(entry.key.status.success.object_path);
+            }
+            if (module_obj_path) |p| {
+                try input_files.append(p);
+            }
+            try input_files.append(comp.compiler_rt_static_lib.?.full_object_path);
+            // libc++ dep
+            if (self.base.options.link_libcpp) {
+                try input_files.append(comp.libcxxabi_static_lib.?.full_object_path);
+                try input_files.append(comp.libcxx_static_lib.?.full_object_path);
+            }
+
+            if (self.base.options.verbose_link) {
+                var argv = std.ArrayList([]const u8).init(self.base.allocator);
+                defer argv.deinit();
+
+                try argv.append("zig");
+                try argv.append("ld");
+
+                try argv.ensureCapacity(input_files.items.len);
+                for (input_files.items) |f| {
+                    argv.appendAssumeCapacity(f);
+                }
+
+                try argv.append("-o");
+                try argv.append(full_out_path);
+
+                Compilation.dump_argv(argv.items);
+            }
+
+            try zld.link(input_files.items, full_out_path);
+
+            break :outer;
+        }
+
         // Create an LLD command line and invoke it.
         var argv = std.ArrayList([]const u8).init(self.base.allocator);
         defer argv.deinit();
@@ -644,9 +744,7 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
             try argv.append("defs");
         }
 
-        if (is_dyn_lib) {
-            try argv.append("-static");
-        } else {
+        if (is_exe_or_dyn_lib) {
             try argv.append("-dynamic");
         }
 
@@ -836,7 +934,7 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
                         }
                     },
                     else => {
-                        log.err("{s} terminated", .{ argv.items[0] });
+                        log.err("{s} terminated", .{argv.items[0]});
                         return error.LLDCrashed;
                     },
                 }
@@ -873,119 +971,6 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
                     log.warn("unexpected LLD stderr:\n{s}", .{stderr});
                 }
             }
-
-            // At this stage, LLD has done its job. It is time to patch the resultant
-            // binaries up!
-            const out_file = try directory.handle.openFile(self.base.options.emit.?.sub_path, .{ .write = true });
-            try self.parseFromFile(out_file);
-
-            if (self.libsystem_cmd_index == null and self.header.?.filetype == macho.MH_EXECUTE) {
-                const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-                const text_section = text_segment.sections.items[self.text_section_index.?];
-                const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64);
-                const needed_size = padToIdeal(@sizeOf(macho.linkedit_data_command));
-
-                if (needed_size + after_last_cmd_offset > text_section.offset) {
-                    log.err("Unable to extend padding between the end of load commands and start of __text section.", .{});
-                    log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size});
-                    log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{});
-                    return error.NotEnoughPadding;
-                }
-
-                // Calculate next available dylib ordinal.
-                const next_ordinal = blk: {
-                    var ordinal: u32 = 1;
-                    for (self.load_commands.items) |cmd| {
-                        switch (cmd) {
-                            .Dylib => ordinal += 1,
-                            else => {},
-                        }
-                    }
-                    break :blk ordinal;
-                };
-
-                // Add load dylib load command
-                self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len);
-                const cmdsize = @intCast(u32, mem.alignForwardGeneric(
-                    u64,
-                    @sizeOf(macho.dylib_command) + mem.lenZ(LIB_SYSTEM_PATH),
-                    @sizeOf(u64),
-                ));
-                // TODO Find a way to work out runtime version from the OS version triple stored in std.Target.
-                // In the meantime, we're gonna hardcode to the minimum compatibility version of 0.0.0.
-                const min_version = 0x0;
-                var dylib_cmd = emptyGenericCommandWithData(macho.dylib_command{
-                    .cmd = macho.LC_LOAD_DYLIB,
-                    .cmdsize = cmdsize,
-                    .dylib = .{
-                        .name = @sizeOf(macho.dylib_command),
-                        .timestamp = 2, // not sure why not simply 0; this is reverse engineered from Mach-O files
-                        .current_version = min_version,
-                        .compatibility_version = min_version,
-                    },
-                });
-                dylib_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylib_cmd.inner.dylib.name);
-                mem.set(u8, dylib_cmd.data, 0);
-                mem.copy(u8, dylib_cmd.data, mem.spanZ(LIB_SYSTEM_PATH));
-                try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd });
-                self.header_dirty = true;
-                self.load_commands_dirty = true;
-
-                if (self.symtab_cmd_index == null or self.dysymtab_cmd_index == null) {
-                    log.err("Incomplete Mach-O binary: no LC_SYMTAB or LC_DYSYMTAB load command found!", .{});
-                    log.err("Without the symbol table, it is not possible to patch up the binary for cross-compilation.", .{});
-                    return error.NoSymbolTableFound;
-                }
-
-                // Patch dyld info
-                try self.fixupBindInfo(next_ordinal);
-                try self.fixupLazyBindInfo(next_ordinal);
-
-                // Write updated load commands and the header
-                try self.writeLoadCommands();
-                try self.writeHeader();
-
-                assert(!self.header_dirty);
-                assert(!self.load_commands_dirty);
-            }
-            if (self.code_signature_cmd_index == null) outer: {
-                if (target.cpu.arch != .aarch64) break :outer; // This is currently needed only for aarch64 targets.
-                const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-                const text_section = text_segment.sections.items[self.text_section_index.?];
-                const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64);
-                const needed_size = padToIdeal(@sizeOf(macho.linkedit_data_command));
-
-                if (needed_size + after_last_cmd_offset > text_section.offset) {
-                    log.err("Unable to extend padding between the end of load commands and start of __text section.", .{});
-                    log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size});
-                    log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{});
-                    return error.NotEnoughPadding;
-                }
-
-                // Add code signature load command
-                self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len);
-                try self.load_commands.append(self.base.allocator, .{
-                    .LinkeditData = .{
-                        .cmd = macho.LC_CODE_SIGNATURE,
-                        .cmdsize = @sizeOf(macho.linkedit_data_command),
-                        .dataoff = 0,
-                        .datasize = 0,
-                    },
-                });
-                self.header_dirty = true;
-                self.load_commands_dirty = true;
-
-                // Pad out space for code signature
-                try self.writeCodeSignaturePadding();
-                // Write updated load commands and the header
-                try self.writeLoadCommands();
-                try self.writeHeader();
-                // Generate adhoc code signature
-                try self.writeCodeSignature();
-
-                assert(!self.header_dirty);
-                assert(!self.load_commands_dirty);
-            }
         }
     }
 
@@ -1021,14 +1006,14 @@ pub fn deinit(self: *MachO) void {
     if (self.d_sym) |*ds| {
         ds.deinit(self.base.allocator);
     }
-    for (self.extern_lazy_symbols.items()) |*entry| {
+    for (self.lazy_imports.items()) |*entry| {
         self.base.allocator.free(entry.key);
     }
-    self.extern_lazy_symbols.deinit(self.base.allocator);
-    for (self.extern_nonlazy_symbols.items()) |*entry| {
+    self.lazy_imports.deinit(self.base.allocator);
+    for (self.nonlazy_imports.items()) |*entry| {
         self.base.allocator.free(entry.key);
     }
-    self.extern_nonlazy_symbols.deinit(self.base.allocator);
+    self.nonlazy_imports.deinit(self.base.allocator);
     self.pie_fixups.deinit(self.base.allocator);
     self.stub_fixups.deinit(self.base.allocator);
     self.text_block_free_list.deinit(self.base.allocator);
@@ -1042,10 +1027,10 @@ pub fn deinit(self: *MachO) void {
     }
     self.string_table_directory.deinit(self.base.allocator);
     self.string_table.deinit(self.base.allocator);
-    self.global_symbols.deinit(self.base.allocator);
-    self.global_symbol_free_list.deinit(self.base.allocator);
-    self.local_symbols.deinit(self.base.allocator);
-    self.local_symbol_free_list.deinit(self.base.allocator);
+    self.globals.deinit(self.base.allocator);
+    self.globals_free_list.deinit(self.base.allocator);
+    self.locals.deinit(self.base.allocator);
+    self.locals_free_list.deinit(self.base.allocator);
     for (self.load_commands.items) |*lc| {
         lc.deinit(self.base.allocator);
     }
@@ -1100,7 +1085,7 @@ fn shrinkTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64) vo
 }
 
 fn growTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 {
-    const sym = self.local_symbols.items[text_block.local_sym_index];
+    const sym = self.locals.items[text_block.local_sym_index];
     const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value;
     const need_realloc = !align_ok or new_block_size > text_block.capacity(self.*);
     if (!need_realloc) return sym.n_value;
@@ -1110,34 +1095,41 @@ fn growTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alig
 pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void {
     if (decl.link.macho.local_sym_index != 0) return;
 
-    try self.local_symbols.ensureCapacity(self.base.allocator, self.local_symbols.items.len + 1);
+    try self.locals.ensureCapacity(self.base.allocator, self.locals.items.len + 1);
     try self.offset_table.ensureCapacity(self.base.allocator, self.offset_table.items.len + 1);
 
-    if (self.local_symbol_free_list.popOrNull()) |i| {
+    if (self.locals_free_list.popOrNull()) |i| {
         log.debug("reusing symbol index {d} for {s}", .{ i, decl.name });
         decl.link.macho.local_sym_index = i;
     } else {
-        log.debug("allocating symbol index {d} for {s}", .{ self.local_symbols.items.len, decl.name });
-        decl.link.macho.local_sym_index = @intCast(u32, self.local_symbols.items.len);
-        _ = self.local_symbols.addOneAssumeCapacity();
+        log.debug("allocating symbol index {d} for {s}", .{ self.locals.items.len, decl.name });
+        decl.link.macho.local_sym_index = @intCast(u32, self.locals.items.len);
+        _ = self.locals.addOneAssumeCapacity();
     }
 
     if (self.offset_table_free_list.popOrNull()) |i| {
+        log.debug("reusing offset table entry index {d} for {s}", .{ i, decl.name });
         decl.link.macho.offset_table_index = i;
     } else {
+        log.debug("allocating offset table entry index {d} for {s}", .{ self.offset_table.items.len, decl.name });
         decl.link.macho.offset_table_index = @intCast(u32, self.offset_table.items.len);
         _ = self.offset_table.addOneAssumeCapacity();
         self.offset_table_count_dirty = true;
+        self.rebase_info_dirty = true;
     }
 
-    self.local_symbols.items[decl.link.macho.local_sym_index] = .{
+    self.locals.items[decl.link.macho.local_sym_index] = .{
         .n_strx = 0,
         .n_type = 0,
         .n_sect = 0,
         .n_desc = 0,
         .n_value = 0,
     };
-    self.offset_table.items[decl.link.macho.offset_table_index] = 0;
+    self.offset_table.items[decl.link.macho.offset_table_index] = .{
+        .kind = .Local,
+        .symbol = decl.link.macho.local_sym_index,
+        .index = decl.link.macho.offset_table_index,
+    };
 }
 
 pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
@@ -1180,8 +1172,9 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
         .externally_managed => |x| x,
         .appended => code_buffer.items,
         .fail => |em| {
-            // Clear any PIE fixups and stub fixups for this decl.
+            // Clear any PIE fixups for this decl.
             self.pie_fixups.shrinkRetainingCapacity(0);
+            // Clear any stub fixups for this decl.
             self.stub_fixups.shrinkRetainingCapacity(0);
             decl.analysis = .codegen_failure;
             try module.failed_decls.put(module.gpa, decl, em);
@@ -1191,7 +1184,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
 
     const required_alignment = typed_value.ty.abiAlignment(self.base.options.target);
     assert(decl.link.macho.local_sym_index != 0); // Caller forgot to call allocateDeclIndexes()
-    const symbol = &self.local_symbols.items[decl.link.macho.local_sym_index];
+    const symbol = &self.locals.items[decl.link.macho.local_sym_index];
 
     if (decl.link.macho.size != 0) {
         const capacity = decl.link.macho.capacity(self.*);
@@ -1200,9 +1193,12 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
             const vaddr = try self.growTextBlock(&decl.link.macho, code.len, required_alignment);
             log.debug("growing {s} from 0x{x} to 0x{x}", .{ decl.name, symbol.n_value, vaddr });
             if (vaddr != symbol.n_value) {
-                symbol.n_value = vaddr;
                 log.debug(" (writing new offset table entry)", .{});
-                self.offset_table.items[decl.link.macho.offset_table_index] = vaddr;
+                self.offset_table.items[decl.link.macho.offset_table_index] = .{
+                    .kind = .Local,
+                    .symbol = decl.link.macho.local_sym_index,
+                    .index = decl.link.macho.offset_table_index,
+                };
                 try self.writeOffsetTableEntry(decl.link.macho.offset_table_index);
             }
         } else if (code.len < decl.link.macho.size) {
@@ -1231,7 +1227,11 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
             .n_desc = 0,
             .n_value = addr,
         };
-        self.offset_table.items[decl.link.macho.offset_table_index] = addr;
+        self.offset_table.items[decl.link.macho.offset_table_index] = .{
+            .kind = .Local,
+            .symbol = decl.link.macho.local_sym_index,
+            .index = decl.link.macho.offset_table_index,
+        };
 
         try self.writeLocalSymbol(decl.link.macho.local_sym_index);
         if (self.d_sym) |*ds|
@@ -1239,30 +1239,48 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
         try self.writeOffsetTableEntry(decl.link.macho.offset_table_index);
     }
 
-    // Perform PIE fixups (if any)
-    const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-    const got_section = text_segment.sections.items[self.got_section_index.?];
+    // Calculate displacements to target addr (if any).
     while (self.pie_fixups.popOrNull()) |fixup| {
-        const target_addr = fixup.address;
-        const this_addr = symbol.n_value + fixup.start;
+        assert(fixup.size == 4);
+        const this_addr = symbol.n_value + fixup.offset;
+        const target_addr = fixup.target_addr;
+
         switch (self.base.options.target.cpu.arch) {
             .x86_64 => {
-                assert(target_addr >= this_addr + fixup.len);
-                const displacement = try math.cast(u32, target_addr - this_addr - fixup.len);
-                var placeholder = code_buffer.items[fixup.start + fixup.len - @sizeOf(u32) ..][0..@sizeOf(u32)];
-                mem.writeIntSliceLittle(u32, placeholder, displacement);
+                const displacement = try math.cast(u32, target_addr - this_addr - 4);
+                mem.writeIntLittle(u32, code_buffer.items[fixup.offset..][0..4], displacement);
             },
             .aarch64 => {
-                assert(target_addr >= this_addr);
-                const displacement = try math.cast(u27, target_addr - this_addr);
-                var placeholder = code_buffer.items[fixup.start..][0..fixup.len];
-                mem.writeIntSliceLittle(u32, placeholder, aarch64.Instruction.b(@as(i28, displacement)).toU32());
+                // TODO optimize instruction based on jump length (use ldr(literal) + nop if possible).
+                {
+                    const inst = code_buffer.items[fixup.offset..][0..4];
+                    var parsed = mem.bytesAsValue(meta.TagPayload(
+                        aarch64.Instruction,
+                        aarch64.Instruction.PCRelativeAddress,
+                    ), inst);
+                    const this_page = @intCast(i32, this_addr >> 12);
+                    const target_page = @intCast(i32, target_addr >> 12);
+                    const pages = @bitCast(u21, @intCast(i21, target_page - this_page));
+                    parsed.immhi = @truncate(u19, pages >> 2);
+                    parsed.immlo = @truncate(u2, pages);
+                }
+                {
+                    const inst = code_buffer.items[fixup.offset + 4 ..][0..4];
+                    var parsed = mem.bytesAsValue(meta.TagPayload(
+                        aarch64.Instruction,
+                        aarch64.Instruction.LoadStoreRegister,
+                    ), inst);
+                    const narrowed = @truncate(u12, target_addr);
+                    const offset = try math.divExact(u12, narrowed, 8);
+                    parsed.offset = offset;
+                }
             },
             else => unreachable, // unsupported target architecture
         }
     }
 
     // Resolve stubs (if any)
+    const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
     const stubs = text_segment.sections.items[self.stubs_section_index.?];
     for (self.stub_fixups.items) |fixup| {
         const stub_addr = stubs.addr + fixup.symbol * stubs.reserved2;
@@ -1287,9 +1305,6 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
             try self.writeStubInStubHelper(fixup.symbol);
             try self.writeLazySymbolPointer(fixup.symbol);
 
-            const extern_sym = &self.extern_lazy_symbols.items()[fixup.symbol].value;
-            extern_sym.segment = self.data_segment_cmd_index.?;
-            extern_sym.offset = fixup.symbol * @sizeOf(u64);
             self.rebase_info_dirty = true;
             self.lazy_binding_info_dirty = true;
         }
@@ -1331,9 +1346,9 @@ pub fn updateDeclExports(
     const tracy = trace(@src());
     defer tracy.end();
 
-    try self.global_symbols.ensureCapacity(self.base.allocator, self.global_symbols.items.len + exports.len);
+    try self.globals.ensureCapacity(self.base.allocator, self.globals.items.len + exports.len);
     if (decl.link.macho.local_sym_index == 0) return;
-    const decl_sym = &self.local_symbols.items[decl.link.macho.local_sym_index];
+    const decl_sym = &self.locals.items[decl.link.macho.local_sym_index];
 
     for (exports) |exp| {
         if (exp.options.section) |section_name| {
@@ -1366,7 +1381,7 @@ pub fn updateDeclExports(
         };
         const n_type = decl_sym.n_type | macho.N_EXT;
         if (exp.link.macho.sym_index) |i| {
-            const sym = &self.global_symbols.items[i];
+            const sym = &self.globals.items[i];
             sym.* = .{
                 .n_strx = try self.updateString(sym.n_strx, exp.options.name),
                 .n_type = n_type,
@@ -1376,12 +1391,12 @@ pub fn updateDeclExports(
             };
         } else {
             const name_str_index = try self.makeString(exp.options.name);
-            const i = if (self.global_symbol_free_list.popOrNull()) |i| i else blk: {
-                _ = self.global_symbols.addOneAssumeCapacity();
+            const i = if (self.globals_free_list.popOrNull()) |i| i else blk: {
+                _ = self.globals.addOneAssumeCapacity();
                 self.export_info_dirty = true;
-                break :blk self.global_symbols.items.len - 1;
+                break :blk self.globals.items.len - 1;
             };
-            self.global_symbols.items[i] = .{
+            self.globals.items[i] = .{
                 .n_strx = name_str_index,
                 .n_type = n_type,
                 .n_sect = @intCast(u8, self.text_section_index.?) + 1,
@@ -1396,18 +1411,18 @@ pub fn updateDeclExports(
 
 pub fn deleteExport(self: *MachO, exp: Export) void {
     const sym_index = exp.sym_index orelse return;
-    self.global_symbol_free_list.append(self.base.allocator, sym_index) catch {};
-    self.global_symbols.items[sym_index].n_type = 0;
+    self.globals_free_list.append(self.base.allocator, sym_index) catch {};
+    self.globals.items[sym_index].n_type = 0;
 }
 
 pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {
     // Appending to free lists is allowed to fail because the free lists are heuristics based anyway.
     self.freeTextBlock(&decl.link.macho);
     if (decl.link.macho.local_sym_index != 0) {
-        self.local_symbol_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {};
+        self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {};
         self.offset_table_free_list.append(self.base.allocator, decl.link.macho.offset_table_index) catch {};
 
-        self.local_symbols.items[decl.link.macho.local_sym_index].n_type = 0;
+        self.locals.items[decl.link.macho.local_sym_index].n_type = 0;
 
         decl.link.macho.local_sym_index = 0;
     }
@@ -1415,7 +1430,7 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {
 
 pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 {
     assert(decl.link.macho.local_sym_index != 0);
-    return self.local_symbols.items[decl.link.macho.local_sym_index].n_value;
+    return self.locals.items[decl.link.macho.local_sym_index].n_value;
 }
 
 pub fn populateMissingMetadata(self: *MachO) !void {
@@ -1555,39 +1570,6 @@ pub fn populateMissingMetadata(self: *MachO) !void {
         self.header_dirty = true;
         self.load_commands_dirty = true;
     }
-    if (self.got_section_index == null) {
-        const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-        self.got_section_index = @intCast(u16, text_segment.sections.items.len);
-
-        const alignment: u2 = switch (self.base.options.target.cpu.arch) {
-            .x86_64 => 0,
-            .aarch64 => 2,
-            else => unreachable, // unhandled architecture type
-        };
-        const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS;
-        const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint;
-        const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad);
-        assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment.
-
-        log.debug("found __ziggot section free space 0x{x} to 0x{x}", .{ off, off + needed_size });
-
-        try text_segment.addSection(self.base.allocator, .{
-            .sectname = makeStaticString("__ziggot"),
-            .segname = makeStaticString("__TEXT"),
-            .addr = text_segment.inner.vmaddr + off,
-            .size = needed_size,
-            .offset = @intCast(u32, off),
-            .@"align" = alignment,
-            .reloff = 0,
-            .nreloc = 0,
-            .flags = flags,
-            .reserved1 = 0,
-            .reserved2 = 0,
-            .reserved3 = 0,
-        });
-        self.header_dirty = true;
-        self.load_commands_dirty = true;
-    }
     if (self.stubs_section_index == null) {
         const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
         self.stubs_section_index = @intCast(u16, text_segment.sections.items.len);
@@ -1599,7 +1581,7 @@ pub fn populateMissingMetadata(self: *MachO) !void {
         };
         const stub_size: u4 = switch (self.base.options.target.cpu.arch) {
             .x86_64 => 6,
-            .aarch64 => 2 * @sizeOf(u32),
+            .aarch64 => 3 * @sizeOf(u32),
             else => unreachable, // unhandled architecture type
         };
         const flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS;
@@ -1688,9 +1670,9 @@ pub fn populateMissingMetadata(self: *MachO) !void {
         self.header_dirty = true;
         self.load_commands_dirty = true;
     }
-    if (self.data_got_section_index == null) {
+    if (self.got_section_index == null) {
         const dc_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
-        self.data_got_section_index = @intCast(u16, dc_segment.sections.items.len);
+        self.got_section_index = @intCast(u16, dc_segment.sections.items.len);
 
         const flags = macho.S_NON_LAZY_SYMBOL_POINTERS;
         const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint;
@@ -2062,12 +2044,12 @@ pub fn populateMissingMetadata(self: *MachO) !void {
         self.header_dirty = true;
         self.load_commands_dirty = true;
     }
-    if (!self.extern_nonlazy_symbols.contains("dyld_stub_binder")) {
-        const index = @intCast(u32, self.extern_nonlazy_symbols.items().len);
+    if (!self.nonlazy_imports.contains("dyld_stub_binder")) {
+        const index = @intCast(u32, self.nonlazy_imports.items().len);
         const name = try self.base.allocator.dupe(u8, "dyld_stub_binder");
         const offset = try self.makeString("dyld_stub_binder");
-        try self.extern_nonlazy_symbols.putNoClobber(self.base.allocator, name, .{
-            .inner = .{
+        try self.nonlazy_imports.putNoClobber(self.base.allocator, name, .{
+            .symbol = .{
                 .n_strx = offset,
                 .n_type = std.macho.N_UNDF | std.macho.N_EXT,
                 .n_sect = 0,
@@ -2075,68 +2057,19 @@ pub fn populateMissingMetadata(self: *MachO) !void {
                 .n_value = 0,
             },
             .dylib_ordinal = 1, // TODO this is currently hardcoded.
-            .segment = self.data_const_segment_cmd_index.?,
-            .offset = index * @sizeOf(u64),
+            .index = index,
+        });
+        const off_index = @intCast(u32, self.offset_table.items.len);
+        try self.offset_table.append(self.base.allocator, .{
+            .kind = .Extern,
+            .symbol = index,
+            .index = off_index,
         });
+        try self.writeOffsetTableEntry(off_index);
         self.binding_info_dirty = true;
     }
     if (self.stub_helper_stubs_start_off == null) {
-        const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-        const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?];
-        const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
-        const data = &data_segment.sections.items[self.data_section_index.?];
-        const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
-        const got = &data_const_segment.sections.items[self.data_got_section_index.?];
-        switch (self.base.options.target.cpu.arch) {
-            .x86_64 => {
-                const code_size = 15;
-                var code: [code_size]u8 = undefined;
-                // lea %r11, [rip + disp]
-                code[0] = 0x4c;
-                code[1] = 0x8d;
-                code[2] = 0x1d;
-                {
-                    const displacement = try math.cast(u32, data.addr - stub_helper.addr - 7);
-                    mem.writeIntLittle(u32, code[3..7], displacement);
-                }
-                // push %r11
-                code[7] = 0x41;
-                code[8] = 0x53;
-                // jmp [rip + disp]
-                code[9] = 0xff;
-                code[10] = 0x25;
-                {
-                    const displacement = try math.cast(u32, got.addr - stub_helper.addr - code_size);
-                    mem.writeIntLittle(u32, code[11..], displacement);
-                }
-                self.stub_helper_stubs_start_off = stub_helper.offset + code_size;
-                try self.base.file.?.pwriteAll(&code, stub_helper.offset);
-            },
-            .aarch64 => {
-                var code: [4 * @sizeOf(u32)]u8 = undefined;
-                {
-                    const displacement = try math.cast(i21, data.addr - stub_helper.addr);
-                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32());
-                }
-                mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.stp(
-                    .x16,
-                    .x17,
-                    aarch64.Register.sp,
-                    aarch64.Instruction.LoadStorePairOffset.pre_index(-16),
-                ).toU32());
-                {
-                    const displacement = try math.divExact(u64, got.addr - stub_helper.addr - 2 * @sizeOf(u32), 4);
-                    const literal = try math.cast(u19, displacement);
-                    mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.ldr(.x16, .{
-                        .literal = literal,
-                    }).toU32());
-                }
-                mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.br(.x16).toU32());
-                self.stub_helper_stubs_start_off = stub_helper.offset + 4 * @sizeOf(u32);
-                try self.base.file.?.pwriteAll(&code, stub_helper.offset);
-            },
-            else => unreachable,
-        }
+        try self.writeStubHelperPreamble();
     }
 }
 
@@ -2161,7 +2094,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, 
             const big_block = self.text_block_free_list.items[i];
             // We now have a pointer to a live text block that has too much capacity.
             // Is it enough that we could fit this new text block?
-            const sym = self.local_symbols.items[big_block.local_sym_index];
+            const sym = self.locals.items[big_block.local_sym_index];
             const capacity = big_block.capacity(self.*);
             const ideal_capacity = padToIdeal(capacity);
             const ideal_capacity_end_vaddr = sym.n_value + ideal_capacity;
@@ -2192,7 +2125,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, 
             }
             break :blk new_start_vaddr;
         } else if (self.last_text_block) |last| {
-            const last_symbol = self.local_symbols.items[last.local_sym_index];
+            const last_symbol = self.locals.items[last.local_sym_index];
             // TODO We should pad out the excess capacity with NOPs. For executables,
             // no padding seems to be OK, but it will probably not be for objects.
             const ideal_capacity = padToIdeal(last.size);
@@ -2290,12 +2223,12 @@ fn updateString(self: *MachO, old_str_off: u32, new_name: []const u8) !u32 {
 }
 
 pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 {
-    const index = @intCast(u32, self.extern_lazy_symbols.items().len);
+    const index = @intCast(u32, self.lazy_imports.items().len);
     const offset = try self.makeString(name);
     const sym_name = try self.base.allocator.dupe(u8, name);
     const dylib_ordinal = 1; // TODO this is now hardcoded, since we only support libSystem.
-    try self.extern_lazy_symbols.putNoClobber(self.base.allocator, sym_name, .{
-        .inner = .{
+    try self.lazy_imports.putNoClobber(self.base.allocator, sym_name, .{
+        .symbol = .{
             .n_strx = offset,
             .n_type = macho.N_UNDF | macho.N_EXT,
             .n_sect = 0,
@@ -2303,6 +2236,7 @@ pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 {
             .n_value = 0,
         },
         .dylib_ordinal = dylib_ordinal,
+        .index = index,
     });
     log.debug("adding new extern symbol '{s}' with dylib ordinal '{}'", .{ name, dylib_ordinal });
     return index;
@@ -2461,41 +2395,29 @@ fn findFreeSpaceLinkedit(self: *MachO, object_size: u64, min_alignment: u16, sta
 }
 
 fn writeOffsetTableEntry(self: *MachO, index: usize) !void {
-    const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-    const sect = &text_segment.sections.items[self.got_section_index.?];
+    const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const sect = &seg.sections.items[self.got_section_index.?];
     const off = sect.offset + @sizeOf(u64) * index;
-    const vmaddr = sect.addr + @sizeOf(u64) * index;
 
     if (self.offset_table_count_dirty) {
         // TODO relocate.
         self.offset_table_count_dirty = false;
     }
 
-    var code: [8]u8 = undefined;
-    switch (self.base.options.target.cpu.arch) {
-        .x86_64 => {
-            const pos_symbol_off = try math.cast(u31, vmaddr - self.offset_table.items[index] + 7);
-            const symbol_off = @bitCast(u32, @as(i32, pos_symbol_off) * -1);
-            // lea %rax, [rip - disp]
-            code[0] = 0x48;
-            code[1] = 0x8D;
-            code[2] = 0x5;
-            mem.writeIntLittle(u32, code[3..7], symbol_off);
-            // ret
-            code[7] = 0xC3;
-        },
-        .aarch64 => {
-            const pos_symbol_off = try math.cast(u20, vmaddr - self.offset_table.items[index]);
-            const symbol_off = @as(i21, pos_symbol_off) * -1;
-            // adr x0, #-disp
-            mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x0, symbol_off).toU32());
-            // ret x28
-            mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ret(.x28).toU32());
-        },
-        else => unreachable, // unsupported target architecture
-    }
-    log.debug("writing offset table entry 0x{x} at 0x{x}", .{ self.offset_table.items[index], off });
-    try self.base.file.?.pwriteAll(&code, off);
+    const got_entry = self.offset_table.items[index];
+    const sym = blk: {
+        switch (got_entry.kind) {
+            .Local => {
+                break :blk self.locals.items[got_entry.symbol];
+            },
+            .Extern => {
+                break :blk self.nonlazy_imports.items()[got_entry.symbol].value.symbol;
+            },
+        }
+    };
+    const sym_name = self.getString(sym.n_strx);
+    log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ off, sym.n_value, sym_name });
+    try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off);
 }
 
 fn writeLazySymbolPointer(self: *MachO, index: u32) !void {
@@ -2518,6 +2440,133 @@ fn writeLazySymbolPointer(self: *MachO, index: u32) !void {
     try self.base.file.?.pwriteAll(&buf, off);
 }
 
+fn writeStubHelperPreamble(self: *MachO) !void {
+    const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?];
+    const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const got = &data_const_segment.sections.items[self.got_section_index.?];
+    const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const data = &data_segment.sections.items[self.data_section_index.?];
+
+    switch (self.base.options.target.cpu.arch) {
+        .x86_64 => {
+            const code_size = 15;
+            var code: [code_size]u8 = undefined;
+            // lea %r11, [rip + disp]
+            code[0] = 0x4c;
+            code[1] = 0x8d;
+            code[2] = 0x1d;
+            {
+                const target_addr = data.addr;
+                const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7);
+                mem.writeIntLittle(u32, code[3..7], displacement);
+            }
+            // push %r11
+            code[7] = 0x41;
+            code[8] = 0x53;
+            // jmp [rip + disp]
+            code[9] = 0xff;
+            code[10] = 0x25;
+            {
+                const displacement = try math.cast(u32, got.addr - stub_helper.addr - code_size);
+                mem.writeIntLittle(u32, code[11..], displacement);
+            }
+            try self.base.file.?.pwriteAll(&code, stub_helper.offset);
+            self.stub_helper_stubs_start_off = stub_helper.offset + code_size;
+        },
+        .aarch64 => {
+            var code: [6 * @sizeOf(u32)]u8 = undefined;
+
+            data_blk_outer: {
+                const this_addr = stub_helper.addr;
+                const target_addr = data.addr;
+                data_blk: {
+                    const displacement = math.cast(i21, target_addr - this_addr) catch |_| break :data_blk;
+                    // adr x17, disp
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32());
+                    // nop
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32());
+                    break :data_blk_outer;
+                }
+                data_blk: {
+                    const new_this_addr = this_addr + @sizeOf(u32);
+                    const displacement = math.cast(i21, target_addr - new_this_addr) catch |_| break :data_blk;
+                    // nop
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32());
+                    // adr x17, disp
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32());
+                    break :data_blk_outer;
+                }
+                // Jump is too big, replace adr with adrp and add.
+                const this_page = @intCast(i32, this_addr >> 12);
+                const target_page = @intCast(i32, target_addr >> 12);
+                const pages = @intCast(i21, target_page - this_page);
+                // adrp x17, pages
+                mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32());
+                const narrowed = @truncate(u12, target_addr);
+                mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32());
+            }
+
+            // stp x16, x17, [sp, #-16]!
+            mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.stp(
+                .x16,
+                .x17,
+                aarch64.Register.sp,
+                aarch64.Instruction.LoadStorePairOffset.pre_index(-16),
+            ).toU32());
+
+            binder_blk_outer: {
+                const this_addr = stub_helper.addr + 3 * @sizeOf(u32);
+                const target_addr = got.addr;
+                binder_blk: {
+                    const displacement = math.divExact(u64, target_addr - this_addr, 4) catch |_| break :binder_blk;
+                    const literal = math.cast(u18, displacement) catch |_| break :binder_blk;
+                    // ldr x16, label
+                    mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    // nop
+                    mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32());
+                    break :binder_blk_outer;
+                }
+                binder_blk: {
+                    const new_this_addr = this_addr + @sizeOf(u32);
+                    const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch |_| break :binder_blk;
+                    const literal = math.cast(u18, displacement) catch |_| break :binder_blk;
+                    // nop
+                    mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32());
+                    // ldr x16, label
+                    mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    break :binder_blk_outer;
+                }
+                // Jump is too big, replace ldr with adrp and ldr(register).
+                const this_page = @intCast(i32, this_addr >> 12);
+                const target_page = @intCast(i32, target_addr >> 12);
+                const pages = @intCast(i21, target_page - this_page);
+                // adrp x16, pages
+                mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32());
+                const narrowed = @truncate(u12, target_addr);
+                const offset = try math.divExact(u12, narrowed, 8);
+                // ldr x16, x16, offset
+                mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{
+                    .register = .{
+                        .rn = .x16,
+                        .offset = aarch64.Instruction.LoadStoreOffset.imm(offset),
+                    },
+                }).toU32());
+            }
+
+            // br x16
+            mem.writeIntLittle(u32, code[20..24], aarch64.Instruction.br(.x16).toU32());
+            try self.base.file.?.pwriteAll(&code, stub_helper.offset);
+            self.stub_helper_stubs_start_off = stub_helper.offset + code.len;
+        },
+        else => unreachable,
+    }
+}
+
 fn writeStub(self: *MachO, index: u32) !void {
     const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
     const stubs = text_segment.sections.items[self.stubs_section_index.?];
@@ -2527,9 +2576,12 @@ fn writeStub(self: *MachO, index: u32) !void {
     const stub_off = stubs.offset + index * stubs.reserved2;
     const stub_addr = stubs.addr + index * stubs.reserved2;
     const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64);
+
     log.debug("writing stub at 0x{x}", .{stub_off});
+
     var code = try self.base.allocator.alloc(u8, stubs.reserved2);
     defer self.base.allocator.free(code);
+
     switch (self.base.options.target.cpu.arch) {
         .x86_64 => {
             assert(la_ptr_addr >= stub_addr + stubs.reserved2);
@@ -2541,12 +2593,50 @@ fn writeStub(self: *MachO, index: u32) !void {
         },
         .aarch64 => {
             assert(la_ptr_addr >= stub_addr);
-            const displacement = try math.divExact(u64, la_ptr_addr - stub_addr, 4);
-            const literal = try math.cast(u19, displacement);
-            mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{
-                .literal = literal,
-            }).toU32());
-            mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.br(.x16).toU32());
+            outer: {
+                const this_addr = stub_addr;
+                const target_addr = la_ptr_addr;
+                inner: {
+                    const displacement = math.divExact(u64, target_addr - this_addr, 4) catch |_| break :inner;
+                    const literal = math.cast(u18, displacement) catch |_| break :inner;
+                    // ldr x16, literal
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    // nop
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32());
+                    break :outer;
+                }
+                inner: {
+                    const new_this_addr = this_addr + @sizeOf(u32);
+                    const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch |_| break :inner;
+                    const literal = math.cast(u18, displacement) catch |_| break :inner;
+                    // nop
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32());
+                    // ldr x16, literal
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    break :outer;
+                }
+                // Use adrp followed by ldr(register).
+                const this_page = @intCast(i32, this_addr >> 12);
+                const target_page = @intCast(i32, target_addr >> 12);
+                const pages = @intCast(i21, target_page - this_page);
+                // adrp x16, pages
+                mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, pages).toU32());
+                const narrowed = @truncate(u12, target_addr);
+                const offset = try math.divExact(u12, narrowed, 8);
+                // ldr x16, x16, offset
+                mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{
+                    .register = .{
+                        .rn = .x16,
+                        .offset = aarch64.Instruction.LoadStoreOffset.imm(offset),
+                    },
+                }).toU32());
+            }
+            // br x16
+            mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32());
         },
         else => unreachable,
     }
@@ -2563,8 +2653,10 @@ fn writeStubInStubHelper(self: *MachO, index: u32) !void {
         else => unreachable,
     };
     const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size;
+
     var code = try self.base.allocator.alloc(u8, stub_size);
     defer self.base.allocator.free(code);
+
     switch (self.base.options.target.cpu.arch) {
         .x86_64 => {
             const displacement = try math.cast(
@@ -2579,12 +2671,19 @@ fn writeStubInStubHelper(self: *MachO, index: u32) !void {
             mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement));
         },
         .aarch64 => {
-            const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4);
+            const literal = blk: {
+                const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4);
+                break :blk try math.cast(u18, div_res);
+            };
+            // ldr w16, literal
             mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{
-                .literal = @divExact(stub_size - @sizeOf(u32), 4),
+                .literal = literal,
             }).toU32());
+            const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4);
+            // b disp
             mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(displacement).toU32());
-            mem.writeIntLittle(u32, code[8..12], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`.
+            // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`.
+            mem.writeIntLittle(u32, code[8..12], 0x0);
         },
         else => unreachable,
     }
@@ -2593,9 +2692,9 @@ fn writeStubInStubHelper(self: *MachO, index: u32) !void {
 
 fn relocateSymbolTable(self: *MachO) !void {
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    const nlocals = self.local_symbols.items.len;
-    const nglobals = self.global_symbols.items.len;
-    const nundefs = self.extern_lazy_symbols.items().len + self.extern_nonlazy_symbols.items().len;
+    const nlocals = self.locals.items.len;
+    const nglobals = self.globals.items.len;
+    const nundefs = self.lazy_imports.items().len + self.nonlazy_imports.items().len;
     const nsyms = nlocals + nglobals + nundefs;
 
     if (symtab.nsyms < nsyms) {
@@ -2630,7 +2729,7 @@ fn writeLocalSymbol(self: *MachO, index: usize) !void {
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
     const off = symtab.symoff + @sizeOf(macho.nlist_64) * index;
     log.debug("writing local symbol {} at 0x{x}", .{ index, off });
-    try self.base.file.?.pwriteAll(mem.asBytes(&self.local_symbols.items[index]), off);
+    try self.base.file.?.pwriteAll(mem.asBytes(&self.locals.items[index]), off);
 }
 
 fn writeAllGlobalAndUndefSymbols(self: *MachO) !void {
@@ -2639,18 +2738,18 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void {
 
     try self.relocateSymbolTable();
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    const nlocals = self.local_symbols.items.len;
-    const nglobals = self.global_symbols.items.len;
+    const nlocals = self.locals.items.len;
+    const nglobals = self.globals.items.len;
 
-    const nundefs = self.extern_lazy_symbols.items().len + self.extern_nonlazy_symbols.items().len;
+    const nundefs = self.lazy_imports.items().len + self.nonlazy_imports.items().len;
     var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator);
     defer undefs.deinit();
     try undefs.ensureCapacity(nundefs);
-    for (self.extern_lazy_symbols.items()) |entry| {
-        undefs.appendAssumeCapacity(entry.value.inner);
+    for (self.lazy_imports.items()) |entry| {
+        undefs.appendAssumeCapacity(entry.value.symbol);
     }
-    for (self.extern_nonlazy_symbols.items()) |entry| {
-        undefs.appendAssumeCapacity(entry.value.inner);
+    for (self.nonlazy_imports.items()) |entry| {
+        undefs.appendAssumeCapacity(entry.value.symbol);
     }
 
     const locals_off = symtab.symoff;
@@ -2659,7 +2758,7 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void {
     const globals_off = locals_off + locals_size;
     const globals_size = nglobals * @sizeOf(macho.nlist_64);
     log.debug("writing global symbols from 0x{x} to 0x{x}", .{ globals_off, globals_size + globals_off });
-    try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.global_symbols.items), globals_off);
+    try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), globals_off);
 
     const undefs_off = globals_off + globals_size;
     const undefs_size = nundefs * @sizeOf(macho.nlist_64);
@@ -2685,15 +2784,15 @@ fn writeIndirectSymbolTable(self: *MachO) !void {
     const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
     const stubs = &text_segment.sections.items[self.stubs_section_index.?];
     const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
-    const got = &data_const_seg.sections.items[self.data_got_section_index.?];
+    const got = &data_const_seg.sections.items[self.got_section_index.?];
     const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
     const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?];
     const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
 
-    const lazy = self.extern_lazy_symbols.items();
-    const nonlazy = self.extern_nonlazy_symbols.items();
+    const lazy = self.lazy_imports.items();
+    const got_entries = self.offset_table.items;
     const allocated_size = self.allocatedSizeLinkedit(dysymtab.indirectsymoff);
-    const nindirectsyms = @intCast(u32, lazy.len * 2 + nonlazy.len);
+    const nindirectsyms = @intCast(u32, lazy.len * 2 + got_entries.len);
     const needed_size = @intCast(u32, nindirectsyms * @sizeOf(u32));
 
     if (needed_size > allocated_size) {
@@ -2712,20 +2811,27 @@ fn writeIndirectSymbolTable(self: *MachO) !void {
     var writer = stream.writer();
 
     stubs.reserved1 = 0;
-    for (self.extern_lazy_symbols.items()) |_, i| {
+    for (lazy) |_, i| {
         const symtab_idx = @intCast(u32, dysymtab.iundefsym + i);
         try writer.writeIntLittle(u32, symtab_idx);
     }
 
     const base_id = @intCast(u32, lazy.len);
     got.reserved1 = base_id;
-    for (self.extern_nonlazy_symbols.items()) |_, i| {
-        const symtab_idx = @intCast(u32, dysymtab.iundefsym + i + base_id);
-        try writer.writeIntLittle(u32, symtab_idx);
+    for (got_entries) |entry| {
+        switch (entry.kind) {
+            .Local => {
+                try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL);
+            },
+            .Extern => {
+                const symtab_idx = @intCast(u32, dysymtab.iundefsym + entry.index + base_id);
+                try writer.writeIntLittle(u32, symtab_idx);
+            },
+        }
     }
 
-    la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, nonlazy.len);
-    for (self.extern_lazy_symbols.items()) |_, i| {
+    la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, got_entries.len);
+    for (lazy) |_, i| {
         const symtab_idx = @intCast(u32, dysymtab.iundefsym + i);
         try writer.writeIntLittle(u32, symtab_idx);
     }
@@ -2791,7 +2897,7 @@ fn writeCodeSignature(self: *MachO) !void {
 
 fn writeExportTrie(self: *MachO) !void {
     if (!self.export_info_dirty) return;
-    if (self.global_symbols.items.len == 0) return;
+    if (self.globals.items.len == 0) return;
 
     const tracy = trace(@src());
     defer tracy.end();
@@ -2800,7 +2906,7 @@ fn writeExportTrie(self: *MachO) !void {
     defer trie.deinit();
 
     const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-    for (self.global_symbols.items) |symbol| {
+    for (self.globals.items) |symbol| {
         // TODO figure out if we should put all global symbols into the export trie
         const name = self.getString(symbol.n_strx);
         assert(symbol.n_value >= text_segment.inner.vmaddr);
@@ -2842,14 +2948,48 @@ fn writeRebaseInfoTable(self: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const size = try rebaseInfoSize(self.extern_lazy_symbols.items());
+    var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator);
+    defer pointers.deinit();
+
+    if (self.got_section_index) |idx| {
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = self.data_const_segment_cmd_index.?;
+
+        for (self.offset_table.items) |entry| {
+            if (entry.kind == .Extern) continue;
+            try pointers.append(.{
+                .offset = base_offset + entry.index * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
+    if (self.la_symbol_ptr_section_index) |idx| {
+        try pointers.ensureCapacity(pointers.items.len + self.lazy_imports.items().len);
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = self.data_segment_cmd_index.?;
+
+        for (self.lazy_imports.items()) |entry| {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + entry.value.index * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
+    std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp);
+
+    const size = try bind.rebaseInfoSize(pointers.items);
     var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
     defer self.base.allocator.free(buffer);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try writeRebaseInfo(self.extern_lazy_symbols.items(), stream.writer());
+    try bind.writeRebaseInfo(pointers.items, stream.writer());
 
-    const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
     const allocated_size = self.allocatedSizeLinkedit(dyld_info.rebase_off);
     const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64));
@@ -2874,14 +3014,34 @@ fn writeBindingInfoTable(self: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const size = try bindInfoSize(self.extern_nonlazy_symbols.items());
+    var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator);
+    defer pointers.deinit();
+
+    if (self.got_section_index) |idx| {
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?);
+
+        for (self.offset_table.items) |entry| {
+            if (entry.kind == .Local) continue;
+            const import = self.nonlazy_imports.items()[entry.symbol];
+            try pointers.append(.{
+                .offset = base_offset + entry.index * @sizeOf(u64),
+                .segment_id = segment_id,
+                .dylib_ordinal = import.value.dylib_ordinal,
+                .name = import.key,
+            });
+        }
+    }
+
+    const size = try bind.bindInfoSize(pointers.items);
     var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
     defer self.base.allocator.free(buffer);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try writeBindInfo(self.extern_nonlazy_symbols.items(), stream.writer());
+    try bind.writeBindInfo(pointers.items, stream.writer());
 
-    const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
     const allocated_size = self.allocatedSizeLinkedit(dyld_info.bind_off);
     const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64));
@@ -2903,14 +3063,36 @@ fn writeBindingInfoTable(self: *MachO) !void {
 fn writeLazyBindingInfoTable(self: *MachO) !void {
     if (!self.lazy_binding_info_dirty) return;
 
-    const size = try lazyBindInfoSize(self.extern_lazy_symbols.items());
+    const tracy = trace(@src());
+    defer tracy.end();
+
+    var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator);
+    defer pointers.deinit();
+
+    if (self.la_symbol_ptr_section_index) |idx| {
+        try pointers.ensureCapacity(self.lazy_imports.items().len);
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_segment_cmd_index.?);
+
+        for (self.lazy_imports.items()) |entry| {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + entry.value.index * @sizeOf(u64),
+                .segment_id = segment_id,
+                .dylib_ordinal = entry.value.dylib_ordinal,
+                .name = entry.key,
+            });
+        }
+    }
+
+    const size = try bind.lazyBindInfoSize(pointers.items);
     var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
     defer self.base.allocator.free(buffer);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try writeLazyBindInfo(self.extern_lazy_symbols.items(), stream.writer());
+    try bind.writeLazyBindInfo(pointers.items, stream.writer());
 
-    const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
     const allocated_size = self.allocatedSizeLinkedit(dyld_info.lazy_bind_off);
     const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64));
@@ -2931,7 +3113,7 @@ fn writeLazyBindingInfoTable(self: *MachO) !void {
 }
 
 fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void {
-    if (self.extern_lazy_symbols.items().len == 0) return;
+    if (self.lazy_imports.items().len == 0) return;
 
     var stream = std.io.fixedBufferStream(buffer);
     var reader = stream.reader();
@@ -2977,7 +3159,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void {
             else => {},
         }
     }
-    assert(self.extern_lazy_symbols.items().len <= offsets.items.len);
+    assert(self.lazy_imports.items().len <= offsets.items.len);
 
     const stub_size: u4 = switch (self.base.options.target.cpu.arch) {
         .x86_64 => 10,
@@ -2990,7 +3172,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void {
         else => unreachable,
     };
     var buf: [@sizeOf(u32)]u8 = undefined;
-    for (self.extern_lazy_symbols.items()) |_, i| {
+    for (self.lazy_imports.items()) |_, i| {
         const placeholder_off = self.stub_helper_stubs_start_off.? + i * stub_size + off;
         mem.writeIntLittle(u32, &buf, offsets.items[i]);
         try self.base.file.?.pwriteAll(&buf, placeholder_off);
@@ -3104,177 +3286,6 @@ fn writeHeader(self: *MachO) !void {
     self.header_dirty = false;
 }
 
-/// Parse MachO contents from existing binary file.
-fn parseFromFile(self: *MachO, file: fs.File) !void {
-    self.base.file = file;
-    var reader = file.reader();
-    const header = try reader.readStruct(macho.mach_header_64);
-    try self.load_commands.ensureCapacity(self.base.allocator, header.ncmds);
-    var i: u16 = 0;
-    while (i < header.ncmds) : (i += 1) {
-        const cmd = try LoadCommand.read(self.base.allocator, reader);
-        switch (cmd.cmd()) {
-            macho.LC_SEGMENT_64 => {
-                const x = cmd.Segment;
-                if (parseAndCmpName(&x.inner.segname, "__PAGEZERO")) {
-                    self.pagezero_segment_cmd_index = i;
-                } else if (parseAndCmpName(&x.inner.segname, "__LINKEDIT")) {
-                    self.linkedit_segment_cmd_index = i;
-                } else if (parseAndCmpName(&x.inner.segname, "__TEXT")) {
-                    self.text_segment_cmd_index = i;
-                    for (x.sections.items) |sect, j| {
-                        if (parseAndCmpName(&sect.sectname, "__text")) {
-                            self.text_section_index = @intCast(u16, j);
-                        }
-                    }
-                } else if (parseAndCmpName(&x.inner.segname, "__DATA")) {
-                    self.data_segment_cmd_index = i;
-                } else if (parseAndCmpName(&x.inner.segname, "__DATA_CONST")) {
-                    self.data_const_segment_cmd_index = i;
-                }
-            },
-            macho.LC_DYLD_INFO_ONLY => {
-                self.dyld_info_cmd_index = i;
-            },
-            macho.LC_SYMTAB => {
-                self.symtab_cmd_index = i;
-            },
-            macho.LC_DYSYMTAB => {
-                self.dysymtab_cmd_index = i;
-            },
-            macho.LC_LOAD_DYLINKER => {
-                self.dylinker_cmd_index = i;
-            },
-            macho.LC_VERSION_MIN_MACOSX, macho.LC_VERSION_MIN_IPHONEOS, macho.LC_VERSION_MIN_WATCHOS, macho.LC_VERSION_MIN_TVOS => {
-                self.version_min_cmd_index = i;
-            },
-            macho.LC_SOURCE_VERSION => {
-                self.source_version_cmd_index = i;
-            },
-            macho.LC_UUID => {
-                self.uuid_cmd_index = i;
-            },
-            macho.LC_MAIN => {
-                self.main_cmd_index = i;
-            },
-            macho.LC_LOAD_DYLIB => {
-                const x = cmd.Dylib;
-                if (parseAndCmpName(x.data, mem.spanZ(LIB_SYSTEM_PATH))) {
-                    self.libsystem_cmd_index = i;
-                }
-            },
-            macho.LC_FUNCTION_STARTS => {
-                self.function_starts_cmd_index = i;
-            },
-            macho.LC_DATA_IN_CODE => {
-                self.data_in_code_cmd_index = i;
-            },
-            macho.LC_CODE_SIGNATURE => {
-                self.code_signature_cmd_index = i;
-            },
-            else => {
-                log.warn("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
-            },
-        }
-        self.load_commands.appendAssumeCapacity(cmd);
-    }
-    self.header = header;
-}
-
-fn parseAndCmpName(name: []const u8, needle: []const u8) bool {
-    const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
-    return mem.eql(u8, name[0..len], needle);
-}
-
-fn parseSymbolTable(self: *MachO) !void {
-    const symtab = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
-
-    var buffer = try self.base.allocator.alloc(macho.nlist_64, symtab.nsyms);
-    defer self.base.allocator.free(buffer);
-    const nread = try self.base.file.?.preadAll(@ptrCast([*]u8, buffer)[0 .. symtab.nsyms * @sizeOf(macho.nlist_64)], symtab.symoff);
-    assert(@divExact(nread, @sizeOf(macho.nlist_64)) == buffer.len);
-
-    try self.local_symbols.ensureCapacity(self.base.allocator, dysymtab.nlocalsym);
-    try self.global_symbols.ensureCapacity(self.base.allocator, dysymtab.nextdefsym);
-    try self.undef_symbols.ensureCapacity(self.base.allocator, dysymtab.nundefsym);
-
-    self.local_symbols.appendSliceAssumeCapacity(buffer[dysymtab.ilocalsym .. dysymtab.ilocalsym + dysymtab.nlocalsym]);
-    self.global_symbols.appendSliceAssumeCapacity(buffer[dysymtab.iextdefsym .. dysymtab.iextdefsym + dysymtab.nextdefsym]);
-    self.undef_symbols.appendSliceAssumeCapacity(buffer[dysymtab.iundefsym .. dysymtab.iundefsym + dysymtab.nundefsym]);
-}
-
-fn parseStringTable(self: *MachO) !void {
-    const symtab = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-
-    var buffer = try self.base.allocator.alloc(u8, symtab.strsize);
-    defer self.base.allocator.free(buffer);
-    const nread = try self.base.file.?.preadAll(buffer, symtab.stroff);
-    assert(nread == buffer.len);
-
-    try self.string_table.ensureCapacity(self.base.allocator, symtab.strsize);
-    self.string_table.appendSliceAssumeCapacity(buffer);
-}
-
-fn fixupBindInfo(self: *MachO, dylib_ordinal: u32) !void {
-    const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
-    var buffer = try self.base.allocator.alloc(u8, dyld_info.bind_size);
-    defer self.base.allocator.free(buffer);
-    const nread = try self.base.file.?.preadAll(buffer, dyld_info.bind_off);
-    assert(nread == buffer.len);
-    try self.fixupInfoCommon(buffer, dylib_ordinal);
-    try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off);
-}
-
-fn fixupLazyBindInfo(self: *MachO, dylib_ordinal: u32) !void {
-    const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
-    var buffer = try self.base.allocator.alloc(u8, dyld_info.lazy_bind_size);
-    defer self.base.allocator.free(buffer);
-    const nread = try self.base.file.?.preadAll(buffer, dyld_info.lazy_bind_off);
-    assert(nread == buffer.len);
-    try self.fixupInfoCommon(buffer, dylib_ordinal);
-    try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off);
-}
-
-fn fixupInfoCommon(self: *MachO, buffer: []u8, dylib_ordinal: u32) !void {
-    var stream = std.io.fixedBufferStream(buffer);
-    var reader = stream.reader();
-
-    while (true) {
-        const inst = reader.readByte() catch |err| switch (err) {
-            error.EndOfStream => break,
-            else => return err,
-        };
-        const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK;
-        const opcode: u8 = inst & macho.BIND_OPCODE_MASK;
-
-        switch (opcode) {
-            macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
-                var next = try reader.readByte();
-                while (next != @as(u8, 0)) {
-                    next = try reader.readByte();
-                }
-            },
-            macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
-                _ = try std.leb.readULEB128(u64, reader);
-            },
-            macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
-                // Perform the fixup.
-                try stream.seekBy(-1);
-                var writer = stream.writer();
-                try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, dylib_ordinal));
-            },
-            macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
-                _ = try std.leb.readULEB128(u64, reader);
-            },
-            macho.BIND_OPCODE_SET_ADDEND_SLEB => {
-                _ = try std.leb.readILEB128(i64, reader);
-            },
-            else => {},
-        }
-    }
-}
-
 pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
     // TODO https://github.com/ziglang/zig/issues/1284
     return std.math.add(@TypeOf(actual_size), actual_size, actual_size / ideal_factor) catch
diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig
@@ -0,0 +1,278 @@
+const Archive = @This();
+
+const std = @import("std");
+const assert = std.debug.assert;
+const fs = std.fs;
+const log = std.log.scoped(.archive);
+const macho = std.macho;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const Object = @import("Object.zig");
+const parseName = @import("Zld.zig").parseName;
+
+usingnamespace @import("commands.zig");
+
+allocator: *Allocator,
+file: fs.File,
+header: ar_hdr,
+name: []u8,
+
+objects: std.ArrayListUnmanaged(Object) = .{},
+
+/// Parsed table of contents.
+/// Each symbol name points to a list of all definition
+/// sites within the current static archive.
+toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{},
+
+// Archive files start with the ARMAG identifying string.  Then follows a
+// `struct ar_hdr', and as many bytes of member file data as its `ar_size'
+// member indicates, for each member file.
+/// String that begins an archive file.
+const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
+/// Size of that string.
+const SARMAG: u4 = 8;
+
+/// String in ar_fmag at the end of each header.
+const ARFMAG: *const [2:0]u8 = "`\n";
+
+const ar_hdr = extern struct {
+    /// Member file name, sometimes / terminated.
+    ar_name: [16]u8,
+
+    /// File date, decimal seconds since Epoch.
+    ar_date: [12]u8,
+
+    /// User ID, in ASCII format.
+    ar_uid: [6]u8,
+
+    /// Group ID, in ASCII format.
+    ar_gid: [6]u8,
+
+    /// File mode, in ASCII octal.
+    ar_mode: [8]u8,
+
+    /// File size, in ASCII decimal.
+    ar_size: [10]u8,
+
+    /// Always contains ARFMAG.
+    ar_fmag: [2]u8,
+
+    const NameOrLength = union(enum) {
+        Name: []const u8,
+        Length: u64,
+    };
+    pub fn nameOrLength(self: ar_hdr) !NameOrLength {
+        const value = getValue(&self.ar_name);
+        const slash_index = mem.indexOf(u8, value, "/") orelse return error.MalformedArchive;
+        const len = value.len;
+        if (slash_index == len - 1) {
+            // Name stored directly
+            return NameOrLength{ .Name = value };
+        } else {
+            // Name follows the header directly and its length is encoded in
+            // the name field.
+            const length = try std.fmt.parseInt(u64, value[slash_index + 1 ..], 10);
+            return NameOrLength{ .Length = length };
+        }
+    }
+
+    pub fn size(self: ar_hdr) !u64 {
+        const value = getValue(&self.ar_size);
+        return std.fmt.parseInt(u64, value, 10);
+    }
+
+    fn getValue(raw: []const u8) []const u8 {
+        return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)});
+    }
+};
+
+pub fn deinit(self: *Archive) void {
+    self.allocator.free(self.name);
+    for (self.objects.items) |*object| {
+        object.deinit();
+    }
+    self.objects.deinit(self.allocator);
+    for (self.toc.items()) |*entry| {
+        self.allocator.free(entry.key);
+        entry.value.deinit(self.allocator);
+    }
+    self.toc.deinit(self.allocator);
+    self.file.close();
+}
+
+/// Caller owns the returned Archive instance and is responsible for calling
+/// `deinit` to free allocated memory.
+pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, ar_name: []const u8, file: fs.File) !Archive {
+    var reader = file.reader();
+    var magic = try readMagic(allocator, reader);
+    defer allocator.free(magic);
+
+    if (!mem.eql(u8, magic, ARMAG)) {
+        // Reset file cursor.
+        try file.seekTo(0);
+        return error.NotArchive;
+    }
+
+    const header = try reader.readStruct(ar_hdr);
+
+    if (!mem.eql(u8, &header.ar_fmag, ARFMAG))
+        return error.MalformedArchive;
+
+    var embedded_name = try getName(allocator, header, reader);
+    log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, ar_name });
+    defer allocator.free(embedded_name);
+
+    var name = try allocator.dupe(u8, ar_name);
+    var self = Archive{
+        .allocator = allocator,
+        .file = file,
+        .header = header,
+        .name = name,
+    };
+
+    var object_offsets = try self.readTableOfContents(reader);
+    defer self.allocator.free(object_offsets);
+
+    var i: usize = 1;
+    while (i < object_offsets.len) : (i += 1) {
+        const offset = object_offsets[i];
+        try reader.context.seekTo(offset);
+        try self.readObject(arch, ar_name, reader);
+    }
+
+    return self;
+}
+
+fn readTableOfContents(self: *Archive, reader: anytype) ![]u32 {
+    const symtab_size = try reader.readIntLittle(u32);
+    var symtab = try self.allocator.alloc(u8, symtab_size);
+    defer self.allocator.free(symtab);
+    try reader.readNoEof(symtab);
+
+    const strtab_size = try reader.readIntLittle(u32);
+    var strtab = try self.allocator.alloc(u8, strtab_size);
+    defer self.allocator.free(strtab);
+    try reader.readNoEof(strtab);
+
+    var symtab_stream = std.io.fixedBufferStream(symtab);
+    var symtab_reader = symtab_stream.reader();
+
+    var object_offsets = std.ArrayList(u32).init(self.allocator);
+    try object_offsets.append(0);
+    var last: usize = 0;
+
+    while (true) {
+        const n_strx = symtab_reader.readIntLittle(u32) catch |err| switch (err) {
+            error.EndOfStream => break,
+            else => |e| return e,
+        };
+        const object_offset = try symtab_reader.readIntLittle(u32);
+
+        const sym_name = mem.spanZ(@ptrCast([*:0]const u8, strtab.ptr + n_strx));
+        const owned_name = try self.allocator.dupe(u8, sym_name);
+        const res = try self.toc.getOrPut(self.allocator, owned_name);
+        defer if (res.found_existing) self.allocator.free(owned_name);
+
+        if (!res.found_existing) {
+            res.entry.value = .{};
+        }
+
+        try res.entry.value.append(self.allocator, object_offset);
+
+        // TODO This will go once we properly use archive's TOC to pick
+        // an object which defines a missing symbol rather than pasting in
+        // all of the objects always.
+        // Here, we assume that symbols are NOT sorted in any way, and
+        // they point to objects in sequence.
+        if (object_offsets.items[last] != object_offset) {
+            try object_offsets.append(object_offset);
+            last += 1;
+        }
+    }
+
+    return object_offsets.toOwnedSlice();
+}
+
+fn readObject(self: *Archive, arch: std.Target.Cpu.Arch, ar_name: []const u8, reader: anytype) !void {
+    const object_header = try reader.readStruct(ar_hdr);
+
+    if (!mem.eql(u8, &object_header.ar_fmag, ARFMAG))
+        return error.MalformedArchive;
+
+    var object_name = try getName(self.allocator, object_header, reader);
+    log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name });
+
+    const offset = @intCast(u32, try reader.context.getPos());
+    const header = try reader.readStruct(macho.mach_header_64);
+
+    const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
+        macho.CPU_TYPE_ARM64 => .aarch64,
+        macho.CPU_TYPE_X86_64 => .x86_64,
+        else => |value| {
+            log.err("unsupported cpu architecture 0x{x}", .{value});
+            return error.UnsupportedCpuArchitecture;
+        },
+    };
+    if (this_arch != arch) {
+        log.err("mismatched cpu architecture: found {s}, expected {s}", .{ this_arch, arch });
+        return error.MismatchedCpuArchitecture;
+    }
+
+    // TODO Implement std.fs.File.clone() or similar.
+    var new_file = try fs.cwd().openFile(ar_name, .{});
+    var object = Object{
+        .allocator = self.allocator,
+        .name = object_name,
+        .ar_name = try mem.dupe(self.allocator, u8, ar_name),
+        .file = new_file,
+        .header = header,
+    };
+
+    try object.readLoadCommands(reader, .{ .offset = offset });
+
+    if (object.symtab_cmd_index != null) {
+        try object.readSymtab();
+        try object.readStrtab();
+    }
+
+    if (object.data_in_code_cmd_index != null) try object.readDataInCode();
+
+    log.debug("\n\n", .{});
+    log.debug("{s} defines symbols", .{object.name});
+    for (object.symtab.items) |sym| {
+        const symname = object.getString(sym.n_strx);
+        log.debug("'{s}': {}", .{ symname, sym });
+    }
+
+    try self.objects.append(self.allocator, object);
+}
+
+fn readMagic(allocator: *Allocator, reader: anytype) ![]u8 {
+    var magic = std.ArrayList(u8).init(allocator);
+    try magic.ensureCapacity(SARMAG);
+    var i: usize = 0;
+    while (i < SARMAG) : (i += 1) {
+        const next = try reader.readByte();
+        magic.appendAssumeCapacity(next);
+    }
+    return magic.toOwnedSlice();
+}
+
+fn getName(allocator: *Allocator, header: ar_hdr, reader: anytype) ![]u8 {
+    const name_or_length = try header.nameOrLength();
+    var name: []u8 = undefined;
+    switch (name_or_length) {
+        .Name => |n| {
+            name = try allocator.dupe(u8, n);
+        },
+        .Length => |len| {
+            var n = try allocator.alloc(u8, len);
+            defer allocator.free(n);
+            try reader.readNoEof(n);
+            const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len;
+            name = try allocator.dupe(u8, n[0..actual_len]);
+        },
+    }
+    return name;
+}
diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig
@@ -839,8 +839,8 @@ fn findFreeSpaceLinkedit(self: *DebugSymbols, object_size: u64, min_alignment: u
 
 fn relocateSymbolTable(self: *DebugSymbols) !void {
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    const nlocals = self.base.local_symbols.items.len;
-    const nglobals = self.base.global_symbols.items.len;
+    const nlocals = self.base.locals.items.len;
+    const nglobals = self.base.globals.items.len;
     const nsyms = nlocals + nglobals;
 
     if (symtab.nsyms < nsyms) {
@@ -875,7 +875,7 @@ pub fn writeLocalSymbol(self: *DebugSymbols, index: usize) !void {
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
     const off = symtab.symoff + @sizeOf(macho.nlist_64) * index;
     log.debug("writing dSym local symbol {} at 0x{x}", .{ index, off });
-    try self.file.pwriteAll(mem.asBytes(&self.base.local_symbols.items[index]), off);
+    try self.file.pwriteAll(mem.asBytes(&self.base.locals.items[index]), off);
 }
 
 fn writeStringTable(self: *DebugSymbols) !void {
@@ -1057,7 +1057,7 @@ pub fn commitDeclDebugInfo(
     var dbg_info_buffer = &debug_buffers.dbg_info_buffer;
     var dbg_info_type_relocs = &debug_buffers.dbg_info_type_relocs;
 
-    const symbol = self.base.local_symbols.items[decl.link.macho.local_sym_index];
+    const symbol = self.base.locals.items[decl.link.macho.local_sym_index];
     const text_block = &decl.link.macho;
     // If the Decl is a function, we need to update the __debug_line program.
     const typed_value = decl.typed_value.most_recent.typed_value;
diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig
@@ -0,0 +1,229 @@
+const Object = @This();
+
+const std = @import("std");
+const assert = std.debug.assert;
+const fs = std.fs;
+const io = std.io;
+const log = std.log.scoped(.object);
+const macho = std.macho;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const parseName = @import("Zld.zig").parseName;
+
+usingnamespace @import("commands.zig");
+
+allocator: *Allocator,
+file: fs.File,
+name: []u8,
+ar_name: ?[]u8 = null,
+
+header: macho.mach_header_64,
+
+load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
+
+segment_cmd_index: ?u16 = null,
+symtab_cmd_index: ?u16 = null,
+dysymtab_cmd_index: ?u16 = null,
+build_version_cmd_index: ?u16 = null,
+data_in_code_cmd_index: ?u16 = null,
+text_section_index: ?u16 = null,
+
+// __DWARF segment sections
+dwarf_debug_info_index: ?u16 = null,
+dwarf_debug_abbrev_index: ?u16 = null,
+dwarf_debug_str_index: ?u16 = null,
+dwarf_debug_line_index: ?u16 = null,
+dwarf_debug_ranges_index: ?u16 = null,
+
+symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
+strtab: std.ArrayListUnmanaged(u8) = .{},
+
+data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
+
+pub fn deinit(self: *Object) void {
+    for (self.load_commands.items) |*lc| {
+        lc.deinit(self.allocator);
+    }
+    self.load_commands.deinit(self.allocator);
+    self.symtab.deinit(self.allocator);
+    self.strtab.deinit(self.allocator);
+    self.data_in_code_entries.deinit(self.allocator);
+    self.allocator.free(self.name);
+    if (self.ar_name) |v| {
+        self.allocator.free(v);
+    }
+    self.file.close();
+}
+
+/// Caller owns the returned Object instance and is responsible for calling
+/// `deinit` to free allocated memory.
+pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, name: []const u8, file: fs.File) !Object {
+    var reader = file.reader();
+    const header = try reader.readStruct(macho.mach_header_64);
+
+    if (header.filetype != macho.MH_OBJECT) {
+        // Reset file cursor.
+        try file.seekTo(0);
+        return error.NotObject;
+    }
+
+    const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
+        macho.CPU_TYPE_ARM64 => .aarch64,
+        macho.CPU_TYPE_X86_64 => .x86_64,
+        else => |value| {
+            log.err("unsupported cpu architecture 0x{x}", .{value});
+            return error.UnsupportedCpuArchitecture;
+        },
+    };
+    if (this_arch != arch) {
+        log.err("mismatched cpu architecture: found {s}, expected {s}", .{ this_arch, arch });
+        return error.MismatchedCpuArchitecture;
+    }
+
+    var self = Object{
+        .allocator = allocator,
+        .name = try allocator.dupe(u8, name),
+        .file = file,
+        .header = header,
+    };
+
+    try self.readLoadCommands(reader, .{});
+
+    if (self.symtab_cmd_index != null) {
+        try self.readSymtab();
+        try self.readStrtab();
+    }
+
+    if (self.data_in_code_cmd_index != null) try self.readDataInCode();
+
+    log.debug("\n\n", .{});
+    log.debug("{s} defines symbols", .{self.name});
+    for (self.symtab.items) |sym| {
+        const symname = self.getString(sym.n_strx);
+        log.debug("'{s}': {}", .{ symname, sym });
+    }
+
+    return self;
+}
+
+pub const ReadOffset = struct {
+    offset: ?u32 = null,
+};
+
+pub fn readLoadCommands(self: *Object, reader: anytype, offset: ReadOffset) !void {
+    const offset_mod = offset.offset orelse 0;
+    try self.load_commands.ensureCapacity(self.allocator, self.header.ncmds);
+
+    var i: u16 = 0;
+    while (i < self.header.ncmds) : (i += 1) {
+        var cmd = try LoadCommand.read(self.allocator, reader);
+        switch (cmd.cmd()) {
+            macho.LC_SEGMENT_64 => {
+                self.segment_cmd_index = i;
+                var seg = cmd.Segment;
+                for (seg.sections.items) |*sect, j| {
+                    const index = @intCast(u16, j);
+                    const segname = parseName(&sect.segname);
+                    const sectname = parseName(&sect.sectname);
+                    if (mem.eql(u8, segname, "__DWARF")) {
+                        if (mem.eql(u8, sectname, "__debug_info")) {
+                            self.dwarf_debug_info_index = index;
+                        } else if (mem.eql(u8, sectname, "__debug_abbrev")) {
+                            self.dwarf_debug_abbrev_index = index;
+                        } else if (mem.eql(u8, sectname, "__debug_str")) {
+                            self.dwarf_debug_str_index = index;
+                        } else if (mem.eql(u8, sectname, "__debug_line")) {
+                            self.dwarf_debug_line_index = index;
+                        } else if (mem.eql(u8, sectname, "__debug_ranges")) {
+                            self.dwarf_debug_ranges_index = index;
+                        }
+                    } else if (mem.eql(u8, segname, "__TEXT")) {
+                        if (mem.eql(u8, sectname, "__text")) {
+                            self.text_section_index = index;
+                        }
+                    }
+
+                    sect.offset += offset_mod;
+                    if (sect.reloff > 0)
+                        sect.reloff += offset_mod;
+                }
+
+                seg.inner.fileoff += offset_mod;
+            },
+            macho.LC_SYMTAB => {
+                self.symtab_cmd_index = i;
+                cmd.Symtab.symoff += offset_mod;
+                cmd.Symtab.stroff += offset_mod;
+            },
+            macho.LC_DYSYMTAB => {
+                self.dysymtab_cmd_index = i;
+            },
+            macho.LC_BUILD_VERSION => {
+                self.build_version_cmd_index = i;
+            },
+            macho.LC_DATA_IN_CODE => {
+                self.data_in_code_cmd_index = i;
+                cmd.LinkeditData.dataoff += offset_mod;
+            },
+            else => {
+                log.debug("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
+            },
+        }
+        self.load_commands.appendAssumeCapacity(cmd);
+    }
+}
+
+pub fn readSymtab(self: *Object) !void {
+    const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+    var buffer = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
+    defer self.allocator.free(buffer);
+    _ = try self.file.preadAll(buffer, symtab_cmd.symoff);
+    try self.symtab.ensureCapacity(self.allocator, symtab_cmd.nsyms);
+    // TODO this align case should not be needed.
+    // Probably a bug in stage1.
+    const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, buffer));
+    self.symtab.appendSliceAssumeCapacity(slice);
+}
+
+pub fn readStrtab(self: *Object) !void {
+    const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+    var buffer = try self.allocator.alloc(u8, symtab_cmd.strsize);
+    defer self.allocator.free(buffer);
+    _ = try self.file.preadAll(buffer, symtab_cmd.stroff);
+    try self.strtab.ensureCapacity(self.allocator, symtab_cmd.strsize);
+    self.strtab.appendSliceAssumeCapacity(buffer);
+}
+
+pub fn getString(self: *const Object, str_off: u32) []const u8 {
+    assert(str_off < self.strtab.items.len);
+    return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off));
+}
+
+pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
+    const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
+    const sect = seg.sections.items[index];
+    var buffer = try allocator.alloc(u8, sect.size);
+    _ = try self.file.preadAll(buffer, sect.offset);
+    return buffer;
+}
+
+pub fn readDataInCode(self: *Object) !void {
+    const index = self.data_in_code_cmd_index orelse return;
+    const data_in_code = self.load_commands.items[index].LinkeditData;
+
+    var buffer = try self.allocator.alloc(u8, data_in_code.datasize);
+    defer self.allocator.free(buffer);
+
+    _ = try self.file.preadAll(buffer, data_in_code.dataoff);
+
+    var stream = io.fixedBufferStream(buffer);
+    var reader = stream.reader();
+    while (true) {
+        const dice = reader.readStruct(macho.data_in_code_entry) catch |err| switch (err) {
+            error.EndOfStream => break,
+            else => |e| return e,
+        };
+        try self.data_in_code_entries.append(self.allocator, dice);
+    }
+}
diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig
@@ -0,0 +1,3294 @@
+const Zld = @This();
+
+const std = @import("std");
+const assert = std.debug.assert;
+const dwarf = std.dwarf;
+const leb = std.leb;
+const mem = std.mem;
+const meta = std.meta;
+const fs = std.fs;
+const macho = std.macho;
+const math = std.math;
+const log = std.log.scoped(.zld);
+const aarch64 = @import("../../codegen/aarch64.zig");
+
+const Allocator = mem.Allocator;
+const CodeSignature = @import("CodeSignature.zig");
+const Archive = @import("Archive.zig");
+const Object = @import("Object.zig");
+const Trie = @import("Trie.zig");
+
+usingnamespace @import("commands.zig");
+usingnamespace @import("bind.zig");
+
+allocator: *Allocator,
+
+arch: ?std.Target.Cpu.Arch = null,
+page_size: ?u16 = null,
+file: ?fs.File = null,
+out_path: ?[]const u8 = null,
+
+// TODO Eventually, we will want to keep track of the  archives themselves to be able to exclude objects
+// contained within from landing in the final artifact. For now however, since we don't optimise the binary
+// at all, we just move all objects from the archives into the final artifact.
+objects: std.ArrayListUnmanaged(Object) = .{},
+
+load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
+
+pagezero_segment_cmd_index: ?u16 = null,
+text_segment_cmd_index: ?u16 = null,
+data_const_segment_cmd_index: ?u16 = null,
+data_segment_cmd_index: ?u16 = null,
+linkedit_segment_cmd_index: ?u16 = null,
+dyld_info_cmd_index: ?u16 = null,
+symtab_cmd_index: ?u16 = null,
+dysymtab_cmd_index: ?u16 = null,
+dylinker_cmd_index: ?u16 = null,
+libsystem_cmd_index: ?u16 = null,
+data_in_code_cmd_index: ?u16 = null,
+function_starts_cmd_index: ?u16 = null,
+main_cmd_index: ?u16 = null,
+version_min_cmd_index: ?u16 = null,
+source_version_cmd_index: ?u16 = null,
+uuid_cmd_index: ?u16 = null,
+code_signature_cmd_index: ?u16 = null,
+
+// __TEXT segment sections
+text_section_index: ?u16 = null,
+stubs_section_index: ?u16 = null,
+stub_helper_section_index: ?u16 = null,
+text_const_section_index: ?u16 = null,
+cstring_section_index: ?u16 = null,
+
+// __DATA_CONST segment sections
+got_section_index: ?u16 = null,
+mod_init_func_section_index: ?u16 = null,
+mod_term_func_section_index: ?u16 = null,
+data_const_section_index: ?u16 = null,
+
+// __DATA segment sections
+tlv_section_index: ?u16 = null,
+tlv_data_section_index: ?u16 = null,
+tlv_bss_section_index: ?u16 = null,
+la_symbol_ptr_section_index: ?u16 = null,
+data_section_index: ?u16 = null,
+bss_section_index: ?u16 = null,
+
+locals: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(Symbol)) = .{},
+exports: std.StringArrayHashMapUnmanaged(macho.nlist_64) = .{},
+nonlazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
+lazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
+tlv_bootstrap: ?Import = null,
+threadlocal_offsets: std.ArrayListUnmanaged(u64) = .{},
+local_rebases: std.ArrayListUnmanaged(Pointer) = .{},
+nonlazy_pointers: std.StringArrayHashMapUnmanaged(GotEntry) = .{},
+
+strtab: std.ArrayListUnmanaged(u8) = .{},
+
+stub_helper_stubs_start_off: ?u64 = null,
+
+mappings: std.AutoHashMapUnmanaged(MappingKey, SectionMapping) = .{},
+unhandled_sections: std.AutoHashMapUnmanaged(MappingKey, u0) = .{},
+
+// TODO this will require scanning the relocations at least one to work out
+// the exact amount of local GOT indirections. For the time being, set some
+// default value.
+const max_local_got_indirections: u16 = 1000;
+
+const GotEntry = struct {
+    index: u32,
+    target_addr: u64,
+};
+
+const MappingKey = struct {
+    object_id: u16,
+    source_sect_id: u16,
+};
+
+const SectionMapping = struct {
+    source_sect_id: u16,
+    target_seg_id: u16,
+    target_sect_id: u16,
+    offset: u32,
+};
+
+const Symbol = struct {
+    inner: macho.nlist_64,
+    tt: Type,
+    object_id: u16,
+
+    const Type = enum {
+        Local,
+        WeakGlobal,
+        Global,
+    };
+};
+
+const DebugInfo = struct {
+    inner: dwarf.DwarfInfo,
+    debug_info: []u8,
+    debug_abbrev: []u8,
+    debug_str: []u8,
+    debug_line: []u8,
+    debug_ranges: []u8,
+
+    pub fn parseFromObject(allocator: *Allocator, object: Object) !?DebugInfo {
+        var debug_info = blk: {
+            const index = object.dwarf_debug_info_index orelse return null;
+            break :blk try object.readSection(allocator, index);
+        };
+        var debug_abbrev = blk: {
+            const index = object.dwarf_debug_abbrev_index orelse return null;
+            break :blk try object.readSection(allocator, index);
+        };
+        var debug_str = blk: {
+            const index = object.dwarf_debug_str_index orelse return null;
+            break :blk try object.readSection(allocator, index);
+        };
+        var debug_line = blk: {
+            const index = object.dwarf_debug_line_index orelse return null;
+            break :blk try object.readSection(allocator, index);
+        };
+        var debug_ranges = blk: {
+            if (object.dwarf_debug_ranges_index) |ind| {
+                break :blk try object.readSection(allocator, ind);
+            }
+            break :blk try allocator.alloc(u8, 0);
+        };
+
+        var inner: dwarf.DwarfInfo = .{
+            .endian = .Little,
+            .debug_info = debug_info,
+            .debug_abbrev = debug_abbrev,
+            .debug_str = debug_str,
+            .debug_line = debug_line,
+            .debug_ranges = debug_ranges,
+        };
+        try dwarf.openDwarfDebugInfo(&inner, allocator);
+
+        return DebugInfo{
+            .inner = inner,
+            .debug_info = debug_info,
+            .debug_abbrev = debug_abbrev,
+            .debug_str = debug_str,
+            .debug_line = debug_line,
+            .debug_ranges = debug_ranges,
+        };
+    }
+
+    pub fn deinit(self: *DebugInfo, allocator: *Allocator) void {
+        allocator.free(self.debug_info);
+        allocator.free(self.debug_abbrev);
+        allocator.free(self.debug_str);
+        allocator.free(self.debug_line);
+        allocator.free(self.debug_ranges);
+        self.inner.abbrev_table_list.deinit();
+        self.inner.compile_unit_list.deinit();
+        self.inner.func_list.deinit();
+    }
+};
+
+pub const Import = struct {
+    /// MachO symbol table entry.
+    symbol: macho.nlist_64,
+
+    /// Id of the dynamic library where the specified entries can be found.
+    dylib_ordinal: i64,
+
+    /// Index of this import within the import list.
+    index: u32,
+};
+
+/// Default path to dyld
+/// TODO instead of hardcoding it, we should probably look through some env vars and search paths
+/// instead but this will do for now.
+const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld";
+
+/// Default lib search path
+/// TODO instead of hardcoding it, we should probably look through some env vars and search paths
+/// instead but this will do for now.
+const DEFAULT_LIB_SEARCH_PATH: []const u8 = "/usr/lib";
+
+const LIB_SYSTEM_NAME: [*:0]const u8 = "System";
+/// TODO we should search for libSystem and fail if it doesn't exist, instead of hardcoding it
+const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B.dylib";
+
+pub fn init(allocator: *Allocator) Zld {
+    return .{ .allocator = allocator };
+}
+
+pub fn deinit(self: *Zld) void {
+    self.threadlocal_offsets.deinit(self.allocator);
+    self.strtab.deinit(self.allocator);
+    self.local_rebases.deinit(self.allocator);
+    for (self.lazy_imports.items()) |*entry| {
+        self.allocator.free(entry.key);
+    }
+    self.lazy_imports.deinit(self.allocator);
+    for (self.nonlazy_imports.items()) |*entry| {
+        self.allocator.free(entry.key);
+    }
+    self.nonlazy_imports.deinit(self.allocator);
+    for (self.nonlazy_pointers.items()) |*entry| {
+        self.allocator.free(entry.key);
+    }
+    self.nonlazy_pointers.deinit(self.allocator);
+    for (self.exports.items()) |*entry| {
+        self.allocator.free(entry.key);
+    }
+    self.exports.deinit(self.allocator);
+    for (self.locals.items()) |*entry| {
+        self.allocator.free(entry.key);
+        entry.value.deinit(self.allocator);
+    }
+    self.locals.deinit(self.allocator);
+    for (self.objects.items) |*object| {
+        object.deinit();
+    }
+    self.objects.deinit(self.allocator);
+    for (self.load_commands.items) |*lc| {
+        lc.deinit(self.allocator);
+    }
+    self.load_commands.deinit(self.allocator);
+    self.mappings.deinit(self.allocator);
+    self.unhandled_sections.deinit(self.allocator);
+    if (self.file) |*f| f.close();
+}
+
+pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
+    if (files.len == 0) return error.NoInputFiles;
+    if (out_path.len == 0) return error.EmptyOutputPath;
+
+    if (self.arch == null) {
+        // Try inferring the arch from the object files.
+        self.arch = blk: {
+            const file = try fs.cwd().openFile(files[0], .{});
+            defer file.close();
+            var reader = file.reader();
+            const header = try reader.readStruct(macho.mach_header_64);
+            const arch: std.Target.Cpu.Arch = switch (header.cputype) {
+                macho.CPU_TYPE_X86_64 => .x86_64,
+                macho.CPU_TYPE_ARM64 => .aarch64,
+                else => |value| {
+                    log.err("unsupported cpu architecture 0x{x}", .{value});
+                    return error.UnsupportedCpuArchitecture;
+                },
+            };
+            break :blk arch;
+        };
+    }
+
+    self.page_size = switch (self.arch.?) {
+        .aarch64 => 0x4000,
+        .x86_64 => 0x1000,
+        else => unreachable,
+    };
+    self.out_path = out_path;
+    self.file = try fs.cwd().createFile(out_path, .{
+        .truncate = true,
+        .read = true,
+        .mode = if (std.Target.current.os.tag == .windows) 0 else 0o777,
+    });
+
+    try self.populateMetadata();
+    try self.parseInputFiles(files);
+    try self.sortSections();
+    try self.resolveImports();
+    try self.allocateTextSegment();
+    try self.allocateDataConstSegment();
+    try self.allocateDataSegment();
+    self.allocateLinkeditSegment();
+    try self.writeStubHelperCommon();
+    try self.resolveSymbols();
+    try self.doRelocs();
+    try self.flush();
+}
+
+fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
+    for (files) |file_name| {
+        const file = try fs.cwd().openFile(file_name, .{});
+
+        try_object: {
+            var object = Object.initFromFile(self.allocator, self.arch.?, file_name, file) catch |err| switch (err) {
+                error.NotObject => break :try_object,
+                else => |e| return e,
+            };
+            const index = @intCast(u16, self.objects.items.len);
+            try self.objects.append(self.allocator, object);
+            try self.updateMetadata(index);
+            continue;
+        }
+
+        try_archive: {
+            var archive = Archive.initFromFile(self.allocator, self.arch.?, file_name, file) catch |err| switch (err) {
+                error.NotArchive => break :try_archive,
+                else => |e| return e,
+            };
+            defer archive.deinit();
+            while (archive.objects.popOrNull()) |object| {
+                const index = @intCast(u16, self.objects.items.len);
+                try self.objects.append(self.allocator, object);
+                try self.updateMetadata(index);
+            }
+            continue;
+        }
+
+        log.err("unexpected file type: expected object '.o' or archive '.a': {s}", .{file_name});
+        return error.UnexpectedInputFileType;
+    }
+}
+
+fn mapAndUpdateSections(
+    self: *Zld,
+    object_id: u16,
+    source_sect_id: u16,
+    target_seg_id: u16,
+    target_sect_id: u16,
+) !void {
+    const object = self.objects.items[object_id];
+    const source_seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+    const source_sect = source_seg.sections.items[source_sect_id];
+    const target_seg = &self.load_commands.items[target_seg_id].Segment;
+    const target_sect = &target_seg.sections.items[target_sect_id];
+
+    const alignment = try math.powi(u32, 2, target_sect.@"align");
+    const offset = mem.alignForwardGeneric(u64, target_sect.size, alignment);
+    const size = mem.alignForwardGeneric(u64, source_sect.size, alignment);
+    const key = MappingKey{
+        .object_id = object_id,
+        .source_sect_id = source_sect_id,
+    };
+    try self.mappings.putNoClobber(self.allocator, key, .{
+        .source_sect_id = source_sect_id,
+        .target_seg_id = target_seg_id,
+        .target_sect_id = target_sect_id,
+        .offset = @intCast(u32, offset),
+    });
+    log.debug("{s}: {s},{s} mapped to {s},{s} from 0x{x} to 0x{x}", .{
+        object.name,
+        parseName(&source_sect.segname),
+        parseName(&source_sect.sectname),
+        parseName(&target_sect.segname),
+        parseName(&target_sect.sectname),
+        offset,
+        offset + size,
+    });
+
+    target_sect.size = offset + size;
+}
+
+fn updateMetadata(self: *Zld, object_id: u16) !void {
+    const object = self.objects.items[object_id];
+    const object_seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+    const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+
+    // Create missing metadata
+    for (object_seg.sections.items) |source_sect, id| {
+        if (id == object.text_section_index.?) continue;
+        const segname = parseName(&source_sect.segname);
+        const sectname = parseName(&source_sect.sectname);
+        const flags = source_sect.flags;
+
+        switch (flags) {
+            macho.S_REGULAR, macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => {
+                if (mem.eql(u8, segname, "__TEXT")) {
+                    if (self.text_const_section_index != null) continue;
+
+                    self.text_const_section_index = @intCast(u16, text_seg.sections.items.len);
+                    try text_seg.addSection(self.allocator, .{
+                        .sectname = makeStaticString("__const"),
+                        .segname = makeStaticString("__TEXT"),
+                        .addr = 0,
+                        .size = 0,
+                        .offset = 0,
+                        .@"align" = 0,
+                        .reloff = 0,
+                        .nreloc = 0,
+                        .flags = macho.S_REGULAR,
+                        .reserved1 = 0,
+                        .reserved2 = 0,
+                        .reserved3 = 0,
+                    });
+                } else if (mem.eql(u8, segname, "__DATA")) {
+                    if (!mem.eql(u8, sectname, "__const")) continue;
+                    if (self.data_const_section_index != null) continue;
+
+                    self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len);
+                    try data_const_seg.addSection(self.allocator, .{
+                        .sectname = makeStaticString("__const"),
+                        .segname = makeStaticString("__DATA_CONST"),
+                        .addr = 0,
+                        .size = 0,
+                        .offset = 0,
+                        .@"align" = 0,
+                        .reloff = 0,
+                        .nreloc = 0,
+                        .flags = macho.S_REGULAR,
+                        .reserved1 = 0,
+                        .reserved2 = 0,
+                        .reserved3 = 0,
+                    });
+                }
+            },
+            macho.S_CSTRING_LITERALS => {
+                if (!mem.eql(u8, segname, "__TEXT")) continue;
+                if (self.cstring_section_index != null) continue;
+
+                self.cstring_section_index = @intCast(u16, text_seg.sections.items.len);
+                try text_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__cstring"),
+                    .segname = makeStaticString("__TEXT"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_CSTRING_LITERALS,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            macho.S_MOD_INIT_FUNC_POINTERS => {
+                if (!mem.eql(u8, segname, "__DATA")) continue;
+                if (self.mod_init_func_section_index != null) continue;
+
+                self.mod_init_func_section_index = @intCast(u16, data_const_seg.sections.items.len);
+                try data_const_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__mod_init_func"),
+                    .segname = makeStaticString("__DATA_CONST"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_MOD_INIT_FUNC_POINTERS,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            macho.S_MOD_TERM_FUNC_POINTERS => {
+                if (!mem.eql(u8, segname, "__DATA")) continue;
+                if (self.mod_term_func_section_index != null) continue;
+
+                self.mod_term_func_section_index = @intCast(u16, data_const_seg.sections.items.len);
+                try data_const_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__mod_term_func"),
+                    .segname = makeStaticString("__DATA_CONST"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_MOD_TERM_FUNC_POINTERS,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            macho.S_ZEROFILL => {
+                if (!mem.eql(u8, segname, "__DATA")) continue;
+                if (self.bss_section_index != null) continue;
+
+                self.bss_section_index = @intCast(u16, data_seg.sections.items.len);
+                try data_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__bss"),
+                    .segname = makeStaticString("__DATA"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_ZEROFILL,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            macho.S_THREAD_LOCAL_VARIABLES => {
+                if (!mem.eql(u8, segname, "__DATA")) continue;
+                if (self.tlv_section_index != null) continue;
+
+                self.tlv_section_index = @intCast(u16, data_seg.sections.items.len);
+                try data_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__thread_vars"),
+                    .segname = makeStaticString("__DATA"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_THREAD_LOCAL_VARIABLES,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            macho.S_THREAD_LOCAL_REGULAR => {
+                if (!mem.eql(u8, segname, "__DATA")) continue;
+                if (self.tlv_data_section_index != null) continue;
+
+                self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len);
+                try data_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__thread_data"),
+                    .segname = makeStaticString("__DATA"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_THREAD_LOCAL_REGULAR,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            macho.S_THREAD_LOCAL_ZEROFILL => {
+                if (!mem.eql(u8, segname, "__DATA")) continue;
+                if (self.tlv_bss_section_index != null) continue;
+
+                self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len);
+                try data_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__thread_bss"),
+                    .segname = makeStaticString("__DATA"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_THREAD_LOCAL_ZEROFILL,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            else => {
+                log.debug("unhandled section type 0x{x} for '{s}/{s}'", .{ flags, segname, sectname });
+            },
+        }
+    }
+
+    // Find ideal section alignment.
+    for (object_seg.sections.items) |source_sect, id| {
+        if (self.getMatchingSection(source_sect)) |res| {
+            const target_seg = &self.load_commands.items[res.seg].Segment;
+            const target_sect = &target_seg.sections.items[res.sect];
+            target_sect.@"align" = math.max(target_sect.@"align", source_sect.@"align");
+        }
+    }
+
+    // Update section mappings
+    for (object_seg.sections.items) |source_sect, id| {
+        const source_sect_id = @intCast(u16, id);
+        if (self.getMatchingSection(source_sect)) |res| {
+            try self.mapAndUpdateSections(object_id, source_sect_id, res.seg, res.sect);
+            continue;
+        }
+
+        const segname = parseName(&source_sect.segname);
+        const sectname = parseName(&source_sect.sectname);
+        log.debug("section '{s}/{s}' will be unmapped", .{ segname, sectname });
+        try self.unhandled_sections.putNoClobber(self.allocator, .{
+            .object_id = object_id,
+            .source_sect_id = source_sect_id,
+        }, 0);
+    }
+}
+
+const MatchingSection = struct {
+    seg: u16,
+    sect: u16,
+};
+
+fn getMatchingSection(self: *Zld, section: macho.section_64) ?MatchingSection {
+    const segname = parseName(&section.segname);
+    const sectname = parseName(&section.sectname);
+    const res: ?MatchingSection = blk: {
+        switch (section.flags) {
+            macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => {
+                break :blk .{
+                    .seg = self.text_segment_cmd_index.?,
+                    .sect = self.text_const_section_index.?,
+                };
+            },
+            macho.S_CSTRING_LITERALS => {
+                break :blk .{
+                    .seg = self.text_segment_cmd_index.?,
+                    .sect = self.cstring_section_index.?,
+                };
+            },
+            macho.S_MOD_INIT_FUNC_POINTERS => {
+                break :blk .{
+                    .seg = self.data_const_segment_cmd_index.?,
+                    .sect = self.mod_init_func_section_index.?,
+                };
+            },
+            macho.S_MOD_TERM_FUNC_POINTERS => {
+                break :blk .{
+                    .seg = self.data_const_segment_cmd_index.?,
+                    .sect = self.mod_term_func_section_index.?,
+                };
+            },
+            macho.S_ZEROFILL => {
+                break :blk .{
+                    .seg = self.data_segment_cmd_index.?,
+                    .sect = self.bss_section_index.?,
+                };
+            },
+            macho.S_THREAD_LOCAL_VARIABLES => {
+                break :blk .{
+                    .seg = self.data_segment_cmd_index.?,
+                    .sect = self.tlv_section_index.?,
+                };
+            },
+            macho.S_THREAD_LOCAL_REGULAR => {
+                break :blk .{
+                    .seg = self.data_segment_cmd_index.?,
+                    .sect = self.tlv_data_section_index.?,
+                };
+            },
+            macho.S_THREAD_LOCAL_ZEROFILL => {
+                break :blk .{
+                    .seg = self.data_segment_cmd_index.?,
+                    .sect = self.tlv_bss_section_index.?,
+                };
+            },
+            macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS => {
+                break :blk .{
+                    .seg = self.text_segment_cmd_index.?,
+                    .sect = self.text_section_index.?,
+                };
+            },
+            macho.S_REGULAR => {
+                if (mem.eql(u8, segname, "__TEXT")) {
+                    break :blk .{
+                        .seg = self.text_segment_cmd_index.?,
+                        .sect = self.text_const_section_index.?,
+                    };
+                } else if (mem.eql(u8, segname, "__DATA")) {
+                    if (mem.eql(u8, sectname, "__data")) {
+                        break :blk .{
+                            .seg = self.data_segment_cmd_index.?,
+                            .sect = self.data_section_index.?,
+                        };
+                    } else if (mem.eql(u8, sectname, "__const")) {
+                        break :blk .{
+                            .seg = self.data_const_segment_cmd_index.?,
+                            .sect = self.data_const_section_index.?,
+                        };
+                    }
+                }
+                break :blk null;
+            },
+            else => {
+                break :blk null;
+            },
+        }
+    };
+    return res;
+}
+
+fn sortSections(self: *Zld) !void {
+    var text_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator);
+    defer text_index_mapping.deinit();
+    var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator);
+    defer data_const_index_mapping.deinit();
+    var data_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator);
+    defer data_index_mapping.deinit();
+
+    {
+        // __TEXT segment
+        const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+        var sections = seg.sections.toOwnedSlice(self.allocator);
+        defer self.allocator.free(sections);
+        try seg.sections.ensureCapacity(self.allocator, sections.len);
+
+        const indices = &[_]*?u16{
+            &self.text_section_index,
+            &self.stubs_section_index,
+            &self.stub_helper_section_index,
+            &self.text_const_section_index,
+            &self.cstring_section_index,
+        };
+        for (indices) |maybe_index| {
+            const new_index: u16 = if (maybe_index.*) |index| blk: {
+                const idx = @intCast(u16, seg.sections.items.len);
+                seg.sections.appendAssumeCapacity(sections[index]);
+                try text_index_mapping.putNoClobber(index, idx);
+                break :blk idx;
+            } else continue;
+            maybe_index.* = new_index;
+        }
+    }
+
+    {
+        // __DATA_CONST segment
+        const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        var sections = seg.sections.toOwnedSlice(self.allocator);
+        defer self.allocator.free(sections);
+        try seg.sections.ensureCapacity(self.allocator, sections.len);
+
+        const indices = &[_]*?u16{
+            &self.got_section_index,
+            &self.mod_init_func_section_index,
+            &self.mod_term_func_section_index,
+            &self.data_const_section_index,
+        };
+        for (indices) |maybe_index| {
+            const new_index: u16 = if (maybe_index.*) |index| blk: {
+                const idx = @intCast(u16, seg.sections.items.len);
+                seg.sections.appendAssumeCapacity(sections[index]);
+                try data_const_index_mapping.putNoClobber(index, idx);
+                break :blk idx;
+            } else continue;
+            maybe_index.* = new_index;
+        }
+    }
+
+    {
+        // __DATA segment
+        const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        var sections = seg.sections.toOwnedSlice(self.allocator);
+        defer self.allocator.free(sections);
+        try seg.sections.ensureCapacity(self.allocator, sections.len);
+
+        // __DATA segment
+        const indices = &[_]*?u16{
+            &self.la_symbol_ptr_section_index,
+            &self.tlv_section_index,
+            &self.data_section_index,
+            &self.tlv_data_section_index,
+            &self.tlv_bss_section_index,
+            &self.bss_section_index,
+        };
+        for (indices) |maybe_index| {
+            const new_index: u16 = if (maybe_index.*) |index| blk: {
+                const idx = @intCast(u16, seg.sections.items.len);
+                seg.sections.appendAssumeCapacity(sections[index]);
+                try data_index_mapping.putNoClobber(index, idx);
+                break :blk idx;
+            } else continue;
+            maybe_index.* = new_index;
+        }
+    }
+
+    var it = self.mappings.iterator();
+    while (it.next()) |entry| {
+        const mapping = &entry.value;
+        if (self.text_segment_cmd_index.? == mapping.target_seg_id) {
+            const new_index = text_index_mapping.get(mapping.target_sect_id) orelse unreachable;
+            mapping.target_sect_id = new_index;
+        } else if (self.data_const_segment_cmd_index.? == mapping.target_seg_id) {
+            const new_index = data_const_index_mapping.get(mapping.target_sect_id) orelse unreachable;
+            mapping.target_sect_id = new_index;
+        } else if (self.data_segment_cmd_index.? == mapping.target_seg_id) {
+            const new_index = data_index_mapping.get(mapping.target_sect_id) orelse unreachable;
+            mapping.target_sect_id = new_index;
+        } else unreachable;
+    }
+}
+
+fn resolveImports(self: *Zld) !void {
+    var imports = std.StringArrayHashMap(bool).init(self.allocator);
+    defer imports.deinit();
+
+    for (self.objects.items) |object| {
+        for (object.symtab.items) |sym| {
+            if (isLocal(&sym)) continue;
+
+            const name = object.getString(sym.n_strx);
+            const res = try imports.getOrPut(name);
+            if (isExport(&sym)) {
+                res.entry.value = false;
+                continue;
+            }
+            if (res.found_existing and !res.entry.value)
+                continue;
+            res.entry.value = true;
+        }
+    }
+
+    for (imports.items()) |entry| {
+        if (!entry.value) continue;
+
+        const sym_name = entry.key;
+        const n_strx = try self.makeString(sym_name);
+        var new_sym: macho.nlist_64 = .{
+            .n_strx = n_strx,
+            .n_type = macho.N_UNDF | macho.N_EXT,
+            .n_value = 0,
+            .n_desc = macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | macho.N_SYMBOL_RESOLVER,
+            .n_sect = 0,
+        };
+        var key = try self.allocator.dupe(u8, sym_name);
+        // TODO handle symbol resolution from non-libc dylibs.
+        const dylib_ordinal = 1;
+
+        // TODO need to rework this. Perhaps should create a set of all possible libc
+        // symbols which are expected to be nonlazy?
+        if (mem.eql(u8, sym_name, "___stdoutp") or
+            mem.eql(u8, sym_name, "___stderrp") or
+            mem.eql(u8, sym_name, "___stdinp") or
+            mem.eql(u8, sym_name, "___stack_chk_guard") or
+            mem.eql(u8, sym_name, "_environ") or
+            mem.eql(u8, sym_name, "__DefaultRuneLocale") or
+            mem.eql(u8, sym_name, "_mach_task_self_"))
+        {
+            log.debug("writing nonlazy symbol '{s}'", .{sym_name});
+            const index = @intCast(u32, self.nonlazy_imports.items().len);
+            try self.nonlazy_imports.putNoClobber(self.allocator, key, .{
+                .symbol = new_sym,
+                .dylib_ordinal = dylib_ordinal,
+                .index = index,
+            });
+        } else if (mem.eql(u8, sym_name, "__tlv_bootstrap")) {
+            log.debug("writing threadlocal symbol '{s}'", .{sym_name});
+            self.tlv_bootstrap = .{
+                .symbol = new_sym,
+                .dylib_ordinal = dylib_ordinal,
+                .index = 0,
+            };
+        } else {
+            log.debug("writing lazy symbol '{s}'", .{sym_name});
+            const index = @intCast(u32, self.lazy_imports.items().len);
+            try self.lazy_imports.putNoClobber(self.allocator, key, .{
+                .symbol = new_sym,
+                .dylib_ordinal = dylib_ordinal,
+                .index = index,
+            });
+        }
+    }
+
+    const n_strx = try self.makeString("dyld_stub_binder");
+    const name = try self.allocator.dupe(u8, "dyld_stub_binder");
+    log.debug("writing nonlazy symbol 'dyld_stub_binder'", .{});
+    const index = @intCast(u32, self.nonlazy_imports.items().len);
+    try self.nonlazy_imports.putNoClobber(self.allocator, name, .{
+        .symbol = .{
+            .n_strx = n_strx,
+            .n_type = std.macho.N_UNDF | std.macho.N_EXT,
+            .n_sect = 0,
+            .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
+            .n_value = 0,
+        },
+        .dylib_ordinal = 1,
+        .index = index,
+    });
+}
+
+fn allocateTextSegment(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const nexterns = @intCast(u32, self.lazy_imports.items().len);
+
+    const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize;
+    seg.inner.fileoff = 0;
+    seg.inner.vmaddr = base_vmaddr;
+
+    // Set stubs and stub_helper sizes
+    const stubs = &seg.sections.items[self.stubs_section_index.?];
+    const stub_helper = &seg.sections.items[self.stub_helper_section_index.?];
+    stubs.size += nexterns * stubs.reserved2;
+
+    const stub_size: u4 = switch (self.arch.?) {
+        .x86_64 => 10,
+        .aarch64 => 3 * @sizeOf(u32),
+        else => unreachable,
+    };
+    stub_helper.size += nexterns * stub_size;
+
+    var sizeofcmds: u64 = 0;
+    for (self.load_commands.items) |lc| {
+        sizeofcmds += lc.cmdsize();
+    }
+
+    try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds);
+
+    // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments.
+    var min_alignment: u32 = 0;
+    for (seg.sections.items) |sect| {
+        const alignment = try math.powi(u32, 2, sect.@"align");
+        min_alignment = math.max(min_alignment, alignment);
+    }
+
+    assert(min_alignment > 0);
+    const last_sect_idx = seg.sections.items.len - 1;
+    const last_sect = seg.sections.items[last_sect_idx];
+    const shift: u32 = blk: {
+        const diff = seg.inner.filesize - last_sect.offset - last_sect.size;
+        const factor = @divTrunc(diff, min_alignment);
+        break :blk @intCast(u32, factor * min_alignment);
+    };
+
+    if (shift > 0) {
+        for (seg.sections.items) |*sect| {
+            sect.offset += shift;
+            sect.addr += shift;
+        }
+    }
+}
+
+fn allocateDataConstSegment(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const nonlazy = @intCast(u32, self.nonlazy_imports.items().len);
+
+    const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize;
+    seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize;
+
+    // Set got size
+    const got = &seg.sections.items[self.got_section_index.?];
+    // TODO this will require scanning the relocations at least one to work out
+    // the exact amount of local GOT indirections. For the time being, set some
+    // default value.
+    got.size += (max_local_got_indirections + nonlazy) * @sizeOf(u64);
+
+    try self.allocateSegment(self.data_const_segment_cmd_index.?, 0);
+}
+
+fn allocateDataSegment(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const lazy = @intCast(u32, self.lazy_imports.items().len);
+
+    const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize;
+    seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize;
+
+    // Set la_symbol_ptr and data size
+    const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?];
+    const data = &seg.sections.items[self.data_section_index.?];
+    la_symbol_ptr.size += lazy * @sizeOf(u64);
+    data.size += @sizeOf(u64); // TODO when do we need more?
+
+    try self.allocateSegment(self.data_segment_cmd_index.?, 0);
+}
+
+fn allocateLinkeditSegment(self: *Zld) void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize;
+    seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize;
+}
+
+fn allocateSegment(self: *Zld, index: u16, offset: u64) !void {
+    const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize;
+    const seg = &self.load_commands.items[index].Segment;
+
+    // Allocate the sections according to their alignment at the beginning of the segment.
+    var start: u64 = offset;
+    for (seg.sections.items) |*sect| {
+        const alignment = try math.powi(u32, 2, sect.@"align");
+        const start_aligned = mem.alignForwardGeneric(u64, start, alignment);
+        const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment);
+        sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned);
+        sect.addr = seg.inner.vmaddr + start_aligned;
+        start = end_aligned;
+    }
+
+    const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size.?);
+    seg.inner.filesize = seg_size_aligned;
+    seg.inner.vmsize = seg_size_aligned;
+}
+
+fn writeStubHelperCommon(self: *Zld) !void {
+    const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?];
+    const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const got = &data_const_segment.sections.items[self.got_section_index.?];
+    const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const data = &data_segment.sections.items[self.data_section_index.?];
+    const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?];
+
+    self.stub_helper_stubs_start_off = blk: {
+        switch (self.arch.?) {
+            .x86_64 => {
+                const code_size = 15;
+                var code: [code_size]u8 = undefined;
+                // lea %r11, [rip + disp]
+                code[0] = 0x4c;
+                code[1] = 0x8d;
+                code[2] = 0x1d;
+                {
+                    const target_addr = data.addr + data.size - @sizeOf(u64);
+                    const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7);
+                    mem.writeIntLittle(u32, code[3..7], displacement);
+                }
+                // push %r11
+                code[7] = 0x41;
+                code[8] = 0x53;
+                // jmp [rip + disp]
+                code[9] = 0xff;
+                code[10] = 0x25;
+                {
+                    const dyld_stub_binder = self.nonlazy_imports.get("dyld_stub_binder").?;
+                    const addr = (got.addr + dyld_stub_binder.index * @sizeOf(u64));
+                    const displacement = try math.cast(u32, addr - stub_helper.addr - code_size);
+                    mem.writeIntLittle(u32, code[11..], displacement);
+                }
+                try self.file.?.pwriteAll(&code, stub_helper.offset);
+                break :blk stub_helper.offset + code_size;
+            },
+            .aarch64 => {
+                var code: [6 * @sizeOf(u32)]u8 = undefined;
+                data_blk_outer: {
+                    const this_addr = stub_helper.addr;
+                    const target_addr = data.addr + data.size - @sizeOf(u64);
+                    data_blk: {
+                        const displacement = math.cast(i21, target_addr - this_addr) catch |_| break :data_blk;
+                        // adr x17, disp
+                        mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32());
+                        // nop
+                        mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32());
+                        break :data_blk_outer;
+                    }
+                    data_blk: {
+                        const new_this_addr = this_addr + @sizeOf(u32);
+                        const displacement = math.cast(i21, target_addr - new_this_addr) catch |_| break :data_blk;
+                        // nop
+                        mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32());
+                        // adr x17, disp
+                        mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32());
+                        break :data_blk_outer;
+                    }
+                    // Jump is too big, replace adr with adrp and add.
+                    const this_page = @intCast(i32, this_addr >> 12);
+                    const target_page = @intCast(i32, target_addr >> 12);
+                    const pages = @intCast(i21, target_page - this_page);
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32());
+                    const narrowed = @truncate(u12, target_addr);
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32());
+                }
+                // stp x16, x17, [sp, #-16]!
+                code[8] = 0xf0;
+                code[9] = 0x47;
+                code[10] = 0xbf;
+                code[11] = 0xa9;
+                binder_blk_outer: {
+                    const dyld_stub_binder = self.nonlazy_imports.get("dyld_stub_binder").?;
+                    const this_addr = stub_helper.addr + 3 * @sizeOf(u32);
+                    const target_addr = (got.addr + dyld_stub_binder.index * @sizeOf(u64));
+                    binder_blk: {
+                        const displacement = math.divExact(u64, target_addr - this_addr, 4) catch |_| break :binder_blk;
+                        const literal = math.cast(u18, displacement) catch |_| break :binder_blk;
+                        // ldr x16, label
+                        mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{
+                            .literal = literal,
+                        }).toU32());
+                        // nop
+                        mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32());
+                        break :binder_blk_outer;
+                    }
+                    binder_blk: {
+                        const new_this_addr = this_addr + @sizeOf(u32);
+                        const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch |_| break :binder_blk;
+                        const literal = math.cast(u18, displacement) catch |_| break :binder_blk;
+                        log.debug("2: disp=0x{x}, literal=0x{x}", .{ displacement, literal });
+                        // Pad with nop to please division.
+                        // nop
+                        mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32());
+                        // ldr x16, label
+                        mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{
+                            .literal = literal,
+                        }).toU32());
+                        break :binder_blk_outer;
+                    }
+                    // Use adrp followed by ldr(immediate).
+                    const this_page = @intCast(i32, this_addr >> 12);
+                    const target_page = @intCast(i32, target_addr >> 12);
+                    const pages = @intCast(i21, target_page - this_page);
+                    mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32());
+                    const narrowed = @truncate(u12, target_addr);
+                    const offset = try math.divExact(u12, narrowed, 8);
+                    mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{
+                        .register = .{
+                            .rn = .x16,
+                            .offset = aarch64.Instruction.LoadStoreOffset.imm(offset),
+                        },
+                    }).toU32());
+                }
+                // br x16
+                code[20] = 0x00;
+                code[21] = 0x02;
+                code[22] = 0x1f;
+                code[23] = 0xd6;
+                try self.file.?.pwriteAll(&code, stub_helper.offset);
+                break :blk stub_helper.offset + 6 * @sizeOf(u32);
+            },
+            else => unreachable,
+        }
+    };
+
+    for (self.lazy_imports.items()) |_, i| {
+        const index = @intCast(u32, i);
+        try self.writeLazySymbolPointer(index);
+        try self.writeStub(index);
+        try self.writeStubInStubHelper(index);
+    }
+}
+
+fn writeLazySymbolPointer(self: *Zld, index: u32) !void {
+    const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?];
+    const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?];
+
+    const stub_size: u4 = switch (self.arch.?) {
+        .x86_64 => 10,
+        .aarch64 => 3 * @sizeOf(u32),
+        else => unreachable,
+    };
+    const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size;
+    const end = stub_helper.addr + stub_off - stub_helper.offset;
+    var buf: [@sizeOf(u64)]u8 = undefined;
+    mem.writeIntLittle(u64, &buf, end);
+    const off = la_symbol_ptr.offset + index * @sizeOf(u64);
+    log.debug("writing lazy symbol pointer entry 0x{x} at 0x{x}", .{ end, off });
+    try self.file.?.pwriteAll(&buf, off);
+}
+
+fn writeStub(self: *Zld, index: u32) !void {
+    const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stubs = text_segment.sections.items[self.stubs_section_index.?];
+    const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?];
+
+    const stub_off = stubs.offset + index * stubs.reserved2;
+    const stub_addr = stubs.addr + index * stubs.reserved2;
+    const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64);
+    log.debug("writing stub at 0x{x}", .{stub_off});
+    var code = try self.allocator.alloc(u8, stubs.reserved2);
+    defer self.allocator.free(code);
+    switch (self.arch.?) {
+        .x86_64 => {
+            assert(la_ptr_addr >= stub_addr + stubs.reserved2);
+            const displacement = try math.cast(u32, la_ptr_addr - stub_addr - stubs.reserved2);
+            // jmp
+            code[0] = 0xff;
+            code[1] = 0x25;
+            mem.writeIntLittle(u32, code[2..][0..4], displacement);
+        },
+        .aarch64 => {
+            assert(la_ptr_addr >= stub_addr);
+            outer: {
+                const this_addr = stub_addr;
+                const target_addr = la_ptr_addr;
+                inner: {
+                    const displacement = math.divExact(u64, target_addr - this_addr, 4) catch |_| break :inner;
+                    const literal = math.cast(u18, displacement) catch |_| break :inner;
+                    // ldr x16, literal
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    // nop
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32());
+                    break :outer;
+                }
+                inner: {
+                    const new_this_addr = this_addr + @sizeOf(u32);
+                    const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch |_| break :inner;
+                    const literal = math.cast(u18, displacement) catch |_| break :inner;
+                    // nop
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32());
+                    // ldr x16, literal
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    break :outer;
+                }
+                // Use adrp followed by ldr(immediate).
+                const this_page = @intCast(i32, this_addr >> 12);
+                const target_page = @intCast(i32, target_addr >> 12);
+                const pages = @intCast(i21, target_page - this_page);
+                mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, pages).toU32());
+                const narrowed = @truncate(u12, target_addr);
+                const offset = try math.divExact(u12, narrowed, 8);
+                mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{
+                    .register = .{
+                        .rn = .x16,
+                        .offset = aarch64.Instruction.LoadStoreOffset.imm(offset),
+                    },
+                }).toU32());
+            }
+            // br x16
+            mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32());
+        },
+        else => unreachable,
+    }
+    try self.file.?.pwriteAll(code, stub_off);
+}
+
+fn writeStubInStubHelper(self: *Zld, index: u32) !void {
+    const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?];
+
+    const stub_size: u4 = switch (self.arch.?) {
+        .x86_64 => 10,
+        .aarch64 => 3 * @sizeOf(u32),
+        else => unreachable,
+    };
+    const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size;
+    var code = try self.allocator.alloc(u8, stub_size);
+    defer self.allocator.free(code);
+    switch (self.arch.?) {
+        .x86_64 => {
+            const displacement = try math.cast(
+                i32,
+                @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - stub_size,
+            );
+            // pushq
+            code[0] = 0x68;
+            mem.writeIntLittle(u32, code[1..][0..4], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`.
+            // jmpq
+            code[5] = 0xe9;
+            mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement));
+        },
+        .aarch64 => {
+            const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4);
+            const literal = @divExact(stub_size - @sizeOf(u32), 4);
+            // ldr w16, literal
+            mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{
+                .literal = literal,
+            }).toU32());
+            // b disp
+            mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(displacement).toU32());
+            mem.writeIntLittle(u32, code[8..12], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`.
+        },
+        else => unreachable,
+    }
+    try self.file.?.pwriteAll(code, stub_off);
+}
+
+fn resolveSymbols(self: *Zld) !void {
+    for (self.objects.items) |object, object_id| {
+        const seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+        log.debug("\n\n", .{});
+        log.debug("resolving symbols in {s}", .{object.name});
+
+        for (object.symtab.items) |sym| {
+            if (isImport(&sym)) continue;
+
+            const sym_name = object.getString(sym.n_strx);
+            const out_name = try self.allocator.dupe(u8, sym_name);
+            const locs = try self.locals.getOrPut(self.allocator, out_name);
+            defer {
+                if (locs.found_existing) self.allocator.free(out_name);
+            }
+
+            if (!locs.found_existing) {
+                locs.entry.value = .{};
+            }
+
+            const tt: Symbol.Type = blk: {
+                if (isLocal(&sym)) {
+                    break :blk .Local;
+                } else if (isWeakDef(&sym)) {
+                    break :blk .WeakGlobal;
+                } else {
+                    break :blk .Global;
+                }
+            };
+            if (tt == .Global) {
+                for (locs.entry.value.items) |ss| {
+                    if (ss.tt == .Global) {
+                        log.debug("symbol already defined '{s}'", .{sym_name});
+                        continue;
+                        // log.err("symbol '{s}' defined multiple times: {}", .{ sym_name, sym });
+                        // return error.MultipleSymbolDefinitions;
+                    }
+                }
+            }
+
+            const source_sect_id = sym.n_sect - 1;
+            const target_mapping = self.mappings.get(.{
+                .object_id = @intCast(u16, object_id),
+                .source_sect_id = source_sect_id,
+            }) orelse {
+                if (self.unhandled_sections.get(.{
+                    .object_id = @intCast(u16, object_id),
+                    .source_sect_id = source_sect_id,
+                }) != null) continue;
+
+                log.err("section not mapped for symbol '{s}': {}", .{ sym_name, sym });
+                return error.SectionNotMappedForSymbol;
+            };
+            const source_sect = seg.sections.items[source_sect_id];
+            const target_seg = self.load_commands.items[target_mapping.target_seg_id].Segment;
+            const target_sect = target_seg.sections.items[target_mapping.target_sect_id];
+            const target_addr = target_sect.addr + target_mapping.offset;
+            const n_value = sym.n_value - source_sect.addr + target_addr;
+
+            log.debug("resolving '{s}':{} as {s} symbol at 0x{x}", .{ sym_name, sym, tt, n_value });
+
+            // TODO there might be a more generic way of doing this.
+            var n_sect: u16 = 0;
+            for (self.load_commands.items) |cmd, cmd_id| {
+                if (cmd != .Segment) break;
+                if (cmd_id == target_mapping.target_seg_id) {
+                    n_sect += target_mapping.target_sect_id + 1;
+                    break;
+                }
+                n_sect += @intCast(u16, cmd.Segment.sections.items.len);
+            }
+
+            const n_strx = try self.makeString(sym_name);
+            try locs.entry.value.append(self.allocator, .{
+                .inner = .{
+                    .n_strx = n_strx,
+                    .n_value = n_value,
+                    .n_type = macho.N_SECT,
+                    .n_desc = sym.n_desc,
+                    .n_sect = @intCast(u8, n_sect),
+                },
+                .tt = tt,
+                .object_id = @intCast(u16, object_id),
+            });
+        }
+    }
+}
+
+fn doRelocs(self: *Zld) !void {
+    for (self.objects.items) |object, object_id| {
+        log.debug("\n\n", .{});
+        log.debug("relocating object {s}", .{object.name});
+
+        const seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+
+        for (seg.sections.items) |sect, source_sect_id| {
+            const segname = parseName(&sect.segname);
+            const sectname = parseName(&sect.sectname);
+
+            var code = try self.allocator.alloc(u8, sect.size);
+            _ = try object.file.preadAll(code, sect.offset);
+            defer self.allocator.free(code);
+
+            // Parse relocs (if any)
+            var raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc);
+            defer self.allocator.free(raw_relocs);
+            _ = try object.file.preadAll(raw_relocs, sect.reloff);
+            const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs);
+
+            // Get mapping
+            const target_mapping = self.mappings.get(.{
+                .object_id = @intCast(u16, object_id),
+                .source_sect_id = @intCast(u16, source_sect_id),
+            }) orelse {
+                log.debug("no mapping for {s},{s}; skipping", .{ segname, sectname });
+                continue;
+            };
+            const target_seg = self.load_commands.items[target_mapping.target_seg_id].Segment;
+            const target_sect = target_seg.sections.items[target_mapping.target_sect_id];
+            const target_sect_addr = target_sect.addr + target_mapping.offset;
+            const target_sect_off = target_sect.offset + target_mapping.offset;
+
+            var addend: ?u64 = null;
+            var sub: ?i64 = null;
+
+            for (relocs) |rel| {
+                const off = @intCast(u32, rel.r_address);
+                const this_addr = target_sect_addr + off;
+
+                switch (self.arch.?) {
+                    .aarch64 => {
+                        const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type);
+                        log.debug("{s}", .{rel_type});
+                        log.debug("    | source address 0x{x}", .{this_addr});
+                        log.debug("    | offset 0x{x}", .{off});
+
+                        if (rel_type == .ARM64_RELOC_ADDEND) {
+                            addend = rel.r_symbolnum;
+                            log.debug("    | calculated addend = 0x{x}", .{addend});
+                            // TODO followed by either PAGE21 or PAGEOFF12 only.
+                            continue;
+                        }
+                    },
+                    .x86_64 => {
+                        const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
+                        log.debug("{s}", .{rel_type});
+                        log.debug("    | source address 0x{x}", .{this_addr});
+                        log.debug("    | offset 0x{x}", .{off});
+                    },
+                    else => {},
+                }
+
+                const target_addr = try self.relocTargetAddr(@intCast(u16, object_id), rel);
+                log.debug("    | target address 0x{x}", .{target_addr});
+                if (rel.r_extern == 1) {
+                    const target_symname = object.getString(object.symtab.items[rel.r_symbolnum].n_strx);
+                    log.debug("    | target symbol '{s}'", .{target_symname});
+                } else {
+                    const target_sectname = seg.sections.items[rel.r_symbolnum - 1].sectname;
+                    log.debug("    | target section '{s}'", .{parseName(&target_sectname)});
+                }
+
+                switch (self.arch.?) {
+                    .x86_64 => {
+                        const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
+
+                        switch (rel_type) {
+                            .X86_64_RELOC_BRANCH => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, target_addr) - @intCast(i64, this_addr) - 4));
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_GOT_LOAD => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, target_addr) - @intCast(i64, this_addr) - 4));
+
+                                blk: {
+                                    const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+                                    const got = data_const_seg.sections.items[self.got_section_index.?];
+                                    if (got.addr <= target_addr and target_addr < got.addr + got.size) break :blk;
+                                    log.debug("    | rewriting to leaq", .{});
+                                    code[off - 2] = 0x8d;
+                                }
+
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_GOT => {
+                                assert(rel.r_length == 2);
+                                // TODO Instead of referring to the target symbol directly, we refer to it
+                                // indirectly via GOT. Getting actual target address should be done in the
+                                // helper relocTargetAddr function rather than here.
+                                const sym = object.symtab.items[rel.r_symbolnum];
+                                const sym_name = try self.allocator.dupe(u8, object.getString(sym.n_strx));
+                                const res = try self.nonlazy_pointers.getOrPut(self.allocator, sym_name);
+                                defer if (res.found_existing) self.allocator.free(sym_name);
+
+                                const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+                                const got = data_const_seg.sections.items[self.got_section_index.?];
+
+                                if (!res.found_existing) {
+                                    const index = @intCast(u32, self.nonlazy_pointers.items().len) - 1;
+                                    assert(index < max_local_got_indirections); // TODO This is just a temp solution.
+                                    res.entry.value = .{
+                                        .index = index,
+                                        .target_addr = target_addr,
+                                    };
+                                    var buf: [@sizeOf(u64)]u8 = undefined;
+                                    mem.writeIntLittle(u64, &buf, target_addr);
+                                    const got_offset = got.offset + (index + self.nonlazy_imports.items().len) * @sizeOf(u64);
+
+                                    log.debug("    | GOT off 0x{x}", .{got.offset});
+                                    log.debug("    | writing GOT entry 0x{x} at 0x{x}", .{ target_addr, got_offset });
+
+                                    try self.file.?.pwriteAll(&buf, got_offset);
+                                }
+
+                                const index = res.entry.value.index + self.nonlazy_imports.items().len;
+                                const actual_target_addr = got.addr + index * @sizeOf(u64);
+
+                                log.debug("    | GOT addr 0x{x}", .{got.addr});
+                                log.debug("    | actual target address in GOT 0x{x}", .{actual_target_addr});
+
+                                const inst = code[off..][0..4];
+                                const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, actual_target_addr) - @intCast(i64, this_addr) - 4));
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_TLV => {
+                                assert(rel.r_length == 2);
+                                // We need to rewrite the opcode from movq to leaq.
+                                code[off - 2] = 0x8d;
+                                // Add displacement.
+                                const inst = code[off..][0..4];
+                                const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, target_addr) - @intCast(i64, this_addr) - 4));
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_SIGNED,
+                            .X86_64_RELOC_SIGNED_1,
+                            .X86_64_RELOC_SIGNED_2,
+                            .X86_64_RELOC_SIGNED_4,
+                            => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const offset = @intCast(i64, mem.readIntLittle(i32, inst));
+                                log.debug("    | calculated addend 0x{x}", .{offset});
+                                const actual_target_addr = blk: {
+                                    if (rel.r_extern == 1) {
+                                        break :blk @intCast(i64, target_addr) + offset;
+                                    } else {
+                                        const correction: i4 = switch (rel_type) {
+                                            .X86_64_RELOC_SIGNED => 0,
+                                            .X86_64_RELOC_SIGNED_1 => 1,
+                                            .X86_64_RELOC_SIGNED_2 => 2,
+                                            .X86_64_RELOC_SIGNED_4 => 4,
+                                            else => unreachable,
+                                        };
+                                        log.debug("    | calculated correction 0x{x}", .{correction});
+
+                                        // The value encoded in the instruction is a displacement - 4 - correction.
+                                        // To obtain the adjusted target address in the final binary, we need
+                                        // calculate the original target address within the object file, establish
+                                        // what the offset from the original target section was, and apply this
+                                        // offset to the resultant target section with this relocated binary.
+                                        const orig_sect_id = @intCast(u16, rel.r_symbolnum - 1);
+                                        const target_map = self.mappings.get(.{
+                                            .object_id = @intCast(u16, object_id),
+                                            .source_sect_id = orig_sect_id,
+                                        }) orelse unreachable;
+                                        const orig_seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+                                        const orig_sect = orig_seg.sections.items[orig_sect_id];
+                                        const orig_offset = off + offset + 4 + correction - @intCast(i64, orig_sect.addr);
+                                        log.debug("    | original offset 0x{x}", .{orig_offset});
+                                        const adjusted = @intCast(i64, target_addr) + orig_offset;
+                                        log.debug("    | adjusted target address 0x{x}", .{adjusted});
+                                        break :blk adjusted - correction;
+                                    }
+                                };
+                                const result = actual_target_addr - @intCast(i64, this_addr) - 4;
+                                const displacement = @bitCast(u32, @intCast(i32, result));
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_SUBTRACTOR => {
+                                sub = @intCast(i64, target_addr);
+                            },
+                            .X86_64_RELOC_UNSIGNED => {
+                                switch (rel.r_length) {
+                                    3 => {
+                                        const inst = code[off..][0..8];
+                                        const offset = mem.readIntLittle(i64, inst);
+
+                                        const result = outer: {
+                                            if (rel.r_extern == 1) {
+                                                log.debug("    | calculated addend 0x{x}", .{offset});
+                                                if (sub) |s| {
+                                                    break :outer @intCast(i64, target_addr) - s + offset;
+                                                } else {
+                                                    break :outer @intCast(i64, target_addr) + offset;
+                                                }
+                                            } else {
+                                                // The value encoded in the instruction is an absolute offset
+                                                // from the start of MachO header to the target address in the
+                                                // object file. To extract the address, we calculate the offset from
+                                                // the beginning of the source section to the address, and apply it to
+                                                // the target address value.
+                                                const orig_sect_id = @intCast(u16, rel.r_symbolnum - 1);
+                                                const target_map = self.mappings.get(.{
+                                                    .object_id = @intCast(u16, object_id),
+                                                    .source_sect_id = orig_sect_id,
+                                                }) orelse unreachable;
+                                                const orig_seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+                                                const orig_sect = orig_seg.sections.items[orig_sect_id];
+                                                const orig_offset = offset - @intCast(i64, orig_sect.addr);
+                                                const actual_target_addr = inner: {
+                                                    if (sub) |s| {
+                                                        break :inner @intCast(i64, target_addr) - s + orig_offset;
+                                                    } else {
+                                                        break :inner @intCast(i64, target_addr) + orig_offset;
+                                                    }
+                                                };
+                                                log.debug("    | adjusted target address 0x{x}", .{actual_target_addr});
+                                                break :outer actual_target_addr;
+                                            }
+                                        };
+                                        mem.writeIntLittle(u64, inst, @bitCast(u64, result));
+                                        sub = null;
+
+                                        rebases: {
+                                            var hit: bool = false;
+                                            if (target_mapping.target_seg_id == self.data_segment_cmd_index.?) {
+                                                if (self.data_section_index) |index| {
+                                                    if (index == target_mapping.target_sect_id) hit = true;
+                                                }
+                                            }
+                                            if (target_mapping.target_seg_id == self.data_const_segment_cmd_index.?) {
+                                                if (self.data_const_section_index) |index| {
+                                                    if (index == target_mapping.target_sect_id) hit = true;
+                                                }
+                                            }
+
+                                            if (!hit) break :rebases;
+
+                                            try self.local_rebases.append(self.allocator, .{
+                                                .offset = this_addr - target_seg.inner.vmaddr,
+                                                .segment_id = target_mapping.target_seg_id,
+                                            });
+                                        }
+                                        // TLV is handled via a separate offset mechanism.
+                                        // Calculate the offset to the initializer.
+                                        if (target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: {
+                                            assert(rel.r_extern == 1);
+                                            const sym = object.symtab.items[rel.r_symbolnum];
+                                            if (isImport(&sym)) break :tlv;
+
+                                            const base_addr = blk: {
+                                                if (self.tlv_data_section_index) |index| {
+                                                    const tlv_data = target_seg.sections.items[index];
+                                                    break :blk tlv_data.addr;
+                                                } else {
+                                                    const tlv_bss = target_seg.sections.items[self.tlv_bss_section_index.?];
+                                                    break :blk tlv_bss.addr;
+                                                }
+                                            };
+                                            // Since we require TLV data to always preceed TLV bss section, we calculate
+                                            // offsets wrt to the former if it is defined; otherwise, wrt to the latter.
+                                            try self.threadlocal_offsets.append(self.allocator, target_addr - base_addr);
+                                        }
+                                    },
+                                    2 => {
+                                        const inst = code[off..][0..4];
+                                        const offset = mem.readIntLittle(i32, inst);
+                                        log.debug("    | calculated addend 0x{x}", .{offset});
+                                        const result = if (sub) |s|
+                                            @intCast(i64, target_addr) - s + offset
+                                        else
+                                            @intCast(i64, target_addr) + offset;
+                                        mem.writeIntLittle(u32, inst, @truncate(u32, @bitCast(u64, result)));
+                                        sub = null;
+                                    },
+                                    else => |len| {
+                                        log.err("unexpected relocation length 0x{x}", .{len});
+                                        return error.UnexpectedRelocationLength;
+                                    },
+                                }
+                            },
+                        }
+                    },
+                    .aarch64 => {
+                        const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type);
+
+                        switch (rel_type) {
+                            .ARM64_RELOC_BRANCH26 => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const displacement = @intCast(
+                                    i28,
+                                    @intCast(i64, target_addr) - @intCast(i64, this_addr),
+                                );
+                                var parsed = mem.bytesAsValue(
+                                    meta.TagPayload(
+                                        aarch64.Instruction,
+                                        aarch64.Instruction.UnconditionalBranchImmediate,
+                                    ),
+                                    inst,
+                                );
+                                parsed.imm26 = @truncate(u26, @bitCast(u28, displacement) >> 2);
+                            },
+                            .ARM64_RELOC_PAGE21,
+                            .ARM64_RELOC_GOT_LOAD_PAGE21,
+                            .ARM64_RELOC_TLVP_LOAD_PAGE21,
+                            => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const ta = if (addend) |a| target_addr + a else target_addr;
+                                const this_page = @intCast(i32, this_addr >> 12);
+                                const target_page = @intCast(i32, ta >> 12);
+                                const pages = @bitCast(u21, @intCast(i21, target_page - this_page));
+                                log.debug("    | moving by {} pages", .{pages});
+                                var parsed = mem.bytesAsValue(
+                                    meta.TagPayload(
+                                        aarch64.Instruction,
+                                        aarch64.Instruction.PCRelativeAddress,
+                                    ),
+                                    inst,
+                                );
+                                parsed.immhi = @truncate(u19, pages >> 2);
+                                parsed.immlo = @truncate(u2, pages);
+                                addend = null;
+                            },
+                            .ARM64_RELOC_PAGEOFF12,
+                            .ARM64_RELOC_GOT_LOAD_PAGEOFF12,
+                            => {
+                                const inst = code[off..][0..4];
+                                if (aarch64IsArithmetic(inst)) {
+                                    log.debug("    | detected ADD opcode", .{});
+                                    // add
+                                    var parsed = mem.bytesAsValue(
+                                        meta.TagPayload(
+                                            aarch64.Instruction,
+                                            aarch64.Instruction.AddSubtractImmediate,
+                                        ),
+                                        inst,
+                                    );
+                                    const ta = if (addend) |a| target_addr + a else target_addr;
+                                    const narrowed = @truncate(u12, ta);
+                                    parsed.imm12 = narrowed;
+                                } else {
+                                    log.debug("    | detected LDR/STR opcode", .{});
+                                    // ldr/str
+                                    var parsed = mem.bytesAsValue(
+                                        meta.TagPayload(
+                                            aarch64.Instruction,
+                                            aarch64.Instruction.LoadStoreRegister,
+                                        ),
+                                        inst,
+                                    );
+
+                                    const ta = if (addend) |a| target_addr + a else target_addr;
+                                    const narrowed = @truncate(u12, ta);
+                                    log.debug("    | narrowed 0x{x}", .{narrowed});
+                                    log.debug("    | parsed.size 0x{x}", .{parsed.size});
+
+                                    if (rel_type == .ARM64_RELOC_GOT_LOAD_PAGEOFF12) blk: {
+                                        const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+                                        const got = data_const_seg.sections.items[self.got_section_index.?];
+                                        if (got.addr <= target_addr and target_addr < got.addr + got.size) break :blk;
+
+                                        log.debug("    | rewriting to add", .{});
+                                        mem.writeIntLittle(u32, inst, aarch64.Instruction.add(
+                                            @intToEnum(aarch64.Register, parsed.rt),
+                                            @intToEnum(aarch64.Register, parsed.rn),
+                                            narrowed,
+                                            false,
+                                        ).toU32());
+                                        addend = null;
+                                        continue;
+                                    }
+
+                                    const offset: u12 = blk: {
+                                        if (parsed.size == 0) {
+                                            if (parsed.v == 1) {
+                                                // 128-bit SIMD is scaled by 16.
+                                                break :blk try math.divExact(u12, narrowed, 16);
+                                            }
+                                            // Otherwise, 8-bit SIMD or ldrb.
+                                            break :blk narrowed;
+                                        } else {
+                                            const denom: u4 = try math.powi(u4, 2, parsed.size);
+                                            break :blk try math.divExact(u12, narrowed, denom);
+                                        }
+                                    };
+                                    parsed.offset = offset;
+                                }
+                                addend = null;
+                            },
+                            .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => {
+                                const RegInfo = struct {
+                                    rd: u5,
+                                    rn: u5,
+                                    size: u1,
+                                };
+                                const inst = code[off..][0..4];
+                                const parsed: RegInfo = blk: {
+                                    if (aarch64IsArithmetic(inst)) {
+                                        const curr = mem.bytesAsValue(
+                                            meta.TagPayload(
+                                                aarch64.Instruction,
+                                                aarch64.Instruction.AddSubtractImmediate,
+                                            ),
+                                            inst,
+                                        );
+                                        break :blk .{ .rd = curr.rd, .rn = curr.rn, .size = curr.sf };
+                                    } else {
+                                        const curr = mem.bytesAsValue(
+                                            meta.TagPayload(
+                                                aarch64.Instruction,
+                                                aarch64.Instruction.LoadStoreRegister,
+                                            ),
+                                            inst,
+                                        );
+                                        break :blk .{ .rd = curr.rt, .rn = curr.rn, .size = @truncate(u1, curr.size) };
+                                    }
+                                };
+                                const ta = if (addend) |a| target_addr + a else target_addr;
+                                const narrowed = @truncate(u12, ta);
+                                log.debug("    | rewriting TLV access to ADD opcode", .{});
+                                // For TLV, we always generate an add instruction.
+                                mem.writeIntLittle(u32, inst, aarch64.Instruction.add(
+                                    @intToEnum(aarch64.Register, parsed.rd),
+                                    @intToEnum(aarch64.Register, parsed.rn),
+                                    narrowed,
+                                    false,
+                                ).toU32());
+                            },
+                            .ARM64_RELOC_SUBTRACTOR => {
+                                sub = @intCast(i64, target_addr);
+                            },
+                            .ARM64_RELOC_UNSIGNED => {
+                                switch (rel.r_length) {
+                                    3 => {
+                                        const inst = code[off..][0..8];
+                                        const offset = mem.readIntLittle(i64, inst);
+                                        log.debug("    | calculated addend 0x{x}", .{offset});
+                                        const result = if (sub) |s|
+                                            @intCast(i64, target_addr) - s + offset
+                                        else
+                                            @intCast(i64, target_addr) + offset;
+                                        mem.writeIntLittle(u64, inst, @bitCast(u64, result));
+                                        sub = null;
+
+                                        rebases: {
+                                            var hit: bool = false;
+                                            if (target_mapping.target_seg_id == self.data_segment_cmd_index.?) {
+                                                if (self.data_section_index) |index| {
+                                                    if (index == target_mapping.target_sect_id) hit = true;
+                                                }
+                                            }
+                                            if (target_mapping.target_seg_id == self.data_const_segment_cmd_index.?) {
+                                                if (self.data_const_section_index) |index| {
+                                                    if (index == target_mapping.target_sect_id) hit = true;
+                                                }
+                                            }
+
+                                            if (!hit) break :rebases;
+
+                                            try self.local_rebases.append(self.allocator, .{
+                                                .offset = this_addr - target_seg.inner.vmaddr,
+                                                .segment_id = target_mapping.target_seg_id,
+                                            });
+                                        }
+                                        // TLV is handled via a separate offset mechanism.
+                                        // Calculate the offset to the initializer.
+                                        if (target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: {
+                                            assert(rel.r_extern == 1);
+                                            const sym = object.symtab.items[rel.r_symbolnum];
+                                            if (isImport(&sym)) break :tlv;
+
+                                            const base_addr = blk: {
+                                                if (self.tlv_data_section_index) |index| {
+                                                    const tlv_data = target_seg.sections.items[index];
+                                                    break :blk tlv_data.addr;
+                                                } else {
+                                                    const tlv_bss = target_seg.sections.items[self.tlv_bss_section_index.?];
+                                                    break :blk tlv_bss.addr;
+                                                }
+                                            };
+                                            // Since we require TLV data to always preceed TLV bss section, we calculate
+                                            // offsets wrt to the former if it is defined; otherwise, wrt to the latter.
+                                            try self.threadlocal_offsets.append(self.allocator, target_addr - base_addr);
+                                        }
+                                    },
+                                    2 => {
+                                        const inst = code[off..][0..4];
+                                        const offset = mem.readIntLittle(i32, inst);
+                                        log.debug("    | calculated addend 0x{x}", .{offset});
+                                        const result = if (sub) |s|
+                                            @intCast(i64, target_addr) - s + offset
+                                        else
+                                            @intCast(i64, target_addr) + offset;
+                                        mem.writeIntLittle(u32, inst, @truncate(u32, @bitCast(u64, result)));
+                                        sub = null;
+                                    },
+                                    else => |len| {
+                                        log.err("unexpected relocation length 0x{x}", .{len});
+                                        return error.UnexpectedRelocationLength;
+                                    },
+                                }
+                            },
+                            .ARM64_RELOC_POINTER_TO_GOT => return error.TODOArm64RelocPointerToGot,
+                            else => unreachable,
+                        }
+                    },
+                    else => unreachable,
+                }
+            }
+
+            log.debug("writing contents of '{s},{s}' section from '{s}' from 0x{x} to 0x{x}", .{
+                segname,
+                sectname,
+                object.name,
+                target_sect_off,
+                target_sect_off + code.len,
+            });
+
+            if (target_sect.flags == macho.S_ZEROFILL or
+                target_sect.flags == macho.S_THREAD_LOCAL_ZEROFILL or
+                target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES)
+            {
+                log.debug("zeroing out '{s},{s}' from 0x{x} to 0x{x}", .{
+                    parseName(&target_sect.segname),
+                    parseName(&target_sect.sectname),
+                    target_sect_off,
+                    target_sect_off + code.len,
+                });
+                // Zero-out the space
+                var zeroes = try self.allocator.alloc(u8, code.len);
+                defer self.allocator.free(zeroes);
+                mem.set(u8, zeroes, 0);
+                try self.file.?.pwriteAll(zeroes, target_sect_off);
+            } else {
+                try self.file.?.pwriteAll(code, target_sect_off);
+            }
+        }
+    }
+}
+
+fn relocTargetAddr(self: *Zld, object_id: u16, rel: macho.relocation_info) !u64 {
+    const object = self.objects.items[object_id];
+    const seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+    const target_addr = blk: {
+        if (rel.r_extern == 1) {
+            const sym = object.symtab.items[rel.r_symbolnum];
+            if (isLocal(&sym) or isExport(&sym)) {
+                // Relocate using section offsets only.
+                const target_mapping = self.mappings.get(.{
+                    .object_id = object_id,
+                    .source_sect_id = sym.n_sect - 1,
+                }) orelse unreachable;
+                const source_sect = seg.sections.items[target_mapping.source_sect_id];
+                const target_seg = self.load_commands.items[target_mapping.target_seg_id].Segment;
+                const target_sect = target_seg.sections.items[target_mapping.target_sect_id];
+                const target_sect_addr = target_sect.addr + target_mapping.offset;
+                log.debug("    | symbol local to object", .{});
+                break :blk target_sect_addr + sym.n_value - source_sect.addr;
+            } else if (isImport(&sym)) {
+                // Relocate to either the artifact's local symbol, or an import from
+                // shared library.
+                const sym_name = object.getString(sym.n_strx);
+                if (self.locals.get(sym_name)) |locs| {
+                    var n_value: ?u64 = null;
+                    for (locs.items) |loc| {
+                        switch (loc.tt) {
+                            .Global => {
+                                n_value = loc.inner.n_value;
+                                break;
+                            },
+                            .WeakGlobal => {
+                                n_value = loc.inner.n_value;
+                            },
+                            .Local => {},
+                        }
+                    }
+                    if (n_value) |v| {
+                        break :blk v;
+                    }
+                    log.err("local symbol export '{s}' not found", .{sym_name});
+                    return error.LocalSymbolExportNotFound;
+                } else if (self.lazy_imports.get(sym_name)) |ext| {
+                    const segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+                    const stubs = segment.sections.items[self.stubs_section_index.?];
+                    break :blk stubs.addr + ext.index * stubs.reserved2;
+                } else if (self.nonlazy_imports.get(sym_name)) |ext| {
+                    const segment = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+                    const got = segment.sections.items[self.got_section_index.?];
+                    break :blk got.addr + ext.index * @sizeOf(u64);
+                } else if (mem.eql(u8, sym_name, "__tlv_bootstrap")) {
+                    const segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+                    const tlv = segment.sections.items[self.tlv_section_index.?];
+                    break :blk tlv.addr + self.tlv_bootstrap.?.index * @sizeOf(u64);
+                } else {
+                    log.err("failed to resolve symbol '{s}' as a relocation target", .{sym_name});
+                    return error.FailedToResolveRelocationTarget;
+                }
+            } else {
+                log.err("unexpected symbol {}, {s}", .{ sym, object.getString(sym.n_strx) });
+                return error.UnexpectedSymbolWhenRelocating;
+            }
+        } else {
+            // TODO I think we need to reparse the relocation_info as scattered_relocation_info
+            // here to get the actual section plus offset into that section of the relocated
+            // symbol. Unless the fine-grained location is encoded within the cell in the code
+            // buffer?
+            const target_mapping = self.mappings.get(.{
+                .object_id = object_id,
+                .source_sect_id = @intCast(u16, rel.r_symbolnum - 1),
+            }) orelse unreachable;
+            const target_seg = self.load_commands.items[target_mapping.target_seg_id].Segment;
+            const target_sect = target_seg.sections.items[target_mapping.target_sect_id];
+            break :blk target_sect.addr + target_mapping.offset;
+        }
+    };
+    return target_addr;
+}
+
+fn populateMetadata(self: *Zld) !void {
+    if (self.pagezero_segment_cmd_index == null) {
+        self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Segment = SegmentCommand.empty(.{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = @sizeOf(macho.segment_command_64),
+                .segname = makeStaticString("__PAGEZERO"),
+                .vmaddr = 0,
+                .vmsize = 0x100000000, // size always set to 4GB
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = 0,
+                .initprot = 0,
+                .nsects = 0,
+                .flags = 0,
+            }),
+        });
+    }
+
+    if (self.text_segment_cmd_index == null) {
+        self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Segment = SegmentCommand.empty(.{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = @sizeOf(macho.segment_command_64),
+                .segname = makeStaticString("__TEXT"),
+                .vmaddr = 0x100000000, // always starts at 4GB
+                .vmsize = 0,
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE,
+                .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE,
+                .nsects = 0,
+                .flags = 0,
+            }),
+        });
+    }
+
+    if (self.text_section_index == null) {
+        const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+        self.text_section_index = @intCast(u16, text_seg.sections.items.len);
+        const alignment: u2 = switch (self.arch.?) {
+            .x86_64 => 0,
+            .aarch64 => 2,
+            else => unreachable, // unhandled architecture type
+        };
+        try text_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__text"),
+            .segname = makeStaticString("__TEXT"),
+            .addr = 0,
+            .size = 0,
+            .offset = 0,
+            .@"align" = alignment,
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
+            .reserved1 = 0,
+            .reserved2 = 0,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.stubs_section_index == null) {
+        const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+        self.stubs_section_index = @intCast(u16, text_seg.sections.items.len);
+        const alignment: u2 = switch (self.arch.?) {
+            .x86_64 => 0,
+            .aarch64 => 2,
+            else => unreachable, // unhandled architecture type
+        };
+        const stub_size: u4 = switch (self.arch.?) {
+            .x86_64 => 6,
+            .aarch64 => 3 * @sizeOf(u32),
+            else => unreachable, // unhandled architecture type
+        };
+        try text_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__stubs"),
+            .segname = makeStaticString("__TEXT"),
+            .addr = 0,
+            .size = 0,
+            .offset = 0,
+            .@"align" = alignment,
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
+            .reserved1 = 0,
+            .reserved2 = stub_size,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.stub_helper_section_index == null) {
+        const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+        self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len);
+        const alignment: u2 = switch (self.arch.?) {
+            .x86_64 => 0,
+            .aarch64 => 2,
+            else => unreachable, // unhandled architecture type
+        };
+        const stub_helper_size: u6 = switch (self.arch.?) {
+            .x86_64 => 15,
+            .aarch64 => 6 * @sizeOf(u32),
+            else => unreachable,
+        };
+        try text_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__stub_helper"),
+            .segname = makeStaticString("__TEXT"),
+            .addr = 0,
+            .size = stub_helper_size,
+            .offset = 0,
+            .@"align" = alignment,
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
+            .reserved1 = 0,
+            .reserved2 = 0,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.data_const_segment_cmd_index == null) {
+        self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Segment = SegmentCommand.empty(.{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = @sizeOf(macho.segment_command_64),
+                .segname = makeStaticString("__DATA_CONST"),
+                .vmaddr = 0,
+                .vmsize = 0,
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE,
+                .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE,
+                .nsects = 0,
+                .flags = 0,
+            }),
+        });
+    }
+
+    if (self.got_section_index == null) {
+        const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        self.got_section_index = @intCast(u16, data_const_seg.sections.items.len);
+        try data_const_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__got"),
+            .segname = makeStaticString("__DATA_CONST"),
+            .addr = 0,
+            .size = 0,
+            .offset = 0,
+            .@"align" = 3, // 2^3 = @sizeOf(u64)
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_NON_LAZY_SYMBOL_POINTERS,
+            .reserved1 = 0,
+            .reserved2 = 0,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.data_segment_cmd_index == null) {
+        self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Segment = SegmentCommand.empty(.{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = @sizeOf(macho.segment_command_64),
+                .segname = makeStaticString("__DATA"),
+                .vmaddr = 0,
+                .vmsize = 0,
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE,
+                .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE,
+                .nsects = 0,
+                .flags = 0,
+            }),
+        });
+    }
+
+    if (self.la_symbol_ptr_section_index == null) {
+        const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len);
+        try data_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__la_symbol_ptr"),
+            .segname = makeStaticString("__DATA"),
+            .addr = 0,
+            .size = 0,
+            .offset = 0,
+            .@"align" = 3, // 2^3 = @sizeOf(u64)
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_LAZY_SYMBOL_POINTERS,
+            .reserved1 = 0,
+            .reserved2 = 0,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.data_section_index == null) {
+        const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        self.data_section_index = @intCast(u16, data_seg.sections.items.len);
+        try data_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__data"),
+            .segname = makeStaticString("__DATA"),
+            .addr = 0,
+            .size = 0,
+            .offset = 0,
+            .@"align" = 3, // 2^3 = @sizeOf(u64)
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_REGULAR,
+            .reserved1 = 0,
+            .reserved2 = 0,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.linkedit_segment_cmd_index == null) {
+        self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Segment = SegmentCommand.empty(.{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = @sizeOf(macho.segment_command_64),
+                .segname = makeStaticString("__LINKEDIT"),
+                .vmaddr = 0,
+                .vmsize = 0,
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = macho.VM_PROT_READ,
+                .initprot = macho.VM_PROT_READ,
+                .nsects = 0,
+                .flags = 0,
+            }),
+        });
+    }
+
+    if (self.dyld_info_cmd_index == null) {
+        self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .DyldInfoOnly = .{
+                .cmd = macho.LC_DYLD_INFO_ONLY,
+                .cmdsize = @sizeOf(macho.dyld_info_command),
+                .rebase_off = 0,
+                .rebase_size = 0,
+                .bind_off = 0,
+                .bind_size = 0,
+                .weak_bind_off = 0,
+                .weak_bind_size = 0,
+                .lazy_bind_off = 0,
+                .lazy_bind_size = 0,
+                .export_off = 0,
+                .export_size = 0,
+            },
+        });
+    }
+
+    if (self.symtab_cmd_index == null) {
+        self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Symtab = .{
+                .cmd = macho.LC_SYMTAB,
+                .cmdsize = @sizeOf(macho.symtab_command),
+                .symoff = 0,
+                .nsyms = 0,
+                .stroff = 0,
+                .strsize = 0,
+            },
+        });
+        try self.strtab.append(self.allocator, 0);
+    }
+
+    if (self.dysymtab_cmd_index == null) {
+        self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Dysymtab = .{
+                .cmd = macho.LC_DYSYMTAB,
+                .cmdsize = @sizeOf(macho.dysymtab_command),
+                .ilocalsym = 0,
+                .nlocalsym = 0,
+                .iextdefsym = 0,
+                .nextdefsym = 0,
+                .iundefsym = 0,
+                .nundefsym = 0,
+                .tocoff = 0,
+                .ntoc = 0,
+                .modtaboff = 0,
+                .nmodtab = 0,
+                .extrefsymoff = 0,
+                .nextrefsyms = 0,
+                .indirectsymoff = 0,
+                .nindirectsyms = 0,
+                .extreloff = 0,
+                .nextrel = 0,
+                .locreloff = 0,
+                .nlocrel = 0,
+            },
+        });
+    }
+
+    if (self.dylinker_cmd_index == null) {
+        self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len);
+        const cmdsize = @intCast(u32, mem.alignForwardGeneric(
+            u64,
+            @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH),
+            @sizeOf(u64),
+        ));
+        var dylinker_cmd = emptyGenericCommandWithData(macho.dylinker_command{
+            .cmd = macho.LC_LOAD_DYLINKER,
+            .cmdsize = cmdsize,
+            .name = @sizeOf(macho.dylinker_command),
+        });
+        dylinker_cmd.data = try self.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name);
+        mem.set(u8, dylinker_cmd.data, 0);
+        mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH));
+        try self.load_commands.append(self.allocator, .{ .Dylinker = dylinker_cmd });
+    }
+
+    if (self.libsystem_cmd_index == null) {
+        self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len);
+        const cmdsize = @intCast(u32, mem.alignForwardGeneric(
+            u64,
+            @sizeOf(macho.dylib_command) + mem.lenZ(LIB_SYSTEM_PATH),
+            @sizeOf(u64),
+        ));
+        // TODO Find a way to work out runtime version from the OS version triple stored in std.Target.
+        // In the meantime, we're gonna hardcode to the minimum compatibility version of 0.0.0.
+        const min_version = 0x0;
+        var dylib_cmd = emptyGenericCommandWithData(macho.dylib_command{
+            .cmd = macho.LC_LOAD_DYLIB,
+            .cmdsize = cmdsize,
+            .dylib = .{
+                .name = @sizeOf(macho.dylib_command),
+                .timestamp = 2, // not sure why not simply 0; this is reverse engineered from Mach-O files
+                .current_version = min_version,
+                .compatibility_version = min_version,
+            },
+        });
+        dylib_cmd.data = try self.allocator.alloc(u8, cmdsize - dylib_cmd.inner.dylib.name);
+        mem.set(u8, dylib_cmd.data, 0);
+        mem.copy(u8, dylib_cmd.data, mem.spanZ(LIB_SYSTEM_PATH));
+        try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd });
+    }
+
+    if (self.main_cmd_index == null) {
+        self.main_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Main = .{
+                .cmd = macho.LC_MAIN,
+                .cmdsize = @sizeOf(macho.entry_point_command),
+                .entryoff = 0x0,
+                .stacksize = 0,
+            },
+        });
+    }
+
+    if (self.source_version_cmd_index == null) {
+        self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .SourceVersion = .{
+                .cmd = macho.LC_SOURCE_VERSION,
+                .cmdsize = @sizeOf(macho.source_version_command),
+                .version = 0x0,
+            },
+        });
+    }
+
+    if (self.uuid_cmd_index == null) {
+        self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len);
+        var uuid_cmd: macho.uuid_command = .{
+            .cmd = macho.LC_UUID,
+            .cmdsize = @sizeOf(macho.uuid_command),
+            .uuid = undefined,
+        };
+        std.crypto.random.bytes(&uuid_cmd.uuid);
+        try self.load_commands.append(self.allocator, .{ .Uuid = uuid_cmd });
+    }
+
+    if (self.code_signature_cmd_index == null and self.arch.? == .aarch64) {
+        self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .LinkeditData = .{
+                .cmd = macho.LC_CODE_SIGNATURE,
+                .cmdsize = @sizeOf(macho.linkedit_data_command),
+                .dataoff = 0,
+                .datasize = 0,
+            },
+        });
+    }
+
+    if (self.data_in_code_cmd_index == null and self.arch.? == .x86_64) {
+        self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .LinkeditData = .{
+                .cmd = macho.LC_DATA_IN_CODE,
+                .cmdsize = @sizeOf(macho.linkedit_data_command),
+                .dataoff = 0,
+                .datasize = 0,
+            },
+        });
+    }
+}
+
+fn flush(self: *Zld) !void {
+    if (self.bss_section_index) |index| {
+        const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = &seg.sections.items[index];
+        sect.offset = 0;
+    }
+
+    if (self.tlv_bss_section_index) |index| {
+        const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = &seg.sections.items[index];
+        sect.offset = 0;
+    }
+
+    if (self.tlv_section_index) |index| {
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = &seg.sections.items[index];
+
+        var buffer = try self.allocator.alloc(u8, sect.size);
+        defer self.allocator.free(buffer);
+        _ = try self.file.?.preadAll(buffer, sect.offset);
+
+        var stream = std.io.fixedBufferStream(buffer);
+        var writer = stream.writer();
+
+        const seek_amt = 2 * @sizeOf(u64);
+        while (self.threadlocal_offsets.popOrNull()) |offset| {
+            try writer.context.seekBy(seek_amt);
+            try writer.writeIntLittle(u64, offset);
+        }
+
+        try self.file.?.pwriteAll(buffer, sect.offset);
+    }
+
+    try self.setEntryPoint();
+    try self.writeRebaseInfoTable();
+    try self.writeBindInfoTable();
+    try self.writeLazyBindInfoTable();
+    try self.writeExportInfo();
+    if (self.arch.? == .x86_64) {
+        try self.writeDataInCode();
+    }
+
+    {
+        const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+        const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+        symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    }
+
+    try self.writeDebugInfo();
+    try self.writeSymbolTable();
+    try self.writeDynamicSymbolTable();
+    try self.writeStringTable();
+
+    {
+        // Seal __LINKEDIT size
+        const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+        seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?);
+    }
+
+    if (self.arch.? == .aarch64) {
+        try self.writeCodeSignaturePadding();
+    }
+
+    try self.writeLoadCommands();
+    try self.writeHeader();
+
+    if (self.arch.? == .aarch64) {
+        try self.writeCodeSignature();
+    }
+
+    if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) {
+        try fs.cwd().copyFile(self.out_path.?, fs.cwd(), self.out_path.?, .{});
+    }
+}
+
+fn setEntryPoint(self: *Zld) !void {
+    // TODO we should respect the -entry flag passed in by the user to set a custom
+    // entrypoint. For now, assume default of `_main`.
+    const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const text = seg.sections.items[self.text_section_index.?];
+    const entry_syms = self.locals.get("_main") orelse return error.MissingMainEntrypoint;
+
+    var entry_sym: ?macho.nlist_64 = null;
+    for (entry_syms.items) |es| {
+        switch (es.tt) {
+            .Global => {
+                entry_sym = es.inner;
+                break;
+            },
+            .WeakGlobal => {
+                entry_sym = es.inner;
+            },
+            .Local => {},
+        }
+    }
+    if (entry_sym == null) {
+        log.err("no (weak) global definition of _main found", .{});
+        return error.MissingMainEntrypoint;
+    }
+
+    const name = try self.allocator.dupe(u8, "_main");
+    try self.exports.putNoClobber(self.allocator, name, .{
+        .n_strx = entry_sym.?.n_strx,
+        .n_value = entry_sym.?.n_value,
+        .n_type = macho.N_SECT | macho.N_EXT,
+        .n_desc = entry_sym.?.n_desc,
+        .n_sect = entry_sym.?.n_sect,
+    });
+
+    const ec = &self.load_commands.items[self.main_cmd_index.?].Main;
+    ec.entryoff = @intCast(u32, entry_sym.?.n_value - seg.inner.vmaddr);
+}
+
+fn writeRebaseInfoTable(self: *Zld) !void {
+    var pointers = std.ArrayList(Pointer).init(self.allocator);
+    defer pointers.deinit();
+
+    try pointers.ensureCapacity(pointers.items.len + self.local_rebases.items.len);
+    pointers.appendSliceAssumeCapacity(self.local_rebases.items);
+
+    if (self.got_section_index) |idx| {
+        // TODO this should be cleaned up!
+        try pointers.ensureCapacity(pointers.items.len + self.nonlazy_pointers.items().len);
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?);
+        const index_offset = @intCast(u32, self.nonlazy_imports.items().len);
+        for (self.nonlazy_pointers.items()) |entry| {
+            const index = index_offset + entry.value.index;
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + index * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
+    if (self.mod_init_func_section_index) |idx| {
+        // TODO audit and investigate this.
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const npointers = sect.size * @sizeOf(u64);
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?);
+
+        try pointers.ensureCapacity(pointers.items.len + npointers);
+        var i: usize = 0;
+        while (i < npointers) : (i += 1) {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + i * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
+    if (self.mod_term_func_section_index) |idx| {
+        // TODO audit and investigate this.
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const npointers = sect.size * @sizeOf(u64);
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?);
+
+        try pointers.ensureCapacity(pointers.items.len + npointers);
+        var i: usize = 0;
+        while (i < npointers) : (i += 1) {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + i * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
+    if (self.la_symbol_ptr_section_index) |idx| {
+        try pointers.ensureCapacity(pointers.items.len + self.lazy_imports.items().len);
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_segment_cmd_index.?);
+        for (self.lazy_imports.items()) |entry| {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + entry.value.index * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
+    std.sort.sort(Pointer, pointers.items, {}, pointerCmp);
+
+    const size = try rebaseInfoSize(pointers.items);
+    var buffer = try self.allocator.alloc(u8, @intCast(usize, size));
+    defer self.allocator.free(buffer);
+
+    var stream = std.io.fixedBufferStream(buffer);
+    try writeRebaseInfo(pointers.items, stream.writer());
+
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+    dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff);
+    dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @sizeOf(u64)));
+    seg.inner.filesize += dyld_info.rebase_size;
+
+    log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size });
+
+    try self.file.?.pwriteAll(buffer, dyld_info.rebase_off);
+}
+
+fn writeBindInfoTable(self: *Zld) !void {
+    var pointers = std.ArrayList(Pointer).init(self.allocator);
+    defer pointers.deinit();
+
+    if (self.got_section_index) |idx| {
+        try pointers.ensureCapacity(pointers.items.len + self.nonlazy_imports.items().len);
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?);
+        for (self.nonlazy_imports.items()) |entry| {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + entry.value.index * @sizeOf(u64),
+                .segment_id = segment_id,
+                .dylib_ordinal = entry.value.dylib_ordinal,
+                .name = entry.key,
+            });
+        }
+    }
+
+    if (self.tlv_section_index) |idx| {
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_segment_cmd_index.?);
+        try pointers.append(.{
+            .offset = base_offset + self.tlv_bootstrap.?.index * @sizeOf(u64),
+            .segment_id = segment_id,
+            .dylib_ordinal = self.tlv_bootstrap.?.dylib_ordinal,
+            .name = "__tlv_bootstrap",
+        });
+    }
+
+    const size = try bindInfoSize(pointers.items);
+    var buffer = try self.allocator.alloc(u8, @intCast(usize, size));
+    defer self.allocator.free(buffer);
+
+    var stream = std.io.fixedBufferStream(buffer);
+    try writeBindInfo(pointers.items, stream.writer());
+
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+    dyld_info.bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)));
+    seg.inner.filesize += dyld_info.bind_size;
+
+    log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size });
+
+    try self.file.?.pwriteAll(buffer, dyld_info.bind_off);
+}
+
+fn writeLazyBindInfoTable(self: *Zld) !void {
+    var pointers = std.ArrayList(Pointer).init(self.allocator);
+    defer pointers.deinit();
+    try pointers.ensureCapacity(self.lazy_imports.items().len);
+
+    if (self.la_symbol_ptr_section_index) |idx| {
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_segment_cmd_index.?);
+        for (self.lazy_imports.items()) |entry| {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + entry.value.index * @sizeOf(u64),
+                .segment_id = segment_id,
+                .dylib_ordinal = entry.value.dylib_ordinal,
+                .name = entry.key,
+            });
+        }
+    }
+
+    const size = try lazyBindInfoSize(pointers.items);
+    var buffer = try self.allocator.alloc(u8, @intCast(usize, size));
+    defer self.allocator.free(buffer);
+
+    var stream = std.io.fixedBufferStream(buffer);
+    try writeLazyBindInfo(pointers.items, stream.writer());
+
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+    dyld_info.lazy_bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)));
+    seg.inner.filesize += dyld_info.lazy_bind_size;
+
+    log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size });
+
+    try self.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off);
+    try self.populateLazyBindOffsetsInStubHelper(buffer);
+}
+
+fn populateLazyBindOffsetsInStubHelper(self: *Zld, buffer: []const u8) !void {
+    var stream = std.io.fixedBufferStream(buffer);
+    var reader = stream.reader();
+    var offsets = std.ArrayList(u32).init(self.allocator);
+    try offsets.append(0);
+    defer offsets.deinit();
+    var valid_block = false;
+
+    while (true) {
+        const inst = reader.readByte() catch |err| switch (err) {
+            error.EndOfStream => break,
+            else => return err,
+        };
+        const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK;
+        const opcode: u8 = inst & macho.BIND_OPCODE_MASK;
+
+        switch (opcode) {
+            macho.BIND_OPCODE_DO_BIND => {
+                valid_block = true;
+            },
+            macho.BIND_OPCODE_DONE => {
+                if (valid_block) {
+                    const offset = try stream.getPos();
+                    try offsets.append(@intCast(u32, offset));
+                }
+                valid_block = false;
+            },
+            macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
+                var next = try reader.readByte();
+                while (next != @as(u8, 0)) {
+                    next = try reader.readByte();
+                }
+            },
+            macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
+                _ = try leb.readULEB128(u64, reader);
+            },
+            macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
+                _ = try leb.readULEB128(u64, reader);
+            },
+            macho.BIND_OPCODE_SET_ADDEND_SLEB => {
+                _ = try leb.readILEB128(i64, reader);
+            },
+            else => {},
+        }
+    }
+    assert(self.lazy_imports.items().len <= offsets.items.len);
+
+    const stub_size: u4 = switch (self.arch.?) {
+        .x86_64 => 10,
+        .aarch64 => 3 * @sizeOf(u32),
+        else => unreachable,
+    };
+    const off: u4 = switch (self.arch.?) {
+        .x86_64 => 1,
+        .aarch64 => 2 * @sizeOf(u32),
+        else => unreachable,
+    };
+    var buf: [@sizeOf(u32)]u8 = undefined;
+    for (self.lazy_imports.items()) |entry| {
+        const symbol = entry.value;
+        const placeholder_off = self.stub_helper_stubs_start_off.? + symbol.index * stub_size + off;
+        mem.writeIntLittle(u32, &buf, offsets.items[symbol.index]);
+        try self.file.?.pwriteAll(&buf, placeholder_off);
+    }
+}
+
+fn writeExportInfo(self: *Zld) !void {
+    var trie = Trie.init(self.allocator);
+    defer trie.deinit();
+
+    const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    for (self.exports.items()) |entry| {
+        const name = entry.key;
+        const symbol = entry.value;
+        // TODO figure out if we should put all exports into the export trie
+        assert(symbol.n_value >= text_segment.inner.vmaddr);
+        try trie.put(.{
+            .name = name,
+            .vmaddr_offset = symbol.n_value - text_segment.inner.vmaddr,
+            .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR,
+        });
+    }
+
+    try trie.finalize();
+    var buffer = try self.allocator.alloc(u8, @intCast(usize, trie.size));
+    defer self.allocator.free(buffer);
+    var stream = std.io.fixedBufferStream(buffer);
+    const nwritten = try trie.write(stream.writer());
+    assert(nwritten == trie.size);
+
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+    dyld_info.export_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)));
+    seg.inner.filesize += dyld_info.export_size;
+
+    log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size });
+
+    try self.file.?.pwriteAll(buffer, dyld_info.export_off);
+}
+
+fn writeDebugInfo(self: *Zld) !void {
+    var stabs = std.ArrayList(macho.nlist_64).init(self.allocator);
+    defer stabs.deinit();
+
+    for (self.objects.items) |object, object_id| {
+        var debug_info = blk: {
+            var di = try DebugInfo.parseFromObject(self.allocator, object);
+            break :blk di orelse continue;
+        };
+        defer debug_info.deinit(self.allocator);
+
+        // We assume there is only one CU.
+        const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) {
+            error.MissingDebugInfo => {
+                // TODO audit cases with missing debug info and audit our dwarf.zig module.
+                log.debug("invalid or missing debug info in {s}; skipping", .{object.name});
+                continue;
+            },
+            else => |e| return e,
+        };
+        const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_name);
+        const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_comp_dir);
+
+        {
+            const tu_path = try std.fs.path.join(self.allocator, &[_][]const u8{ comp_dir, name });
+            defer self.allocator.free(tu_path);
+            const dirname = std.fs.path.dirname(tu_path) orelse "./";
+            // Current dir
+            try stabs.append(.{
+                .n_strx = try self.makeString(tu_path[0 .. dirname.len + 1]),
+                .n_type = macho.N_SO,
+                .n_sect = 0,
+                .n_desc = 0,
+                .n_value = 0,
+            });
+            // Artifact name
+            try stabs.append(.{
+                .n_strx = try self.makeString(tu_path[dirname.len + 1 ..]),
+                .n_type = macho.N_SO,
+                .n_sect = 0,
+                .n_desc = 0,
+                .n_value = 0,
+            });
+            // Path to object file with debug info
+            var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
+            const full_path = blk: {
+                if (object.ar_name) |prefix| {
+                    const path = try std.os.realpath(prefix, &buffer);
+                    break :blk try std.fmt.allocPrint(self.allocator, "{s}({s})", .{ path, object.name });
+                } else {
+                    const path = try std.os.realpath(object.name, &buffer);
+                    break :blk try mem.dupe(self.allocator, u8, path);
+                }
+            };
+            defer self.allocator.free(full_path);
+            const stat = try object.file.stat();
+            const mtime = @intCast(u64, @divFloor(stat.mtime, 1_000_000_000));
+            try stabs.append(.{
+                .n_strx = try self.makeString(full_path),
+                .n_type = macho.N_OSO,
+                .n_sect = 0,
+                .n_desc = 1,
+                .n_value = mtime,
+            });
+        }
+        log.debug("analyzing debug info in '{s}'", .{object.name});
+
+        for (object.symtab.items) |source_sym| {
+            const symname = object.getString(source_sym.n_strx);
+            const source_addr = source_sym.n_value;
+            const target_syms = self.locals.get(symname) orelse continue;
+            const target_sym: Symbol = blk: {
+                for (target_syms.items) |ts| {
+                    if (ts.object_id == @intCast(u16, object_id)) break :blk ts;
+                } else continue;
+            };
+
+            const maybe_size = blk: for (debug_info.inner.func_list.items) |func| {
+                if (func.pc_range) |range| {
+                    if (source_addr >= range.start and source_addr < range.end) {
+                        break :blk range.end - range.start;
+                    }
+                }
+            } else null;
+
+            if (maybe_size) |size| {
+                try stabs.append(.{
+                    .n_strx = 0,
+                    .n_type = macho.N_BNSYM,
+                    .n_sect = target_sym.inner.n_sect,
+                    .n_desc = 0,
+                    .n_value = target_sym.inner.n_value,
+                });
+                try stabs.append(.{
+                    .n_strx = target_sym.inner.n_strx,
+                    .n_type = macho.N_FUN,
+                    .n_sect = target_sym.inner.n_sect,
+                    .n_desc = 0,
+                    .n_value = target_sym.inner.n_value,
+                });
+                try stabs.append(.{
+                    .n_strx = 0,
+                    .n_type = macho.N_FUN,
+                    .n_sect = 0,
+                    .n_desc = 0,
+                    .n_value = size,
+                });
+                try stabs.append(.{
+                    .n_strx = 0,
+                    .n_type = macho.N_ENSYM,
+                    .n_sect = target_sym.inner.n_sect,
+                    .n_desc = 0,
+                    .n_value = size,
+                });
+            } else {
+                // TODO need a way to differentiate symbols: global, static, local, etc.
+                try stabs.append(.{
+                    .n_strx = target_sym.inner.n_strx,
+                    .n_type = macho.N_STSYM,
+                    .n_sect = target_sym.inner.n_sect,
+                    .n_desc = 0,
+                    .n_value = target_sym.inner.n_value,
+                });
+            }
+        }
+
+        // Close the source file!
+        try stabs.append(.{
+            .n_strx = 0,
+            .n_type = macho.N_SO,
+            .n_sect = 0,
+            .n_desc = 0,
+            .n_value = 0,
+        });
+    }
+
+    if (stabs.items.len == 0) return;
+
+    // Write stabs into the symbol table
+    const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+
+    symtab.nsyms = @intCast(u32, stabs.items.len);
+
+    const stabs_off = symtab.symoff;
+    const stabs_size = symtab.nsyms * @sizeOf(macho.nlist_64);
+    log.debug("writing symbol stabs from 0x{x} to 0x{x}", .{ stabs_off, stabs_size + stabs_off });
+    try self.file.?.pwriteAll(mem.sliceAsBytes(stabs.items), stabs_off);
+
+    linkedit.inner.filesize += stabs_size;
+
+    // Update dynamic symbol table.
+    const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
+    dysymtab.nlocalsym = symtab.nsyms;
+}
+
+fn writeSymbolTable(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+
+    var locals = std.ArrayList(macho.nlist_64).init(self.allocator);
+    defer locals.deinit();
+
+    for (self.locals.items()) |entries| {
+        log.debug("'{s}': {} entries", .{ entries.key, entries.value.items.len });
+        // var symbol: ?macho.nlist_64 = null;
+        for (entries.value.items) |entry| {
+            log.debug("    | {}", .{entry.inner});
+            log.debug("    | {}", .{entry.tt});
+            log.debug("    | {s}", .{self.objects.items[entry.object_id].name});
+            try locals.append(entry.inner);
+        }
+    }
+    const nlocals = locals.items.len;
+
+    const nexports = self.exports.items().len;
+    var exports = std.ArrayList(macho.nlist_64).init(self.allocator);
+    defer exports.deinit();
+
+    try exports.ensureCapacity(nexports);
+    for (self.exports.items()) |entry| {
+        exports.appendAssumeCapacity(entry.value);
+    }
+
+    const has_tlv: bool = self.tlv_bootstrap != null;
+
+    var nundefs = self.lazy_imports.items().len + self.nonlazy_imports.items().len;
+    if (has_tlv) nundefs += 1;
+
+    var undefs = std.ArrayList(macho.nlist_64).init(self.allocator);
+    defer undefs.deinit();
+
+    try undefs.ensureCapacity(nundefs);
+    for (self.lazy_imports.items()) |entry| {
+        undefs.appendAssumeCapacity(entry.value.symbol);
+    }
+    for (self.nonlazy_imports.items()) |entry| {
+        undefs.appendAssumeCapacity(entry.value.symbol);
+    }
+    if (has_tlv) {
+        undefs.appendAssumeCapacity(self.tlv_bootstrap.?.symbol);
+    }
+
+    const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64);
+    const locals_size = nlocals * @sizeOf(macho.nlist_64);
+    log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off });
+    try self.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off);
+
+    const exports_off = locals_off + locals_size;
+    const exports_size = nexports * @sizeOf(macho.nlist_64);
+    log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off });
+    try self.file.?.pwriteAll(mem.sliceAsBytes(exports.items), exports_off);
+
+    const undefs_off = exports_off + exports_size;
+    const undefs_size = nundefs * @sizeOf(macho.nlist_64);
+    log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off });
+    try self.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off);
+
+    symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs);
+    seg.inner.filesize += locals_size + exports_size + undefs_size;
+
+    // Update dynamic symbol table.
+    const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
+    dysymtab.nlocalsym += @intCast(u32, nlocals);
+    dysymtab.iextdefsym = dysymtab.nlocalsym;
+    dysymtab.nextdefsym = @intCast(u32, nexports);
+    dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym;
+    dysymtab.nundefsym = @intCast(u32, nundefs);
+}
+
+fn writeDynamicSymbolTable(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stubs = &text_segment.sections.items[self.stubs_section_index.?];
+    const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const got = &data_const_segment.sections.items[self.got_section_index.?];
+    const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?];
+    const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
+
+    const lazy = self.lazy_imports.items();
+    const nonlazy = self.nonlazy_imports.items();
+    const got_locals = self.nonlazy_pointers.items();
+    dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    dysymtab.nindirectsyms = @intCast(u32, lazy.len * 2 + nonlazy.len + got_locals.len);
+    const needed_size = dysymtab.nindirectsyms * @sizeOf(u32);
+    seg.inner.filesize += needed_size;
+
+    log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{
+        dysymtab.indirectsymoff,
+        dysymtab.indirectsymoff + needed_size,
+    });
+
+    var buf = try self.allocator.alloc(u8, needed_size);
+    defer self.allocator.free(buf);
+    var stream = std.io.fixedBufferStream(buf);
+    var writer = stream.writer();
+
+    stubs.reserved1 = 0;
+    for (lazy) |_, i| {
+        const symtab_idx = @intCast(u32, dysymtab.iundefsym + i);
+        try writer.writeIntLittle(u32, symtab_idx);
+    }
+
+    const base_id = @intCast(u32, lazy.len);
+    got.reserved1 = base_id;
+    for (nonlazy) |_, i| {
+        const symtab_idx = @intCast(u32, dysymtab.iundefsym + i + base_id);
+        try writer.writeIntLittle(u32, symtab_idx);
+    }
+    // TODO there should be one common set of GOT entries.
+    for (got_locals) |_| {
+        try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL);
+    }
+
+    la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, nonlazy.len) + @intCast(u32, got_locals.len);
+    for (lazy) |_, i| {
+        const symtab_idx = @intCast(u32, dysymtab.iundefsym + i);
+        try writer.writeIntLittle(u32, symtab_idx);
+    }
+
+    try self.file.?.pwriteAll(buf, dysymtab.indirectsymoff);
+}
+
+fn writeStringTable(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+    symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64)));
+    seg.inner.filesize += symtab.strsize;
+
+    log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize });
+
+    try self.file.?.pwriteAll(self.strtab.items, symtab.stroff);
+
+    if (symtab.strsize > self.strtab.items.len and self.arch.? == .x86_64) {
+        // This is the last section, so we need to pad it out.
+        try self.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1);
+    }
+}
+
+fn writeDataInCode(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData;
+    const fileoff = seg.inner.fileoff + seg.inner.filesize;
+
+    var buf = std.ArrayList(u8).init(self.allocator);
+    defer buf.deinit();
+
+    const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const text_sect = text_seg.sections.items[self.text_section_index.?];
+    for (self.objects.items) |object, object_id| {
+        const source_seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+        const source_sect = source_seg.sections.items[object.text_section_index.?];
+        const target_mapping = self.mappings.get(.{
+            .object_id = @intCast(u16, object_id),
+            .source_sect_id = object.text_section_index.?,
+        }) orelse continue;
+
+        try buf.ensureCapacity(
+            buf.items.len + object.data_in_code_entries.items.len * @sizeOf(macho.data_in_code_entry),
+        );
+        for (object.data_in_code_entries.items) |dice| {
+            const new_dice: macho.data_in_code_entry = .{
+                .offset = text_sect.offset + target_mapping.offset + dice.offset,
+                .length = dice.length,
+                .kind = dice.kind,
+            };
+            buf.appendSliceAssumeCapacity(mem.asBytes(&new_dice));
+        }
+    }
+    const datasize = @intCast(u32, buf.items.len);
+
+    dice_cmd.dataoff = @intCast(u32, fileoff);
+    dice_cmd.datasize = datasize;
+    seg.inner.filesize += datasize;
+
+    log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize });
+
+    try self.file.?.pwriteAll(buf.items, fileoff);
+}
+
+fn writeCodeSignaturePadding(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const code_sig_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData;
+    const fileoff = seg.inner.fileoff + seg.inner.filesize;
+    const needed_size = CodeSignature.calcCodeSignaturePaddingSize(
+        self.out_path.?,
+        fileoff,
+        self.page_size.?,
+    );
+    code_sig_cmd.dataoff = @intCast(u32, fileoff);
+    code_sig_cmd.datasize = needed_size;
+
+    // Advance size of __LINKEDIT segment
+    seg.inner.filesize += needed_size;
+    seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?);
+
+    log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size });
+
+    // Pad out the space. We need to do this to calculate valid hashes for everything in the file
+    // except for code signature data.
+    try self.file.?.pwriteAll(&[_]u8{0}, fileoff + needed_size - 1);
+}
+
+fn writeCodeSignature(self: *Zld) !void {
+    const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData;
+
+    var code_sig = CodeSignature.init(self.allocator, self.page_size.?);
+    defer code_sig.deinit();
+    try code_sig.calcAdhocSignature(
+        self.file.?,
+        self.out_path.?,
+        text_seg.inner,
+        code_sig_cmd,
+        .Exe,
+    );
+
+    var buffer = try self.allocator.alloc(u8, code_sig.size());
+    defer self.allocator.free(buffer);
+    var stream = std.io.fixedBufferStream(buffer);
+    try code_sig.write(stream.writer());
+
+    log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len });
+
+    try self.file.?.pwriteAll(buffer, code_sig_cmd.dataoff);
+}
+
+fn writeLoadCommands(self: *Zld) !void {
+    var sizeofcmds: u32 = 0;
+    for (self.load_commands.items) |lc| {
+        sizeofcmds += lc.cmdsize();
+    }
+
+    var buffer = try self.allocator.alloc(u8, sizeofcmds);
+    defer self.allocator.free(buffer);
+    var writer = std.io.fixedBufferStream(buffer).writer();
+    for (self.load_commands.items) |lc| {
+        try lc.write(writer);
+    }
+
+    const off = @sizeOf(macho.mach_header_64);
+    log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds });
+    try self.file.?.pwriteAll(buffer, off);
+}
+
+fn writeHeader(self: *Zld) !void {
+    var header: macho.mach_header_64 = undefined;
+    header.magic = macho.MH_MAGIC_64;
+
+    const CpuInfo = struct {
+        cpu_type: macho.cpu_type_t,
+        cpu_subtype: macho.cpu_subtype_t,
+    };
+
+    const cpu_info: CpuInfo = switch (self.arch.?) {
+        .aarch64 => .{
+            .cpu_type = macho.CPU_TYPE_ARM64,
+            .cpu_subtype = macho.CPU_SUBTYPE_ARM_ALL,
+        },
+        .x86_64 => .{
+            .cpu_type = macho.CPU_TYPE_X86_64,
+            .cpu_subtype = macho.CPU_SUBTYPE_X86_64_ALL,
+        },
+        else => return error.UnsupportedCpuArchitecture,
+    };
+    header.cputype = cpu_info.cpu_type;
+    header.cpusubtype = cpu_info.cpu_subtype;
+    header.filetype = macho.MH_EXECUTE;
+    header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL;
+    header.reserved = 0;
+
+    if (self.tlv_section_index) |_|
+        header.flags |= macho.MH_HAS_TLV_DESCRIPTORS;
+
+    header.ncmds = @intCast(u32, self.load_commands.items.len);
+    header.sizeofcmds = 0;
+    for (self.load_commands.items) |cmd| {
+        header.sizeofcmds += cmd.cmdsize();
+    }
+    log.debug("writing Mach-O header {}", .{header});
+    try self.file.?.pwriteAll(mem.asBytes(&header), 0);
+}
+
+pub fn makeStaticString(bytes: []const u8) [16]u8 {
+    var buf = [_]u8{0} ** 16;
+    assert(bytes.len <= buf.len);
+    mem.copy(u8, &buf, bytes);
+    return buf;
+}
+
+fn makeString(self: *Zld, bytes: []const u8) !u32 {
+    try self.strtab.ensureCapacity(self.allocator, self.strtab.items.len + bytes.len + 1);
+    const offset = @intCast(u32, self.strtab.items.len);
+    log.debug("writing new string '{s}' into string table at offset 0x{x}", .{ bytes, offset });
+    self.strtab.appendSliceAssumeCapacity(bytes);
+    self.strtab.appendAssumeCapacity(0);
+    return offset;
+}
+
+fn getString(self: *const Zld, str_off: u32) []const u8 {
+    assert(str_off < self.strtab.items.len);
+    return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off));
+}
+
+pub fn parseName(name: *const [16]u8) []const u8 {
+    const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
+    return name[0..len];
+}
+
+fn isLocal(sym: *const macho.nlist_64) callconv(.Inline) bool {
+    if (isExtern(sym)) return false;
+    const tt = macho.N_TYPE & sym.n_type;
+    return tt == macho.N_SECT;
+}
+
+fn isExport(sym: *const macho.nlist_64) callconv(.Inline) bool {
+    if (!isExtern(sym)) return false;
+    const tt = macho.N_TYPE & sym.n_type;
+    return tt == macho.N_SECT;
+}
+
+fn isImport(sym: *const macho.nlist_64) callconv(.Inline) bool {
+    if (!isExtern(sym)) return false;
+    const tt = macho.N_TYPE & sym.n_type;
+    return tt == macho.N_UNDF;
+}
+
+fn isExtern(sym: *const macho.nlist_64) callconv(.Inline) bool {
+    if ((sym.n_type & macho.N_EXT) == 0) return false;
+    return (sym.n_type & macho.N_PEXT) == 0;
+}
+
+fn isWeakDef(sym: *const macho.nlist_64) callconv(.Inline) bool {
+    return (sym.n_desc & macho.N_WEAK_DEF) != 0;
+}
+
+fn aarch64IsArithmetic(inst: *const [4]u8) callconv(.Inline) bool {
+    const group_decode = @truncate(u5, inst[3]);
+    return ((group_decode >> 2) == 4);
+}
diff --git a/src/link/MachO/bind.zig b/src/link/MachO/bind.zig
@@ -0,0 +1,145 @@
+const std = @import("std");
+const leb = std.leb;
+const macho = std.macho;
+
+pub const Pointer = struct {
+    offset: u64,
+    segment_id: u16,
+    dylib_ordinal: ?i64 = null,
+    name: ?[]const u8 = null,
+};
+
+pub fn pointerCmp(context: void, a: Pointer, b: Pointer) bool {
+    if (a.segment_id < b.segment_id) return true;
+    if (a.segment_id == b.segment_id) {
+        return a.offset < b.offset;
+    }
+    return false;
+}
+
+pub fn rebaseInfoSize(pointers: []const Pointer) !u64 {
+    var stream = std.io.countingWriter(std.io.null_writer);
+    var writer = stream.writer();
+    var size: u64 = 0;
+
+    for (pointers) |pointer| {
+        size += 2;
+        try leb.writeILEB128(writer, pointer.offset);
+        size += 1;
+    }
+
+    size += 1 + stream.bytes_written;
+    return size;
+}
+
+pub fn writeRebaseInfo(pointers: []const Pointer, writer: anytype) !void {
+    for (pointers) |pointer| {
+        try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.REBASE_TYPE_POINTER));
+        try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, pointer.segment_id));
+
+        try leb.writeILEB128(writer, pointer.offset);
+        try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | @truncate(u4, 1));
+    }
+    try writer.writeByte(macho.REBASE_OPCODE_DONE);
+}
+
+pub fn bindInfoSize(pointers: []const Pointer) !u64 {
+    var stream = std.io.countingWriter(std.io.null_writer);
+    var writer = stream.writer();
+    var size: u64 = 0;
+
+    for (pointers) |pointer| {
+        size += 1;
+        if (pointer.dylib_ordinal.? > 15) {
+            try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?));
+        }
+        size += 1;
+
+        size += 1;
+        size += pointer.name.?.len;
+        size += 1;
+
+        size += 1;
+
+        try leb.writeILEB128(writer, pointer.offset);
+        size += 1;
+    }
+
+    size += stream.bytes_written + 1;
+    return size;
+}
+
+pub fn writeBindInfo(pointers: []const Pointer, writer: anytype) !void {
+    for (pointers) |pointer| {
+        if (pointer.dylib_ordinal.? > 15) {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+            try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?));
+        } else if (pointer.dylib_ordinal.? > 0) {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?)));
+        } else {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?)));
+        }
+        try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.BIND_TYPE_POINTER));
+
+        try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
+        try writer.writeAll(pointer.name.?);
+        try writer.writeByte(0);
+
+        try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, pointer.segment_id));
+
+        try leb.writeILEB128(writer, pointer.offset);
+        try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
+    }
+
+    try writer.writeByte(macho.BIND_OPCODE_DONE);
+}
+
+pub fn lazyBindInfoSize(pointers: []const Pointer) !u64 {
+    var stream = std.io.countingWriter(std.io.null_writer);
+    var writer = stream.writer();
+    var size: u64 = 0;
+
+    for (pointers) |pointer| {
+        size += 1;
+
+        try leb.writeILEB128(writer, pointer.offset);
+
+        size += 1;
+        if (pointer.dylib_ordinal.? > 15) {
+            try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?));
+        }
+
+        size += 1;
+        size += pointer.name.?.len;
+        size += 1;
+
+        size += 2;
+    }
+
+    size += stream.bytes_written;
+    return size;
+}
+
+pub fn writeLazyBindInfo(pointers: []const Pointer, writer: anytype) !void {
+    for (pointers) |pointer| {
+        try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, pointer.segment_id));
+
+        try leb.writeILEB128(writer, pointer.offset);
+
+        if (pointer.dylib_ordinal.? > 15) {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+            try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?));
+        } else if (pointer.dylib_ordinal.? > 0) {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?)));
+        } else {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?)));
+        }
+
+        try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
+        try writer.writeAll(pointer.name.?);
+        try writer.writeByte(0);
+
+        try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
+        try writer.writeByte(macho.BIND_OPCODE_DONE);
+    }
+}
diff --git a/src/link/MachO/imports.zig b/src/link/MachO/imports.zig
@@ -1,152 +0,0 @@
-const std = @import("std");
-const leb = std.leb;
-const macho = std.macho;
-const mem = std.mem;
-
-const assert = std.debug.assert;
-const Allocator = mem.Allocator;
-
-pub const ExternSymbol = struct {
-    /// MachO symbol table entry.
-    inner: macho.nlist_64,
-
-    /// Id of the dynamic library where the specified entries can be found.
-    /// Id of 0 means self.
-    /// TODO this should really be an id into the table of all defined
-    /// dylibs.
-    dylib_ordinal: i64 = 0,
-
-    /// Id of the segment where this symbol is defined (will have its address
-    /// resolved).
-    segment: u16 = 0,
-
-    /// Offset relative to the start address of the `segment`.
-    offset: u32 = 0,
-};
-
-pub fn rebaseInfoSize(symbols: anytype) !u64 {
-    var stream = std.io.countingWriter(std.io.null_writer);
-    var writer = stream.writer();
-    var size: u64 = 0;
-
-    for (symbols) |entry| {
-        size += 2;
-        try leb.writeILEB128(writer, entry.value.offset);
-        size += 1;
-    }
-
-    size += 1 + stream.bytes_written;
-    return size;
-}
-
-pub fn writeRebaseInfo(symbols: anytype, writer: anytype) !void {
-    for (symbols) |entry| {
-        const symbol = entry.value;
-        try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.REBASE_TYPE_POINTER));
-        try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
-        try leb.writeILEB128(writer, symbol.offset);
-        try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | @truncate(u4, 1));
-    }
-    try writer.writeByte(macho.REBASE_OPCODE_DONE);
-}
-
-pub fn bindInfoSize(symbols: anytype) !u64 {
-    var stream = std.io.countingWriter(std.io.null_writer);
-    var writer = stream.writer();
-    var size: u64 = 0;
-
-    for (symbols) |entry| {
-        const symbol = entry.value;
-
-        size += 1;
-        if (symbol.dylib_ordinal > 15) {
-            try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
-        }
-        size += 1;
-
-        size += 1;
-        size += entry.key.len;
-        size += 1;
-
-        size += 1;
-        try leb.writeILEB128(writer, symbol.offset);
-        size += 2;
-    }
-
-    size += stream.bytes_written;
-    return size;
-}
-
-pub fn writeBindInfo(symbols: anytype, writer: anytype) !void {
-    for (symbols) |entry| {
-        const symbol = entry.value;
-
-        if (symbol.dylib_ordinal > 15) {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
-            try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
-        } else if (symbol.dylib_ordinal > 0) {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
-        } else {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
-        }
-        try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.BIND_TYPE_POINTER));
-
-        try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
-        try writer.writeAll(entry.key);
-        try writer.writeByte(0);
-
-        try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
-        try leb.writeILEB128(writer, symbol.offset);
-        try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
-        try writer.writeByte(macho.BIND_OPCODE_DONE);
-    }
-}
-
-pub fn lazyBindInfoSize(symbols: anytype) !u64 {
-    var stream = std.io.countingWriter(std.io.null_writer);
-    var writer = stream.writer();
-    var size: u64 = 0;
-
-    for (symbols) |entry| {
-        const symbol = entry.value;
-        size += 1;
-        try leb.writeILEB128(writer, symbol.offset);
-        size += 1;
-        if (symbol.dylib_ordinal > 15) {
-            try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
-        }
-
-        size += 1;
-        size += entry.key.len;
-        size += 1;
-
-        size += 2;
-    }
-
-    size += stream.bytes_written;
-    return size;
-}
-
-pub fn writeLazyBindInfo(symbols: anytype, writer: anytype) !void {
-    for (symbols) |entry| {
-        const symbol = entry.value;
-        try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
-        try leb.writeILEB128(writer, symbol.offset);
-
-        if (symbol.dylib_ordinal > 15) {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
-            try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
-        } else if (symbol.dylib_ordinal > 0) {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
-        } else {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
-        }
-
-        try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
-        try writer.writeAll(entry.key);
-        try writer.writeByte(0);
-
-        try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
-        try writer.writeByte(macho.BIND_OPCODE_DONE);
-    }
-}
diff --git a/src/main.zig b/src/main.zig
@@ -557,7 +557,7 @@ fn buildOutputType(
     var test_filter: ?[]const u8 = null;
     var test_name_prefix: ?[]const u8 = null;
     var override_local_cache_dir: ?[]const u8 = try optionalStringEnvVar(arena, "ZIG_LOCAL_CACHE_DIR");
-    var override_global_cache_dir: ?[]const u8 = try optionalStringEnvVar(arena, "ZIG_GLOBAL_CACHE_DIR");
+    var override_global_cache_dir: ?[]const u8 = null;
     var override_lib_dir: ?[]const u8 = try optionalStringEnvVar(arena, "ZIG_LIB_DIR");
     var main_pkg_path: ?[]const u8 = null;
     var clang_preprocessor_mode: Compilation.ClangPreprocessorMode = .no;
@@ -841,7 +841,11 @@ fn buildOutputType(
                     } else if (mem.eql(u8, arg, "--debug-log")) {
                         if (i + 1 >= args.len) fatal("expected parameter after {s}", .{arg});
                         i += 1;
-                        try log_scopes.append(gpa, args[i]);
+                        if (!build_options.enable_logging) {
+                            std.log.warn("Zig was compiled without logging enabled (-Dlog). --debug-log has no effect.", .{});
+                        } else {
+                            try log_scopes.append(gpa, args[i]);
+                        }
                     } else if (mem.eql(u8, arg, "-fcompiler-rt")) {
                         want_compiler_rt = true;
                     } else if (mem.eql(u8, arg, "-fno-compiler-rt")) {
@@ -2633,6 +2637,50 @@ fn argvCmd(allocator: *Allocator, argv: []const []const u8) ![]u8 {
     return cmd.toOwnedSlice();
 }
 
+fn readSourceFileToEndAlloc(allocator: *mem.Allocator, input: *const fs.File, size_hint: ?usize) ![]const u8 {
+    const source_code = input.readToEndAllocOptions(
+        allocator,
+        max_src_size,
+        size_hint,
+        @alignOf(u16),
+        null,
+    ) catch |err| switch (err) {
+        error.ConnectionResetByPeer => unreachable,
+        error.ConnectionTimedOut => unreachable,
+        error.NotOpenForReading => unreachable,
+        else => |e| return e,
+    };
+    errdefer allocator.free(source_code);
+
+    // Detect unsupported file types with their Byte Order Mark
+    const unsupported_boms = [_][]const u8{
+        "\xff\xfe\x00\x00", // UTF-32 little endian
+        "\xfe\xff\x00\x00", // UTF-32 big endian
+        "\xfe\xff", // UTF-16 big endian
+    };
+    for (unsupported_boms) |bom| {
+        if (mem.startsWith(u8, source_code, bom)) {
+            return error.UnsupportedEncoding;
+        }
+    }
+
+    // If the file starts with a UTF-16 little endian BOM, translate it to UTF-8
+    if (mem.startsWith(u8, source_code, "\xff\xfe")) {
+        const source_code_utf16_le = mem.bytesAsSlice(u16, source_code);
+        const source_code_utf8 = std.unicode.utf16leToUtf8Alloc(allocator, source_code_utf16_le) catch |err| switch (err) {
+            error.DanglingSurrogateHalf => error.UnsupportedEncoding,
+            error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,
+            error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,
+            else => |e| return e,
+        };
+
+        allocator.free(source_code);
+        return source_code_utf8;
+    }
+
+    return source_code;
+}
+
 pub const usage_fmt =
     \\Usage: zig fmt [file]...
     \\
@@ -2704,9 +2752,10 @@ pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void {
             fatal("cannot use --stdin with positional arguments", .{});
         }
 
-        const stdin = io.getStdIn().reader();
-
-        const source_code = try stdin.readAllAlloc(gpa, max_src_size);
+        const stdin = io.getStdIn();
+        const source_code = readSourceFileToEndAlloc(gpa, &stdin, null) catch |err| {
+            fatal("unable to read stdin: {s}", .{err});
+        };
         defer gpa.free(source_code);
 
         var tree = std.zig.parse(gpa, source_code) catch |err| {
@@ -2781,6 +2830,7 @@ const FmtError = error{
     EndOfStream,
     Unseekable,
     NotOpenForWriting,
+    UnsupportedEncoding,
 } || fs.File.OpenError;
 
 fn fmtPath(fmt: *Fmt, file_path: []const u8, check_mode: bool, dir: fs.Dir, sub_path: []const u8) FmtError!void {
@@ -2846,21 +2896,15 @@ fn fmtPathFile(
     if (stat.kind == .Directory)
         return error.IsDir;
 
-    const source_code = source_file.readToEndAllocOptions(
+    const source_code = try readSourceFileToEndAlloc(
         fmt.gpa,
-        max_src_size,
+        &source_file,
         std.math.cast(usize, stat.size) catch return error.FileTooBig,
-        @alignOf(u8),
-        null,
-    ) catch |err| switch (err) {
-        error.ConnectionResetByPeer => unreachable,
-        error.ConnectionTimedOut => unreachable,
-        error.NotOpenForReading => unreachable,
-        else => |e| return e,
-    };
+    );
+    defer fmt.gpa.free(source_code);
+
     source_file.close();
     file_closed = true;
-    defer fmt.gpa.free(source_code);
 
     // Add to set after no longer possible to get error.IsDir.
     if (try fmt.seen.fetchPut(stat.inode, {})) |_| return;
@@ -3237,7 +3281,8 @@ pub const ClangArgIterator = struct {
                 self.zig_equivalent = clang_arg.zig_equivalent;
                 break :find_clang_arg;
             },
-        } else {
+        }
+        else {
             fatal("Unknown Clang option: '{s}'", .{arg});
         }
     }
diff --git a/src/stage1/all_types.hpp b/src/stage1/all_types.hpp
@@ -391,6 +391,8 @@ enum LazyValueId {
     LazyValueIdAlignOf,
     LazyValueIdSizeOf,
     LazyValueIdPtrType,
+    LazyValueIdPtrTypeSimple,
+    LazyValueIdPtrTypeSimpleConst,
     LazyValueIdOptType,
     LazyValueIdSliceType,
     LazyValueIdFnType,
@@ -467,6 +469,13 @@ struct LazyValuePtrType {
     bool is_allowzero;
 };
 
+struct LazyValuePtrTypeSimple {
+    LazyValue base;
+
+    IrAnalyze *ira;
+    IrInstGen *elem_type;
+};
+
 struct LazyValueOptType {
     LazyValue base;
 
@@ -2130,10 +2139,6 @@ struct CodeGen {
     Buf llvm_ir_file_output_path;
     Buf analysis_json_output_path;
     Buf docs_output_path;
-    Buf *cache_dir;
-    Buf *c_artifact_dir;
-    const char **libc_include_dir_list;
-    size_t libc_include_dir_len;
 
     Buf *builtin_zig_path;
     Buf *zig_std_special_dir; // Cannot be overridden; derived from zig_lib_dir.
@@ -2610,7 +2615,8 @@ enum IrInstSrcId {
     IrInstSrcIdEnumToInt,
     IrInstSrcIdIntToErr,
     IrInstSrcIdErrToInt,
-    IrInstSrcIdCheckSwitchProngs,
+    IrInstSrcIdCheckSwitchProngsUnderYes,
+    IrInstSrcIdCheckSwitchProngsUnderNo,
     IrInstSrcIdCheckStatementIsVoid,
     IrInstSrcIdTypeName,
     IrInstSrcIdDeclRef,
@@ -2624,12 +2630,15 @@ enum IrInstSrcId {
     IrInstSrcIdHasField,
     IrInstSrcIdSetEvalBranchQuota,
     IrInstSrcIdPtrType,
+    IrInstSrcIdPtrTypeSimple,
+    IrInstSrcIdPtrTypeSimpleConst,
     IrInstSrcIdAlignCast,
     IrInstSrcIdImplicitCast,
     IrInstSrcIdResolveResult,
     IrInstSrcIdResetResult,
     IrInstSrcIdSetAlignStack,
-    IrInstSrcIdArgType,
+    IrInstSrcIdArgTypeAllowVarFalse,
+    IrInstSrcIdArgTypeAllowVarTrue,
     IrInstSrcIdExport,
     IrInstSrcIdExtern,
     IrInstSrcIdErrorReturnTrace,
@@ -3294,6 +3303,12 @@ struct IrInstSrcArrayType {
     IrInstSrc *child_type;
 };
 
+struct IrInstSrcPtrTypeSimple {
+    IrInstSrc base;
+
+    IrInstSrc *child_type;
+};
+
 struct IrInstSrcPtrType {
     IrInstSrc base;
 
@@ -4020,7 +4035,6 @@ struct IrInstSrcCheckSwitchProngs {
     IrInstSrcCheckSwitchProngsRange *ranges;
     size_t range_count;
     AstNode* else_prong;
-    bool have_underscore_prong;
 };
 
 struct IrInstSrcCheckStatementIsVoid {
@@ -4144,7 +4158,6 @@ struct IrInstSrcArgType {
 
     IrInstSrc *fn_type;
     IrInstSrc *arg_index;
-    bool allow_var;
 };
 
 struct IrInstSrcExport {
diff --git a/src/stage1/analyze.cpp b/src/stage1/analyze.cpp
@@ -1237,6 +1237,22 @@ Error type_val_resolve_zero_bits(CodeGen *g, ZigValue *type_val, ZigType *parent
                         parent_type_val, is_zero_bits);
             }
         }
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst: {
+            LazyValuePtrTypeSimple *lazy_ptr_type = reinterpret_cast<LazyValuePtrTypeSimple *>(type_val->data.x_lazy);
+
+            if (parent_type_val == lazy_ptr_type->elem_type->value) {
+                // Does a struct which contains a pointer field to itself have bits? Yes.
+                *is_zero_bits = false;
+                return ErrorNone;
+            } else {
+                if (parent_type_val == nullptr) {
+                    parent_type_val = type_val;
+                }
+                return type_val_resolve_zero_bits(g, lazy_ptr_type->elem_type->value, parent_type,
+                        parent_type_val, is_zero_bits);
+            }
+        }
         case LazyValueIdArrayType: {
             LazyValueArrayType *lazy_array_type =
                 reinterpret_cast<LazyValueArrayType *>(type_val->data.x_lazy);
@@ -1285,6 +1301,8 @@ Error type_val_resolve_is_opaque_type(CodeGen *g, ZigValue *type_val, bool *is_o
             zig_unreachable();
         case LazyValueIdSliceType:
         case LazyValueIdPtrType:
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst:
         case LazyValueIdFnType:
         case LazyValueIdOptType:
         case LazyValueIdErrUnionType:
@@ -1313,6 +1331,11 @@ static ReqCompTime type_val_resolve_requires_comptime(CodeGen *g, ZigValue *type
             LazyValuePtrType *lazy_ptr_type = reinterpret_cast<LazyValuePtrType *>(type_val->data.x_lazy);
             return type_val_resolve_requires_comptime(g, lazy_ptr_type->elem_type->value);
         }
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst: {
+            LazyValuePtrTypeSimple *lazy_ptr_type = reinterpret_cast<LazyValuePtrTypeSimple *>(type_val->data.x_lazy);
+            return type_val_resolve_requires_comptime(g, lazy_ptr_type->elem_type->value);
+        }
         case LazyValueIdOptType: {
             LazyValueOptType *lazy_opt_type = reinterpret_cast<LazyValueOptType *>(type_val->data.x_lazy);
             return type_val_resolve_requires_comptime(g, lazy_opt_type->payload_type->value);
@@ -1413,6 +1436,24 @@ start_over:
             }
             return ErrorNone;
         }
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst: {
+            LazyValuePtrTypeSimple *lazy_ptr_type = reinterpret_cast<LazyValuePtrTypeSimple *>(type_val->data.x_lazy);
+            bool is_zero_bits;
+            if ((err = type_val_resolve_zero_bits(g, lazy_ptr_type->elem_type->value, nullptr,
+                nullptr, &is_zero_bits)))
+            {
+                return err;
+            }
+            if (is_zero_bits) {
+                *abi_size = 0;
+                *size_in_bits = 0;
+            } else {
+                *abi_size = g->builtin_types.entry_usize->abi_size;
+                *size_in_bits = g->builtin_types.entry_usize->size_in_bits;
+            }
+            return ErrorNone;
+        }
         case LazyValueIdFnType:
             *abi_size = g->builtin_types.entry_usize->abi_size;
             *size_in_bits = g->builtin_types.entry_usize->size_in_bits;
@@ -1449,6 +1490,8 @@ Error type_val_resolve_abi_align(CodeGen *g, AstNode *source_node, ZigValue *typ
             zig_unreachable();
         case LazyValueIdSliceType:
         case LazyValueIdPtrType:
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst:
         case LazyValueIdFnType:
             *abi_align = g->builtin_types.entry_usize->abi_align;
             return ErrorNone;
@@ -1506,7 +1549,9 @@ static OnePossibleValue type_val_resolve_has_one_possible_value(CodeGen *g, ZigV
                 return OnePossibleValueYes;
             return type_val_resolve_has_one_possible_value(g, lazy_array_type->elem_type->value);
         }
-        case LazyValueIdPtrType: {
+        case LazyValueIdPtrType:
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst: {
             Error err;
             bool zero_bits;
             if ((err = type_val_resolve_zero_bits(g, type_val, nullptr, nullptr, &zero_bits))) {
@@ -5758,6 +5803,8 @@ static bool can_mutate_comptime_var_state(ZigValue *value) {
             case LazyValueIdAlignOf:
             case LazyValueIdSizeOf:
             case LazyValueIdPtrType:
+            case LazyValueIdPtrTypeSimple:
+            case LazyValueIdPtrTypeSimpleConst:
             case LazyValueIdOptType:
             case LazyValueIdSliceType:
             case LazyValueIdFnType:
diff --git a/src/stage1/ir.cpp b/src/stage1/ir.cpp
@@ -476,7 +476,8 @@ static void destroy_instruction_src(IrInstSrc *inst) {
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcIntToErr *>(inst));
         case IrInstSrcIdErrToInt:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcErrToInt *>(inst));
-        case IrInstSrcIdCheckSwitchProngs:
+        case IrInstSrcIdCheckSwitchProngsUnderNo:
+        case IrInstSrcIdCheckSwitchProngsUnderYes:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcCheckSwitchProngs *>(inst));
         case IrInstSrcIdCheckStatementIsVoid:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcCheckStatementIsVoid *>(inst));
@@ -486,6 +487,9 @@ static void destroy_instruction_src(IrInstSrc *inst) {
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcTagName *>(inst));
         case IrInstSrcIdPtrType:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcPtrType *>(inst));
+        case IrInstSrcIdPtrTypeSimple:
+        case IrInstSrcIdPtrTypeSimpleConst:
+            return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcPtrTypeSimple *>(inst));
         case IrInstSrcIdDeclRef:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcDeclRef *>(inst));
         case IrInstSrcIdPanic:
@@ -514,7 +518,8 @@ static void destroy_instruction_src(IrInstSrc *inst) {
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcResetResult *>(inst));
         case IrInstSrcIdSetAlignStack:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcSetAlignStack *>(inst));
-        case IrInstSrcIdArgType:
+        case IrInstSrcIdArgTypeAllowVarFalse:
+        case IrInstSrcIdArgTypeAllowVarTrue:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcArgType *>(inst));
         case IrInstSrcIdExport:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcExport *>(inst));
@@ -1470,10 +1475,6 @@ static constexpr IrInstSrcId ir_inst_id(IrInstSrcErrToInt *) {
     return IrInstSrcIdErrToInt;
 }
 
-static constexpr IrInstSrcId ir_inst_id(IrInstSrcCheckSwitchProngs *) {
-    return IrInstSrcIdCheckSwitchProngs;
-}
-
 static constexpr IrInstSrcId ir_inst_id(IrInstSrcCheckStatementIsVoid *) {
     return IrInstSrcIdCheckStatementIsVoid;
 }
@@ -1546,10 +1547,6 @@ static constexpr IrInstSrcId ir_inst_id(IrInstSrcSetAlignStack *) {
     return IrInstSrcIdSetAlignStack;
 }
 
-static constexpr IrInstSrcId ir_inst_id(IrInstSrcArgType *) {
-    return IrInstSrcIdArgType;
-}
-
 static constexpr IrInstSrcId ir_inst_id(IrInstSrcExport *) {
     return IrInstSrcIdExport;
 }
@@ -2615,11 +2612,35 @@ static IrInstGen *ir_build_br_gen(IrAnalyze *ira, IrInst *source_instr, IrBasicB
     return &inst->base;
 }
 
+static IrInstSrc *ir_build_ptr_type_simple(IrBuilderSrc *irb, Scope *scope, AstNode *source_node,
+        IrInstSrc *child_type, bool is_const)
+{
+    IrInstSrcPtrTypeSimple *inst = heap::c_allocator.create<IrInstSrcPtrTypeSimple>();
+    inst->base.id = is_const ? IrInstSrcIdPtrTypeSimpleConst : IrInstSrcIdPtrTypeSimple;
+    inst->base.base.scope = scope;
+    inst->base.base.source_node = source_node;
+    inst->base.base.debug_id = exec_next_debug_id(irb->exec);
+    inst->base.owner_bb = irb->current_basic_block;
+    ir_instruction_append(irb->current_basic_block, &inst->base);
+
+    inst->child_type = child_type;
+
+    ir_ref_instruction(child_type, irb->current_basic_block);
+
+    return &inst->base;
+}
+
 static IrInstSrc *ir_build_ptr_type(IrBuilderSrc *irb, Scope *scope, AstNode *source_node,
         IrInstSrc *child_type, bool is_const, bool is_volatile, PtrLen ptr_len,
         IrInstSrc *sentinel, IrInstSrc *align_value,
         uint32_t bit_offset_start, uint32_t host_int_bytes, bool is_allow_zero)
 {
+    if (!is_volatile && ptr_len == PtrLenSingle && sentinel == nullptr && align_value == nullptr &&
+            bit_offset_start == 0 && host_int_bytes == 0 && is_allow_zero == 0)
+    {
+        return ir_build_ptr_type_simple(irb, scope, source_node, child_type, is_const);
+    }
+
     IrInstSrcPtrType *inst = ir_build_instruction<IrInstSrcPtrType>(irb, scope, source_node);
     inst->sentinel = sentinel;
     inst->align_value = align_value;
@@ -4354,13 +4375,19 @@ static IrInstSrc *ir_build_check_switch_prongs(IrBuilderSrc *irb, Scope *scope, 
         IrInstSrc *target_value, IrInstSrcCheckSwitchProngsRange *ranges, size_t range_count,
         AstNode* else_prong, bool have_underscore_prong)
 {
-    IrInstSrcCheckSwitchProngs *instruction = ir_build_instruction<IrInstSrcCheckSwitchProngs>(
-            irb, scope, source_node);
+    IrInstSrcCheckSwitchProngs *instruction = heap::c_allocator.create<IrInstSrcCheckSwitchProngs>();
+    instruction->base.id = have_underscore_prong ?
+        IrInstSrcIdCheckSwitchProngsUnderYes : IrInstSrcIdCheckSwitchProngsUnderNo;
+    instruction->base.base.scope = scope;
+    instruction->base.base.source_node = source_node;
+    instruction->base.base.debug_id = exec_next_debug_id(irb->exec);
+    instruction->base.owner_bb = irb->current_basic_block;
+    ir_instruction_append(irb->current_basic_block, &instruction->base);
+
     instruction->target_value = target_value;
     instruction->ranges = ranges;
     instruction->range_count = range_count;
     instruction->else_prong = else_prong;
-    instruction->have_underscore_prong = have_underscore_prong;
 
     ir_ref_instruction(target_value, irb->current_basic_block);
     for (size_t i = 0; i < range_count; i += 1) {
@@ -4590,10 +4617,17 @@ static IrInstSrc *ir_build_set_align_stack(IrBuilderSrc *irb, Scope *scope, AstN
 static IrInstSrc *ir_build_arg_type(IrBuilderSrc *irb, Scope *scope, AstNode *source_node,
         IrInstSrc *fn_type, IrInstSrc *arg_index, bool allow_var)
 {
-    IrInstSrcArgType *instruction = ir_build_instruction<IrInstSrcArgType>(irb, scope, source_node);
+    IrInstSrcArgType *instruction = heap::c_allocator.create<IrInstSrcArgType>();
+    instruction->base.id = allow_var ?
+        IrInstSrcIdArgTypeAllowVarTrue : IrInstSrcIdArgTypeAllowVarFalse;
+    instruction->base.base.scope = scope;
+    instruction->base.base.source_node = source_node;
+    instruction->base.base.debug_id = exec_next_debug_id(irb->exec);
+    instruction->base.owner_bb = irb->current_basic_block;
+    ir_instruction_append(irb->current_basic_block, &instruction->base);
+
     instruction->fn_type = fn_type;
     instruction->arg_index = arg_index;
-    instruction->allow_var = allow_var;
 
     ir_ref_instruction(fn_type, irb->current_basic_block);
     ir_ref_instruction(arg_index, irb->current_basic_block);
@@ -29702,7 +29736,7 @@ static IrInstGen *ir_analyze_instruction_test_comptime(IrAnalyze *ira, IrInstSrc
 }
 
 static IrInstGen *ir_analyze_instruction_check_switch_prongs(IrAnalyze *ira,
-        IrInstSrcCheckSwitchProngs *instruction)
+        IrInstSrcCheckSwitchProngs *instruction, bool have_underscore_prong)
 {
     IrInstGen *target_value = instruction->target_value->child;
     ZigType *switch_type = target_value->value->type;
@@ -29767,7 +29801,7 @@ static IrInstGen *ir_analyze_instruction_check_switch_prongs(IrAnalyze *ira,
                 bigint_incr(&field_index);
             }
         }
-        if (instruction->have_underscore_prong) {
+        if (have_underscore_prong) {
             if (!switch_type->data.enumeration.non_exhaustive) {
                 ir_add_error(ira, &instruction->base.base,
                     buf_sprintf("switch on exhaustive enum has `_` prong"));
@@ -30871,6 +30905,24 @@ static IrInstGen *ir_analyze_instruction_ptr_to_int(IrAnalyze *ira, IrInstSrcPtr
     return ir_build_ptr_to_int_gen(ira, &instruction->base.base, target);
 }
 
+static IrInstGen *ir_analyze_instruction_ptr_type_simple(IrAnalyze *ira,
+        IrInstSrcPtrTypeSimple *instruction, bool is_const)
+{
+    IrInstGen *result = ir_const(ira, &instruction->base.base, ira->codegen->builtin_types.entry_type);
+    result->value->special = ConstValSpecialLazy;
+
+    LazyValuePtrTypeSimple *lazy_ptr_type = heap::c_allocator.create<LazyValuePtrTypeSimple>();
+    lazy_ptr_type->ira = ira; ira_ref(ira);
+    result->value->data.x_lazy = &lazy_ptr_type->base;
+    lazy_ptr_type->base.id = is_const ? LazyValueIdPtrTypeSimpleConst : LazyValueIdPtrTypeSimple;
+
+    lazy_ptr_type->elem_type = instruction->child_type->child;
+    if (ir_resolve_type_lazy(ira, lazy_ptr_type->elem_type) == nullptr)
+        return ira->codegen->invalid_inst_gen;
+
+    return result;
+}
+
 static IrInstGen *ir_analyze_instruction_ptr_type(IrAnalyze *ira, IrInstSrcPtrType *instruction) {
     IrInstGen *result = ir_const(ira, &instruction->base.base, ira->codegen->builtin_types.entry_type);
     result->value->special = ConstValSpecialLazy;
@@ -30976,7 +31028,9 @@ static IrInstGen *ir_analyze_instruction_set_align_stack(IrAnalyze *ira, IrInstS
     return ir_const_void(ira, &instruction->base.base);
 }
 
-static IrInstGen *ir_analyze_instruction_arg_type(IrAnalyze *ira, IrInstSrcArgType *instruction) {
+static IrInstGen *ir_analyze_instruction_arg_type(IrAnalyze *ira, IrInstSrcArgType *instruction,
+        bool allow_var)
+{
     IrInstGen *fn_type_inst = instruction->fn_type->child;
     ZigType *fn_type = ir_resolve_type(ira, fn_type_inst);
     if (type_is_invalid(fn_type))
@@ -30998,7 +31052,7 @@ static IrInstGen *ir_analyze_instruction_arg_type(IrAnalyze *ira, IrInstSrcArgTy
 
     FnTypeId *fn_type_id = &fn_type->data.fn.fn_type_id;
     if (arg_index >= fn_type_id->param_count) {
-        if (instruction->allow_var) {
+        if (allow_var) {
             // TODO remove this with var args
             return ir_const_type(ira, &instruction->base.base, ira->codegen->builtin_types.entry_anytype);
         }
@@ -31013,7 +31067,7 @@ static IrInstGen *ir_analyze_instruction_arg_type(IrAnalyze *ira, IrInstSrcArgTy
         // Args are only unresolved if our function is generic.
         ir_assert(fn_type->data.fn.is_generic, &instruction->base.base);
 
-        if (instruction->allow_var) {
+        if (allow_var) {
             return ir_const_type(ira, &instruction->base.base, ira->codegen->builtin_types.entry_anytype);
         } else {
             ir_add_error(ira, &arg_index_inst->base,
@@ -32341,8 +32395,10 @@ static IrInstGen *ir_analyze_instruction_base(IrAnalyze *ira, IrInstSrc *instruc
             return ir_analyze_instruction_fn_proto(ira, (IrInstSrcFnProto *)instruction);
         case IrInstSrcIdTestComptime:
             return ir_analyze_instruction_test_comptime(ira, (IrInstSrcTestComptime *)instruction);
-        case IrInstSrcIdCheckSwitchProngs:
-            return ir_analyze_instruction_check_switch_prongs(ira, (IrInstSrcCheckSwitchProngs *)instruction);
+        case IrInstSrcIdCheckSwitchProngsUnderNo:
+            return ir_analyze_instruction_check_switch_prongs(ira, (IrInstSrcCheckSwitchProngs *)instruction, false);
+        case IrInstSrcIdCheckSwitchProngsUnderYes:
+            return ir_analyze_instruction_check_switch_prongs(ira, (IrInstSrcCheckSwitchProngs *)instruction, true);
         case IrInstSrcIdCheckStatementIsVoid:
             return ir_analyze_instruction_check_statement_is_void(ira, (IrInstSrcCheckStatementIsVoid *)instruction);
         case IrInstSrcIdDeclRef:
@@ -32373,6 +32429,10 @@ static IrInstGen *ir_analyze_instruction_base(IrAnalyze *ira, IrInstSrc *instruc
             return ir_analyze_instruction_set_eval_branch_quota(ira, (IrInstSrcSetEvalBranchQuota *)instruction);
         case IrInstSrcIdPtrType:
             return ir_analyze_instruction_ptr_type(ira, (IrInstSrcPtrType *)instruction);
+        case IrInstSrcIdPtrTypeSimple:
+            return ir_analyze_instruction_ptr_type_simple(ira, (IrInstSrcPtrTypeSimple *)instruction, false);
+        case IrInstSrcIdPtrTypeSimpleConst:
+            return ir_analyze_instruction_ptr_type_simple(ira, (IrInstSrcPtrTypeSimple *)instruction, true);
         case IrInstSrcIdAlignCast:
             return ir_analyze_instruction_align_cast(ira, (IrInstSrcAlignCast *)instruction);
         case IrInstSrcIdImplicitCast:
@@ -32383,8 +32443,10 @@ static IrInstGen *ir_analyze_instruction_base(IrAnalyze *ira, IrInstSrc *instruc
             return ir_analyze_instruction_reset_result(ira, (IrInstSrcResetResult *)instruction);
         case IrInstSrcIdSetAlignStack:
             return ir_analyze_instruction_set_align_stack(ira, (IrInstSrcSetAlignStack *)instruction);
-        case IrInstSrcIdArgType:
-            return ir_analyze_instruction_arg_type(ira, (IrInstSrcArgType *)instruction);
+        case IrInstSrcIdArgTypeAllowVarFalse:
+            return ir_analyze_instruction_arg_type(ira, (IrInstSrcArgType *)instruction, false);
+        case IrInstSrcIdArgTypeAllowVarTrue:
+            return ir_analyze_instruction_arg_type(ira, (IrInstSrcArgType *)instruction, true);
         case IrInstSrcIdExport:
             return ir_analyze_instruction_export(ira, (IrInstSrcExport *)instruction);
         case IrInstSrcIdExtern:
@@ -32737,12 +32799,15 @@ bool ir_inst_src_has_side_effects(IrInstSrc *instruction) {
         case IrInstSrcIdMemcpy:
         case IrInstSrcIdBreakpoint:
         case IrInstSrcIdOverflowOp: // TODO when we support multiple returns this can be side effect free
-        case IrInstSrcIdCheckSwitchProngs:
+        case IrInstSrcIdCheckSwitchProngsUnderNo:
+        case IrInstSrcIdCheckSwitchProngsUnderYes:
         case IrInstSrcIdCheckStatementIsVoid:
         case IrInstSrcIdCheckRuntimeScope:
         case IrInstSrcIdPanic:
         case IrInstSrcIdSetEvalBranchQuota:
         case IrInstSrcIdPtrType:
+        case IrInstSrcIdPtrTypeSimple:
+        case IrInstSrcIdPtrTypeSimpleConst:
         case IrInstSrcIdSetAlignStack:
         case IrInstSrcIdExport:
         case IrInstSrcIdExtern:
@@ -32826,7 +32891,8 @@ bool ir_inst_src_has_side_effects(IrInstSrc *instruction) {
         case IrInstSrcIdAlignCast:
         case IrInstSrcIdImplicitCast:
         case IrInstSrcIdResolveResult:
-        case IrInstSrcIdArgType:
+        case IrInstSrcIdArgTypeAllowVarFalse:
+        case IrInstSrcIdArgTypeAllowVarTrue:
         case IrInstSrcIdErrorReturnTrace:
         case IrInstSrcIdErrorUnion:
         case IrInstSrcIdFloatOp:
@@ -33249,6 +33315,54 @@ static Error ir_resolve_lazy_raw(AstNode *source_node, ZigValue *val) {
             // We can't free the lazy value here, because multiple other ZigValues might be pointing to it.
             return ErrorNone;
         }
+        case LazyValueIdPtrTypeSimple: {
+            LazyValuePtrTypeSimple *lazy_ptr_type = reinterpret_cast<LazyValuePtrTypeSimple *>(val->data.x_lazy);
+            IrAnalyze *ira = lazy_ptr_type->ira;
+
+            ZigType *elem_type = ir_resolve_type(ira, lazy_ptr_type->elem_type);
+            if (type_is_invalid(elem_type))
+                return ErrorSemanticAnalyzeFail;
+
+            if (elem_type->id == ZigTypeIdUnreachable) {
+                ir_add_error(ira, &lazy_ptr_type->elem_type->base,
+                        buf_create_from_str("pointer to noreturn not allowed"));
+                return ErrorSemanticAnalyzeFail;
+            }
+
+            assert(val->type->id == ZigTypeIdMetaType);
+            val->data.x_type = get_pointer_to_type_extra2(ira->codegen, elem_type,
+                    false, false, PtrLenSingle, 0,
+                    0, 0,
+                    false, VECTOR_INDEX_NONE, nullptr, nullptr);
+            val->special = ConstValSpecialStatic;
+
+            // We can't free the lazy value here, because multiple other ZigValues might be pointing to it.
+            return ErrorNone;
+        }
+        case LazyValueIdPtrTypeSimpleConst: {
+            LazyValuePtrTypeSimple *lazy_ptr_type = reinterpret_cast<LazyValuePtrTypeSimple *>(val->data.x_lazy);
+            IrAnalyze *ira = lazy_ptr_type->ira;
+
+            ZigType *elem_type = ir_resolve_type(ira, lazy_ptr_type->elem_type);
+            if (type_is_invalid(elem_type))
+                return ErrorSemanticAnalyzeFail;
+
+            if (elem_type->id == ZigTypeIdUnreachable) {
+                ir_add_error(ira, &lazy_ptr_type->elem_type->base,
+                        buf_create_from_str("pointer to noreturn not allowed"));
+                return ErrorSemanticAnalyzeFail;
+            }
+
+            assert(val->type->id == ZigTypeIdMetaType);
+            val->data.x_type = get_pointer_to_type_extra2(ira->codegen, elem_type,
+                    true, false, PtrLenSingle, 0,
+                    0, 0,
+                    false, VECTOR_INDEX_NONE, nullptr, nullptr);
+            val->special = ConstValSpecialStatic;
+
+            // We can't free the lazy value here, because multiple other ZigValues might be pointing to it.
+            return ErrorNone;
+        }
         case LazyValueIdArrayType: {
             LazyValueArrayType *lazy_array_type = reinterpret_cast<LazyValueArrayType *>(val->data.x_lazy);
             IrAnalyze *ira = lazy_array_type->ira;
diff --git a/src/stage1/ir_print.cpp b/src/stage1/ir_print.cpp
@@ -270,8 +270,10 @@ const char* ir_inst_src_type_str(IrInstSrcId id) {
             return "SrcIntToErr";
         case IrInstSrcIdErrToInt:
             return "SrcErrToInt";
-        case IrInstSrcIdCheckSwitchProngs:
-            return "SrcCheckSwitchProngs";
+        case IrInstSrcIdCheckSwitchProngsUnderNo:
+            return "SrcCheckSwitchProngsUnderNo";
+        case IrInstSrcIdCheckSwitchProngsUnderYes:
+            return "SrcCheckSwitchProngsUnderYes";
         case IrInstSrcIdCheckStatementIsVoid:
             return "SrcCheckStatementIsVoid";
         case IrInstSrcIdTypeName:
@@ -298,6 +300,10 @@ const char* ir_inst_src_type_str(IrInstSrcId id) {
             return "SrcSetEvalBranchQuota";
         case IrInstSrcIdPtrType:
             return "SrcPtrType";
+        case IrInstSrcIdPtrTypeSimple:
+            return "SrcPtrTypeSimple";
+        case IrInstSrcIdPtrTypeSimpleConst:
+            return "SrcPtrTypeSimpleConst";
         case IrInstSrcIdAlignCast:
             return "SrcAlignCast";
         case IrInstSrcIdImplicitCast:
@@ -308,8 +314,10 @@ const char* ir_inst_src_type_str(IrInstSrcId id) {
             return "SrcResetResult";
         case IrInstSrcIdSetAlignStack:
             return "SrcSetAlignStack";
-        case IrInstSrcIdArgType:
-            return "SrcArgType";
+        case IrInstSrcIdArgTypeAllowVarFalse:
+            return "SrcArgTypeAllowVarFalse";
+        case IrInstSrcIdArgTypeAllowVarTrue:
+            return "SrcArgTypeAllowVarTrue";
         case IrInstSrcIdExport:
             return "SrcExport";
         case IrInstSrcIdExtern:
@@ -2187,7 +2195,9 @@ static void ir_print_err_to_int(IrPrintGen *irp, IrInstGenErrToInt *instruction)
     ir_print_other_inst_gen(irp, instruction->target);
 }
 
-static void ir_print_check_switch_prongs(IrPrintSrc *irp, IrInstSrcCheckSwitchProngs *instruction) {
+static void ir_print_check_switch_prongs(IrPrintSrc *irp, IrInstSrcCheckSwitchProngs *instruction,
+        bool have_underscore_prong)
+{
     fprintf(irp->f, "@checkSwitchProngs(");
     ir_print_other_inst_src(irp, instruction->target_value);
     fprintf(irp->f, ",");
@@ -2200,6 +2210,8 @@ static void ir_print_check_switch_prongs(IrPrintSrc *irp, IrInstSrcCheckSwitchPr
     }
     const char *have_else_str = instruction->else_prong != nullptr ? "yes" : "no";
     fprintf(irp->f, ")else:%s", have_else_str);
+    const char *have_under_str = have_underscore_prong ? "yes" : "no";
+    fprintf(irp->f, " _:%s", have_under_str);
 }
 
 static void ir_print_check_statement_is_void(IrPrintSrc *irp, IrInstSrcCheckStatementIsVoid *instruction) {
@@ -2237,6 +2249,15 @@ static void ir_print_ptr_type(IrPrintSrc *irp, IrInstSrcPtrType *instruction) {
     ir_print_other_inst_src(irp, instruction->child_type);
 }
 
+static void ir_print_ptr_type_simple(IrPrintSrc *irp, IrInstSrcPtrTypeSimple *instruction,
+        bool is_const)
+{
+    fprintf(irp->f, "&");
+    const char *const_str = is_const ? "const " : "";
+    fprintf(irp->f, "*%s", const_str);
+    ir_print_other_inst_src(irp, instruction->child_type);
+}
+
 static void ir_print_decl_ref(IrPrintSrc *irp, IrInstSrcDeclRef *instruction) {
     const char *ptr_str = (instruction->lval != LValNone) ? "ptr " : "";
     fprintf(irp->f, "declref %s%s", ptr_str, buf_ptr(instruction->tld->name));
@@ -2344,11 +2365,17 @@ static void ir_print_set_align_stack(IrPrintSrc *irp, IrInstSrcSetAlignStack *in
     fprintf(irp->f, ")");
 }
 
-static void ir_print_arg_type(IrPrintSrc *irp, IrInstSrcArgType *instruction) {
+static void ir_print_arg_type(IrPrintSrc *irp, IrInstSrcArgType *instruction, bool allow_var) {
     fprintf(irp->f, "@ArgType(");
     ir_print_other_inst_src(irp, instruction->fn_type);
     fprintf(irp->f, ",");
     ir_print_other_inst_src(irp, instruction->arg_index);
+    fprintf(irp->f, ",");
+    if (allow_var) {
+        fprintf(irp->f, "allow_var=true");
+    } else {
+        fprintf(irp->f, "allow_var=false");
+    }
     fprintf(irp->f, ")");
 }
 
@@ -2885,8 +2912,11 @@ static void ir_print_inst_src(IrPrintSrc *irp, IrInstSrc *instruction, bool trai
         case IrInstSrcIdErrToInt:
             ir_print_err_to_int(irp, (IrInstSrcErrToInt *)instruction);
             break;
-        case IrInstSrcIdCheckSwitchProngs:
-            ir_print_check_switch_prongs(irp, (IrInstSrcCheckSwitchProngs *)instruction);
+        case IrInstSrcIdCheckSwitchProngsUnderNo:
+            ir_print_check_switch_prongs(irp, (IrInstSrcCheckSwitchProngs *)instruction, false);
+            break;
+        case IrInstSrcIdCheckSwitchProngsUnderYes:
+            ir_print_check_switch_prongs(irp, (IrInstSrcCheckSwitchProngs *)instruction, true);
             break;
         case IrInstSrcIdCheckStatementIsVoid:
             ir_print_check_statement_is_void(irp, (IrInstSrcCheckStatementIsVoid *)instruction);
@@ -2900,6 +2930,12 @@ static void ir_print_inst_src(IrPrintSrc *irp, IrInstSrc *instruction, bool trai
         case IrInstSrcIdPtrType:
             ir_print_ptr_type(irp, (IrInstSrcPtrType *)instruction);
             break;
+        case IrInstSrcIdPtrTypeSimple:
+            ir_print_ptr_type_simple(irp, (IrInstSrcPtrTypeSimple *)instruction, false);
+            break;
+        case IrInstSrcIdPtrTypeSimpleConst:
+            ir_print_ptr_type_simple(irp, (IrInstSrcPtrTypeSimple *)instruction, true);
+            break;
         case IrInstSrcIdDeclRef:
             ir_print_decl_ref(irp, (IrInstSrcDeclRef *)instruction);
             break;
@@ -2942,8 +2978,11 @@ static void ir_print_inst_src(IrPrintSrc *irp, IrInstSrc *instruction, bool trai
         case IrInstSrcIdSetAlignStack:
             ir_print_set_align_stack(irp, (IrInstSrcSetAlignStack *)instruction);
             break;
-        case IrInstSrcIdArgType:
-            ir_print_arg_type(irp, (IrInstSrcArgType *)instruction);
+        case IrInstSrcIdArgTypeAllowVarFalse:
+            ir_print_arg_type(irp, (IrInstSrcArgType *)instruction, false);
+            break;
+        case IrInstSrcIdArgTypeAllowVarTrue:
+            ir_print_arg_type(irp, (IrInstSrcArgType *)instruction, true);
             break;
         case IrInstSrcIdExport:
             ir_print_export(irp, (IrInstSrcExport *)instruction);
diff --git a/src/translate_c.zig b/src/translate_c.zig
@@ -11,6 +11,7 @@ const math = std.math;
 const ast = @import("translate_c/ast.zig");
 const Node = ast.Node;
 const Tag = Node.Tag;
+const c_builtins = std.c.builtins;
 
 const CallingConvention = std.builtin.CallingConvention;
 
@@ -269,7 +270,10 @@ pub const Context = struct {
     global_scope: *Scope.Root,
     clang_context: *clang.ASTContext,
     mangle_count: u32 = 0,
+    /// Table of record decls that have been demoted to opaques.
     opaque_demotes: std.AutoHashMapUnmanaged(usize, void) = .{},
+    /// Table of unnamed enums and records that are child types of typedefs.
+    unnamed_typedefs: std.AutoHashMapUnmanaged(usize, []const u8) = .{},
 
     /// This one is different than the root scope's name table. This contains
     /// a list of names that we found by visiting all the top level decls without
@@ -337,6 +341,7 @@ pub fn translate(
         context.alias_list.deinit();
         context.global_names.deinit(gpa);
         context.opaque_demotes.deinit(gpa);
+        context.unnamed_typedefs.deinit(gpa);
         context.global_scope.deinit();
     }
 
@@ -400,6 +405,51 @@ fn declVisitorNamesOnly(c: *Context, decl: *const clang.Decl) Error!void {
     if (decl.castToNamedDecl()) |named_decl| {
         const decl_name = try c.str(named_decl.getName_bytes_begin());
         try c.global_names.put(c.gpa, decl_name, {});
+
+        // Check for typedefs with unnamed enum/record child types.
+        if (decl.getKind() == .Typedef) {
+            const typedef_decl = @ptrCast(*const clang.TypedefNameDecl, decl);
+            var child_ty = typedef_decl.getUnderlyingType().getTypePtr();
+            const addr: usize = while (true) switch (child_ty.getTypeClass()) {
+                .Enum => {
+                    const enum_ty = @ptrCast(*const clang.EnumType, child_ty);
+                    const enum_decl = enum_ty.getDecl();
+                    // check if this decl is unnamed
+                    if (@ptrCast(*const clang.NamedDecl, enum_decl).getName_bytes_begin()[0] != 0) return;
+                    break @ptrToInt(enum_decl.getCanonicalDecl());
+                },
+                .Record => {
+                    const record_ty = @ptrCast(*const clang.RecordType, child_ty);
+                    const record_decl = record_ty.getDecl();
+                    // check if this decl is unnamed
+                    if (@ptrCast(*const clang.NamedDecl, record_decl).getName_bytes_begin()[0] != 0) return;
+                    break @ptrToInt(record_decl.getCanonicalDecl());
+                },
+                .Elaborated => {
+                    const elaborated_ty = @ptrCast(*const clang.ElaboratedType, child_ty);
+                    child_ty = elaborated_ty.getNamedType().getTypePtr();
+                },
+                .Decayed => {
+                    const decayed_ty = @ptrCast(*const clang.DecayedType, child_ty);
+                    child_ty = decayed_ty.getDecayedType().getTypePtr();
+                },
+                .Attributed => {
+                    const attributed_ty = @ptrCast(*const clang.AttributedType, child_ty);
+                    child_ty = attributed_ty.getEquivalentType().getTypePtr();
+                },
+                .MacroQualified => {
+                    const macroqualified_ty = @ptrCast(*const clang.MacroQualifiedType, child_ty);
+                    child_ty = macroqualified_ty.getModifiedType().getTypePtr();
+                },
+                else => return,
+            } else unreachable;
+            // TODO https://github.com/ziglang/zig/issues/3756
+            // TODO https://github.com/ziglang/zig/issues/1802
+            const name = if (isZigPrimitiveType(decl_name)) try std.fmt.allocPrint(c.arena, "{s}_{d}", .{ decl_name, c.getMangle() }) else decl_name;
+            try c.unnamed_typedefs.putNoClobber(c.gpa, addr, name);
+            // Put this typedef in the decl_table to avoid redefinitions.
+            try c.decl_table.putNoClobber(c.gpa, @ptrToInt(typedef_decl.getCanonicalDecl()), name);
+        }
     }
 }
 
@@ -635,7 +685,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co
     if (has_init) trans_init: {
         if (decl_init) |expr| {
             const node_or_error = if (expr.getStmtClass() == .StringLiteralClass)
-                transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), zigArraySize(c, type_node) catch 0)
+                transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
             else
                 transExprCoercing(c, scope, expr, .used);
             init_node = node_or_error catch |err| switch (err) {
@@ -751,17 +801,10 @@ fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordD
     const toplevel = scope.id == .root;
     const bs: *Scope.Block = if (!toplevel) try scope.findBlockScope(c) else undefined;
 
-    var bare_name = try c.str(@ptrCast(*const clang.NamedDecl, record_decl).getName_bytes_begin());
-    var is_unnamed = false;
-    // Record declarations such as `struct {...} x` have no name but they're not
-    // anonymous hence here isAnonymousStructOrUnion is not needed
-    if (bare_name.len == 0) {
-        bare_name = try std.fmt.allocPrint(c.arena, "unnamed_{d}", .{c.getMangle()});
-        is_unnamed = true;
-    }
-
-    var container_kind_name: []const u8 = undefined;
     var is_union = false;
+    var container_kind_name: []const u8 = undefined;
+    var bare_name: []const u8 = try c.str(@ptrCast(*const clang.NamedDecl, record_decl).getName_bytes_begin());
+
     if (record_decl.isUnion()) {
         container_kind_name = "union";
         is_union = true;
@@ -772,7 +815,20 @@ fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordD
         return failDecl(c, record_loc, bare_name, "record {s} is not a struct or union", .{bare_name});
     }
 
-    var name: []const u8 = try std.fmt.allocPrint(c.arena, "{s}_{s}", .{ container_kind_name, bare_name });
+    var is_unnamed = false;
+    var name = bare_name;
+    if (c.unnamed_typedefs.get(@ptrToInt(record_decl.getCanonicalDecl()))) |typedef_name| {
+        bare_name = typedef_name;
+        name = typedef_name;
+    } else {
+        // Record declarations such as `struct {...} x` have no name but they're not
+        // anonymous hence here isAnonymousStructOrUnion is not needed
+        if (bare_name.len == 0) {
+            bare_name = try std.fmt.allocPrint(c.arena, "unnamed_{d}", .{c.getMangle()});
+            is_unnamed = true;
+        }
+        name = try std.fmt.allocPrint(c.arena, "{s}_{s}", .{ container_kind_name, bare_name });
+    }
     if (!toplevel) name = try bs.makeMangledName(c, name);
     try c.decl_table.putNoClobber(c.gpa, @ptrToInt(record_decl.getCanonicalDecl()), name);
 
@@ -873,14 +929,19 @@ fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: *const clang.EnumDecl) E
     const toplevel = scope.id == .root;
     const bs: *Scope.Block = if (!toplevel) try scope.findBlockScope(c) else undefined;
 
-    var bare_name = try c.str(@ptrCast(*const clang.NamedDecl, enum_decl).getName_bytes_begin());
     var is_unnamed = false;
-    if (bare_name.len == 0) {
-        bare_name = try std.fmt.allocPrint(c.arena, "unnamed_{d}", .{c.getMangle()});
-        is_unnamed = true;
+    var bare_name: []const u8 = try c.str(@ptrCast(*const clang.NamedDecl, enum_decl).getName_bytes_begin());
+    var name = bare_name;
+    if (c.unnamed_typedefs.get(@ptrToInt(enum_decl.getCanonicalDecl()))) |typedef_name| {
+        bare_name = typedef_name;
+        name = typedef_name;
+    } else {
+        if (bare_name.len == 0) {
+            bare_name = try std.fmt.allocPrint(c.arena, "unnamed_{d}", .{c.getMangle()});
+            is_unnamed = true;
+        }
+        name = try std.fmt.allocPrint(c.arena, "enum_{s}", .{bare_name});
     }
-
-    var name: []const u8 = try std.fmt.allocPrint(c.arena, "enum_{s}", .{bare_name});
     if (!toplevel) _ = try bs.makeMangledName(c, name);
     try c.decl_table.putNoClobber(c.gpa, @ptrToInt(enum_decl.getCanonicalDecl()), name);
 
@@ -1058,6 +1119,11 @@ fn transStmt(
             const compound_literal = @ptrCast(*const clang.CompoundLiteralExpr, stmt);
             return transExpr(c, scope, compound_literal.getInitializer(), result_used);
         },
+        .GenericSelectionExprClass => {
+            const gen_sel = @ptrCast(*const clang.GenericSelectionExpr, stmt);
+            return transExpr(c, scope, gen_sel.getResultExpr(), result_used);
+        },
+        // When adding new cases here, see comment for maybeBlockify()
         else => {
             return fail(c, error.UnsupportedTranslation, stmt.getBeginLoc(), "TODO implement translation of stmt class {s}", .{@tagName(sc)});
         },
@@ -1407,7 +1473,7 @@ fn transDeclStmtOne(
 
             var init_node = if (decl_init) |expr|
                 if (expr.getStmtClass() == .StringLiteralClass)
-                    try transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), try zigArraySize(c, type_node))
+                    try transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
                 else
                     try transExprCoercing(c, scope, expr, .used)
             else
@@ -1522,7 +1588,7 @@ fn transImplicitCastExpr(
             return maybeSuppressResult(c, scope, result_used, ne);
         },
         .BuiltinFnToFnPtr => {
-            return transExpr(c, scope, sub_expr, result_used);
+            return transBuiltinFnExpr(c, scope, sub_expr, result_used);
         },
         .ToVoid => {
             // Should only appear in the rhs and lhs of a ConditionalOperator
@@ -1538,6 +1604,22 @@ fn transImplicitCastExpr(
     }
 }
 
+fn isBuiltinDefined(name: []const u8) bool {
+    inline for (std.meta.declarations(c_builtins)) |decl| {
+        if (std.mem.eql(u8, name, decl.name)) return true;
+    }
+    return false;
+}
+
+fn transBuiltinFnExpr(c: *Context, scope: *Scope, expr: *const clang.Expr, used: ResultUsed) TransError!Node {
+    const node = try transExpr(c, scope, expr, used);
+    if (node.castTag(.identifier)) |ident| {
+        const name = ident.data;
+        if (!isBuiltinDefined(name)) return fail(c, error.UnsupportedTranslation, expr.getBeginLoc(), "TODO implement function '{s}' in std.c.builtins", .{name});
+    }
+    return node;
+}
+
 fn transBoolExpr(
     c: *Context,
     scope: *Scope,
@@ -1582,6 +1664,10 @@ fn exprIsNarrowStringLiteral(expr: *const clang.Expr) bool {
             const op_expr = @ptrCast(*const clang.ParenExpr, expr).getSubExpr();
             return exprIsNarrowStringLiteral(op_expr);
         },
+        .GenericSelectionExprClass => {
+            const gen_sel = @ptrCast(*const clang.GenericSelectionExpr, expr);
+            return exprIsNarrowStringLiteral(gen_sel.getResultExpr());
+        },
         else => return false,
     }
 }
@@ -1733,6 +1819,20 @@ fn transReturnStmt(
     return Tag.@"return".create(c.arena, rhs);
 }
 
+fn transNarrowStringLiteral(
+    c: *Context,
+    scope: *Scope,
+    stmt: *const clang.StringLiteral,
+    result_used: ResultUsed,
+) TransError!Node {
+    var len: usize = undefined;
+    const bytes_ptr = stmt.getString_bytes_begin_size(&len);
+
+    const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
+    const node = try Tag.string_literal.create(c.arena, str);
+    return maybeSuppressResult(c, scope, result_used, node);
+}
+
 fn transStringLiteral(
     c: *Context,
     scope: *Scope,
@@ -1741,19 +1841,14 @@ fn transStringLiteral(
 ) TransError!Node {
     const kind = stmt.getKind();
     switch (kind) {
-        .Ascii, .UTF8 => {
-            var len: usize = undefined;
-            const bytes_ptr = stmt.getString_bytes_begin_size(&len);
-
-            const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
-            const node = try Tag.string_literal.create(c.arena, str);
-            return maybeSuppressResult(c, scope, result_used, node);
-        },
+        .Ascii, .UTF8 => return transNarrowStringLiteral(c, scope, stmt, result_used),
         .UTF16, .UTF32, .Wide => {
             const str_type = @tagName(stmt.getKind());
             const name = try std.fmt.allocPrint(c.arena, "zig.{s}_string_{d}", .{ str_type, c.getMangle() });
-            const lit_array = try transStringLiteralAsArray(c, scope, stmt, stmt.getLength() + 1);
 
+            const expr_base = @ptrCast(*const clang.Expr, stmt);
+            const array_type = try transQualTypeInitialized(c, scope, expr_base.getType(), expr_base, expr_base.getBeginLoc());
+            const lit_array = try transStringLiteralInitializer(c, scope, stmt, array_type);
             const decl = try Tag.var_simple.create(c.arena, .{ .name = name, .init = lit_array });
             try scope.appendNode(decl);
             const node = try Tag.identifier.create(c.arena, name);
@@ -1762,52 +1857,67 @@ fn transStringLiteral(
     }
 }
 
-/// Parse the size of an array back out from an ast Node.
-fn zigArraySize(c: *Context, node: Node) TransError!usize {
-    if (node.castTag(.array_type)) |array| {
-        return array.data.len;
-    }
-    return error.UnsupportedTranslation;
+fn getArrayPayload(array_type: Node) ast.Payload.Array.ArrayTypeInfo {
+    return (array_type.castTag(.array_type) orelse array_type.castTag(.null_sentinel_array_type).?).data;
 }
 
-/// Translate a string literal to an array of integers. Used when an
-/// array is initialized from a string literal. `array_size` is the
-/// size of the array being initialized. If the string literal is larger
-/// than the array, truncate the string. If the array is larger than the
-/// string literal, pad the array with 0's
-fn transStringLiteralAsArray(
+/// Translate a string literal that is initializing an array. In general narrow string
+/// literals become `"<string>".*` or `"<string>"[0..<size>].*` if they need truncation.
+/// Wide string literals become an array of integers. zero-fillers pad out the array to
+/// the appropriate length, if necessary.
+fn transStringLiteralInitializer(
     c: *Context,
     scope: *Scope,
     stmt: *const clang.StringLiteral,
-    array_size: usize,
+    array_type: Node,
 ) TransError!Node {
-    if (array_size == 0) return error.UnsupportedType;
+    assert(array_type.tag() == .array_type or array_type.tag() == .null_sentinel_array_type);
+
+    const is_narrow = stmt.getKind() == .Ascii or stmt.getKind() == .UTF8;
 
     const str_length = stmt.getLength();
+    const payload = getArrayPayload(array_type);
+    const array_size = payload.len;
+    const elem_type = payload.elem_type;
+
+    if (array_size == 0) return Tag.empty_array.create(c.arena, elem_type);
+
+    const num_inits = math.min(str_length, array_size);
+    const init_node = if (num_inits > 0) blk: {
+        if (is_narrow) {
+            // "string literal".* or string literal"[0..num_inits].*
+            var str = try transNarrowStringLiteral(c, scope, stmt, .used);
+            if (str_length != array_size) str = try Tag.string_slice.create(c.arena, .{ .string = str, .end = num_inits });
+            break :blk try Tag.deref.create(c.arena, str);
+        } else {
+            const init_list = try c.arena.alloc(Node, num_inits);
+            var i: c_uint = 0;
+            while (i < num_inits) : (i += 1) {
+                init_list[i] = try transCreateCharLitNode(c, false, stmt.getCodeUnit(i));
+            }
+            const init_args = .{ .len = num_inits, .elem_type = elem_type };
+            const init_array_type = try if (array_type.tag() == .array_type) Tag.array_type.create(c.arena, init_args) else Tag.null_sentinel_array_type.create(c.arena, init_args);
+            break :blk try Tag.array_init.create(c.arena, .{
+                .cond = init_array_type,
+                .cases = init_list,
+            });
+        }
+    } else null;
 
-    const expr_base = @ptrCast(*const clang.Expr, stmt);
-    const ty = expr_base.getType().getTypePtr();
-    const const_arr_ty = @ptrCast(*const clang.ConstantArrayType, ty);
+    if (num_inits == array_size) return init_node.?; // init_node is only null if num_inits == 0; but if num_inits == array_size == 0 we've already returned
+    assert(array_size > str_length); // If array_size <= str_length, `num_inits == array_size` and we've already returned.
 
-    const elem_type = try transQualType(c, scope, const_arr_ty.getElementType(), expr_base.getBeginLoc());
-    const arr_type = try Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_type });
-    const init_list = try c.arena.alloc(Node, array_size);
+    const filler_node = try Tag.array_filler.create(c.arena, .{
+        .type = elem_type,
+        .filler = Tag.zero_literal.init(),
+        .count = array_size - str_length,
+    });
 
-    var i: c_uint = 0;
-    const kind = stmt.getKind();
-    const narrow = kind == .Ascii or kind == .UTF8;
-    while (i < str_length and i < array_size) : (i += 1) {
-        const code_unit = stmt.getCodeUnit(i);
-        init_list[i] = try transCreateCharLitNode(c, narrow, code_unit);
-    }
-    while (i < array_size) : (i += 1) {
-        init_list[i] = try transCreateNodeNumber(c, 0, .int);
+    if (init_node) |some| {
+        return Tag.array_cat.create(c.arena, .{ .lhs = some, .rhs = filler_node });
+    } else {
+        return filler_node;
     }
-
-    return Tag.array_init.create(c.arena, .{
-        .cond = arr_type,
-        .cases = init_list,
-    });
 }
 
 /// determine whether `stmt` is a "pointer subtraction expression" - a subtraction where
@@ -1836,6 +1946,7 @@ fn cIntTypeForEnum(enum_qt: clang.QualType) clang.QualType {
     return enum_decl.getIntegerType();
 }
 
+// when modifying this function, make sure to also update std.meta.cast
 fn transCCast(
     c: *Context,
     scope: *Scope,
@@ -2192,6 +2303,35 @@ fn transImplicitValueInitExpr(
     return transZeroInitExpr(c, scope, source_loc, ty);
 }
 
+/// If a statement can possibly translate to a Zig assignment (either directly because it's
+/// an assignment in C or indirectly via result assignment to `_`) AND it's the sole statement
+/// in the body of an if statement or loop, then we need to put the statement into its own block.
+/// The `else` case here corresponds to statements that could result in an assignment. If a statement
+/// class never needs a block, add its enum to the top prong.
+fn maybeBlockify(c: *Context, scope: *Scope, stmt: *const clang.Stmt) TransError!Node {
+    switch (stmt.getStmtClass()) {
+        .BreakStmtClass,
+        .CompoundStmtClass,
+        .ContinueStmtClass,
+        .DeclRefExprClass,
+        .DeclStmtClass,
+        .DoStmtClass,
+        .ForStmtClass,
+        .IfStmtClass,
+        .ReturnStmtClass,
+        .NullStmtClass,
+        .WhileStmtClass,
+        => return transStmt(c, scope, stmt, .unused),
+        else => {
+            var block_scope = try Scope.Block.init(c, scope, false);
+            defer block_scope.deinit();
+            const result = try transStmt(c, &block_scope.base, stmt, .unused);
+            try block_scope.statements.append(result);
+            return block_scope.complete(c);
+        },
+    }
+}
+
 fn transIfStmt(
     c: *Context,
     scope: *Scope,
@@ -2209,9 +2349,10 @@ fn transIfStmt(
     const cond_expr = @ptrCast(*const clang.Expr, stmt.getCond());
     const cond = try transBoolExpr(c, &cond_scope.base, cond_expr, .used);
 
-    const then_body = try transStmt(c, scope, stmt.getThen(), .unused);
+    const then_body = try maybeBlockify(c, scope, stmt.getThen());
+
     const else_body = if (stmt.getElse()) |expr|
-        try transStmt(c, scope, expr, .unused)
+        try maybeBlockify(c, scope, expr)
     else
         null;
     return Tag.@"if".create(c.arena, .{ .cond = cond, .then = then_body, .@"else" = else_body });
@@ -2236,7 +2377,7 @@ fn transWhileLoop(
         .parent = scope,
         .id = .loop,
     };
-    const body = try transStmt(c, &loop_scope, stmt.getBody(), .unused);
+    const body = try maybeBlockify(c, &loop_scope, stmt.getBody());
     return Tag.@"while".create(c.arena, .{ .cond = cond, .body = body, .cont_expr = null });
 }
 
@@ -2262,7 +2403,7 @@ fn transDoWhileLoop(
     const if_not_break = switch (cond.tag()) {
         .false_literal => return transStmt(c, scope, stmt.getBody(), .unused),
         .true_literal => {
-            const body_node = try transStmt(c, scope, stmt.getBody(), .unused);
+            const body_node = try maybeBlockify(c, scope, stmt.getBody());
             return Tag.while_true.create(c.arena, body_node);
         },
         else => try Tag.if_not_break.create(c.arena, cond),
@@ -2338,7 +2479,7 @@ fn transForLoop(
     else
         null;
 
-    const body = try transStmt(c, &loop_scope, stmt.getBody(), .unused);
+    const body = try maybeBlockify(c, &loop_scope, stmt.getBody());
     const while_node = try Tag.@"while".create(c.arena, .{ .cond = cond, .body = body, .cont_expr = cont_expr });
     if (block_scope) |*bs| {
         try bs.statements.append(while_node);
@@ -2725,6 +2866,10 @@ fn cIsFunctionDeclRef(expr: *const clang.Expr) bool {
             const opcode = un_op.getOpcode();
             return (opcode == .AddrOf or opcode == .Deref) and cIsFunctionDeclRef(un_op.getSubExpr());
         },
+        .GenericSelectionExprClass => {
+            const gen_sel = @ptrCast(*const clang.GenericSelectionExpr, expr);
+            return cIsFunctionDeclRef(gen_sel.getResultExpr());
+        },
         else => return false,
     }
 }
@@ -3052,43 +3197,34 @@ fn transCreateCompoundAssign(
     const requires_int_cast = blk: {
         const are_integers = cIsInteger(lhs_qt) and cIsInteger(rhs_qt);
         const are_same_sign = cIsSignedInteger(lhs_qt) == cIsSignedInteger(rhs_qt);
-        break :blk are_integers and !are_same_sign;
+        break :blk are_integers and !(are_same_sign and cIntTypeCmp(lhs_qt, rhs_qt) == .eq);
     };
+
     if (used == .unused) {
         // common case
         // c: lhs += rhs
         // zig: lhs += rhs
+        const lhs_node = try transExpr(c, scope, lhs, .used);
+        var rhs_node = try transExpr(c, scope, rhs, .used);
+        if (is_ptr_op_signed) rhs_node = try usizeCastForWrappingPtrArithmetic(c.arena, rhs_node);
+
         if ((is_mod or is_div) and is_signed) {
-            const lhs_node = try transExpr(c, scope, lhs, .used);
-            const rhs_node = try transExpr(c, scope, rhs, .used);
+            if (requires_int_cast) rhs_node = try transCCast(c, scope, loc, lhs_qt, rhs_qt, rhs_node);
+            const operands = .{ .lhs = lhs_node, .rhs = rhs_node };
             const builtin = if (is_mod)
-                try Tag.rem.create(c.arena, .{ .lhs = lhs_node, .rhs = rhs_node })
+                try Tag.rem.create(c.arena, operands)
             else
-                try Tag.div_trunc.create(c.arena, .{ .lhs = lhs_node, .rhs = rhs_node });
+                try Tag.div_trunc.create(c.arena, operands);
 
             return transCreateNodeInfixOp(c, scope, .assign, lhs_node, builtin, .used);
         }
 
-        const lhs_node = try transExpr(c, scope, lhs, .used);
-        var rhs_node = if (is_shift or requires_int_cast)
-            try transExprCoercing(c, scope, rhs, .used)
-        else
-            try transExpr(c, scope, rhs, .used);
-
-        if (is_ptr_op_signed) {
-            rhs_node = try usizeCastForWrappingPtrArithmetic(c.arena, rhs_node);
-        }
-
-        if (is_shift or requires_int_cast) {
-            // @intCast(rhs)
-            const cast_to_type = if (is_shift)
-                try qualTypeToLog2IntRef(c, scope, getExprQualType(c, rhs), loc)
-            else
-                try transQualType(c, scope, getExprQualType(c, lhs), loc);
-
+        if (is_shift) {
+            const cast_to_type = try qualTypeToLog2IntRef(c, scope, rhs_qt, loc);
             rhs_node = try Tag.int_cast.create(c.arena, .{ .lhs = cast_to_type, .rhs = rhs_node });
+        } else if (requires_int_cast) {
+            rhs_node = try transCCast(c, scope, loc, lhs_qt, rhs_qt, rhs_node);
         }
-
         return transCreateNodeInfixOp(c, scope, op, lhs_node, rhs_node, .used);
     }
     // worst case
@@ -3110,29 +3246,24 @@ fn transCreateCompoundAssign(
     const lhs_node = try Tag.identifier.create(c.arena, ref);
     const ref_node = try Tag.deref.create(c.arena, lhs_node);
 
+    var rhs_node = try transExpr(c, &block_scope.base, rhs, .used);
+    if (is_ptr_op_signed) rhs_node = try usizeCastForWrappingPtrArithmetic(c.arena, rhs_node);
     if ((is_mod or is_div) and is_signed) {
-        const rhs_node = try transExpr(c, &block_scope.base, rhs, .used);
+        if (requires_int_cast) rhs_node = try transCCast(c, scope, loc, lhs_qt, rhs_qt, rhs_node);
+        const operands = .{ .lhs = ref_node, .rhs = rhs_node };
         const builtin = if (is_mod)
-            try Tag.rem.create(c.arena, .{ .lhs = ref_node, .rhs = rhs_node })
+            try Tag.rem.create(c.arena, operands)
         else
-            try Tag.div_trunc.create(c.arena, .{ .lhs = ref_node, .rhs = rhs_node });
+            try Tag.div_trunc.create(c.arena, operands);
 
         const assign = try transCreateNodeInfixOp(c, &block_scope.base, .assign, ref_node, builtin, .used);
         try block_scope.statements.append(assign);
     } else {
-        var rhs_node = try transExpr(c, &block_scope.base, rhs, .used);
-
-        if (is_shift or requires_int_cast) {
-            // @intCast(rhs)
-            const cast_to_type = if (is_shift)
-                try qualTypeToLog2IntRef(c, scope, getExprQualType(c, rhs), loc)
-            else
-                try transQualType(c, scope, getExprQualType(c, lhs), loc);
-
+        if (is_shift) {
+            const cast_to_type = try qualTypeToLog2IntRef(c, &block_scope.base, rhs_qt, loc);
             rhs_node = try Tag.int_cast.create(c.arena, .{ .lhs = cast_to_type, .rhs = rhs_node });
-        }
-        if (is_ptr_op_signed) {
-            rhs_node = try usizeCastForWrappingPtrArithmetic(c.arena, rhs_node);
+        } else if (requires_int_cast) {
+            rhs_node = try transCCast(c, &block_scope.base, loc, lhs_qt, rhs_qt, rhs_node);
         }
 
         const assign = try transCreateNodeInfixOp(c, &block_scope.base, op, ref_node, rhs_node, .used);
@@ -3194,11 +3325,11 @@ fn transFloatingLiteral(c: *Context, scope: *Scope, stmt: *const clang.FloatingL
     var dbl = stmt.getValueAsApproximateDouble();
     const is_negative = dbl < 0;
     if (is_negative) dbl = -dbl;
-    const str = try std.fmt.allocPrint(c.arena, "{d}", .{dbl});
-    var node = if (dbl == std.math.floor(dbl))
-        try Tag.integer_literal.create(c.arena, str)
+    const str = if (dbl == std.math.floor(dbl))
+        try std.fmt.allocPrint(c.arena, "{d}.0", .{dbl})
     else
-        try Tag.float_literal.create(c.arena, str);
+        try std.fmt.allocPrint(c.arena, "{d}", .{dbl});
+    var node = try Tag.float_literal.create(c.arena, str);
     if (is_negative) node = try Tag.negate.create(c.arena, node);
     return maybeSuppressResult(c, scope, used, node);
 }
@@ -3312,9 +3443,8 @@ fn addTopLevelDecl(c: *Context, name: []const u8, decl_node: Node) !void {
     try c.global_scope.nodes.append(decl_node);
 }
 
-/// Translate a qual type for a variable with an initializer. The initializer
-/// only matters for incomplete arrays, since the size of the array is determined
-/// by the size of the initializer
+/// Translate a qualtype for a variable with an initializer. This only matters
+/// for incomplete arrays, since the initializer determines the size of the array.
 fn transQualTypeInitialized(
     c: *Context,
     scope: *Scope,
@@ -3330,9 +3460,14 @@ fn transQualTypeInitialized(
         switch (decl_init.getStmtClass()) {
             .StringLiteralClass => {
                 const string_lit = @ptrCast(*const clang.StringLiteral, decl_init);
-                const string_lit_size = string_lit.getLength() + 1; // +1 for null terminator
+                const string_lit_size = string_lit.getLength();
                 const array_size = @intCast(usize, string_lit_size);
-                return Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
+
+                // incomplete array initialized with empty string, will be translated as [1]T{0}
+                // see https://github.com/ziglang/zig/issues/8256
+                if (array_size == 0) return Tag.array_type.create(c.arena, .{ .len = 1, .elem_type = elem_ty });
+
+                return Tag.null_sentinel_array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
             },
             .InitListExprClass => {
                 const init_expr = @ptrCast(*const clang.InitListExpr, decl_init);
@@ -4746,6 +4881,10 @@ fn parseCPrimaryExprInner(c: *Context, m: *MacroCtx, scope: *Scope) ParseError!N
         },
         .Identifier => {
             const mangled_name = scope.getAlias(slice);
+            if (mem.startsWith(u8, mangled_name, "__builtin_") and !isBuiltinDefined(mangled_name)) {
+                try m.fail(c, "TODO implement function '{s}' in std.c.builtins", .{mangled_name});
+                return error.ParseError;
+            }
             return Tag.identifier.create(c.arena, builtin_typedef_map.get(mangled_name) orelse mangled_name);
         },
         .LParen => {
diff --git a/src/translate_c/ast.zig b/src/translate_c/ast.zig
@@ -40,6 +40,8 @@ pub const Node = extern union {
         string_literal,
         char_literal,
         enum_literal,
+        /// "string"[0..end]
+        string_slice,
         identifier,
         @"if",
         /// if (!operand) break;
@@ -176,6 +178,7 @@ pub const Node = extern union {
         c_pointer,
         single_pointer,
         array_type,
+        null_sentinel_array_type,
 
         /// @import("std").meta.sizeof(operand)
         std_meta_sizeof,
@@ -334,7 +337,7 @@ pub const Node = extern union {
                 .std_meta_promoteIntLiteral => Payload.PromoteIntLiteral,
                 .block => Payload.Block,
                 .c_pointer, .single_pointer => Payload.Pointer,
-                .array_type => Payload.Array,
+                .array_type, .null_sentinel_array_type => Payload.Array,
                 .arg_redecl, .alias, .fail_decl => Payload.ArgRedecl,
                 .log2_int_type => Payload.Log2IntType,
                 .var_simple, .pub_var_simple => Payload.SimpleVarDecl,
@@ -342,6 +345,7 @@ pub const Node = extern union {
                 .array_filler => Payload.ArrayFiller,
                 .pub_inline_fn => Payload.PubInlineFn,
                 .field_access => Payload.FieldAccess,
+                .string_slice => Payload.StringSlice,
             };
         }
 
@@ -584,10 +588,12 @@ pub const Payload = struct {
 
     pub const Array = struct {
         base: Payload,
-        data: struct {
+        data: ArrayTypeInfo,
+
+        pub const ArrayTypeInfo = struct {
             elem_type: Node,
             len: usize,
-        },
+        };
     };
 
     pub const Pointer = struct {
@@ -664,6 +670,14 @@ pub const Payload = struct {
             radix: Node,
         },
     };
+
+    pub const StringSlice = struct {
+        base: Payload,
+        data: struct {
+            string: Node,
+            end: usize,
+        },
+    };
 };
 
 /// Converts the nodes into a Zig ast.
@@ -1015,6 +1029,36 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
                 .data = undefined,
             });
         },
+        .string_slice => {
+            const payload = node.castTag(.string_slice).?.data;
+
+            const string = try renderNode(c, payload.string);
+            const l_bracket = try c.addToken(.l_bracket, "[");
+            const start = try c.addNode(.{
+                .tag = .integer_literal,
+                .main_token = try c.addToken(.integer_literal, "0"),
+                .data = undefined,
+            });
+            _ = try c.addToken(.ellipsis2, "..");
+            const end = try c.addNode(.{
+                .tag = .integer_literal,
+                .main_token = try c.addTokenFmt(.integer_literal, "{d}", .{payload.end}),
+                .data = undefined,
+            });
+            _ = try c.addToken(.r_bracket, "]");
+
+            return c.addNode(.{
+                .tag = .slice,
+                .main_token = l_bracket,
+                .data = .{
+                    .lhs = string,
+                    .rhs = try c.addExtra(std.zig.ast.Node.Slice{
+                        .start = start,
+                        .end = end,
+                    }),
+                },
+            });
+        },
         .fail_decl => {
             const payload = node.castTag(.fail_decl).?.data;
             // pub const name = @compileError(msg);
@@ -1581,6 +1625,10 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
             const payload = node.castTag(.array_type).?.data;
             return renderArrayType(c, payload.len, payload.elem_type);
         },
+        .null_sentinel_array_type => {
+            const payload = node.castTag(.null_sentinel_array_type).?.data;
+            return renderNullSentinelArrayType(c, payload.len, payload.elem_type);
+        },
         .array_filler => {
             const payload = node.castTag(.array_filler).?.data;
 
@@ -1946,6 +1994,36 @@ fn renderArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
     });
 }
 
+fn renderNullSentinelArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
+    const l_bracket = try c.addToken(.l_bracket, "[");
+    const len_expr = try c.addNode(.{
+        .tag = .integer_literal,
+        .main_token = try c.addTokenFmt(.integer_literal, "{d}", .{len}),
+        .data = undefined,
+    });
+    _ = try c.addToken(.colon, ":");
+
+    const sentinel_expr = try c.addNode(.{
+        .tag = .integer_literal,
+        .main_token = try c.addToken(.integer_literal, "0"),
+        .data = undefined,
+    });
+
+    _ = try c.addToken(.r_bracket, "]");
+    const elem_type_expr = try renderNode(c, elem_type);
+    return c.addNode(.{
+        .tag = .array_type_sentinel,
+        .main_token = l_bracket,
+        .data = .{
+            .lhs = len_expr,
+            .rhs = try c.addExtra(std.zig.ast.Node.ArrayTypeSentinel {
+                .sentinel = sentinel_expr,
+                .elem_type = elem_type_expr,
+            }),
+        },
+    });
+}
+
 fn addSemicolonIfNeeded(c: *Context, node: Node) !void {
     switch (node.tag()) {
         .warning => unreachable,
@@ -2014,6 +2092,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
         .integer_literal,
         .float_literal,
         .string_literal,
+        .string_slice,
         .char_literal,
         .enum_literal,
         .identifier,
@@ -2035,6 +2114,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
         .func,
         .call,
         .array_type,
+        .null_sentinel_array_type,
         .bool_to_int,
         .div_exact,
         .byte_offset_of,
diff --git a/src/zig_clang.cpp b/src/zig_clang.cpp
@@ -2459,6 +2459,11 @@ struct ZigClangQualType ZigClangFunctionType_getReturnType(const struct ZigClang
     return bitcast(casted->getReturnType());
 }
 
+const struct ZigClangExpr *ZigClangGenericSelectionExpr_getResultExpr(const struct ZigClangGenericSelectionExpr *self) {
+    auto casted = reinterpret_cast<const clang::GenericSelectionExpr *>(self);
+    return reinterpret_cast<const struct ZigClangExpr *>(casted->getResultExpr());
+}
+
 bool ZigClangFunctionProtoType_isVariadic(const struct ZigClangFunctionProtoType *self) {
     auto casted = reinterpret_cast<const clang::FunctionProtoType *>(self);
     return casted->isVariadic();
diff --git a/src/zig_clang.h b/src/zig_clang.h
@@ -1123,6 +1123,8 @@ ZIG_EXTERN_C bool ZigClangFunctionType_getNoReturnAttr(const struct ZigClangFunc
 ZIG_EXTERN_C enum ZigClangCallingConv ZigClangFunctionType_getCallConv(const struct ZigClangFunctionType *self);
 ZIG_EXTERN_C struct ZigClangQualType ZigClangFunctionType_getReturnType(const struct ZigClangFunctionType *self);
 
+ZIG_EXTERN_C const struct ZigClangExpr *ZigClangGenericSelectionExpr_getResultExpr(const struct ZigClangGenericSelectionExpr *self);
+
 ZIG_EXTERN_C bool ZigClangFunctionProtoType_isVariadic(const struct ZigClangFunctionProtoType *self);
 ZIG_EXTERN_C unsigned ZigClangFunctionProtoType_getNumParams(const struct ZigClangFunctionProtoType *self);
 ZIG_EXTERN_C struct ZigClangQualType ZigClangFunctionProtoType_getParamType(const struct ZigClangFunctionProtoType *self, unsigned i);
diff --git a/test/cli.zig b/test/cli.zig
@@ -28,6 +28,8 @@ pub fn main() !void {
     const zig_exe = try fs.path.resolve(a, &[_][]const u8{zig_exe_rel});
 
     const dir_path = try fs.path.join(a, &[_][]const u8{ cache_root, "clitest" });
+    defer fs.cwd().deleteTree(dir_path) catch {};
+    
     const TestFn = fn ([]const u8, []const u8) anyerror!void;
     const test_fns = [_]TestFn{
         testZigInitLib,
@@ -174,4 +176,13 @@ fn testZigFmt(zig_exe: []const u8, dir_path: []const u8) !void {
     const run_result3 = try exec(dir_path, true, &[_][]const u8{ zig_exe, "fmt", dir_path });
     // both files have been formatted, nothing should change now
     testing.expect(run_result3.stdout.len == 0);
+
+    // Check UTF-16 decoding
+    const fmt4_zig_path = try fs.path.join(a, &[_][]const u8{ dir_path, "fmt4.zig" });
+    var unformatted_code_utf16 = "\xff\xfe \x00 \x00 \x00 \x00/\x00/\x00 \x00n\x00o\x00 \x00r\x00e\x00a\x00s\x00o\x00n\x00";
+    try fs.cwd().writeFile(fmt4_zig_path, unformatted_code_utf16);
+
+    const run_result4 = try exec(dir_path, true, &[_][]const u8{ zig_exe, "fmt", dir_path });
+    testing.expect(std.mem.startsWith(u8, run_result4.stdout, fmt4_zig_path));
+    testing.expect(run_result4.stdout.len == fmt4_zig_path.len + 1 and run_result4.stdout[run_result4.stdout.len - 1] == '\n');
 }
diff --git a/test/run_translated_c.zig b/test/run_translated_c.zig
@@ -3,6 +3,17 @@ const tests = @import("tests.zig");
 const nl = std.cstr.line_sep;
 
 pub fn addCases(cases: *tests.RunTranslatedCContext) void {
+    cases.add("division of floating literals",
+        \\#define _NO_CRT_STDIO_INLINE 1
+        \\#include <stdio.h>
+        \\#define PI 3.14159265358979323846f
+        \\#define DEG2RAD (PI/180.0f)
+        \\int main(void) {
+        \\    printf("DEG2RAD is: %f\n", DEG2RAD);
+        \\    return 0;
+        \\}
+    , "DEG2RAD is: 0.017453" ++ nl);
+
     cases.add("use global scope for record/enum/typedef type transalation if needed",
         \\void bar(void);
         \\void baz(void);
@@ -1187,4 +1198,114 @@ pub fn addCases(cases: *tests.RunTranslatedCContext) void {
         \\    return 0;
         \\}
     , "");
+
+    cases.add("Generic selections",
+        \\#include <stdlib.h>
+        \\#include <string.h>
+        \\#include <stdint.h>
+        \\#define my_generic_fn(X) _Generic((X),    \
+        \\              int: abs,                   \
+        \\              char *: strlen,             \
+        \\              size_t: malloc,             \
+        \\              default: free               \
+        \\)(X)
+        \\#define my_generic_val(X) _Generic((X),   \
+        \\              int: 1,                     \
+        \\              const char *: "bar"         \
+        \\)
+        \\int main(void) {
+        \\    if (my_generic_val(100) != 1) abort();
+        \\
+        \\    const char *foo = "foo";
+        \\    const char *bar = my_generic_val(foo);
+        \\    if (strcmp(bar, "bar") != 0) abort();
+        \\
+        \\    if (my_generic_fn(-42) != 42) abort();
+        \\    if (my_generic_fn("hello") != 5) abort();
+        \\
+        \\    size_t size = 8192;
+        \\    uint8_t *mem = my_generic_fn(size);
+        \\    memset(mem, 42, size);
+        \\    if (mem[size - 1] != 42) abort();
+        \\    my_generic_fn(mem);
+        \\
+        \\    return 0;
+        \\}
+    , "");
+
+    // See __builtin_alloca_with_align comment in std.c.builtins
+    cases.add("use of unimplemented builtin in unused function does not prevent compilation",
+        \\#include <stdlib.h>
+        \\void unused() {
+        \\    __builtin_alloca_with_align(1, 8);
+        \\}
+        \\int main(void) {
+        \\    if (__builtin_sqrt(1.0) != 1.0) abort();
+        \\    return 0;
+        \\}
+    , "");
+
+    cases.add("convert single-statement bodies into blocks for if/else/for/while. issue #8159",
+        \\#include <stdlib.h>
+        \\int foo() { return 1; }
+        \\int main(void) {
+        \\    int i = 0;
+        \\    if (i == 0) if (i == 0) if (i != 0) i = 1;
+        \\    if (i != 0) i = 1; else if (i == 0) if (i == 0) i += 1;
+        \\    for (; i < 10;) for (; i < 10;) i++;
+        \\    while (i == 100) while (i == 100) foo();
+        \\    if (0) do do "string"; while(1); while(1);
+        \\    return 0;
+        \\}
+    , "");
+
+    cases.add("cast RHS of compound assignment if necessary, unused result",
+        \\#include <stdlib.h>
+        \\int main(void) {
+        \\   signed short val = -1;
+        \\   val += 1; if (val != 0) abort();
+        \\   val -= 1; if (val != -1) abort();
+        \\   val *= 2; if (val != -2) abort();
+        \\   val /= 2; if (val != -1) abort();
+        \\   val %= 2; if (val != -1) abort();
+        \\   val <<= 1; if (val != -2) abort();
+        \\   val >>= 1; if (val != -1) abort();
+        \\   val += 100000000;       // compile error if @truncate() not inserted
+        \\   unsigned short uval = 1;
+        \\   uval += 1; if (uval != 2) abort();
+        \\   uval -= 1; if (uval != 1) abort();
+        \\   uval *= 2; if (uval != 2) abort();
+        \\   uval /= 2; if (uval != 1) abort();
+        \\   uval %= 2; if (uval != 1) abort();
+        \\   uval <<= 1; if (uval != 2) abort();
+        \\   uval >>= 1; if (uval != 1) abort();
+        \\   uval += 100000000;      // compile error if @truncate() not inserted
+        \\}
+    , "");
+
+    cases.add("cast RHS of compound assignment if necessary, used result",
+        \\#include <stdlib.h>
+        \\int main(void) {
+        \\   signed short foo;
+        \\   signed short val = -1;
+        \\   foo = (val += 1); if (foo != 0) abort();
+        \\   foo = (val -= 1); if (foo != -1) abort();
+        \\   foo = (val *= 2); if (foo != -2) abort();
+        \\   foo = (val /= 2); if (foo != -1) abort();
+        \\   foo = (val %= 2); if (foo != -1) abort();
+        \\   foo = (val <<= 1); if (foo != -2) abort();
+        \\   foo = (val >>= 1); if (foo != -1) abort();
+        \\   foo = (val += 100000000);    // compile error if @truncate() not inserted
+        \\   unsigned short ufoo;
+        \\   unsigned short uval = 1;
+        \\   ufoo = (uval += 1); if (ufoo != 2) abort();
+        \\   ufoo = (uval -= 1); if (ufoo != 1) abort();
+        \\   ufoo = (uval *= 2); if (ufoo != 2) abort();
+        \\   ufoo = (uval /= 2); if (ufoo != 1) abort();
+        \\   ufoo = (uval %= 2); if (ufoo != 1) abort();
+        \\   ufoo = (uval <<= 1); if (ufoo != 2) abort();
+        \\   ufoo = (uval >>= 1); if (ufoo != 1) abort();
+        \\   ufoo = (uval += 100000000);  // compile error if @truncate() not inserted
+        \\}
+    , "");
 }
diff --git a/test/stage1/behavior/vector.zig b/test/stage1/behavior/vector.zig
@@ -4,7 +4,7 @@ const mem = std.mem;
 const math = std.math;
 const expect = std.testing.expect;
 const expectEqual = std.testing.expectEqual;
-const expectWithinEpsilon = std.testing.expectWithinEpsilon;
+const expectApproxEqRel = std.testing.expectApproxEqRel;
 const Vector = std.meta.Vector;
 
 test "implicit cast vector to array - bool" {
@@ -527,10 +527,14 @@ test "vector reduce operation" {
             switch (@typeInfo(TX)) {
                 .Int, .Bool => expectEqual(expected, r),
                 .Float => {
-                    if (math.isNan(expected) != math.isNan(r)) {
-                        std.debug.panic("unexpected NaN value!\n", .{});
+                    const expected_nan = math.isNan(expected);
+                    const got_nan = math.isNan(r);
+
+                    if (expected_nan and got_nan) {
+                        // Do this check explicitly as two NaN values are never
+                        // equal.
                     } else {
-                        expectWithinEpsilon(expected, r, 0.001);
+                        expectApproxEqRel(expected, r, math.sqrt(math.epsilon(TX)));
                     }
                 },
                 else => unreachable,
diff --git a/test/stage2/cbe.zig b/test/stage2/cbe.zig
@@ -51,7 +51,7 @@ pub fn addCases(ctx: *TestContext) !void {
             \\    _ = printf("Hello, %s!\n", "world");
             \\    return 0;
             \\}
-        , "Hello, world!\n");
+        , "Hello, world!" ++ std.cstr.line_sep);
     }
 
     {
diff --git a/test/stage2/wasm.zig b/test/stage2/wasm.zig
@@ -175,6 +175,41 @@ pub fn addCases(ctx: *TestContext) !void {
             \\    return i;
             \\}
         , "31\n");
+
+        case.addCompareOutput(
+            \\export fn _start() void {
+            \\    assert(foo(true) != @as(i32, 30));
+            \\}
+            \\
+            \\fn assert(ok: bool) void {
+            \\    if (!ok) unreachable;
+            \\}
+            \\
+            \\fn foo(ok: bool) i32 {
+            \\    const x = if(ok) @as(i32, 20) else @as(i32, 10);
+            \\    return x;
+            \\}
+        , "");
+
+        case.addCompareOutput(
+            \\export fn _start() void {
+            \\    assert(foo(false) == @as(i32, 20));
+            \\    assert(foo(true) == @as(i32, 30));
+            \\}
+            \\
+            \\fn assert(ok: bool) void {
+            \\    if (!ok) unreachable;
+            \\}
+            \\
+            \\fn foo(ok: bool) i32 {
+            \\    const val: i32 = blk: {
+            \\        var x: i32 = 1;
+            \\        if (!ok) break :blk x + @as(i32, 9);
+            \\        break :blk x + @as(i32, 19);
+            \\    };
+            \\    return val + 10;
+            \\}
+        , "");
     }
 
     {
diff --git a/test/standalone.zig b/test/standalone.zig
@@ -9,7 +9,10 @@ pub fn addCases(cases: *tests.StandaloneContext) void {
     cases.add("test/standalone/main_return_error/error_u8.zig");
     cases.add("test/standalone/main_return_error/error_u8_non_zero.zig");
     cases.addBuildFile("test/standalone/main_pkg_path/build.zig");
-    cases.addBuildFile("test/standalone/shared_library/build.zig");
+    if (std.Target.current.os.tag != .macos) {
+        // TODO zld cannot link shared libraries yet.
+        cases.addBuildFile("test/standalone/shared_library/build.zig");
+    }
     cases.addBuildFile("test/standalone/mix_o_files/build.zig");
     cases.addBuildFile("test/standalone/global_linkage/build.zig");
     cases.addBuildFile("test/standalone/static_c_lib/build.zig");
diff --git a/test/standalone/mix_o_files/base64.zig b/test/standalone/mix_o_files/base64.zig
@@ -3,9 +3,9 @@ const base64 = @import("std").base64;
 export fn decode_base_64(dest_ptr: [*]u8, dest_len: usize, source_ptr: [*]const u8, source_len: usize) usize {
     const src = source_ptr[0..source_len];
     const dest = dest_ptr[0..dest_len];
-    const base64_decoder = base64.standard_decoder_unsafe;
-    const decoded_size = base64_decoder.calcSize(src);
-    base64_decoder.decode(dest[0..decoded_size], src);
+    const base64_decoder = base64.standard.Decoder;
+    const decoded_size = base64_decoder.calcSizeForSlice(src) catch unreachable;
+    base64_decoder.decode(dest[0..decoded_size], src) catch unreachable;
     return decoded_size;
 }
 
diff --git a/test/translate_c.zig b/test/translate_c.zig
@@ -3,6 +3,28 @@ const std = @import("std");
 const CrossTarget = std.zig.CrossTarget;
 
 pub fn addCases(cases: *tests.TranslateCContext) void {
+    cases.add("unnamed child types of typedef receive typedef's name",
+        \\typedef enum {
+        \\    FooA,
+        \\    FooB,
+        \\} Foo;
+        \\typedef struct {
+        \\    int a, b;
+        \\} Bar;
+    , &[_][]const u8{
+        \\pub const Foo = extern enum(c_int) {
+        \\    A,
+        \\    B,
+        \\    _,
+        \\};
+        \\pub const FooA = @enumToInt(Foo.A);
+        \\pub const FooB = @enumToInt(Foo.B);
+        \\pub const Bar = extern struct {
+        \\    a: c_int,
+        \\    b: c_int,
+        \\};
+    });
+
     cases.add("if as while stmt has semicolon",
         \\void foo() {
         \\    while (1) if (1) {
@@ -218,9 +240,8 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\} Bar;
     , &[_][]const u8{
         \\source.h:1:9: warning: struct demoted to opaque type - unable to translate type of field foo
-        \\const struct_unnamed_1 = opaque {};
-        \\pub const Foo = struct_unnamed_1;
-        \\const struct_unnamed_2 = extern struct {
+        \\pub const Foo = opaque {};
+        \\pub const Bar = extern struct {
         \\    bar: ?*Foo,
         \\};
     });
@@ -519,17 +540,16 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\} outer;
         \\void foo(outer *x) { x->y = x->x; }
     , &[_][]const u8{
-        \\const struct_unnamed_3 = extern struct {
+        \\const struct_unnamed_2 = extern struct {
         \\    y: c_int,
         \\};
-        \\const union_unnamed_2 = extern union {
+        \\const union_unnamed_1 = extern union {
         \\    x: u8,
-        \\    unnamed_0: struct_unnamed_3,
+        \\    unnamed_0: struct_unnamed_2,
         \\};
-        \\const struct_unnamed_1 = extern struct {
-        \\    unnamed_0: union_unnamed_2,
+        \\pub const outer = extern struct {
+        \\    unnamed_0: union_unnamed_1,
         \\};
-        \\pub const outer = struct_unnamed_1;
         \\pub export fn foo(arg_x: [*c]outer) void {
         \\    var x = arg_x;
         \\    x.*.unnamed_0.unnamed_0.y = @bitCast(c_int, @as(c_uint, x.*.unnamed_0.x));
@@ -565,21 +585,20 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\struct {int x,y;} s2 = {.y = 2, .x=1};
         \\foo s3 = { 123 };
     , &[_][]const u8{
-        \\const struct_unnamed_1 = extern struct {
+        \\pub const foo = extern struct {
         \\    x: c_int,
         \\};
-        \\pub const foo = struct_unnamed_1;
-        \\const struct_unnamed_2 = extern struct {
+        \\const struct_unnamed_1 = extern struct {
         \\    x: f64,
         \\    y: f64,
         \\    z: f64,
         \\};
-        \\pub export var s0: struct_unnamed_2 = struct_unnamed_2{
+        \\pub export var s0: struct_unnamed_1 = struct_unnamed_1{
         \\    .x = 1.2,
         \\    .y = 1.3,
         \\    .z = 0,
         \\};
-        \\const struct_unnamed_3 = extern struct {
+        \\const struct_unnamed_2 = extern struct {
         \\    sec: c_int,
         \\    min: c_int,
         \\    hour: c_int,
@@ -587,7 +606,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    mon: c_int,
         \\    year: c_int,
         \\};
-        \\pub export var s1: struct_unnamed_3 = struct_unnamed_3{
+        \\pub export var s1: struct_unnamed_2 = struct_unnamed_2{
         \\    .sec = @as(c_int, 30),
         \\    .min = @as(c_int, 15),
         \\    .hour = @as(c_int, 17),
@@ -595,11 +614,11 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    .mon = @as(c_int, 12),
         \\    .year = @as(c_int, 2014),
         \\};
-        \\const struct_unnamed_4 = extern struct {
+        \\const struct_unnamed_3 = extern struct {
         \\    x: c_int,
         \\    y: c_int,
         \\};
-        \\pub export var s2: struct_unnamed_4 = struct_unnamed_4{
+        \\pub export var s2: struct_unnamed_3 = struct_unnamed_3{
         \\    .x = @as(c_int, 1),
         \\    .y = @as(c_int, 2),
         \\};
@@ -745,14 +764,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    static const char v2[] = "2.2.2";
         \\}
     , &[_][]const u8{
-        \\const v2: [6]u8 = [6]u8{
-        \\    '2',
-        \\    '.',
-        \\    '2',
-        \\    '.',
-        \\    '2',
-        \\    0,
-        \\};
+        \\const v2: [5:0]u8 = "2.2.2".*;
         \\pub export fn foo() void {}
     });
 
@@ -1600,30 +1612,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\static char arr1[] = "hello";
         \\char arr2[] = "hello";
     , &[_][]const u8{
-        \\pub export var arr0: [6]u8 = [6]u8{
-        \\    'h',
-        \\    'e',
-        \\    'l',
-        \\    'l',
-        \\    'o',
-        \\    0,
-        \\};
-        \\pub var arr1: [6]u8 = [6]u8{
-        \\    'h',
-        \\    'e',
-        \\    'l',
-        \\    'l',
-        \\    'o',
-        \\    0,
-        \\};
-        \\pub export var arr2: [6]u8 = [6]u8{
-        \\    'h',
-        \\    'e',
-        \\    'l',
-        \\    'l',
-        \\    'o',
-        \\    0,
-        \\};
+        \\pub export var arr0: [5:0]u8 = "hello".*;
+        \\pub var arr1: [5:0]u8 = "hello".*;
+        \\pub export var arr2: [5:0]u8 = "hello".*;
     });
 
     cases.add("array initializer expr",
@@ -1667,37 +1658,36 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    p,
         \\};
     , &[_][]const u8{
-        \\const enum_unnamed_1 = extern enum(c_int) {
+        \\pub const d = extern enum(c_int) {
         \\    a,
         \\    b,
         \\    c,
         \\    _,
         \\};
-        \\pub const a = @enumToInt(enum_unnamed_1.a);
-        \\pub const b = @enumToInt(enum_unnamed_1.b);
-        \\pub const c = @enumToInt(enum_unnamed_1.c);
-        \\pub const d = enum_unnamed_1;
-        \\const enum_unnamed_2 = extern enum(c_int) {
+        \\pub const a = @enumToInt(d.a);
+        \\pub const b = @enumToInt(d.b);
+        \\pub const c = @enumToInt(d.c);
+        \\const enum_unnamed_1 = extern enum(c_int) {
         \\    e = 0,
         \\    f = 4,
         \\    g = 5,
         \\    _,
         \\};
-        \\pub const e = @enumToInt(enum_unnamed_2.e);
-        \\pub const f = @enumToInt(enum_unnamed_2.f);
-        \\pub const g = @enumToInt(enum_unnamed_2.g);
-        \\pub export var h: enum_unnamed_2 = @intToEnum(enum_unnamed_2, e);
-        \\const enum_unnamed_3 = extern enum(c_int) {
+        \\pub const e = @enumToInt(enum_unnamed_1.e);
+        \\pub const f = @enumToInt(enum_unnamed_1.f);
+        \\pub const g = @enumToInt(enum_unnamed_1.g);
+        \\pub export var h: enum_unnamed_1 = @intToEnum(enum_unnamed_1, e);
+        \\const enum_unnamed_2 = extern enum(c_int) {
         \\    i,
         \\    j,
         \\    k,
         \\    _,
         \\};
-        \\pub const i = @enumToInt(enum_unnamed_3.i);
-        \\pub const j = @enumToInt(enum_unnamed_3.j);
-        \\pub const k = @enumToInt(enum_unnamed_3.k);
+        \\pub const i = @enumToInt(enum_unnamed_2.i);
+        \\pub const j = @enumToInt(enum_unnamed_2.j);
+        \\pub const k = @enumToInt(enum_unnamed_2.k);
         \\pub const struct_Baz = extern struct {
-        \\    l: enum_unnamed_3,
+        \\    l: enum_unnamed_2,
         \\    m: d,
         \\};
         \\pub const enum_i = extern enum(c_int) {
@@ -1962,7 +1952,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
     , &[_][]const u8{
         \\pub export fn foo() c_int {
         \\    var a: c_int = 5;
-        \\    while (true) a = 2;
+        \\    while (true) {
+        \\        a = 2;
+        \\    }
         \\    while (true) {
         \\        var a_1: c_int = 4;
         \\        a_1 = 9;
@@ -1975,7 +1967,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\        var a_1: c_int = 2;
         \\        a_1 = 12;
         \\    }
-        \\    while (true) a = 7;
+        \\    while (true) {
+        \\        a = 7;
+        \\    }
         \\    return 0;
         \\}
     });
@@ -2036,7 +2030,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\}
     , &[_][]const u8{
         \\pub export fn bar() c_int {
-        \\    if ((if (true) @as(c_int, 5) else if (true) @as(c_int, 4) else @as(c_int, 6)) != 0) _ = @as(c_int, 2);
+        \\    if ((if (true) @as(c_int, 5) else if (true) @as(c_int, 4) else @as(c_int, 6)) != 0) {
+        \\        _ = @as(c_int, 2);
+        \\    }
         \\    return if (true) @as(c_int, 5) else if (true) @as(c_int, 4) else @as(c_int, 6);
         \\}
     });
@@ -2417,7 +2413,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\pub const yes = [*c]u8;
         \\pub export fn foo() void {
         \\    var a: yes = undefined;
-        \\    if (a != null) _ = @as(c_int, 2);
+        \\    if (a != null) {
+        \\        _ = @as(c_int, 2);
+        \\    }
         \\}
     });
 
@@ -2456,7 +2454,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    b: c_int,
         \\};
         \\pub extern var a: struct_Foo;
-        \\pub export var b: f32 = 2;
+        \\pub export var b: f32 = 2.0;
         \\pub export fn foo() void {
         \\    var c: [*c]struct_Foo = undefined;
         \\    _ = a.b;
@@ -2768,7 +2766,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    var a = arg_a;
         \\    var i: c_int = 0;
         \\    while (a > @bitCast(c_uint, @as(c_int, 0))) {
-        \\        a >>= @intCast(@import("std").math.Log2Int(c_int), 1);
+        \\        a >>= @intCast(@import("std").math.Log2Int(c_int), @as(c_int, 1));
         \\    }
         \\    return i;
         \\}
@@ -2788,7 +2786,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    var a = arg_a;
         \\    var i: c_int = 0;
         \\    while (a > @bitCast(c_uint, @as(c_int, 0))) {
-        \\        a >>= @intCast(@import("std").math.Log2Int(c_int), 1);
+        \\        a >>= @intCast(@import("std").math.Log2Int(c_int), @as(c_int, 1));
         \\    }
         \\    return i;
         \\}
@@ -3020,17 +3018,17 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\pub extern fn fn_bool(x: bool) void;
         \\pub extern fn fn_ptr(x: ?*c_void) void;
         \\pub export fn call() void {
-        \\    fn_int(@floatToInt(c_int, 3));
-        \\    fn_int(@floatToInt(c_int, 3));
-        \\    fn_int(@floatToInt(c_int, 3));
+        \\    fn_int(@floatToInt(c_int, 3.0));
+        \\    fn_int(@floatToInt(c_int, 3.0));
+        \\    fn_int(@floatToInt(c_int, 3.0));
         \\    fn_int(@as(c_int, 1094861636));
         \\    fn_f32(@intToFloat(f32, @as(c_int, 3)));
         \\    fn_f64(@intToFloat(f64, @as(c_int, 3)));
         \\    fn_char(@bitCast(u8, @truncate(i8, @as(c_int, '3'))));
         \\    fn_char(@bitCast(u8, @truncate(i8, @as(c_int, '\x01'))));
         \\    fn_char(@bitCast(u8, @truncate(i8, @as(c_int, 0))));
-        \\    fn_f32(3);
-        \\    fn_f64(3);
+        \\    fn_f32(3.0);
+        \\    fn_f64(3.0);
         \\    fn_bool(@as(c_int, 123) != 0);
         \\    fn_bool(@as(c_int, 0) != 0);
         \\    fn_bool(@ptrToInt(fn_int) != 0);
@@ -3418,4 +3416,56 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\pub const MAY_NEED_PROMOTION_HEX = @import("std").meta.promoteIntLiteral(c_int, 0x80000000, .hexadecimal);
         \\pub const MAY_NEED_PROMOTION_OCT = @import("std").meta.promoteIntLiteral(c_int, 0o20000000000, .octal);
     });
+
+    // See __builtin_alloca_with_align comment in std.c.builtins
+    cases.add("demote un-implemented builtins",
+        \\#define FOO(X) __builtin_alloca_with_align((X), 8)
+    , &[_][]const u8{
+        \\pub const FOO = @compileError("TODO implement function '__builtin_alloca_with_align' in std.c.builtins");
+    });
+
+    cases.add("null sentinel arrays when initialized from string literal. Issue #8256",
+        \\#include <stdint.h>
+        \\char zero[0] = "abc";
+        \\uint32_t zero_w[0] = U"💯💯💯";
+        \\char empty_incomplete[] = "";
+        \\uint32_t empty_incomplete_w[] = U"";
+        \\char empty_constant[100] = "";
+        \\uint32_t empty_constant_w[100] = U"";
+        \\char incomplete[] = "abc";
+        \\uint32_t incomplete_w[] = U"💯💯💯";
+        \\char truncated[1] = "abc";
+        \\uint32_t truncated_w[1] = U"💯💯💯";
+        \\char extend[5] = "a";
+        \\uint32_t extend_w[5] = U"💯";
+        \\char no_null[3] = "abc";
+        \\uint32_t no_null_w[3] = U"💯💯💯";
+    , &[_][]const u8{
+        \\pub export var zero: [0]u8 = [0]u8{};
+        \\pub export var zero_w: [0]u32 = [0]u32{};
+        \\pub export var empty_incomplete: [1]u8 = [1]u8{0} ** 1;
+        \\pub export var empty_incomplete_w: [1]u32 = [1]u32{0} ** 1;
+        \\pub export var empty_constant: [100]u8 = [1]u8{0} ** 100;
+        \\pub export var empty_constant_w: [100]u32 = [1]u32{0} ** 100;
+        \\pub export var incomplete: [3:0]u8 = "abc".*;
+        \\pub export var incomplete_w: [3:0]u32 = [3:0]u32{
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\};
+        \\pub export var truncated: [1]u8 = "abc"[0..1].*;
+        \\pub export var truncated_w: [1]u32 = [1]u32{
+        \\    '\u{1f4af}',
+        \\};
+        \\pub export var extend: [5]u8 = "a"[0..1].* ++ [1]u8{0} ** 4;
+        \\pub export var extend_w: [5]u32 = [1]u32{
+        \\    '\u{1f4af}',
+        \\} ++ [1]u32{0} ** 4;
+        \\pub export var no_null: [3]u8 = "abc".*;
+        \\pub export var no_null_w: [3]u32 = [3]u32{
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\};
+    });
 }
diff --git a/tools/update_clang_options.zig b/tools/update_clang_options.zig
@@ -332,6 +332,10 @@ const known_options = [_]KnownOpt{
         .name = "s",
         .ident = "strip",
     },
+    .{
+        .name = "dynamiclib",
+        .ident = "shared",
+    },
 };
 
 const blacklisted_options = [_][]const u8{};

	zig fork of https://codeberg.org/ziglang/zig
	Log \| Files \| Refs \| README \| LICENSE

M	CMakeLists.txt	\|	29	++++++++++++++++++++++-------
A	ci/azure/macos_arm64_script	\|	132	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	ci/azure/pipelines.yml	\|	15	++++++++++++++-
M	ci/azure/windows_msvc_install	\|	2	+-
M	doc/docgen.zig	\|	23	++++++++++++++++++-----
M	doc/langref.html.in	\|	6	+++---
M	lib/std/array_hash_map.zig	\|	26	+++++++++++++-------------
M	lib/std/base64.zig	\|	646	+++++++++++++++++++++++++++++++++++++++----------------------------------------
M	lib/std/bit_set.zig	\|	28	+++++++++++++++++++++-------
M	lib/std/build.zig	\|	23	+++++++----------------
M	lib/std/c.zig	\|	6	+++---
M	lib/std/c/builtins.zig	\|	8	+++++++-
M	lib/std/crypto.zig	\|	20	+++++++++++++++++---
M	lib/std/crypto/25519/curve25519.zig	\|	13	+++++++------
M	lib/std/crypto/25519/ed25519.zig	\|	23	++++++++++++-----------
M	lib/std/crypto/25519/edwards25519.zig	\|	21	+++++++++++----------
M	lib/std/crypto/25519/field.zig	\|	5	+++--
M	lib/std/crypto/25519/ristretto255.zig	\|	9	+++++----
M	lib/std/crypto/25519/scalar.zig	\|	3	++-
M	lib/std/crypto/25519/x25519.zig	\|	11	++++++-----
M	lib/std/crypto/aegis.zig	\|	5	+++--
M	lib/std/crypto/aes_gcm.zig	\|	3	++-
M	lib/std/crypto/aes_ocb.zig	\|	3	++-
M	lib/std/crypto/bcrypt.zig	\|	22	++++++++--------------
M	lib/std/crypto/benchmark.zig	\|	1	+
M	lib/std/crypto/chacha20.zig	\|	1170	++++++++++++++++++++++++++++++++++++++++---------------------------------------
A	lib/std/crypto/error.zig	\|	34	++++++++++++++++++++++++++++++++++
M	lib/std/crypto/gimli.zig	\|	5	+++--
M	lib/std/crypto/isap.zig	\|	3	++-
M	lib/std/crypto/pbkdf2.zig	\|	150	+++++++++++++++++++++++++++++++++++++------------------------------------------
M	lib/std/crypto/salsa20.zig	\|	15	++++++++-------
M	lib/std/debug.zig	\|	18	------------------
A	lib/std/enums.zig	\|	1281	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	lib/std/fmt.zig	\|	9	+++++++--
M	lib/std/fs.zig	\|	10	+++++-----
M	lib/std/fs/path.zig	\|	12	+++++++++++-
M	lib/std/hash/auto_hash.zig	\|	2	+-
M	lib/std/macho.zig	\|	40	++++++++++++++++++++++++++++++++++++++++
M	lib/std/mem.zig	\|	19	+++++++++++++++++++
M	lib/std/meta.zig	\|	69	+++++++++++++++++++++++++++++++++++++++++++++++++++------------------
M	lib/std/meta/trait.zig	\|	78	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	lib/std/os.zig	\|	5	+++--
M	lib/std/os/linux/io_uring.zig	\|	2	+-
M	lib/std/os/linux/mips.zig	\|	37	+++++++++++++++++++++++++++++++++++++
M	lib/std/os/uefi/tables/boot_services.zig	\|	3	++-
M	lib/std/os/windows/user32.zig	\|	2	+-
M	lib/std/special/build_runner.zig	\|	8	++++----
M	lib/std/std.zig	\|	4	++++
M	lib/std/testing.zig	\|	76	+++++++++++++++++++++++++++++++++++++++-------------------------------------
M	lib/std/zig/parser_test.zig	\|	319	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	lib/std/zig/render.zig	\|	259	++++++++++++++++++++++++++++++-------------------------------------------------
M	src/BuiltinFn.zig	\|	2	+-
M	src/Compilation.zig	\|	23	+++++++++++++++++------
M	src/clang.zig	\|	5	+++++
M	src/clang_options_data.zig	\|	9	++++++++-
M	src/codegen.zig	\|	195	+++++++++++++++++++++++++------------------------------------------------------
M	src/codegen/aarch64.zig	\|	5	++++-
M	src/codegen/llvm.zig	\|	22	+++++++++++-----------
M	src/codegen/llvm/bindings.zig	\|	33	+++++++++++++++++++++++----------
M	src/codegen/wasm.zig	\|	44	+++++++++++++++++++++++++++++++++++++++++---
M	src/config.zig.in	\|	2	+-
M	src/introspect.zig	\|	8	++++++++
M	src/link/MachO.zig	\|	1091	++++++++++++++++++++++++++++++++++++++++---------------------------------------
A	src/link/MachO/Archive.zig	\|	278	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/link/MachO/DebugSymbols.zig	\|	8	++++----
A	src/link/MachO/Object.zig	\|	229	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/link/MachO/Zld.zig	\|	3294	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/link/MachO/bind.zig	\|	145	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	src/link/MachO/imports.zig	\|	152	-------------------------------------------------------------------------------
M	src/main.zig	\|	79	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
M	src/stage1/all_types.hpp	\|	29	+++++++++++++++++++++--------
M	src/stage1/analyze.cpp	\|	49	++++++++++++++++++++++++++++++++++++++++++++++++-
M	src/stage1/ir.cpp	\|	166	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
M	src/stage1/ir_print.cpp	\|	59	+++++++++++++++++++++++++++++++++++++++++++++++++----------
M	src/translate_c.zig	\|	369	++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
M	src/translate_c/ast.zig	\|	86	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
M	src/zig_clang.cpp	\|	5	+++++
M	src/zig_clang.h	\|	2	++
M	test/cli.zig	\|	11	+++++++++++
M	test/run_translated_c.zig	\|	121	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	test/stage1/behavior/vector.zig	\|	12	++++++++----
M	test/stage2/cbe.zig	\|	2	+-
M	test/stage2/wasm.zig	\|	35	+++++++++++++++++++++++++++++++++++
M	test/standalone.zig	\|	5	++++-
M	test/standalone/mix_o_files/base64.zig	\|	6	+++---
M	test/translate_c.zig	\|	202	+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
M	tools/update_clang_options.zig	\|	4	++++