diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5fb6d00d05..963938ca78 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -434,13 +434,12 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/log10.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/log2.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/modti3.zig"
+    "${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulXi3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/muldf3.zig"
-    "${CMAKE_SOURCE_DIR}/lib/compiler_rt/muldi3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulf3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulo.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulsf3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/multf3.zig"
-    "${CMAKE_SOURCE_DIR}/lib/compiler_rt/multi3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/mulxf3.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/negXi2.zig"
     "${CMAKE_SOURCE_DIR}/lib/compiler_rt/negv.zig"
@@ -569,6 +568,7 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/clang_options_data.zig"
     "${CMAKE_SOURCE_DIR}/src/codegen.zig"
     "${CMAKE_SOURCE_DIR}/src/codegen/c.zig"
+    "${CMAKE_SOURCE_DIR}/src/codegen/c/type.zig"
     "${CMAKE_SOURCE_DIR}/src/codegen/llvm.zig"
     "${CMAKE_SOURCE_DIR}/src/codegen/llvm/bindings.zig"
     "${CMAKE_SOURCE_DIR}/src/glibc.zig"
@@ -612,7 +612,6 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/tapi.zig"
     "${CMAKE_SOURCE_DIR}/src/link/tapi/Tokenizer.zig"
     "${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig"
-    "${CMAKE_SOURCE_DIR}/src/link/tapi/parse/test.zig"
     "${CMAKE_SOURCE_DIR}/src/link/tapi/yaml.zig"
     "${CMAKE_SOURCE_DIR}/src/main.zig"
     "${CMAKE_SOURCE_DIR}/src/mingw.zig"
@@ -748,10 +747,11 @@ set(BUILD_ZIG2_ARGS
   build-exe src/main.zig -ofmt=c -lc
   -OReleaseSmall
   --name zig2 -femit-bin="${ZIG2_C_SOURCE}"
-  --pkg-begin build_options "${ZIG_CONFIG_ZIG_OUT}" --pkg-end
+  --mod "build_options::${ZIG_CONFIG_ZIG_OUT}"
+  --deps build_options
   -target "${HOST_TARGET_TRIPLE}"
 )
- 
+
 add_custom_command(
   OUTPUT "${ZIG2_C_SOURCE}"
   COMMAND zig1 ${BUILD_ZIG2_ARGS}
@@ -765,10 +765,11 @@ set(BUILD_COMPILER_RT_ARGS
   build-obj lib/compiler_rt.zig -ofmt=c
   -OReleaseSmall
   --name compiler_rt -femit-bin="${ZIG_COMPILER_RT_C_SOURCE}"
-  --pkg-begin build_options "${ZIG_CONFIG_ZIG_OUT}" --pkg-end
+  --mod "build_options::${ZIG_CONFIG_ZIG_OUT}"
+  --deps build_options
   -target "${HOST_TARGET_TRIPLE}"
 )
- 
+
 add_custom_command(
   OUTPUT "${ZIG_COMPILER_RT_C_SOURCE}"
   COMMAND zig1 ${BUILD_COMPILER_RT_ARGS}
@@ -782,7 +783,7 @@ set_target_properties(zig2 PROPERTIES
   COMPILE_FLAGS ${ZIG2_COMPILE_FLAGS}
   LINK_FLAGS ${ZIG2_LINK_FLAGS}
 )
-target_include_directories(zig2 PUBLIC "${CMAKE_SOURCE_DIR}/lib")
+target_include_directories(zig2 PUBLIC "${CMAKE_SOURCE_DIR}/stage1")
 target_link_libraries(zig2 LINK_PUBLIC zigcpp)
 
 if(MSVC)
diff --git a/build.zig b/build.zig
index a4747139ec..87b1b797ca 100644
--- a/build.zig
+++ b/build.zig
@@ -118,8 +118,11 @@ pub fn build(b: *std.Build) !void {
                 ".gz",
                 ".z.0",
                 ".z.9",
+                ".zstd.3",
+                ".zstd.19",
                 "rfc1951.txt",
                 "rfc1952.txt",
+                "rfc8478.txt",
                 // exclude files from lib/std/compress/deflate/testdata
                 ".expect",
                 ".expect-noinput",
@@ -513,8 +516,12 @@ fn addWasiUpdateStep(b: *std.Build, version: [:0]const u8) !void {
     run_opt.addArg("-o");
     run_opt.addFileSourceArg(.{ .path = "stage1/zig1.wasm" });
 
+    const copy_zig_h = b.addWriteFiles();
+    copy_zig_h.addCopyFileToSource(.{ .path = "lib/zig.h" }, "stage1/zig.h");
+
     const update_zig1_step = b.step("update-zig1", "Update stage1/zig1.wasm");
     update_zig1_step.dependOn(&run_opt.step);
+    update_zig1_step.dependOn(&copy_zig_h.step);
 }
 
 fn addCompilerStep(
diff --git a/ci/x86_64-windows-debug.ps1 b/ci/x86_64-windows-debug.ps1
index c8e58a717a..c6f0c529e9 100644
--- a/ci/x86_64-windows-debug.ps1
+++ b/ci/x86_64-windows-debug.ps1
@@ -87,7 +87,8 @@ CheckLastExitCode
   -OReleaseSmall `
   --name compiler_rt `
   -femit-bin="compiler_rt-x86_64-windows-msvc.c" `
-  --pkg-begin build_options config.zig --pkg-end `
+  --mod build_options::config.zig `
+  --deps build_options `
   -target x86_64-windows-msvc
 CheckLastExitCode
 
diff --git a/ci/x86_64-windows-release.ps1 b/ci/x86_64-windows-release.ps1
index 4af2389a0e..adf59522b9 100644
--- a/ci/x86_64-windows-release.ps1
+++ b/ci/x86_64-windows-release.ps1
@@ -87,7 +87,8 @@ CheckLastExitCode
   -OReleaseSmall `
   --name compiler_rt `
   -femit-bin="compiler_rt-x86_64-windows-msvc.c" `
-  --pkg-begin build_options config.zig --pkg-end `
+  --mod build_options::config.zig `
+  --deps build_options `
   -target x86_64-windows-msvc
 CheckLastExitCode
 
diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig
index feeb8dfb38..1cae2a710e 100644
--- a/lib/compiler_rt.zig
+++ b/lib/compiler_rt.zig
@@ -3,64 +3,34 @@ const builtin = @import("builtin");
 pub const panic = @import("compiler_rt/common.zig").panic;
 
 comptime {
-    _ = @import("compiler_rt/addf3.zig");
-    _ = @import("compiler_rt/addhf3.zig");
-    _ = @import("compiler_rt/addsf3.zig");
-    _ = @import("compiler_rt/adddf3.zig");
-    _ = @import("compiler_rt/addtf3.zig");
-    _ = @import("compiler_rt/addxf3.zig");
+    // Integer routines
+    _ = @import("compiler_rt/count0bits.zig");
+    _ = @import("compiler_rt/parity.zig");
+    _ = @import("compiler_rt/popcount.zig");
+    _ = @import("compiler_rt/bswap.zig");
+    _ = @import("compiler_rt/cmp.zig");
 
-    _ = @import("compiler_rt/subhf3.zig");
-    _ = @import("compiler_rt/subsf3.zig");
-    _ = @import("compiler_rt/subdf3.zig");
-    _ = @import("compiler_rt/subtf3.zig");
-    _ = @import("compiler_rt/subxf3.zig");
+    _ = @import("compiler_rt/shift.zig");
+    _ = @import("compiler_rt/negXi2.zig");
+    _ = @import("compiler_rt/int.zig");
+    _ = @import("compiler_rt/mulXi3.zig");
+    _ = @import("compiler_rt/divti3.zig");
+    _ = @import("compiler_rt/udivti3.zig");
+    _ = @import("compiler_rt/modti3.zig");
+    _ = @import("compiler_rt/umodti3.zig");
 
-    _ = @import("compiler_rt/mulf3.zig");
-    _ = @import("compiler_rt/mulhf3.zig");
-    _ = @import("compiler_rt/mulsf3.zig");
-    _ = @import("compiler_rt/muldf3.zig");
-    _ = @import("compiler_rt/multf3.zig");
-    _ = @import("compiler_rt/mulxf3.zig");
+    _ = @import("compiler_rt/absv.zig");
+    _ = @import("compiler_rt/absvsi2.zig");
+    _ = @import("compiler_rt/absvdi2.zig");
+    _ = @import("compiler_rt/absvti2.zig");
+    _ = @import("compiler_rt/negv.zig");
 
-    _ = @import("compiler_rt/powiXf2.zig");
-    _ = @import("compiler_rt/mulc3.zig");
-    _ = @import("compiler_rt/mulhc3.zig");
-    _ = @import("compiler_rt/mulsc3.zig");
-    _ = @import("compiler_rt/muldc3.zig");
-    _ = @import("compiler_rt/mulxc3.zig");
-    _ = @import("compiler_rt/multc3.zig");
-
-    _ = @import("compiler_rt/divc3.zig");
-    _ = @import("compiler_rt/divhc3.zig");
-    _ = @import("compiler_rt/divsc3.zig");
-    _ = @import("compiler_rt/divdc3.zig");
-    _ = @import("compiler_rt/divxc3.zig");
-    _ = @import("compiler_rt/divtc3.zig");
-
-    _ = @import("compiler_rt/neghf2.zig");
-    _ = @import("compiler_rt/negsf2.zig");
-    _ = @import("compiler_rt/negdf2.zig");
-    _ = @import("compiler_rt/negtf2.zig");
-    _ = @import("compiler_rt/negxf2.zig");
-
-    _ = @import("compiler_rt/comparef.zig");
-    _ = @import("compiler_rt/cmphf2.zig");
-    _ = @import("compiler_rt/cmpsf2.zig");
-    _ = @import("compiler_rt/cmpdf2.zig");
-    _ = @import("compiler_rt/cmptf2.zig");
-    _ = @import("compiler_rt/cmpxf2.zig");
-    _ = @import("compiler_rt/gehf2.zig");
-    _ = @import("compiler_rt/gesf2.zig");
-    _ = @import("compiler_rt/gedf2.zig");
-    _ = @import("compiler_rt/gexf2.zig");
-    _ = @import("compiler_rt/getf2.zig");
-    _ = @import("compiler_rt/unordhf2.zig");
-    _ = @import("compiler_rt/unordsf2.zig");
-    _ = @import("compiler_rt/unorddf2.zig");
-    _ = @import("compiler_rt/unordxf2.zig");
-    _ = @import("compiler_rt/unordtf2.zig");
+    _ = @import("compiler_rt/addo.zig");
+    _ = @import("compiler_rt/subo.zig");
+    _ = @import("compiler_rt/mulo.zig");
 
+    // Float routines
+    // conversion
     _ = @import("compiler_rt/extendf.zig");
     _ = @import("compiler_rt/extendhfsf2.zig");
     _ = @import("compiler_rt/extendhfdf2.zig");
@@ -85,70 +55,6 @@ comptime {
     _ = @import("compiler_rt/trunctfdf2.zig");
     _ = @import("compiler_rt/trunctfxf2.zig");
 
-    _ = @import("compiler_rt/divhf3.zig");
-    _ = @import("compiler_rt/divsf3.zig");
-    _ = @import("compiler_rt/divdf3.zig");
-    _ = @import("compiler_rt/divxf3.zig");
-    _ = @import("compiler_rt/divtf3.zig");
-    _ = @import("compiler_rt/sin.zig");
-    _ = @import("compiler_rt/cos.zig");
-    _ = @import("compiler_rt/sincos.zig");
-    _ = @import("compiler_rt/ceil.zig");
-    _ = @import("compiler_rt/exp.zig");
-    _ = @import("compiler_rt/exp2.zig");
-    _ = @import("compiler_rt/fabs.zig");
-    _ = @import("compiler_rt/floor.zig");
-    _ = @import("compiler_rt/fma.zig");
-    _ = @import("compiler_rt/fmax.zig");
-    _ = @import("compiler_rt/fmin.zig");
-    _ = @import("compiler_rt/fmod.zig");
-    _ = @import("compiler_rt/log.zig");
-    _ = @import("compiler_rt/log10.zig");
-    _ = @import("compiler_rt/log2.zig");
-    _ = @import("compiler_rt/round.zig");
-    _ = @import("compiler_rt/sqrt.zig");
-    _ = @import("compiler_rt/tan.zig");
-    _ = @import("compiler_rt/trunc.zig");
-    _ = @import("compiler_rt/divti3.zig");
-    _ = @import("compiler_rt/modti3.zig");
-    _ = @import("compiler_rt/multi3.zig");
-    _ = @import("compiler_rt/udivti3.zig");
-    _ = @import("compiler_rt/udivmodei4.zig");
-    _ = @import("compiler_rt/udivmodti4.zig");
-    _ = @import("compiler_rt/umodti3.zig");
-
-    _ = @import("compiler_rt/int_to_float.zig");
-    _ = @import("compiler_rt/floatsihf.zig");
-    _ = @import("compiler_rt/floatsisf.zig");
-    _ = @import("compiler_rt/floatsidf.zig");
-    _ = @import("compiler_rt/floatsitf.zig");
-    _ = @import("compiler_rt/floatsixf.zig");
-    _ = @import("compiler_rt/floatdihf.zig");
-    _ = @import("compiler_rt/floatdisf.zig");
-    _ = @import("compiler_rt/floatdidf.zig");
-    _ = @import("compiler_rt/floatditf.zig");
-    _ = @import("compiler_rt/floatdixf.zig");
-    _ = @import("compiler_rt/floattihf.zig");
-    _ = @import("compiler_rt/floattisf.zig");
-    _ = @import("compiler_rt/floattidf.zig");
-    _ = @import("compiler_rt/floattitf.zig");
-    _ = @import("compiler_rt/floattixf.zig");
-    _ = @import("compiler_rt/floatundihf.zig");
-    _ = @import("compiler_rt/floatundisf.zig");
-    _ = @import("compiler_rt/floatundidf.zig");
-    _ = @import("compiler_rt/floatunditf.zig");
-    _ = @import("compiler_rt/floatundixf.zig");
-    _ = @import("compiler_rt/floatunsihf.zig");
-    _ = @import("compiler_rt/floatunsisf.zig");
-    _ = @import("compiler_rt/floatunsidf.zig");
-    _ = @import("compiler_rt/floatunsitf.zig");
-    _ = @import("compiler_rt/floatunsixf.zig");
-    _ = @import("compiler_rt/floatuntihf.zig");
-    _ = @import("compiler_rt/floatuntisf.zig");
-    _ = @import("compiler_rt/floatuntidf.zig");
-    _ = @import("compiler_rt/floatuntitf.zig");
-    _ = @import("compiler_rt/floatuntixf.zig");
-
     _ = @import("compiler_rt/float_to_int.zig");
     _ = @import("compiler_rt/fixhfsi.zig");
     _ = @import("compiler_rt/fixhfdi.zig");
@@ -181,28 +87,131 @@ comptime {
     _ = @import("compiler_rt/fixunsxfdi.zig");
     _ = @import("compiler_rt/fixunsxfti.zig");
 
-    _ = @import("compiler_rt/count0bits.zig");
-    _ = @import("compiler_rt/parity.zig");
-    _ = @import("compiler_rt/popcount.zig");
-    _ = @import("compiler_rt/bswap.zig");
-    _ = @import("compiler_rt/int.zig");
-    _ = @import("compiler_rt/shift.zig");
+    _ = @import("compiler_rt/int_to_float.zig");
+    _ = @import("compiler_rt/floatsihf.zig");
+    _ = @import("compiler_rt/floatsisf.zig");
+    _ = @import("compiler_rt/floatsidf.zig");
+    _ = @import("compiler_rt/floatsitf.zig");
+    _ = @import("compiler_rt/floatsixf.zig");
+    _ = @import("compiler_rt/floatdihf.zig");
+    _ = @import("compiler_rt/floatdisf.zig");
+    _ = @import("compiler_rt/floatdidf.zig");
+    _ = @import("compiler_rt/floatditf.zig");
+    _ = @import("compiler_rt/floatdixf.zig");
+    _ = @import("compiler_rt/floattihf.zig");
+    _ = @import("compiler_rt/floattisf.zig");
+    _ = @import("compiler_rt/floattidf.zig");
+    _ = @import("compiler_rt/floattitf.zig");
+    _ = @import("compiler_rt/floattixf.zig");
+    _ = @import("compiler_rt/floatundihf.zig");
+    _ = @import("compiler_rt/floatundisf.zig");
+    _ = @import("compiler_rt/floatundidf.zig");
+    _ = @import("compiler_rt/floatunditf.zig");
+    _ = @import("compiler_rt/floatundixf.zig");
+    _ = @import("compiler_rt/floatunsihf.zig");
+    _ = @import("compiler_rt/floatunsisf.zig");
+    _ = @import("compiler_rt/floatunsidf.zig");
+    _ = @import("compiler_rt/floatunsitf.zig");
+    _ = @import("compiler_rt/floatunsixf.zig");
+    _ = @import("compiler_rt/floatuntihf.zig");
+    _ = @import("compiler_rt/floatuntisf.zig");
+    _ = @import("compiler_rt/floatuntidf.zig");
+    _ = @import("compiler_rt/floatuntitf.zig");
+    _ = @import("compiler_rt/floatuntixf.zig");
 
-    _ = @import("compiler_rt/negXi2.zig");
+    // comparison
+    _ = @import("compiler_rt/comparef.zig");
+    _ = @import("compiler_rt/cmphf2.zig");
+    _ = @import("compiler_rt/cmpsf2.zig");
+    _ = @import("compiler_rt/cmpdf2.zig");
+    _ = @import("compiler_rt/cmptf2.zig");
+    _ = @import("compiler_rt/cmpxf2.zig");
+    _ = @import("compiler_rt/unordhf2.zig");
+    _ = @import("compiler_rt/unordsf2.zig");
+    _ = @import("compiler_rt/unorddf2.zig");
+    _ = @import("compiler_rt/unordxf2.zig");
+    _ = @import("compiler_rt/unordtf2.zig");
+    _ = @import("compiler_rt/gehf2.zig");
+    _ = @import("compiler_rt/gesf2.zig");
+    _ = @import("compiler_rt/gedf2.zig");
+    _ = @import("compiler_rt/gexf2.zig");
+    _ = @import("compiler_rt/getf2.zig");
 
-    _ = @import("compiler_rt/muldi3.zig");
+    // arithmetic
+    _ = @import("compiler_rt/addf3.zig");
+    _ = @import("compiler_rt/addhf3.zig");
+    _ = @import("compiler_rt/addsf3.zig");
+    _ = @import("compiler_rt/adddf3.zig");
+    _ = @import("compiler_rt/addtf3.zig");
+    _ = @import("compiler_rt/addxf3.zig");
 
-    _ = @import("compiler_rt/absv.zig");
-    _ = @import("compiler_rt/absvsi2.zig");
-    _ = @import("compiler_rt/absvdi2.zig");
-    _ = @import("compiler_rt/absvti2.zig");
+    _ = @import("compiler_rt/subhf3.zig");
+    _ = @import("compiler_rt/subsf3.zig");
+    _ = @import("compiler_rt/subdf3.zig");
+    _ = @import("compiler_rt/subtf3.zig");
+    _ = @import("compiler_rt/subxf3.zig");
 
-    _ = @import("compiler_rt/negv.zig");
-    _ = @import("compiler_rt/addo.zig");
-    _ = @import("compiler_rt/subo.zig");
-    _ = @import("compiler_rt/mulo.zig");
-    _ = @import("compiler_rt/cmp.zig");
+    _ = @import("compiler_rt/mulf3.zig");
+    _ = @import("compiler_rt/mulhf3.zig");
+    _ = @import("compiler_rt/mulsf3.zig");
+    _ = @import("compiler_rt/muldf3.zig");
+    _ = @import("compiler_rt/multf3.zig");
+    _ = @import("compiler_rt/mulxf3.zig");
 
+    _ = @import("compiler_rt/divhf3.zig");
+    _ = @import("compiler_rt/divsf3.zig");
+    _ = @import("compiler_rt/divdf3.zig");
+    _ = @import("compiler_rt/divxf3.zig");
+    _ = @import("compiler_rt/divtf3.zig");
+
+    _ = @import("compiler_rt/neghf2.zig");
+    _ = @import("compiler_rt/negsf2.zig");
+    _ = @import("compiler_rt/negdf2.zig");
+    _ = @import("compiler_rt/negtf2.zig");
+    _ = @import("compiler_rt/negxf2.zig");
+
+    // other
+    _ = @import("compiler_rt/powiXf2.zig");
+    _ = @import("compiler_rt/mulc3.zig");
+    _ = @import("compiler_rt/mulhc3.zig");
+    _ = @import("compiler_rt/mulsc3.zig");
+    _ = @import("compiler_rt/muldc3.zig");
+    _ = @import("compiler_rt/mulxc3.zig");
+    _ = @import("compiler_rt/multc3.zig");
+
+    _ = @import("compiler_rt/divc3.zig");
+    _ = @import("compiler_rt/divhc3.zig");
+    _ = @import("compiler_rt/divsc3.zig");
+    _ = @import("compiler_rt/divdc3.zig");
+    _ = @import("compiler_rt/divxc3.zig");
+    _ = @import("compiler_rt/divtc3.zig");
+
+    // Math routines. Alphabetically sorted.
+    _ = @import("compiler_rt/ceil.zig");
+    _ = @import("compiler_rt/cos.zig");
+    _ = @import("compiler_rt/exp.zig");
+    _ = @import("compiler_rt/exp2.zig");
+    _ = @import("compiler_rt/fabs.zig");
+    _ = @import("compiler_rt/floor.zig");
+    _ = @import("compiler_rt/fma.zig");
+    _ = @import("compiler_rt/fmax.zig");
+    _ = @import("compiler_rt/fmin.zig");
+    _ = @import("compiler_rt/fmod.zig");
+    _ = @import("compiler_rt/log.zig");
+    _ = @import("compiler_rt/log10.zig");
+    _ = @import("compiler_rt/log2.zig");
+    _ = @import("compiler_rt/round.zig");
+    _ = @import("compiler_rt/sin.zig");
+    _ = @import("compiler_rt/sincos.zig");
+    _ = @import("compiler_rt/sqrt.zig");
+    _ = @import("compiler_rt/tan.zig");
+    _ = @import("compiler_rt/trunc.zig");
+
+    // BigInt. Alphabetically sorted.
+    _ = @import("compiler_rt/udivmodei4.zig");
+    _ = @import("compiler_rt/udivmodti4.zig");
+
+    // extra
     _ = @import("compiler_rt/os_version_check.zig");
     _ = @import("compiler_rt/emutls.zig");
     _ = @import("compiler_rt/arm.zig");
diff --git a/lib/compiler_rt/README.md b/lib/compiler_rt/README.md
index 0590c33fde..fb581daf2a 100644
--- a/lib/compiler_rt/README.md
+++ b/lib/compiler_rt/README.md
@@ -331,7 +331,7 @@ Integer and Float Operations
 | ✓ | __negdf2           | f64  | ∅    | f64  | ..                             |
 | ✓ | __negtf2           | f128 | ∅    | f128 | ..                             |
 | ✓ | __negxf2           | f80  | ∅    | f80  | ..                             |
-|   |                    |      |      |      | **Floating point raised to integer power** |
+|   |                    |      |      |      | **Other** |
 | ✓ | __powihf2          | f16  | i32  | f16  | `a ^ b`                        |
 | ✓ | __powisf2          | f32  | i32  | f32  | ..                             |
 | ✓ | __powidf2          | f64  | i32  | f64  | ..                             |
diff --git a/lib/compiler_rt/common.zig b/lib/compiler_rt/common.zig
index 40a770070d..ee8aec18cd 100644
--- a/lib/compiler_rt/common.zig
+++ b/lib/compiler_rt/common.zig
@@ -1,5 +1,6 @@
 const std = @import("std");
 const builtin = @import("builtin");
+const native_endian = builtin.cpu.arch.endian();
 
 pub const linkage: std.builtin.GlobalLinkage = if (builtin.is_test) .Internal else .Weak;
 /// Determines the symbol's visibility to other objects.
@@ -221,3 +222,20 @@ pub inline fn fneg(a: anytype) @TypeOf(a) {
     const negated = @bitCast(U, a) ^ sign_bit_mask;
     return @bitCast(F, negated);
 }
+
+/// Allows to access underlying bits as two equally sized lower and higher
+/// signed or unsigned integers.
+pub fn HalveInt(comptime T: type, comptime signed_half: bool) type {
+    return extern union {
+        pub const bits = @divExact(@typeInfo(T).Int.bits, 2);
+        pub const HalfTU = std.meta.Int(.unsigned, bits);
+        pub const HalfTS = std.meta.Int(.signed, bits);
+        pub const HalfT = if (signed_half) HalfTS else HalfTU;
+
+        all: T,
+        s: if (native_endian == .Little)
+            extern struct { low: HalfT, high: HalfT }
+        else
+            extern struct { high: HalfT, low: HalfT },
+    };
+}
diff --git a/lib/compiler_rt/int.zig b/lib/compiler_rt/int.zig
index 6a761807dd..47ff9e4c0c 100644
--- a/lib/compiler_rt/int.zig
+++ b/lib/compiler_rt/int.zig
@@ -16,7 +16,6 @@ pub const panic = common.panic;
 comptime {
     @export(__divmodti4, .{ .name = "__divmodti4", .linkage = common.linkage, .visibility = common.visibility });
     @export(__udivmoddi4, .{ .name = "__udivmoddi4", .linkage = common.linkage, .visibility = common.visibility });
-    @export(__mulsi3, .{ .name = "__mulsi3", .linkage = common.linkage, .visibility = common.visibility });
     @export(__divmoddi4, .{ .name = "__divmoddi4", .linkage = common.linkage, .visibility = common.visibility });
     if (common.want_aeabi) {
         @export(__aeabi_idiv, .{ .name = "__aeabi_idiv", .linkage = common.linkage, .visibility = common.visibility });
@@ -663,59 +662,3 @@ fn test_one_umodsi3(a: u32, b: u32, expected_r: u32) !void {
     const r: u32 = __umodsi3(a, b);
     try testing.expect(r == expected_r);
 }
-
-pub fn __mulsi3(a: i32, b: i32) callconv(.C) i32 {
-    var ua = @bitCast(u32, a);
-    var ub = @bitCast(u32, b);
-    var r: u32 = 0;
-
-    while (ua > 0) {
-        if ((ua & 1) != 0) r +%= ub;
-        ua >>= 1;
-        ub <<= 1;
-    }
-
-    return @bitCast(i32, r);
-}
-
-fn test_one_mulsi3(a: i32, b: i32, result: i32) !void {
-    try testing.expectEqual(result, __mulsi3(a, b));
-}
-
-test "mulsi3" {
-    try test_one_mulsi3(0, 0, 0);
-    try test_one_mulsi3(0, 1, 0);
-    try test_one_mulsi3(1, 0, 0);
-    try test_one_mulsi3(0, 10, 0);
-    try test_one_mulsi3(10, 0, 0);
-    try test_one_mulsi3(0, maxInt(i32), 0);
-    try test_one_mulsi3(maxInt(i32), 0, 0);
-    try test_one_mulsi3(0, -1, 0);
-    try test_one_mulsi3(-1, 0, 0);
-    try test_one_mulsi3(0, -10, 0);
-    try test_one_mulsi3(-10, 0, 0);
-    try test_one_mulsi3(0, minInt(i32), 0);
-    try test_one_mulsi3(minInt(i32), 0, 0);
-    try test_one_mulsi3(1, 1, 1);
-    try test_one_mulsi3(1, 10, 10);
-    try test_one_mulsi3(10, 1, 10);
-    try test_one_mulsi3(1, maxInt(i32), maxInt(i32));
-    try test_one_mulsi3(maxInt(i32), 1, maxInt(i32));
-    try test_one_mulsi3(1, -1, -1);
-    try test_one_mulsi3(1, -10, -10);
-    try test_one_mulsi3(-10, 1, -10);
-    try test_one_mulsi3(1, minInt(i32), minInt(i32));
-    try test_one_mulsi3(minInt(i32), 1, minInt(i32));
-    try test_one_mulsi3(46340, 46340, 2147395600);
-    try test_one_mulsi3(-46340, 46340, -2147395600);
-    try test_one_mulsi3(46340, -46340, -2147395600);
-    try test_one_mulsi3(-46340, -46340, 2147395600);
-    try test_one_mulsi3(4194303, 8192, @truncate(i32, 34359730176));
-    try test_one_mulsi3(-4194303, 8192, @truncate(i32, -34359730176));
-    try test_one_mulsi3(4194303, -8192, @truncate(i32, -34359730176));
-    try test_one_mulsi3(-4194303, -8192, @truncate(i32, 34359730176));
-    try test_one_mulsi3(8192, 4194303, @truncate(i32, 34359730176));
-    try test_one_mulsi3(-8192, 4194303, @truncate(i32, -34359730176));
-    try test_one_mulsi3(8192, -4194303, @truncate(i32, -34359730176));
-    try test_one_mulsi3(-8192, -4194303, @truncate(i32, 34359730176));
-}
diff --git a/lib/compiler_rt/mulXi3.zig b/lib/compiler_rt/mulXi3.zig
new file mode 100644
index 0000000000..3999681034
--- /dev/null
+++ b/lib/compiler_rt/mulXi3.zig
@@ -0,0 +1,101 @@
+const builtin = @import("builtin");
+const std = @import("std");
+const testing = std.testing;
+const common = @import("common.zig");
+const native_endian = builtin.cpu.arch.endian();
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__mulsi3, .{ .name = "__mulsi3", .linkage = common.linkage, .visibility = common.visibility });
+    if (common.want_aeabi) {
+        @export(__aeabi_lmul, .{ .name = "__aeabi_lmul", .linkage = common.linkage, .visibility = common.visibility });
+    } else {
+        @export(__muldi3, .{ .name = "__muldi3", .linkage = common.linkage, .visibility = common.visibility });
+    }
+    if (common.want_windows_v2u64_abi) {
+        @export(__multi3_windows_x86_64, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
+    } else {
+        @export(__multi3, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
+    }
+}
+
+pub fn __mulsi3(a: i32, b: i32) callconv(.C) i32 {
+    var ua = @bitCast(u32, a);
+    var ub = @bitCast(u32, b);
+    var r: u32 = 0;
+
+    while (ua > 0) {
+        if ((ua & 1) != 0) r +%= ub;
+        ua >>= 1;
+        ub <<= 1;
+    }
+
+    return @bitCast(i32, r);
+}
+
+pub fn __muldi3(a: i64, b: i64) callconv(.C) i64 {
+    return mulX(i64, a, b);
+}
+
+fn __aeabi_lmul(a: i64, b: i64) callconv(.AAPCS) i64 {
+    return mulX(i64, a, b);
+}
+
+inline fn mulX(comptime T: type, a: T, b: T) T {
+    const word_t = common.HalveInt(T, false);
+    const x = word_t{ .all = a };
+    const y = word_t{ .all = b };
+    var r = switch (T) {
+        i64, i128 => word_t{ .all = muldXi(word_t.HalfT, x.s.low, y.s.low) },
+        else => unreachable,
+    };
+    r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high;
+    return r.all;
+}
+
+fn DoubleInt(comptime T: type) type {
+    return switch (T) {
+        u32 => i64,
+        u64 => i128,
+        i32 => i64,
+        i64 => i128,
+        else => unreachable,
+    };
+}
+
+fn muldXi(comptime T: type, a: T, b: T) DoubleInt(T) {
+    const DT = DoubleInt(T);
+    const word_t = common.HalveInt(DT, false);
+    const bits_in_word_2 = @sizeOf(T) * 8 / 2;
+    const lower_mask = (~@as(T, 0)) >> bits_in_word_2;
+
+    var r: word_t = undefined;
+    r.s.low = (a & lower_mask) *% (b & lower_mask);
+    var t: T = r.s.low >> bits_in_word_2;
+    r.s.low &= lower_mask;
+    t += (a >> bits_in_word_2) *% (b & lower_mask);
+    r.s.low +%= (t & lower_mask) << bits_in_word_2;
+    r.s.high = t >> bits_in_word_2;
+    t = r.s.low >> bits_in_word_2;
+    r.s.low &= lower_mask;
+    t +%= (b >> bits_in_word_2) *% (a & lower_mask);
+    r.s.low +%= (t & lower_mask) << bits_in_word_2;
+    r.s.high +%= t >> bits_in_word_2;
+    r.s.high +%= (a >> bits_in_word_2) *% (b >> bits_in_word_2);
+    return r.all;
+}
+
+pub fn __multi3(a: i128, b: i128) callconv(.C) i128 {
+    return mulX(i128, a, b);
+}
+
+const v2u64 = @Vector(2, u64);
+
+fn __multi3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
+    return @bitCast(v2u64, mulX(i128, @bitCast(i128, a), @bitCast(i128, b)));
+}
+
+test {
+    _ = @import("mulXi3_test.zig");
+}
diff --git a/lib/compiler_rt/mulXi3_test.zig b/lib/compiler_rt/mulXi3_test.zig
new file mode 100644
index 0000000000..128f428af2
--- /dev/null
+++ b/lib/compiler_rt/mulXi3_test.zig
@@ -0,0 +1,147 @@
+const std = @import("std");
+const testing = std.testing;
+const mulXi3 = @import("mulXi3.zig");
+const maxInt = std.math.maxInt;
+const minInt = std.math.minInt;
+
+fn test_one_mulsi3(a: i32, b: i32, result: i32) !void {
+    try testing.expectEqual(result, mulXi3.__mulsi3(a, b));
+}
+
+fn test__muldi3(a: i64, b: i64, expected: i64) !void {
+    const x = mulXi3.__muldi3(a, b);
+    try testing.expect(x == expected);
+}
+
+fn test__multi3(a: i128, b: i128, expected: i128) !void {
+    const x = mulXi3.__multi3(a, b);
+    try testing.expect(x == expected);
+}
+
+test "mulsi3" {
+    try test_one_mulsi3(0, 0, 0);
+    try test_one_mulsi3(0, 1, 0);
+    try test_one_mulsi3(1, 0, 0);
+    try test_one_mulsi3(0, 10, 0);
+    try test_one_mulsi3(10, 0, 0);
+    try test_one_mulsi3(0, maxInt(i32), 0);
+    try test_one_mulsi3(maxInt(i32), 0, 0);
+    try test_one_mulsi3(0, -1, 0);
+    try test_one_mulsi3(-1, 0, 0);
+    try test_one_mulsi3(0, -10, 0);
+    try test_one_mulsi3(-10, 0, 0);
+    try test_one_mulsi3(0, minInt(i32), 0);
+    try test_one_mulsi3(minInt(i32), 0, 0);
+    try test_one_mulsi3(1, 1, 1);
+    try test_one_mulsi3(1, 10, 10);
+    try test_one_mulsi3(10, 1, 10);
+    try test_one_mulsi3(1, maxInt(i32), maxInt(i32));
+    try test_one_mulsi3(maxInt(i32), 1, maxInt(i32));
+    try test_one_mulsi3(1, -1, -1);
+    try test_one_mulsi3(1, -10, -10);
+    try test_one_mulsi3(-10, 1, -10);
+    try test_one_mulsi3(1, minInt(i32), minInt(i32));
+    try test_one_mulsi3(minInt(i32), 1, minInt(i32));
+    try test_one_mulsi3(46340, 46340, 2147395600);
+    try test_one_mulsi3(-46340, 46340, -2147395600);
+    try test_one_mulsi3(46340, -46340, -2147395600);
+    try test_one_mulsi3(-46340, -46340, 2147395600);
+    try test_one_mulsi3(4194303, 8192, @truncate(i32, 34359730176));
+    try test_one_mulsi3(-4194303, 8192, @truncate(i32, -34359730176));
+    try test_one_mulsi3(4194303, -8192, @truncate(i32, -34359730176));
+    try test_one_mulsi3(-4194303, -8192, @truncate(i32, 34359730176));
+    try test_one_mulsi3(8192, 4194303, @truncate(i32, 34359730176));
+    try test_one_mulsi3(-8192, 4194303, @truncate(i32, -34359730176));
+    try test_one_mulsi3(8192, -4194303, @truncate(i32, -34359730176));
+    try test_one_mulsi3(-8192, -4194303, @truncate(i32, 34359730176));
+}
+
+test "muldi3" {
+    try test__muldi3(0, 0, 0);
+    try test__muldi3(0, 1, 0);
+    try test__muldi3(1, 0, 0);
+    try test__muldi3(0, 10, 0);
+    try test__muldi3(10, 0, 0);
+    try test__muldi3(0, 81985529216486895, 0);
+    try test__muldi3(81985529216486895, 0, 0);
+
+    try test__muldi3(0, -1, 0);
+    try test__muldi3(-1, 0, 0);
+    try test__muldi3(0, -10, 0);
+    try test__muldi3(-10, 0, 0);
+    try test__muldi3(0, -81985529216486895, 0);
+    try test__muldi3(-81985529216486895, 0, 0);
+
+    try test__muldi3(1, 1, 1);
+    try test__muldi3(1, 10, 10);
+    try test__muldi3(10, 1, 10);
+    try test__muldi3(1, 81985529216486895, 81985529216486895);
+    try test__muldi3(81985529216486895, 1, 81985529216486895);
+
+    try test__muldi3(1, -1, -1);
+    try test__muldi3(1, -10, -10);
+    try test__muldi3(-10, 1, -10);
+    try test__muldi3(1, -81985529216486895, -81985529216486895);
+    try test__muldi3(-81985529216486895, 1, -81985529216486895);
+
+    try test__muldi3(3037000499, 3037000499, 9223372030926249001);
+    try test__muldi3(-3037000499, 3037000499, -9223372030926249001);
+    try test__muldi3(3037000499, -3037000499, -9223372030926249001);
+    try test__muldi3(-3037000499, -3037000499, 9223372030926249001);
+
+    try test__muldi3(4398046511103, 2097152, 9223372036852678656);
+    try test__muldi3(-4398046511103, 2097152, -9223372036852678656);
+    try test__muldi3(4398046511103, -2097152, -9223372036852678656);
+    try test__muldi3(-4398046511103, -2097152, 9223372036852678656);
+
+    try test__muldi3(2097152, 4398046511103, 9223372036852678656);
+    try test__muldi3(-2097152, 4398046511103, -9223372036852678656);
+    try test__muldi3(2097152, -4398046511103, -9223372036852678656);
+    try test__muldi3(-2097152, -4398046511103, 9223372036852678656);
+}
+
+test "multi3" {
+    try test__multi3(0, 0, 0);
+    try test__multi3(0, 1, 0);
+    try test__multi3(1, 0, 0);
+    try test__multi3(0, 10, 0);
+    try test__multi3(10, 0, 0);
+    try test__multi3(0, 81985529216486895, 0);
+    try test__multi3(81985529216486895, 0, 0);
+
+    try test__multi3(0, -1, 0);
+    try test__multi3(-1, 0, 0);
+    try test__multi3(0, -10, 0);
+    try test__multi3(-10, 0, 0);
+    try test__multi3(0, -81985529216486895, 0);
+    try test__multi3(-81985529216486895, 0, 0);
+
+    try test__multi3(1, 1, 1);
+    try test__multi3(1, 10, 10);
+    try test__multi3(10, 1, 10);
+    try test__multi3(1, 81985529216486895, 81985529216486895);
+    try test__multi3(81985529216486895, 1, 81985529216486895);
+
+    try test__multi3(1, -1, -1);
+    try test__multi3(1, -10, -10);
+    try test__multi3(-10, 1, -10);
+    try test__multi3(1, -81985529216486895, -81985529216486895);
+    try test__multi3(-81985529216486895, 1, -81985529216486895);
+
+    try test__multi3(3037000499, 3037000499, 9223372030926249001);
+    try test__multi3(-3037000499, 3037000499, -9223372030926249001);
+    try test__multi3(3037000499, -3037000499, -9223372030926249001);
+    try test__multi3(-3037000499, -3037000499, 9223372030926249001);
+
+    try test__multi3(4398046511103, 2097152, 9223372036852678656);
+    try test__multi3(-4398046511103, 2097152, -9223372036852678656);
+    try test__multi3(4398046511103, -2097152, -9223372036852678656);
+    try test__multi3(-4398046511103, -2097152, 9223372036852678656);
+
+    try test__multi3(2097152, 4398046511103, 9223372036852678656);
+    try test__multi3(-2097152, 4398046511103, -9223372036852678656);
+    try test__multi3(2097152, -4398046511103, -9223372036852678656);
+    try test__multi3(-2097152, -4398046511103, 9223372036852678656);
+
+    try test__multi3(0x00000000000000B504F333F9DE5BE000, 0x000000000000000000B504F333F9DE5B, 0x7FFFFFFFFFFFF328DF915DA296E8A000);
+}
diff --git a/lib/compiler_rt/muldi3.zig b/lib/compiler_rt/muldi3.zig
deleted file mode 100644
index c79713fed0..0000000000
--- a/lib/compiler_rt/muldi3.zig
+++ /dev/null
@@ -1,71 +0,0 @@
-//! Ported from
-//! https://github.com/llvm/llvm-project/blob/llvmorg-9.0.0/compiler-rt/lib/builtins/muldi3.c
-
-const std = @import("std");
-const builtin = @import("builtin");
-const native_endian = builtin.cpu.arch.endian();
-const common = @import("common.zig");
-
-pub const panic = common.panic;
-
-comptime {
-    if (common.want_aeabi) {
-        @export(__aeabi_lmul, .{ .name = "__aeabi_lmul", .linkage = common.linkage, .visibility = common.visibility });
-    } else {
-        @export(__muldi3, .{ .name = "__muldi3", .linkage = common.linkage, .visibility = common.visibility });
-    }
-}
-
-pub fn __muldi3(a: i64, b: i64) callconv(.C) i64 {
-    return mul(a, b);
-}
-
-fn __aeabi_lmul(a: i64, b: i64) callconv(.AAPCS) i64 {
-    return mul(a, b);
-}
-
-inline fn mul(a: i64, b: i64) i64 {
-    const x = dwords{ .all = a };
-    const y = dwords{ .all = b };
-    var r = dwords{ .all = muldsi3(x.s.low, y.s.low) };
-    r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high;
-    return r.all;
-}
-
-const dwords = extern union {
-    all: i64,
-    s: switch (native_endian) {
-        .Little => extern struct {
-            low: u32,
-            high: u32,
-        },
-        .Big => extern struct {
-            high: u32,
-            low: u32,
-        },
-    },
-};
-
-fn muldsi3(a: u32, b: u32) i64 {
-    const bits_in_word_2 = @sizeOf(i32) * 8 / 2;
-    const lower_mask = (~@as(u32, 0)) >> bits_in_word_2;
-
-    var r: dwords = undefined;
-    r.s.low = (a & lower_mask) *% (b & lower_mask);
-    var t: u32 = r.s.low >> bits_in_word_2;
-    r.s.low &= lower_mask;
-    t += (a >> bits_in_word_2) *% (b & lower_mask);
-    r.s.low +%= (t & lower_mask) << bits_in_word_2;
-    r.s.high = t >> bits_in_word_2;
-    t = r.s.low >> bits_in_word_2;
-    r.s.low &= lower_mask;
-    t +%= (b >> bits_in_word_2) *% (a & lower_mask);
-    r.s.low +%= (t & lower_mask) << bits_in_word_2;
-    r.s.high +%= t >> bits_in_word_2;
-    r.s.high +%= (a >> bits_in_word_2) *% (b >> bits_in_word_2);
-    return r.all;
-}
-
-test {
-    _ = @import("muldi3_test.zig");
-}
diff --git a/lib/compiler_rt/muldi3_test.zig b/lib/compiler_rt/muldi3_test.zig
deleted file mode 100644
index 6e005d67c8..0000000000
--- a/lib/compiler_rt/muldi3_test.zig
+++ /dev/null
@@ -1,51 +0,0 @@
-const __muldi3 = @import("muldi3.zig").__muldi3;
-const testing = @import("std").testing;
-
-fn test__muldi3(a: i64, b: i64, expected: i64) !void {
-    const x = __muldi3(a, b);
-    try testing.expect(x == expected);
-}
-
-test "muldi3" {
-    try test__muldi3(0, 0, 0);
-    try test__muldi3(0, 1, 0);
-    try test__muldi3(1, 0, 0);
-    try test__muldi3(0, 10, 0);
-    try test__muldi3(10, 0, 0);
-    try test__muldi3(0, 81985529216486895, 0);
-    try test__muldi3(81985529216486895, 0, 0);
-
-    try test__muldi3(0, -1, 0);
-    try test__muldi3(-1, 0, 0);
-    try test__muldi3(0, -10, 0);
-    try test__muldi3(-10, 0, 0);
-    try test__muldi3(0, -81985529216486895, 0);
-    try test__muldi3(-81985529216486895, 0, 0);
-
-    try test__muldi3(1, 1, 1);
-    try test__muldi3(1, 10, 10);
-    try test__muldi3(10, 1, 10);
-    try test__muldi3(1, 81985529216486895, 81985529216486895);
-    try test__muldi3(81985529216486895, 1, 81985529216486895);
-
-    try test__muldi3(1, -1, -1);
-    try test__muldi3(1, -10, -10);
-    try test__muldi3(-10, 1, -10);
-    try test__muldi3(1, -81985529216486895, -81985529216486895);
-    try test__muldi3(-81985529216486895, 1, -81985529216486895);
-
-    try test__muldi3(3037000499, 3037000499, 9223372030926249001);
-    try test__muldi3(-3037000499, 3037000499, -9223372030926249001);
-    try test__muldi3(3037000499, -3037000499, -9223372030926249001);
-    try test__muldi3(-3037000499, -3037000499, 9223372030926249001);
-
-    try test__muldi3(4398046511103, 2097152, 9223372036852678656);
-    try test__muldi3(-4398046511103, 2097152, -9223372036852678656);
-    try test__muldi3(4398046511103, -2097152, -9223372036852678656);
-    try test__muldi3(-4398046511103, -2097152, 9223372036852678656);
-
-    try test__muldi3(2097152, 4398046511103, 9223372036852678656);
-    try test__muldi3(-2097152, 4398046511103, -9223372036852678656);
-    try test__muldi3(2097152, -4398046511103, -9223372036852678656);
-    try test__muldi3(-2097152, -4398046511103, 9223372036852678656);
-}
diff --git a/lib/compiler_rt/multi3.zig b/lib/compiler_rt/multi3.zig
deleted file mode 100644
index 1918e8b976..0000000000
--- a/lib/compiler_rt/multi3.zig
+++ /dev/null
@@ -1,75 +0,0 @@
-//! Ported from git@github.com:llvm-project/llvm-project-20170507.git
-//! ae684fad6d34858c014c94da69c15e7774a633c3
-//! 2018-08-13
-
-const std = @import("std");
-const builtin = @import("builtin");
-const native_endian = builtin.cpu.arch.endian();
-const common = @import("common.zig");
-
-pub const panic = common.panic;
-
-comptime {
-    if (common.want_windows_v2u64_abi) {
-        @export(__multi3_windows_x86_64, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
-    } else {
-        @export(__multi3, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility });
-    }
-}
-
-pub fn __multi3(a: i128, b: i128) callconv(.C) i128 {
-    return mul(a, b);
-}
-
-const v2u64 = @Vector(2, u64);
-
-fn __multi3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
-    return @bitCast(v2u64, mul(@bitCast(i128, a), @bitCast(i128, b)));
-}
-
-inline fn mul(a: i128, b: i128) i128 {
-    const x = twords{ .all = a };
-    const y = twords{ .all = b };
-    var r = twords{ .all = mulddi3(x.s.low, y.s.low) };
-    r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high;
-    return r.all;
-}
-
-fn mulddi3(a: u64, b: u64) i128 {
-    const bits_in_dword_2 = (@sizeOf(i64) * 8) / 2;
-    const lower_mask = ~@as(u64, 0) >> bits_in_dword_2;
-    var r: twords = undefined;
-    r.s.low = (a & lower_mask) *% (b & lower_mask);
-    var t: u64 = r.s.low >> bits_in_dword_2;
-    r.s.low &= lower_mask;
-    t +%= (a >> bits_in_dword_2) *% (b & lower_mask);
-    r.s.low +%= (t & lower_mask) << bits_in_dword_2;
-    r.s.high = t >> bits_in_dword_2;
-    t = r.s.low >> bits_in_dword_2;
-    r.s.low &= lower_mask;
-    t +%= (b >> bits_in_dword_2) *% (a & lower_mask);
-    r.s.low +%= (t & lower_mask) << bits_in_dword_2;
-    r.s.high +%= t >> bits_in_dword_2;
-    r.s.high +%= (a >> bits_in_dword_2) *% (b >> bits_in_dword_2);
-    return r.all;
-}
-
-const twords = extern union {
-    all: i128,
-    s: S,
-
-    const S = if (native_endian == .Little)
-        extern struct {
-            low: u64,
-            high: u64,
-        }
-    else
-        extern struct {
-            high: u64,
-            low: u64,
-        };
-};
-
-test {
-    _ = @import("multi3_test.zig");
-}
diff --git a/lib/compiler_rt/multi3_test.zig b/lib/compiler_rt/multi3_test.zig
deleted file mode 100644
index e9eafc05de..0000000000
--- a/lib/compiler_rt/multi3_test.zig
+++ /dev/null
@@ -1,53 +0,0 @@
-const __multi3 = @import("multi3.zig").__multi3;
-const testing = @import("std").testing;
-
-fn test__multi3(a: i128, b: i128, expected: i128) !void {
-    const x = __multi3(a, b);
-    try testing.expect(x == expected);
-}
-
-test "multi3" {
-    try test__multi3(0, 0, 0);
-    try test__multi3(0, 1, 0);
-    try test__multi3(1, 0, 0);
-    try test__multi3(0, 10, 0);
-    try test__multi3(10, 0, 0);
-    try test__multi3(0, 81985529216486895, 0);
-    try test__multi3(81985529216486895, 0, 0);
-
-    try test__multi3(0, -1, 0);
-    try test__multi3(-1, 0, 0);
-    try test__multi3(0, -10, 0);
-    try test__multi3(-10, 0, 0);
-    try test__multi3(0, -81985529216486895, 0);
-    try test__multi3(-81985529216486895, 0, 0);
-
-    try test__multi3(1, 1, 1);
-    try test__multi3(1, 10, 10);
-    try test__multi3(10, 1, 10);
-    try test__multi3(1, 81985529216486895, 81985529216486895);
-    try test__multi3(81985529216486895, 1, 81985529216486895);
-
-    try test__multi3(1, -1, -1);
-    try test__multi3(1, -10, -10);
-    try test__multi3(-10, 1, -10);
-    try test__multi3(1, -81985529216486895, -81985529216486895);
-    try test__multi3(-81985529216486895, 1, -81985529216486895);
-
-    try test__multi3(3037000499, 3037000499, 9223372030926249001);
-    try test__multi3(-3037000499, 3037000499, -9223372030926249001);
-    try test__multi3(3037000499, -3037000499, -9223372030926249001);
-    try test__multi3(-3037000499, -3037000499, 9223372030926249001);
-
-    try test__multi3(4398046511103, 2097152, 9223372036852678656);
-    try test__multi3(-4398046511103, 2097152, -9223372036852678656);
-    try test__multi3(4398046511103, -2097152, -9223372036852678656);
-    try test__multi3(-4398046511103, -2097152, 9223372036852678656);
-
-    try test__multi3(2097152, 4398046511103, 9223372036852678656);
-    try test__multi3(-2097152, 4398046511103, -9223372036852678656);
-    try test__multi3(2097152, -4398046511103, -9223372036852678656);
-    try test__multi3(-2097152, -4398046511103, 9223372036852678656);
-
-    try test__multi3(0x00000000000000B504F333F9DE5BE000, 0x000000000000000000B504F333F9DE5B, 0x7FFFFFFFFFFFF328DF915DA296E8A000);
-}
diff --git a/lib/compiler_rt/shift.zig b/lib/compiler_rt/shift.zig
index df6ce82059..4d8658dbc9 100644
--- a/lib/compiler_rt/shift.zig
+++ b/lib/compiler_rt/shift.zig
@@ -1,7 +1,6 @@
 const std = @import("std");
 const builtin = @import("builtin");
 const Log2Int = std.math.Log2Int;
-const native_endian = builtin.cpu.arch.endian();
 const common = @import("common.zig");
 
 pub const panic = common.panic;
@@ -27,39 +26,24 @@ comptime {
     }
 }
 
-fn Dwords(comptime T: type, comptime signed_half: bool) type {
-    return extern union {
-        const bits = @divExact(@typeInfo(T).Int.bits, 2);
-        const HalfTU = std.meta.Int(.unsigned, bits);
-        const HalfTS = std.meta.Int(.signed, bits);
-        const HalfT = if (signed_half) HalfTS else HalfTU;
-
-        all: T,
-        s: if (native_endian == .Little)
-            extern struct { low: HalfT, high: HalfT }
-        else
-            extern struct { high: HalfT, low: HalfT },
-    };
-}
-
 // Arithmetic shift left: shift in 0 from right to left
 // Precondition: 0 <= b < bits_in_dword
 inline fn ashlXi3(comptime T: type, a: T, b: i32) T {
-    const dwords = Dwords(T, false);
-    const S = Log2Int(dwords.HalfT);
+    const word_t = common.HalveInt(T, false);
+    const S = Log2Int(word_t.HalfT);
 
-    const input = dwords{ .all = a };
-    var output: dwords = undefined;
+    const input = word_t{ .all = a };
+    var output: word_t = undefined;
 
-    if (b >= dwords.bits) {
+    if (b >= word_t.bits) {
         output.s.low = 0;
-        output.s.high = input.s.low << @intCast(S, b - dwords.bits);
+        output.s.high = input.s.low << @intCast(S, b - word_t.bits);
     } else if (b == 0) {
         return a;
     } else {
         output.s.low = input.s.low << @intCast(S, b);
         output.s.high = input.s.high << @intCast(S, b);
-        output.s.high |= input.s.low >> @intCast(S, dwords.bits - b);
+        output.s.high |= input.s.low >> @intCast(S, word_t.bits - b);
     }
 
     return output.all;
@@ -68,24 +52,24 @@ inline fn ashlXi3(comptime T: type, a: T, b: i32) T {
 // Arithmetic shift right: shift in 1 from left to right
 // Precondition: 0 <= b < T.bit_count
 inline fn ashrXi3(comptime T: type, a: T, b: i32) T {
-    const dwords = Dwords(T, true);
-    const S = Log2Int(dwords.HalfT);
+    const word_t = common.HalveInt(T, true);
+    const S = Log2Int(word_t.HalfT);
 
-    const input = dwords{ .all = a };
-    var output: dwords = undefined;
+    const input = word_t{ .all = a };
+    var output: word_t = undefined;
 
-    if (b >= dwords.bits) {
-        output.s.high = input.s.high >> (dwords.bits - 1);
-        output.s.low = input.s.high >> @intCast(S, b - dwords.bits);
+    if (b >= word_t.bits) {
+        output.s.high = input.s.high >> (word_t.bits - 1);
+        output.s.low = input.s.high >> @intCast(S, b - word_t.bits);
     } else if (b == 0) {
         return a;
     } else {
         output.s.high = input.s.high >> @intCast(S, b);
-        output.s.low = input.s.high << @intCast(S, dwords.bits - b);
+        output.s.low = input.s.high << @intCast(S, word_t.bits - b);
         // Avoid sign-extension here
         output.s.low |= @bitCast(
-            dwords.HalfT,
-            @bitCast(dwords.HalfTU, input.s.low) >> @intCast(S, b),
+            word_t.HalfT,
+            @bitCast(word_t.HalfTU, input.s.low) >> @intCast(S, b),
         );
     }
 
@@ -95,20 +79,20 @@ inline fn ashrXi3(comptime T: type, a: T, b: i32) T {
 // Logical shift right: shift in 0 from left to right
 // Precondition: 0 <= b < T.bit_count
 inline fn lshrXi3(comptime T: type, a: T, b: i32) T {
-    const dwords = Dwords(T, false);
-    const S = Log2Int(dwords.HalfT);
+    const word_t = common.HalveInt(T, false);
+    const S = Log2Int(word_t.HalfT);
 
-    const input = dwords{ .all = a };
-    var output: dwords = undefined;
+    const input = word_t{ .all = a };
+    var output: word_t = undefined;
 
-    if (b >= dwords.bits) {
+    if (b >= word_t.bits) {
         output.s.high = 0;
-        output.s.low = input.s.high >> @intCast(S, b - dwords.bits);
+        output.s.low = input.s.high >> @intCast(S, b - word_t.bits);
     } else if (b == 0) {
         return a;
     } else {
         output.s.high = input.s.high >> @intCast(S, b);
-        output.s.low = input.s.high << @intCast(S, dwords.bits - b);
+        output.s.low = input.s.high << @intCast(S, word_t.bits - b);
         output.s.low |= input.s.low >> @intCast(S, b);
     }
 
diff --git a/lib/docs/main.js b/lib/docs/main.js
index 7a27f9db4f..a0647bbe61 100644
--- a/lib/docs/main.js
+++ b/lib/docs/main.js
@@ -106,7 +106,7 @@ const NAV_MODES = {
     // empty array means refers to the package itself
     declNames: [],
     // these will be all types, except the last one may be a type or a decl
-    declObjs: [],    
+    declObjs: [],
     // (a, b, c, d) comptime call; result is the value the docs refer to
     callName: null,
   };
@@ -200,7 +200,7 @@ const NAV_MODES = {
       case NAV_MODES.GUIDES:
         document.title = "[G] " + curNav.activeGuide + suffix;
         return;
-    }    
+    }
   }
 
   function isDecl(x) {
@@ -401,7 +401,7 @@ const NAV_MODES = {
     domGuideSwitch.classList.add("active");
     domApiSwitch.classList.remove("active");
     domDocs.classList.add("hidden");
-    domGuides.classList.remove("hidden");    
+    domGuides.classList.remove("hidden");
     domApiMenu.classList.add("hidden");
 
     // sidebar guides list
@@ -422,7 +422,7 @@ const NAV_MODES = {
     if (list.length > 0) {
       domGuidesMenu.classList.remove("hidden");
     }
-    
+
     // main content
     const activeGuide = zigAnalysis.guides[curNav.activeGuide];
     if (activeGuide == undefined) {
@@ -454,7 +454,7 @@ const NAV_MODES = {
 
           Happy writing!
         `);
-    } else {      
+    } else {
       domGuides.innerHTML = markdown(activeGuide);
     }
   }
@@ -467,7 +467,7 @@ const NAV_MODES = {
     domDocs.classList.remove("hidden");
     domApiMenu.classList.remove("hidden");
     domGuidesMenu.classList.add("hidden");
-    
+
     domStatus.classList.add("hidden");
     domFnProto.classList.add("hidden");
     domSectParams.classList.add("hidden");
@@ -537,9 +537,9 @@ const NAV_MODES = {
       currentType = childDecl;
       curNav.declObjs.push(currentType);
     }
-    
-    
-    
+
+
+
     window.x = currentType;
 
     renderNav();
@@ -586,15 +586,15 @@ const NAV_MODES = {
     switch (curNav.mode) {
       case NAV_MODES.API:
       case NAV_MODES.API_INTERNAL:
-          return renderApi();
+        return renderApi();
       case NAV_MODES.GUIDES:
-          return renderGuides();
+        return renderGuides();
       default:
-          throw "?";
-    }  
+        throw "?";
+    }
   }
 
-  
+
   function renderDocTest(decl) {
     if (!decl.decltest) return;
     const astNode = getAstNode(decl.decltest);
@@ -651,7 +651,7 @@ const NAV_MODES = {
       wantLink: true,
       fnDecl,
     });
-    
+
     domFnSourceLink.innerHTML = "[<a target=\"_blank\" href=\"" + sourceFileLink(fnDecl) + "\">src</a>]";
 
     let docsSource = null;
@@ -895,7 +895,7 @@ const NAV_MODES = {
 
   function navLink(pkgNames, declNames, callName) {
     let base = curNav.mode;
-    
+
     if (pkgNames.length === 0 && declNames.length === 0) {
       return base;
     } else if (declNames.length === 0 && callName == null) {
@@ -919,18 +919,18 @@ const NAV_MODES = {
 
   function findDeclNavLink(declName) {
     if (curNav.declObjs.length == 0) return null;
-    const curFile = getAstNode(curNav.declObjs[curNav.declObjs.length-1].src).file;
-    
-    for (let i = curNav.declObjs.length -1; i >= 0; i--) {
-      const curDecl = curNav.declObjs[i];
-      const curDeclName = curNav.declNames[i-1];
-      if (curDeclName == declName) {
-        const declPath = curNav.declNames.slice(0,i);
-        return navLink(curNav.pkgNames, declPath);
-      } 
+    const curFile = getAstNode(curNav.declObjs[curNav.declObjs.length - 1].src).file;
 
-      if (findSubDecl(curDecl, declName) != null) { 
-        const declPath = curNav.declNames.slice(0,i).concat([declName]);
+    for (let i = curNav.declObjs.length - 1; i >= 0; i--) {
+      const curDecl = curNav.declObjs[i];
+      const curDeclName = curNav.declNames[i - 1];
+      if (curDeclName == declName) {
+        const declPath = curNav.declNames.slice(0, i);
+        return navLink(curNav.pkgNames, declPath);
+      }
+
+      if (findSubDecl(curDecl, declName) != null) {
+        const declPath = curNav.declNames.slice(0, i).concat([declName]);
         return navLink(curNav.pkgNames, declPath);
       }
     }
@@ -1366,10 +1366,6 @@ const NAV_MODES = {
             payloadHtml += "truncate";
             break;
           }
-          case "align_cast": {
-            payloadHtml += "alignCast";
-            break;
-          }
           case "has_decl": {
             payloadHtml += "hasDecl";
             break;
@@ -1700,15 +1696,15 @@ const NAV_MODES = {
       }
       case "declRef": {
         const name = getDecl(expr.declRef).name;
-      
+
         if (opts.wantHtml) {
           let payloadHtml = "";
           if (opts.wantLink) {
-            payloadHtml += '<a href="'+ findDeclNavLink(name) +'">';          
+            payloadHtml += '<a href="' + findDeclNavLink(name) + '">';
           }
           payloadHtml +=
             '<span class="tok-kw" style="color:lightblue;">' +
-              name +
+            name +
             "</span>";
           if (opts.wantLink) payloadHtml += "</a>";
           return payloadHtml;
@@ -1728,12 +1724,12 @@ const NAV_MODES = {
           if ("string" in expr.refPath[i]) {
             component = expr.refPath[i].string;
           } else {
-            component = exprName(expr.refPath[i], {...opts, wantLink: false});
+            component = exprName(expr.refPath[i], { ...opts, wantLink: false });
             if (opts.wantLink && "declRef" in expr.refPath[i]) {
               url += "." + getDecl(expr.refPath[i].declRef).name;
-              component = '<a href="'+ url +'">' +
+              component = '<a href="' + url + '">' +
                 component +
-              "</a>";            
+                "</a>";
             }
           }
           name += "." + component;
@@ -1792,12 +1788,12 @@ const NAV_MODES = {
                 name = "struct { ";
               }
             }
-            if (structObj.fields.length > 1 && opts.wantHtml) {name += "</br>";}
+            if (structObj.fields.length > 1 && opts.wantHtml) { name += "</br>"; }
             let indent = "";
             if (structObj.fields.length > 1 && opts.wantHtml) {
               indent = "&nbsp;&nbsp;&nbsp;&nbsp;"
             }
-            if (opts.indent) {
+            if (opts.indent && structObj.fields.length > 1) {
               indent = opts.indent + indent;
             }
             let structNode = getAstNode(structObj.src);
@@ -1808,7 +1804,7 @@ const NAV_MODES = {
               field_end += " ";
             }
 
-            for(let i = 0; i < structObj.fields.length; i += 1) {
+            for (let i = 0; i < structObj.fields.length; i += 1) {
               let fieldNode = getAstNode(structNode.fields[i]);
               let fieldName = fieldNode.name;
               let html = indent;
@@ -1817,17 +1813,17 @@ const NAV_MODES = {
               }
 
               let fieldTypeExpr = structObj.fields[i];
-              if(!structObj.is_tuple) {
+              if (!structObj.is_tuple) {
                 html += ": ";
               }
 
-              html += exprName(fieldTypeExpr, {...opts, indent: indent});
+              html += exprName(fieldTypeExpr, { ...opts, indent: indent });
 
               html += field_end;
 
               name += html;
             }
-            if (opts.indent) {
+            if (opts.indent && structObj.fields.length > 1) {
               name += opts.indent;
             }
             name += "}";
@@ -1850,7 +1846,7 @@ const NAV_MODES = {
             if (enumObj.nonexhaustive) {
               fields_len += 1;
             }
-            if (fields_len > 1 && opts.wantHtml) {name += "</br>";}
+            if (fields_len > 1 && opts.wantHtml) { name += "</br>"; }
             let indent = "";
             if (fields_len > 1) {
               if (opts.wantHtml) {
@@ -1868,10 +1864,10 @@ const NAV_MODES = {
             } else {
               field_end += " ";
             }
-            for(let i = 0; i < enumNode.fields.length; i += 1) {
+            for (let i = 0; i < enumNode.fields.length; i += 1) {
               let fieldNode = getAstNode(enumNode.fields[i]);
               let fieldName = fieldNode.name;
-              let html =  indent + escapeHtml(fieldName);
+              let html = indent + escapeHtml(fieldName);
 
               html += field_end;
 
@@ -1895,16 +1891,16 @@ const NAV_MODES = {
               name = "union";
             }
             if (unionObj.auto_tag) {
-                if (opts.wantHtml) {
-                  name += " (<span class='tok-kw'>enum</span>";
-                } else {
-                  name += " (enum";
-                }
-                if (unionObj.tag) {
-                    name += "(" + exprName(unionObj.tag, opts) + "))";
-                } else {
-                  name += ")";
-                }
+              if (opts.wantHtml) {
+                name += " (<span class='tok-kw'>enum</span>";
+              } else {
+                name += " (enum";
+              }
+              if (unionObj.tag) {
+                name += "(" + exprName(unionObj.tag, opts) + "))";
+              } else {
+                name += ")";
+              }
             } else if (unionObj.tag) {
               name += " (" + exprName(unionObj.tag, opts) + ")";
             }
@@ -1926,7 +1922,7 @@ const NAV_MODES = {
             } else {
               field_end += " ";
             }
-            for(let i = 0; i < unionObj.fields.length; i += 1) {
+            for (let i = 0; i < unionObj.fields.length; i += 1) {
               let fieldNode = getAstNode(unionNode.fields[i]);
               let fieldName = fieldNode.name;
               let html = indent + escapeHtml(fieldName);
@@ -1934,7 +1930,7 @@ const NAV_MODES = {
               let fieldTypeExpr = unionObj.fields[i];
               html += ": ";
 
-              html += exprName(fieldTypeExpr, {...opts, indent: indent});
+              html += exprName(fieldTypeExpr, { ...opts, indent: indent });
 
               html += field_end;
 
@@ -2163,7 +2159,7 @@ const NAV_MODES = {
             opts.fnDecl = null;
             opts.linkFnNameDecl = null;
             let payloadHtml = "";
-            if (opts.addParensIfFnSignature && fnObj.src == 0){
+            if (opts.addParensIfFnSignature && fnObj.src == 0) {
               payloadHtml += "(";
             }
             if (opts.wantHtml) {
@@ -2179,7 +2175,7 @@ const NAV_MODES = {
                 if (linkFnNameDecl) {
                   payloadHtml +=
                     '<a href="' + linkFnNameDecl + '">' +
-                      escapeHtml(fnDecl.name) +
+                    escapeHtml(fnDecl.name) +
                     "</a>";
                 } else {
                   payloadHtml += escapeHtml(fnDecl.name);
@@ -2198,7 +2194,7 @@ const NAV_MODES = {
                 fields = fnNode.fields;
                 isVarArgs = fnNode.varArgs;
               }
-          
+
               for (let i = 0; i < fnObj.params.length; i += 1) {
                 if (i != 0) {
                   payloadHtml += ", ";
@@ -2251,13 +2247,13 @@ const NAV_MODES = {
                 } else if ("typeOf" in value) {
                   payloadHtml += exprName(value, opts);
                 } else if ("typeOf_peer" in value) {
-                    payloadHtml += exprName(value, opts);
+                  payloadHtml += exprName(value, opts);
                 } else if ("declRef" in value) {
-                    payloadHtml += exprName(value, opts);
+                  payloadHtml += exprName(value, opts);
                 } else if ("call" in value) {
-                    payloadHtml += exprName(value, opts);
+                  payloadHtml += exprName(value, opts);
                 } else if ("refPath" in value) {
-                    payloadHtml += exprName(value, opts);
+                  payloadHtml += exprName(value, opts);
                 } else if ("type" in value) {
                   payloadHtml += exprName(value, opts);
                   //payloadHtml += '<span class="tok-kw">' + name + "</span>";
@@ -2301,7 +2297,7 @@ const NAV_MODES = {
             }
             if (fnObj.ret != null) {
               payloadHtml += exprName(fnObj.ret, {
-                ...opts, 
+                ...opts,
                 addParensIfFnSignature: true,
               });
             } else if (opts.wantHtml) {
@@ -2310,7 +2306,7 @@ const NAV_MODES = {
               payloadHtml += "anytype";
             }
 
-            if (opts.addParensIfFnSignature && fnObj.src == 0){
+            if (opts.addParensIfFnSignature && fnObj.src == 0) {
               payloadHtml += ")";
             }
             return payloadHtml;
@@ -2353,7 +2349,7 @@ const NAV_MODES = {
     ) {
       name = "std";
     } else {
-      name = exprName({ type: typeObj }, {wantHtml: false, wantLink: false});
+      name = exprName({ type: typeObj }, { wantHtml: false, wantLink: false });
     }
     if (name != null && name != "") {
       domHdrName.innerText =
@@ -2644,10 +2640,10 @@ const NAV_MODES = {
   }
 
   function sourceFileLink(decl) {
-      const srcNode = getAstNode(decl.src);
-      return sourceFileUrlTemplate.
-          replace("{{file}}", zigAnalysis.files[srcNode.file]).
-          replace("{{line}}", srcNode.line + 1);
+    const srcNode = getAstNode(decl.src);
+    return sourceFileUrlTemplate.
+      replace("{{file}}", zigAnalysis.files[srcNode.file]).
+      replace("{{line}}", srcNode.line + 1);
   }
 
   function renderContainer(container) {
@@ -2783,8 +2779,8 @@ const NAV_MODES = {
           if (short != docs) {
             short = markdown(short);
             var long = markdown(docs);
-            tdDesc.innerHTML = 
-            "<div class=\"expand\" ><span class=\"button\" onclick=\"toggleExpand(event)\"></span><div class=\"sum-less\">" + short + "</div>" + "<div class=\"sum-more\">" + long + "</div></details>";
+            tdDesc.innerHTML =
+              "<div class=\"expand\" ><span class=\"button\" onclick=\"toggleExpand(event)\"></span><div class=\"sum-less\">" + short + "</div>" + "<div class=\"sum-more\">" + long + "</div></details>";
           }
           else {
             tdDesc.innerHTML = markdown(short);
@@ -2818,10 +2814,10 @@ const NAV_MODES = {
           html += ' = <span class="tok-number">' + fieldName + "</span>";
         } else {
           let fieldTypeExpr = container.fields[i];
-          if(container.kind ==! typeKinds.Struct || !container.is_tuple) {
+          if (container.kind !== typeKinds.Struct || !container.is_tuple) {
             html += ": ";
           }
-          html += exprName(fieldTypeExpr, {wantHtml:true, wantLink:true});
+          html += exprName(fieldTypeExpr, { wantHtml: true, wantLink: true });
           let tsn = typeShorthandName(fieldTypeExpr);
           if (tsn) {
             html += "<span> (" + tsn + ")</span>";
@@ -3007,8 +3003,8 @@ const NAV_MODES = {
     throw new Error("No type 'type' found");
   }
 
-  
-  function updateCurNav() {  
+
+  function updateCurNav() {
     curNav = {
       mode: NAV_MODES.API,
       pkgNames: [],
@@ -3021,7 +3017,7 @@ const NAV_MODES = {
 
     const mode = location.hash.substring(0, 3);
     let query = location.hash.substring(3);
-    
+
     const DEFAULT_HASH = NAV_MODES.API + zigAnalysis.packages[zigAnalysis.rootPkg].name;
     switch (mode) {
       case NAV_MODES.API:
@@ -3037,7 +3033,7 @@ const NAV_MODES = {
           nonSearchPart = query.substring(0, qpos);
           curNavSearch = decodeURIComponent(query.substring(qpos + 1));
         }
-    
+
         let parts = nonSearchPart.split(":");
         if (parts[0] == "") {
           location.hash = DEFAULT_HASH;
@@ -3059,14 +3055,14 @@ const NAV_MODES = {
 
         curNav.mode = mode;
         curNav.activeGuide = query;
-    
+
         return;
       default:
         location.hash = DEFAULT_HASH;
         return;
     }
- }
-  
+  }
+
   function onHashChange() {
     updateCurNav();
     if (domSearch.value !== curNavSearch) {
@@ -3103,7 +3099,7 @@ const NAV_MODES = {
               if (!callee.generic_ret) return null;
               resolvedGenericRet = resolveValue({ expr: callee.generic_ret });
             }
-            
+
             if ("type" in resolvedGenericRet.expr) {
               parentType = getType(resolvedGenericRet.expr.type);
             }
@@ -3248,7 +3244,7 @@ const NAV_MODES = {
     });
   }
 
-  function shortDesc(docs){
+  function shortDesc(docs) {
     const trimmed_docs = docs.trim();
     let index = trimmed_docs.indexOf("\n\n");
     let cut = false;
@@ -3319,14 +3315,16 @@ const NAV_MODES = {
         } else if (line.text.startsWith("#")) {
           line.type = "h1";
           line.text = line.text.substr(1);
-        } else if (line.text.startsWith("-")) {
-          line.type = "ul";
-          line.text = line.text.substr(1);
-        } else if (line.text.match(/^\d+\..*$/)) {
-          // if line starts with {number}{dot}
-          const match = line.text.match(/(\d+)\./);
+        } else if (line.text.match(/^-[ \t]+.*$/)) {
+          // line starts with a hyphen, followed by spaces or tabs
+          const match = line.text.match(/^-[ \t]+/);
           line.type = "ul";
           line.text = line.text.substr(match[0].length);
+        } else if (line.text.match(/^\d+\.[ \t]+.*$/)) {
+          // line starts with {number}{dot}{spaces or tabs}
+          const match = line.text.match(/(\d+)\.[ \t]+/);
+          line.type = "ol";
+          line.text = line.text.substr(match[0].length);
           line.ordered_number = Number(match[1].length);
         } else if (line.text == "```") {
           line.type = "skip";
@@ -3536,7 +3534,7 @@ const NAV_MODES = {
         case "ul":
         case "ol":
           if (
-            !previousLineIs("ul", line_no) ||
+            !previousLineIs(line.type, line_no) ||
             getPreviousLineIndent(line_no) < line.indent
           ) {
             html += "<" + line.type + ">\n";
@@ -3545,7 +3543,7 @@ const NAV_MODES = {
           html += "<li>" + markdownInlines(line.text) + "</li>\n";
 
           if (
-            !nextLineIs("ul", line_no) ||
+            !nextLineIs(line.type, line_no) ||
             getNextLineIndent(line_no) < line.indent
           ) {
             html += "</" + line.type + ">\n";
@@ -4067,4 +4065,4 @@ function toggleExpand(event) {
   if (!parent.open && parent.getBoundingClientRect().top < 0) {
     parent.parentElement.parentElement.scrollIntoView(true);
   }
-}
\ No newline at end of file
+}
diff --git a/lib/std/Build.zig b/lib/std/Build.zig
index 678120847f..26919962e3 100644
--- a/lib/std/Build.zig
+++ b/lib/std/Build.zig
@@ -37,7 +37,7 @@ pub const FmtStep = @import("Build/FmtStep.zig");
 pub const InstallArtifactStep = @import("Build/InstallArtifactStep.zig");
 pub const InstallDirStep = @import("Build/InstallDirStep.zig");
 pub const InstallFileStep = @import("Build/InstallFileStep.zig");
-pub const InstallRawStep = @import("Build/InstallRawStep.zig");
+pub const ObjCopyStep = @import("Build/ObjCopyStep.zig");
 pub const CompileStep = @import("Build/CompileStep.zig");
 pub const LogStep = @import("Build/LogStep.zig");
 pub const OptionsStep = @import("Build/OptionsStep.zig");
@@ -1254,11 +1254,8 @@ pub fn installLibFile(self: *Build, src_path: []const u8, dest_rel_path: []const
     self.getInstallStep().dependOn(&self.addInstallFileWithDir(.{ .path = src_path }, .lib, dest_rel_path).step);
 }
 
-/// Output format (BIN vs Intel HEX) determined by filename
-pub fn installRaw(self: *Build, artifact: *CompileStep, dest_filename: []const u8, options: InstallRawStep.CreateOptions) *InstallRawStep {
-    const raw = self.addInstallRaw(artifact, dest_filename, options);
-    self.getInstallStep().dependOn(&raw.step);
-    return raw;
+pub fn addObjCopy(b: *Build, source: FileSource, options: ObjCopyStep.Options) *ObjCopyStep {
+    return ObjCopyStep.create(b, source, options);
 }
 
 ///`dest_rel_path` is relative to install prefix path
@@ -1280,10 +1277,6 @@ pub fn addInstallHeaderFile(b: *Build, src_path: []const u8, dest_rel_path: []co
     return b.addInstallFileWithDir(.{ .path = src_path }, .header, dest_rel_path);
 }
 
-pub fn addInstallRaw(self: *Build, artifact: *CompileStep, dest_filename: []const u8, options: InstallRawStep.CreateOptions) *InstallRawStep {
-    return InstallRawStep.create(self, artifact, dest_filename, options);
-}
-
 pub fn addInstallFileWithDir(
     self: *Build,
     source: FileSource,
@@ -1771,7 +1764,7 @@ test {
     _ = InstallArtifactStep;
     _ = InstallDirStep;
     _ = InstallFileStep;
-    _ = InstallRawStep;
+    _ = ObjCopyStep;
     _ = CompileStep;
     _ = LogStep;
     _ = OptionsStep;
diff --git a/lib/std/Build/CompileStep.zig b/lib/std/Build/CompileStep.zig
index a916de0fc6..ea2320cc89 100644
--- a/lib/std/Build/CompileStep.zig
+++ b/lib/std/Build/CompileStep.zig
@@ -21,7 +21,7 @@ const VcpkgRoot = std.Build.VcpkgRoot;
 const InstallDir = std.Build.InstallDir;
 const InstallArtifactStep = std.Build.InstallArtifactStep;
 const GeneratedFile = std.Build.GeneratedFile;
-const InstallRawStep = std.Build.InstallRawStep;
+const ObjCopyStep = std.Build.ObjCopyStep;
 const EmulatableRunStep = std.Build.EmulatableRunStep;
 const CheckObjectStep = std.Build.CheckObjectStep;
 const RunStep = std.Build.RunStep;
@@ -432,10 +432,6 @@ pub fn install(self: *CompileStep) void {
     self.builder.installArtifact(self);
 }
 
-pub fn installRaw(self: *CompileStep, dest_filename: []const u8, options: InstallRawStep.CreateOptions) *InstallRawStep {
-    return self.builder.installRaw(self, dest_filename, options);
-}
-
 pub fn installHeader(a: *CompileStep, src_path: []const u8, dest_rel_path: []const u8) void {
     const install_file = a.builder.addInstallHeaderFile(src_path, dest_rel_path);
     a.builder.getInstallStep().dependOn(&install_file.step);
@@ -458,6 +454,7 @@ pub fn installConfigHeader(
         options.install_dir,
         dest_rel_path,
     );
+    install_file.step.dependOn(&config_header.step);
     cs.builder.getInstallStep().dependOn(&install_file.step);
     cs.installed_headers.append(&install_file.step) catch @panic("OOM");
 }
@@ -506,6 +503,18 @@ pub fn installLibraryHeaders(a: *CompileStep, l: *CompileStep) void {
     a.installed_headers.appendSlice(l.installed_headers.items) catch @panic("OOM");
 }
 
+pub fn addObjCopy(cs: *CompileStep, options: ObjCopyStep.Options) *ObjCopyStep {
+    var copy = options;
+    if (copy.basename == null) {
+        if (options.format) |f| {
+            copy.basename = cs.builder.fmt("{s}.{s}", .{ cs.name, @tagName(f) });
+        } else {
+            copy.basename = cs.name;
+        }
+    }
+    return cs.builder.addObjCopy(cs.getOutputSource(), copy);
+}
+
 /// Deprecated: use `std.Build.addRunArtifact`
 /// This function will run in the context of the package that created the executable,
 /// which is undesirable when running an executable provided by a dependency package.
@@ -955,7 +964,10 @@ pub fn addFrameworkPath(self: *CompileStep, dir_path: []const u8) void {
 /// package's module table using `name`.
 pub fn addModule(cs: *CompileStep, name: []const u8, module: *Module) void {
     cs.modules.put(cs.builder.dupe(name), module) catch @panic("OOM");
-    cs.addRecursiveBuildDeps(module);
+
+    var done = std.AutoHashMap(*Module, void).init(cs.builder.allocator);
+    defer done.deinit();
+    cs.addRecursiveBuildDeps(module, &done) catch @panic("OOM");
 }
 
 /// Adds a module to be used with `@import` without exposing it in the current
@@ -969,10 +981,12 @@ pub fn addOptions(cs: *CompileStep, module_name: []const u8, options: *OptionsSt
     addModule(cs, module_name, options.createModule());
 }
 
-fn addRecursiveBuildDeps(cs: *CompileStep, module: *Module) void {
+fn addRecursiveBuildDeps(cs: *CompileStep, module: *Module, done: *std.AutoHashMap(*Module, void)) !void {
+    if (done.contains(module)) return;
+    try done.put(module, {});
     module.source_file.addStepDependencies(&cs.step);
     for (module.dependencies.values()) |dep| {
-        cs.addRecursiveBuildDeps(dep);
+        try cs.addRecursiveBuildDeps(dep, done);
     }
 }
 
@@ -1031,22 +1045,110 @@ fn linkLibraryOrObject(self: *CompileStep, other: *CompileStep) void {
 fn appendModuleArgs(
     cs: *CompileStep,
     zig_args: *ArrayList([]const u8),
-    name: []const u8,
-    module: *Module,
 ) error{OutOfMemory}!void {
-    try zig_args.append("--pkg-begin");
-    try zig_args.append(name);
-    try zig_args.append(module.builder.pathFromRoot(module.source_file.getPath(module.builder)));
+    // First, traverse the whole dependency graph and give every module a unique name, ideally one
+    // named after what it's called somewhere in the graph. It will help here to have both a mapping
+    // from module to name and a set of all the currently-used names.
+    var mod_names = std.AutoHashMap(*Module, []const u8).init(cs.builder.allocator);
+    var names = std.StringHashMap(void).init(cs.builder.allocator);
 
+    var to_name = std.ArrayList(struct {
+        name: []const u8,
+        mod: *Module,
+    }).init(cs.builder.allocator);
     {
-        const keys = module.dependencies.keys();
-        for (module.dependencies.values(), 0..) |sub_module, i| {
-            const sub_name = keys[i];
-            try cs.appendModuleArgs(zig_args, sub_name, sub_module);
+        var it = cs.modules.iterator();
+        while (it.next()) |kv| {
+            // While we're traversing the root dependencies, let's make sure that no module names
+            // have colons in them, since the CLI forbids it. We handle this for transitive
+            // dependencies further down.
+            if (std.mem.indexOfScalar(u8, kv.key_ptr.*, ':') != null) {
+                @panic("Module names cannot contain colons");
+            }
+            try to_name.append(.{
+                .name = kv.key_ptr.*,
+                .mod = kv.value_ptr.*,
+            });
         }
     }
 
-    try zig_args.append("--pkg-end");
+    while (to_name.popOrNull()) |dep| {
+        if (mod_names.contains(dep.mod)) continue;
+
+        // We'll use this buffer to store the name we decide on
+        var buf = try cs.builder.allocator.alloc(u8, dep.name.len + 32);
+        // First, try just the exposed dependency name
+        std.mem.copy(u8, buf, dep.name);
+        var name = buf[0..dep.name.len];
+        var n: usize = 0;
+        while (names.contains(name)) {
+            // If that failed, append an incrementing number to the end
+            name = std.fmt.bufPrint(buf, "{s}{}", .{ dep.name, n }) catch unreachable;
+            n += 1;
+        }
+
+        try mod_names.put(dep.mod, name);
+        try names.put(name, {});
+
+        var it = dep.mod.dependencies.iterator();
+        while (it.next()) |kv| {
+            // Same colon-in-name check as above, but for transitive dependencies.
+            if (std.mem.indexOfScalar(u8, kv.key_ptr.*, ':') != null) {
+                @panic("Module names cannot contain colons");
+            }
+            try to_name.append(.{
+                .name = kv.key_ptr.*,
+                .mod = kv.value_ptr.*,
+            });
+        }
+    }
+
+    // Since the module names given to the CLI are based off of the exposed names, we already know
+    // that none of the CLI names have colons in them, so there's no need to check that explicitly.
+
+    // Every module in the graph is now named; output their definitions
+    {
+        var it = mod_names.iterator();
+        while (it.next()) |kv| {
+            const mod = kv.key_ptr.*;
+            const name = kv.value_ptr.*;
+
+            const deps_str = try constructDepString(cs.builder.allocator, mod_names, mod.dependencies);
+            const src = mod.builder.pathFromRoot(mod.source_file.getPath(mod.builder));
+            try zig_args.append("--mod");
+            try zig_args.append(try std.fmt.allocPrint(cs.builder.allocator, "{s}:{s}:{s}", .{ name, deps_str, src }));
+        }
+    }
+
+    // Lastly, output the root dependencies
+    const deps_str = try constructDepString(cs.builder.allocator, mod_names, cs.modules);
+    if (deps_str.len > 0) {
+        try zig_args.append("--deps");
+        try zig_args.append(deps_str);
+    }
+}
+
+fn constructDepString(
+    allocator: std.mem.Allocator,
+    mod_names: std.AutoHashMap(*Module, []const u8),
+    deps: std.StringArrayHashMap(*Module),
+) ![]const u8 {
+    var deps_str = std.ArrayList(u8).init(allocator);
+    var it = deps.iterator();
+    while (it.next()) |kv| {
+        const expose = kv.key_ptr.*;
+        const name = mod_names.get(kv.value_ptr.*).?;
+        if (std.mem.eql(u8, expose, name)) {
+            try deps_str.writer().print("{s},", .{name});
+        } else {
+            try deps_str.writer().print("{s}={s},", .{ expose, name });
+        }
+    }
+    if (deps_str.items.len > 0) {
+        return deps_str.items[0 .. deps_str.items.len - 1]; // omit trailing comma
+    } else {
+        return "";
+    }
 }
 
 fn make(step: *Step) !void {
@@ -1573,13 +1675,7 @@ fn make(step: *Step) !void {
         try zig_args.append("--test-no-exec");
     }
 
-    {
-        const keys = self.modules.keys();
-        for (self.modules.values(), 0..) |module, i| {
-            const name = keys[i];
-            try self.appendModuleArgs(&zig_args, name, module);
-        }
-    }
+    try self.appendModuleArgs(&zig_args);
 
     for (self.include_dirs.items) |include_dir| {
         switch (include_dir) {
diff --git a/lib/std/Build/EmulatableRunStep.zig b/lib/std/Build/EmulatableRunStep.zig
index 5517f7f9aa..d4b5238524 100644
--- a/lib/std/Build/EmulatableRunStep.zig
+++ b/lib/std/Build/EmulatableRunStep.zig
@@ -26,7 +26,7 @@ builder: *std.Build,
 exe: *CompileStep,
 
 /// Set this to `null` to ignore the exit code for the purpose of determining a successful execution
-expected_exit_code: ?u8 = 0,
+expected_term: ?std.ChildProcess.Term = .{ .Exited = 0 },
 
 /// Override this field to modify the environment
 env_map: ?*EnvMap,
@@ -131,7 +131,7 @@ fn make(step: *Step) !void {
     try RunStep.runCommand(
         argv_list.items,
         self.builder,
-        self.expected_exit_code,
+        self.expected_term,
         self.stdout_action,
         self.stderr_action,
         .Inherit,
diff --git a/lib/std/Build/InstallRawStep.zig b/lib/std/Build/InstallRawStep.zig
deleted file mode 100644
index 014c44f287..0000000000
--- a/lib/std/Build/InstallRawStep.zig
+++ /dev/null
@@ -1,110 +0,0 @@
-//! TODO: Rename this to ObjCopyStep now that it invokes the `zig objcopy`
-//! subcommand rather than containing an implementation directly.
-
-const std = @import("std");
-const InstallRawStep = @This();
-
-const Allocator = std.mem.Allocator;
-const ArenaAllocator = std.heap.ArenaAllocator;
-const ArrayListUnmanaged = std.ArrayListUnmanaged;
-const File = std.fs.File;
-const InstallDir = std.Build.InstallDir;
-const CompileStep = std.Build.CompileStep;
-const Step = std.Build.Step;
-const elf = std.elf;
-const fs = std.fs;
-const io = std.io;
-const sort = std.sort;
-
-pub const base_id = .install_raw;
-
-pub const RawFormat = enum {
-    bin,
-    hex,
-};
-
-step: Step,
-builder: *std.Build,
-artifact: *CompileStep,
-dest_dir: InstallDir,
-dest_filename: []const u8,
-options: CreateOptions,
-output_file: std.Build.GeneratedFile,
-
-pub const CreateOptions = struct {
-    format: ?RawFormat = null,
-    dest_dir: ?InstallDir = null,
-    only_section: ?[]const u8 = null,
-    pad_to: ?u64 = null,
-};
-
-pub fn create(
-    builder: *std.Build,
-    artifact: *CompileStep,
-    dest_filename: []const u8,
-    options: CreateOptions,
-) *InstallRawStep {
-    const self = builder.allocator.create(InstallRawStep) catch @panic("OOM");
-    self.* = InstallRawStep{
-        .step = Step.init(.install_raw, builder.fmt("install raw binary {s}", .{artifact.step.name}), builder.allocator, make),
-        .builder = builder,
-        .artifact = artifact,
-        .dest_dir = if (options.dest_dir) |d| d else switch (artifact.kind) {
-            .obj => unreachable,
-            .@"test" => unreachable,
-            .exe, .test_exe => .bin,
-            .lib => unreachable,
-        },
-        .dest_filename = dest_filename,
-        .options = options,
-        .output_file = std.Build.GeneratedFile{ .step = &self.step },
-    };
-    self.step.dependOn(&artifact.step);
-
-    builder.pushInstalledFile(self.dest_dir, dest_filename);
-    return self;
-}
-
-pub fn getOutputSource(self: *const InstallRawStep) std.Build.FileSource {
-    return std.Build.FileSource{ .generated = &self.output_file };
-}
-
-fn make(step: *Step) !void {
-    const self = @fieldParentPtr(InstallRawStep, "step", step);
-    const b = self.builder;
-
-    if (self.artifact.target.getObjectFormat() != .elf) {
-        std.debug.print("InstallRawStep only works with ELF format.\n", .{});
-        return error.InvalidObjectFormat;
-    }
-
-    const full_src_path = self.artifact.getOutputSource().getPath(b);
-    const full_dest_path = b.getInstallPath(self.dest_dir, self.dest_filename);
-    self.output_file.path = full_dest_path;
-
-    try fs.cwd().makePath(b.getInstallPath(self.dest_dir, ""));
-
-    var argv_list = std.ArrayList([]const u8).init(b.allocator);
-    try argv_list.appendSlice(&.{ b.zig_exe, "objcopy" });
-
-    if (self.options.only_section) |only_section| {
-        try argv_list.appendSlice(&.{ "-j", only_section });
-    }
-    if (self.options.pad_to) |pad_to| {
-        try argv_list.appendSlice(&.{
-            "--pad-to",
-            b.fmt("{d}", .{pad_to}),
-        });
-    }
-    if (self.options.format) |format| switch (format) {
-        .bin => try argv_list.appendSlice(&.{ "-O", "binary" }),
-        .hex => try argv_list.appendSlice(&.{ "-O", "hex" }),
-    };
-
-    try argv_list.appendSlice(&.{ full_src_path, full_dest_path });
-    _ = try self.builder.execFromStep(argv_list.items, &self.step);
-}
-
-test {
-    std.testing.refAllDecls(InstallRawStep);
-}
diff --git a/lib/std/Build/ObjCopyStep.zig b/lib/std/Build/ObjCopyStep.zig
new file mode 100644
index 0000000000..aea5b8975c
--- /dev/null
+++ b/lib/std/Build/ObjCopyStep.zig
@@ -0,0 +1,138 @@
+const std = @import("std");
+const ObjCopyStep = @This();
+
+const Allocator = std.mem.Allocator;
+const ArenaAllocator = std.heap.ArenaAllocator;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
+const File = std.fs.File;
+const InstallDir = std.Build.InstallDir;
+const CompileStep = std.Build.CompileStep;
+const Step = std.Build.Step;
+const elf = std.elf;
+const fs = std.fs;
+const io = std.io;
+const sort = std.sort;
+
+pub const base_id: Step.Id = .objcopy;
+
+pub const RawFormat = enum {
+    bin,
+    hex,
+};
+
+step: Step,
+builder: *std.Build,
+file_source: std.Build.FileSource,
+basename: []const u8,
+output_file: std.Build.GeneratedFile,
+
+format: ?RawFormat,
+only_section: ?[]const u8,
+pad_to: ?u64,
+
+pub const Options = struct {
+    basename: ?[]const u8 = null,
+    format: ?RawFormat = null,
+    only_section: ?[]const u8 = null,
+    pad_to: ?u64 = null,
+};
+
+pub fn create(
+    builder: *std.Build,
+    file_source: std.Build.FileSource,
+    options: Options,
+) *ObjCopyStep {
+    const self = builder.allocator.create(ObjCopyStep) catch @panic("OOM");
+    self.* = ObjCopyStep{
+        .step = Step.init(
+            base_id,
+            builder.fmt("objcopy {s}", .{file_source.getDisplayName()}),
+            builder.allocator,
+            make,
+        ),
+        .builder = builder,
+        .file_source = file_source,
+        .basename = options.basename orelse file_source.getDisplayName(),
+        .output_file = std.Build.GeneratedFile{ .step = &self.step },
+
+        .format = options.format,
+        .only_section = options.only_section,
+        .pad_to = options.pad_to,
+    };
+    file_source.addStepDependencies(&self.step);
+    return self;
+}
+
+pub fn getOutputSource(self: *const ObjCopyStep) std.Build.FileSource {
+    return .{ .generated = &self.output_file };
+}
+
+fn make(step: *Step) !void {
+    const self = @fieldParentPtr(ObjCopyStep, "step", step);
+    const b = self.builder;
+
+    var man = b.cache.obtain();
+    defer man.deinit();
+
+    // Random bytes to make ObjCopyStep unique. Refresh this with new random
+    // bytes when ObjCopyStep implementation is modified incompatibly.
+    man.hash.add(@as(u32, 0xe18b7baf));
+
+    const full_src_path = self.file_source.getPath(b);
+    _ = try man.addFile(full_src_path, null);
+    man.hash.addOptionalBytes(self.only_section);
+    man.hash.addOptional(self.pad_to);
+    man.hash.addOptional(self.format);
+
+    if (man.hit() catch |err| failWithCacheError(man, err)) {
+        // Cache hit, skip subprocess execution.
+        const digest = man.final();
+        self.output_file.path = try b.cache_root.join(b.allocator, &.{
+            "o", &digest, self.basename,
+        });
+        return;
+    }
+
+    const digest = man.final();
+    const full_dest_path = try b.cache_root.join(b.allocator, &.{ "o", &digest, self.basename });
+    const cache_path = "o" ++ fs.path.sep_str ++ digest;
+    b.cache_root.handle.makePath(cache_path) catch |err| {
+        std.debug.print("unable to make path {s}: {s}\n", .{ cache_path, @errorName(err) });
+        return err;
+    };
+
+    var argv = std.ArrayList([]const u8).init(b.allocator);
+    try argv.appendSlice(&.{ b.zig_exe, "objcopy" });
+
+    if (self.only_section) |only_section| {
+        try argv.appendSlice(&.{ "-j", only_section });
+    }
+    if (self.pad_to) |pad_to| {
+        try argv.appendSlice(&.{ "--pad-to", b.fmt("{d}", .{pad_to}) });
+    }
+    if (self.format) |format| switch (format) {
+        .bin => try argv.appendSlice(&.{ "-O", "binary" }),
+        .hex => try argv.appendSlice(&.{ "-O", "hex" }),
+    };
+
+    try argv.appendSlice(&.{ full_src_path, full_dest_path });
+    _ = try self.builder.execFromStep(argv.items, &self.step);
+
+    self.output_file.path = full_dest_path;
+    try man.writeManifest();
+}
+
+/// TODO consolidate this with the same function in RunStep?
+/// Also properly deal with concurrency (see open PR)
+fn failWithCacheError(man: std.Build.Cache.Manifest, err: anyerror) noreturn {
+    const i = man.failed_file_index orelse failWithSimpleError(err);
+    const pp = man.files.items[i].prefixed_path orelse failWithSimpleError(err);
+    const prefix = man.cache.prefixes()[pp.prefix].path orelse "";
+    std.debug.print("{s}: {s}/{s}\n", .{ @errorName(err), prefix, pp.sub_path });
+    std.process.exit(1);
+}
+
+fn failWithSimpleError(err: anyerror) noreturn {
+    std.debug.print("{s}\n", .{@errorName(err)});
+    std.process.exit(1);
+}
diff --git a/lib/std/Build/RunStep.zig b/lib/std/Build/RunStep.zig
index d3f48e4e87..1aae37d2f3 100644
--- a/lib/std/Build/RunStep.zig
+++ b/lib/std/Build/RunStep.zig
@@ -35,7 +35,7 @@ stderr_action: StdIoAction = .inherit,
 stdin_behavior: std.ChildProcess.StdIo = .Inherit,
 
 /// Set this to `null` to ignore the exit code for the purpose of determining a successful execution
-expected_exit_code: ?u8 = 0,
+expected_term: ?std.ChildProcess.Term = .{ .Exited = 0 },
 
 /// Print the command before running it
 print: bool,
@@ -290,7 +290,7 @@ fn make(step: *Step) !void {
     try runCommand(
         argv_list.items,
         self.builder,
-        self.expected_exit_code,
+        self.expected_term,
         self.stdout_action,
         self.stderr_action,
         self.stdin_behavior,
@@ -304,10 +304,55 @@ fn make(step: *Step) !void {
     }
 }
 
+fn formatTerm(
+    term: ?std.ChildProcess.Term,
+    comptime fmt: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = fmt;
+    _ = options;
+    if (term) |t| switch (t) {
+        .Exited => |code| try writer.print("exited with code {}", .{code}),
+        .Signal => |sig| try writer.print("terminated with signal {}", .{sig}),
+        .Stopped => |sig| try writer.print("stopped with signal {}", .{sig}),
+        .Unknown => |code| try writer.print("terminated for unknown reason with code {}", .{code}),
+    } else {
+        try writer.writeAll("exited with any code");
+    }
+}
+fn fmtTerm(term: ?std.ChildProcess.Term) std.fmt.Formatter(formatTerm) {
+    return .{ .data = term };
+}
+
+fn termMatches(expected: ?std.ChildProcess.Term, actual: std.ChildProcess.Term) bool {
+    return if (expected) |e| switch (e) {
+        .Exited => |expected_code| switch (actual) {
+            .Exited => |actual_code| expected_code == actual_code,
+            else => false,
+        },
+        .Signal => |expected_sig| switch (actual) {
+            .Signal => |actual_sig| expected_sig == actual_sig,
+            else => false,
+        },
+        .Stopped => |expected_sig| switch (actual) {
+            .Stopped => |actual_sig| expected_sig == actual_sig,
+            else => false,
+        },
+        .Unknown => |expected_code| switch (actual) {
+            .Unknown => |actual_code| expected_code == actual_code,
+            else => false,
+        },
+    } else switch (actual) {
+        .Exited => true,
+        else => false,
+    };
+}
+
 pub fn runCommand(
     argv: []const []const u8,
     builder: *std.Build,
-    expected_exit_code: ?u8,
+    expected_term: ?std.ChildProcess.Term,
     stdout_action: StdIoAction,
     stderr_action: StdIoAction,
     stdin_behavior: std.ChildProcess.StdIo,
@@ -369,32 +414,14 @@ pub fn runCommand(
         return err;
     };
 
-    switch (term) {
-        .Exited => |code| blk: {
-            const expected_code = expected_exit_code orelse break :blk;
-
-            if (code != expected_code) {
-                if (builder.prominent_compile_errors) {
-                    std.debug.print("Run step exited with error code {} (expected {})\n", .{
-                        code,
-                        expected_code,
-                    });
-                } else {
-                    std.debug.print("The following command exited with error code {} (expected {}):\n", .{
-                        code,
-                        expected_code,
-                    });
-                    printCmd(cwd, argv);
-                }
-
-                return error.UnexpectedExitCode;
-            }
-        },
-        else => {
-            std.debug.print("The following command terminated unexpectedly:\n", .{});
+    if (!termMatches(expected_term, term)) {
+        if (builder.prominent_compile_errors) {
+            std.debug.print("Run step {} (expected {})\n", .{ fmtTerm(term), fmtTerm(expected_term) });
+        } else {
+            std.debug.print("The following command {} (expected {}):\n", .{ fmtTerm(term), fmtTerm(expected_term) });
             printCmd(cwd, argv);
-            return error.UncleanExit;
-        },
+        }
+        return error.UnexpectedExit;
     }
 
     switch (stderr_action) {
diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig
index ff0ceb2a51..82c39ac2cc 100644
--- a/lib/std/Build/Step.zig
+++ b/lib/std/Build/Step.zig
@@ -21,7 +21,7 @@ pub const Id = enum {
     check_file,
     check_object,
     config_header,
-    install_raw,
+    objcopy,
     options,
     custom,
 
@@ -42,7 +42,7 @@ pub const Id = enum {
             .check_file => Build.CheckFileStep,
             .check_object => Build.CheckObjectStep,
             .config_header => Build.ConfigHeaderStep,
-            .install_raw => Build.InstallRawStep,
+            .objcopy => Build.ObjCopyStep,
             .options => Build.OptionsStep,
             .custom => @compileError("no type available for custom step"),
         };
diff --git a/lib/std/Build/WriteFileStep.zig b/lib/std/Build/WriteFileStep.zig
index 1621295ad8..3a30aba190 100644
--- a/lib/std/Build/WriteFileStep.zig
+++ b/lib/std/Build/WriteFileStep.zig
@@ -1,55 +1,117 @@
-const std = @import("../std.zig");
-const Step = std.Build.Step;
-const fs = std.fs;
-const ArrayList = std.ArrayList;
-
-const WriteFileStep = @This();
-
-pub const base_id = .write_file;
+//! WriteFileStep is primarily used to create a directory in an appropriate
+//! location inside the local cache which has a set of files that have either
+//! been generated during the build, or are copied from the source package.
+//!
+//! However, this step has an additional capability of writing data to paths
+//! relative to the package root, effectively mutating the package's source
+//! files. Be careful with the latter functionality; it should not be used
+//! during the normal build process, but as a utility run by a developer with
+//! intention to update source files, which will then be committed to version
+//! control.
 
 step: Step,
 builder: *std.Build,
-files: std.TailQueue(File),
+/// The elements here are pointers because we need stable pointers for the
+/// GeneratedFile field.
+files: std.ArrayListUnmanaged(*File),
+output_source_files: std.ArrayListUnmanaged(OutputSourceFile),
+
+pub const base_id = .write_file;
 
 pub const File = struct {
-    source: std.Build.GeneratedFile,
-    basename: []const u8,
+    generated_file: std.Build.GeneratedFile,
+    sub_path: []const u8,
+    contents: Contents,
+};
+
+pub const OutputSourceFile = struct {
+    contents: Contents,
+    sub_path: []const u8,
+};
+
+pub const Contents = union(enum) {
     bytes: []const u8,
+    copy: std.Build.FileSource,
 };
 
 pub fn init(builder: *std.Build) WriteFileStep {
-    return WriteFileStep{
+    return .{
         .builder = builder,
         .step = Step.init(.write_file, "writefile", builder.allocator, make),
         .files = .{},
+        .output_source_files = .{},
     };
 }
 
-pub fn add(self: *WriteFileStep, basename: []const u8, bytes: []const u8) void {
-    const node = self.builder.allocator.create(std.TailQueue(File).Node) catch @panic("unhandled error");
-    node.* = .{
-        .data = .{
-            .source = std.Build.GeneratedFile{ .step = &self.step },
-            .basename = self.builder.dupePath(basename),
-            .bytes = self.builder.dupe(bytes),
-        },
+pub fn add(wf: *WriteFileStep, sub_path: []const u8, bytes: []const u8) void {
+    const gpa = wf.builder.allocator;
+    const file = gpa.create(File) catch @panic("OOM");
+    file.* = .{
+        .generated_file = .{ .step = &wf.step },
+        .sub_path = wf.builder.dupePath(sub_path),
+        .contents = .{ .bytes = wf.builder.dupe(bytes) },
     };
-
-    self.files.append(node);
+    wf.files.append(gpa, file) catch @panic("OOM");
 }
 
-/// Gets a file source for the given basename. If the file does not exist, returns `null`.
-pub fn getFileSource(step: *WriteFileStep, basename: []const u8) ?std.Build.FileSource {
-    var it = step.files.first;
-    while (it) |node| : (it = node.next) {
-        if (std.mem.eql(u8, node.data.basename, basename))
-            return std.Build.FileSource{ .generated = &node.data.source };
+/// Place the file into the generated directory within the local cache,
+/// along with all the rest of the files added to this step. The parameter
+/// here is the destination path relative to the local cache directory
+/// associated with this WriteFileStep. It may be a basename, or it may
+/// include sub-directories, in which case this step will ensure the
+/// required sub-path exists.
+/// This is the option expected to be used most commonly with `addCopyFile`.
+pub fn addCopyFile(wf: *WriteFileStep, source: std.Build.FileSource, sub_path: []const u8) void {
+    const gpa = wf.builder.allocator;
+    const file = gpa.create(File) catch @panic("OOM");
+    file.* = .{
+        .generated_file = .{ .step = &wf.step },
+        .sub_path = wf.builder.dupePath(sub_path),
+        .contents = .{ .copy = source },
+    };
+    wf.files.append(gpa, file) catch @panic("OOM");
+}
+
+/// A path relative to the package root.
+/// Be careful with this because it updates source files. This should not be
+/// used as part of the normal build process, but as a utility occasionally
+/// run by a developer with intent to modify source files and then commit
+/// those changes to version control.
+/// A file added this way is not available with `getFileSource`.
+pub fn addCopyFileToSource(wf: *WriteFileStep, source: std.Build.FileSource, sub_path: []const u8) void {
+    wf.output_source_files.append(wf.builder.allocator, .{
+        .contents = .{ .copy = source },
+        .sub_path = sub_path,
+    }) catch @panic("OOM");
+}
+
+/// Gets a file source for the given sub_path. If the file does not exist, returns `null`.
+pub fn getFileSource(wf: *WriteFileStep, sub_path: []const u8) ?std.Build.FileSource {
+    for (wf.files.items) |file| {
+        if (std.mem.eql(u8, file.sub_path, sub_path)) {
+            return .{ .generated = &file.generated_file };
+        }
     }
     return null;
 }
 
 fn make(step: *Step) !void {
-    const self = @fieldParentPtr(WriteFileStep, "step", step);
+    const wf = @fieldParentPtr(WriteFileStep, "step", step);
+
+    // Writing to source files is kind of an extra capability of this
+    // WriteFileStep - arguably it should be a different step. But anyway here
+    // it is, it happens unconditionally and does not interact with the other
+    // files here.
+    for (wf.output_source_files.items) |output_source_file| {
+        const basename = fs.path.basename(output_source_file.sub_path);
+        if (fs.path.dirname(output_source_file.sub_path)) |dirname| {
+            var dir = try wf.builder.build_root.handle.makeOpenPath(dirname, .{});
+            defer dir.close();
+            try writeFile(wf, dir, output_source_file.contents, basename);
+        } else {
+            try writeFile(wf, wf.builder.build_root.handle, output_source_file.contents, basename);
+        }
+    }
 
     // The cache is used here not really as a way to speed things up - because writing
     // the data to a file would probably be very fast - but as a way to find a canonical
@@ -58,56 +120,96 @@ fn make(step: *Step) !void {
     // If, for example, a hard-coded path was used as the location to put WriteFileStep
     // files, then two WriteFileSteps executing in parallel might clobber each other.
 
-    // TODO port the cache system from the compiler to zig std lib. Until then
-    // we directly construct the path, and no "cache hit" detection happens;
-    // the files are always written.
-    // Note there is similar code over in ConfigHeaderStep.
-    const Hasher = std.crypto.auth.siphash.SipHash128(1, 3);
+    var man = wf.builder.cache.obtain();
+    defer man.deinit();
+
     // Random bytes to make WriteFileStep unique. Refresh this with
     // new random bytes when WriteFileStep implementation is modified
     // in a non-backwards-compatible way.
-    var hash = Hasher.init("eagVR1dYXoE7ARDP");
+    man.hash.add(@as(u32, 0xd767ee59));
 
-    {
-        var it = self.files.first;
-        while (it) |node| : (it = node.next) {
-            hash.update(node.data.basename);
-            hash.update(node.data.bytes);
-            hash.update("|");
+    for (wf.files.items) |file| {
+        man.hash.addBytes(file.sub_path);
+        switch (file.contents) {
+            .bytes => |bytes| {
+                man.hash.addBytes(bytes);
+            },
+            .copy => |file_source| {
+                _ = try man.addFile(file_source.getPath(wf.builder), null);
+            },
         }
     }
-    var digest: [16]u8 = undefined;
-    hash.final(&digest);
-    var hash_basename: [digest.len * 2]u8 = undefined;
-    _ = std.fmt.bufPrint(
-        &hash_basename,
-        "{s}",
-        .{std.fmt.fmtSliceHexLower(&digest)},
-    ) catch unreachable;
 
-    const output_dir = try self.builder.cache_root.join(self.builder.allocator, &.{
-        "o", &hash_basename,
-    });
-    var dir = fs.cwd().makeOpenPath(output_dir, .{}) catch |err| {
-        std.debug.print("unable to make path {s}: {s}\n", .{ output_dir, @errorName(err) });
-        return err;
-    };
-    defer dir.close();
-    {
-        var it = self.files.first;
-        while (it) |node| : (it = node.next) {
-            dir.writeFile(node.data.basename, node.data.bytes) catch |err| {
-                std.debug.print("unable to write {s} into {s}: {s}\n", .{
-                    node.data.basename,
-                    output_dir,
-                    @errorName(err),
-                });
-                return err;
-            };
-            node.data.source.path = try fs.path.join(
-                self.builder.allocator,
-                &[_][]const u8{ output_dir, node.data.basename },
+    if (man.hit() catch |err| failWithCacheError(man, err)) {
+        // Cache hit, skip writing file data.
+        const digest = man.final();
+        for (wf.files.items) |file| {
+            file.generated_file.path = try wf.builder.cache_root.join(
+                wf.builder.allocator,
+                &.{ "o", &digest, file.sub_path },
             );
         }
+        return;
+    }
+
+    const digest = man.final();
+    const cache_path = "o" ++ fs.path.sep_str ++ digest;
+
+    var cache_dir = wf.builder.cache_root.handle.makeOpenPath(cache_path, .{}) catch |err| {
+        std.debug.print("unable to make path {s}: {s}\n", .{ cache_path, @errorName(err) });
+        return err;
+    };
+    defer cache_dir.close();
+
+    for (wf.files.items) |file| {
+        const basename = fs.path.basename(file.sub_path);
+        if (fs.path.dirname(file.sub_path)) |dirname| {
+            var dir = try wf.builder.cache_root.handle.makeOpenPath(dirname, .{});
+            defer dir.close();
+            try writeFile(wf, dir, file.contents, basename);
+        } else {
+            try writeFile(wf, cache_dir, file.contents, basename);
+        }
+
+        file.generated_file.path = try wf.builder.cache_root.join(
+            wf.builder.allocator,
+            &.{ cache_path, file.sub_path },
+        );
+    }
+
+    try man.writeManifest();
+}
+
+fn writeFile(wf: *WriteFileStep, dir: fs.Dir, contents: Contents, basename: []const u8) !void {
+    // TODO after landing concurrency PR, improve error reporting here
+    switch (contents) {
+        .bytes => |bytes| return dir.writeFile(basename, bytes),
+        .copy => |file_source| {
+            const source_path = file_source.getPath(wf.builder);
+            const prev_status = try fs.Dir.updateFile(fs.cwd(), source_path, dir, basename, .{});
+            _ = prev_status; // TODO logging (affected by open PR regarding concurrency)
+        },
     }
 }
+
+/// TODO consolidate this with the same function in RunStep?
+/// Also properly deal with concurrency (see open PR)
+fn failWithCacheError(man: std.Build.Cache.Manifest, err: anyerror) noreturn {
+    const i = man.failed_file_index orelse failWithSimpleError(err);
+    const pp = man.files.items[i].prefixed_path orelse failWithSimpleError(err);
+    const prefix = man.cache.prefixes()[pp.prefix].path orelse "";
+    std.debug.print("{s}: {s}/{s}\n", .{ @errorName(err), prefix, pp.sub_path });
+    std.process.exit(1);
+}
+
+fn failWithSimpleError(err: anyerror) noreturn {
+    std.debug.print("{s}\n", .{@errorName(err)});
+    std.process.exit(1);
+}
+
+const std = @import("../std.zig");
+const Step = std.Build.Step;
+const fs = std.fs;
+const ArrayList = std.ArrayList;
+
+const WriteFileStep = @This();
diff --git a/lib/std/RingBuffer.zig b/lib/std/RingBuffer.zig
new file mode 100644
index 0000000000..857775b5a0
--- /dev/null
+++ b/lib/std/RingBuffer.zig
@@ -0,0 +1,136 @@
+//! This ring buffer stores read and write indices while being able to utilise
+//! the full backing slice by incrementing the indices modulo twice the slice's
+//! length and reducing indices modulo the slice's length on slice access. This
+//! means that whether the ring buffer if full or empty can be distinguished by
+//! looking at the difference between the read and write indices without adding
+//! an extra boolean flag or having to reserve a slot in the buffer.
+//!
+//! This ring buffer has not been implemented with thread safety in mind, and
+//! therefore should not be assumed to be suitable for use cases involving
+//! separate reader and writer threads.
+
+const Allocator = @import("std").mem.Allocator;
+const assert = @import("std").debug.assert;
+
+const RingBuffer = @This();
+
+data: []u8,
+read_index: usize,
+write_index: usize,
+
+pub const Error = error{Full};
+
+/// Allocate a new `RingBuffer`; `deinit()` should be called to free the buffer.
+pub fn init(allocator: Allocator, capacity: usize) Allocator.Error!RingBuffer {
+    const bytes = try allocator.alloc(u8, capacity);
+    return RingBuffer{
+        .data = bytes,
+        .write_index = 0,
+        .read_index = 0,
+    };
+}
+
+/// Free the data backing a `RingBuffer`; must be passed the same `Allocator` as
+/// `init()`.
+pub fn deinit(self: *RingBuffer, allocator: Allocator) void {
+    allocator.free(self.data);
+    self.* = undefined;
+}
+
+/// Returns `index` modulo the length of the backing slice.
+pub fn mask(self: RingBuffer, index: usize) usize {
+    return index % self.data.len;
+}
+
+/// Returns `index` modulo twice the length of the backing slice.
+pub fn mask2(self: RingBuffer, index: usize) usize {
+    return index % (2 * self.data.len);
+}
+
+/// Write `byte` into the ring buffer. Returns `error.Full` if the ring
+/// buffer is full.
+pub fn write(self: *RingBuffer, byte: u8) Error!void {
+    if (self.isFull()) return error.Full;
+    self.writeAssumeCapacity(byte);
+}
+
+/// Write `byte` into the ring buffer. If the ring buffer is full, the
+/// oldest byte is overwritten.
+pub fn writeAssumeCapacity(self: *RingBuffer, byte: u8) void {
+    self.data[self.mask(self.write_index)] = byte;
+    self.write_index = self.mask2(self.write_index + 1);
+}
+
+/// Write `bytes` into the ring buffer. Returns `error.Full` if the ring
+/// buffer does not have enough space, without writing any data.
+pub fn writeSlice(self: *RingBuffer, bytes: []const u8) Error!void {
+    if (self.len() + bytes.len > self.data.len) return error.Full;
+    self.writeSliceAssumeCapacity(bytes);
+}
+
+/// Write `bytes` into the ring buffer. If there is not enough space, older
+/// bytes will be overwritten.
+pub fn writeSliceAssumeCapacity(self: *RingBuffer, bytes: []const u8) void {
+    for (bytes) |b| self.writeAssumeCapacity(b);
+}
+
+/// Consume a byte from the ring buffer and return it. Returns `null` if the
+/// ring buffer is empty.
+pub fn read(self: *RingBuffer) ?u8 {
+    if (self.isEmpty()) return null;
+    return self.readAssumeLength();
+}
+
+/// Consume a byte from the ring buffer and return it; asserts that the buffer
+/// is not empty.
+pub fn readAssumeLength(self: *RingBuffer) u8 {
+    assert(!self.isEmpty());
+    const byte = self.data[self.mask(self.read_index)];
+    self.read_index = self.mask2(self.read_index + 1);
+    return byte;
+}
+
+/// Returns `true` if the ring buffer is empty and `false` otherwise.
+pub fn isEmpty(self: RingBuffer) bool {
+    return self.write_index == self.read_index;
+}
+
+/// Returns `true` if the ring buffer is full and `false` otherwise.
+pub fn isFull(self: RingBuffer) bool {
+    return self.mask2(self.write_index + self.data.len) == self.read_index;
+}
+
+/// Returns the length
+pub fn len(self: RingBuffer) usize {
+    const wrap_offset = 2 * self.data.len * @boolToInt(self.write_index < self.read_index);
+    const adjusted_write_index = self.write_index + wrap_offset;
+    return adjusted_write_index - self.read_index;
+}
+
+/// A `Slice` represents a region of a ring buffer. The region is split into two
+/// sections as the ring buffer data will not be contiguous if the desired
+/// region wraps to the start of the backing slice.
+pub const Slice = struct {
+    first: []u8,
+    second: []u8,
+};
+
+/// Returns a `Slice` for the region of the ring buffer starting at
+/// `self.mask(start_unmasked)` with the specified length.
+pub fn sliceAt(self: RingBuffer, start_unmasked: usize, length: usize) Slice {
+    assert(length <= self.data.len);
+    const slice1_start = self.mask(start_unmasked);
+    const slice1_end = @min(self.data.len, slice1_start + length);
+    const slice1 = self.data[slice1_start..slice1_end];
+    const slice2 = self.data[0 .. length - slice1.len];
+    return Slice{
+        .first = slice1,
+        .second = slice2,
+    };
+}
+
+/// Returns a `Slice` for the last `length` bytes written to the ring buffer.
+/// Does not check that any bytes have been written into the region.
+pub fn sliceLast(self: RingBuffer, length: usize) Slice {
+    return self.sliceAt(self.write_index + self.data.len - length, length);
+}
diff --git a/lib/std/child_process.zig b/lib/std/child_process.zig
index 07dd1f27f5..c3bd53b880 100644
--- a/lib/std/child_process.zig
+++ b/lib/std/child_process.zig
@@ -197,6 +197,32 @@ pub const ChildProcess = struct {
         stderr: []u8,
     };
 
+    /// Collect the output from the process's stdout and stderr. Will return once all output
+    /// has been collected. This does not mean that the process has ended. `wait` should still
+    /// be called to wait for and clean up the process.
+    ///
+    /// The process must be started with stdout_behavior and stderr_behavior == .Pipe
+    pub fn collectOutput(
+        child: ChildProcess,
+        stdout: *std.ArrayList(u8),
+        stderr: *std.ArrayList(u8),
+        max_output_bytes: usize,
+    ) !void {
+        debug.assert(child.stdout_behavior == .Pipe);
+        debug.assert(child.stderr_behavior == .Pipe);
+        if (builtin.os.tag == .haiku) {
+            const stdout_in = child.stdout.?.reader();
+            const stderr_in = child.stderr.?.reader();
+
+            try stdout_in.readAllArrayList(stdout, max_output_bytes);
+            try stderr_in.readAllArrayList(stderr, max_output_bytes);
+        } else if (builtin.os.tag == .windows) {
+            try collectOutputWindows(child, stdout, stderr, max_output_bytes);
+        } else {
+            try collectOutputPosix(child, stdout, stderr, max_output_bytes);
+        }
+    }
+
     fn collectOutputPosix(
         child: ChildProcess,
         stdout: *std.ArrayList(u8),
@@ -297,8 +323,12 @@ pub const ChildProcess = struct {
         }
     }
 
-    fn collectOutputWindows(child: ChildProcess, outs: [2]*std.ArrayList(u8), max_output_bytes: usize) !void {
+    fn collectOutputWindows(child: ChildProcess, stdout: *std.ArrayList(u8), stderr: *std.ArrayList(u8), max_output_bytes: usize) !void {
         const bump_amt = 512;
+        const outs = [_]*std.ArrayList(u8){
+            stdout,
+            stderr,
+        };
         const handles = [_]windows.HANDLE{
             child.stdout.?.handle,
             child.stderr.?.handle,
@@ -391,24 +421,6 @@ pub const ChildProcess = struct {
         child.env_map = args.env_map;
         child.expand_arg0 = args.expand_arg0;
 
-        try child.spawn();
-
-        if (builtin.os.tag == .haiku) {
-            const stdout_in = child.stdout.?.reader();
-            const stderr_in = child.stderr.?.reader();
-
-            const stdout = try stdout_in.readAllAlloc(args.allocator, args.max_output_bytes);
-            errdefer args.allocator.free(stdout);
-            const stderr = try stderr_in.readAllAlloc(args.allocator, args.max_output_bytes);
-            errdefer args.allocator.free(stderr);
-
-            return ExecResult{
-                .term = try child.wait(),
-                .stdout = stdout,
-                .stderr = stderr,
-            };
-        }
-
         var stdout = std.ArrayList(u8).init(args.allocator);
         var stderr = std.ArrayList(u8).init(args.allocator);
         errdefer {
@@ -416,11 +428,8 @@ pub const ChildProcess = struct {
             stderr.deinit();
         }
 
-        if (builtin.os.tag == .windows) {
-            try collectOutputWindows(child, [_]*std.ArrayList(u8){ &stdout, &stderr }, args.max_output_bytes);
-        } else {
-            try collectOutputPosix(child, &stdout, &stderr, args.max_output_bytes);
-        }
+        try child.spawn();
+        try child.collectOutput(&stdout, &stderr, args.max_output_bytes);
 
         return ExecResult{
             .term = try child.wait(),
diff --git a/lib/std/compress.zig b/lib/std/compress.zig
index 9af1b30259..7e81d9deba 100644
--- a/lib/std/compress.zig
+++ b/lib/std/compress.zig
@@ -6,6 +6,7 @@ pub const lzma = @import("compress/lzma.zig");
 pub const lzma2 = @import("compress/lzma2.zig");
 pub const xz = @import("compress/xz.zig");
 pub const zlib = @import("compress/zlib.zig");
+pub const zstd = @import("compress/zstandard.zig");
 
 pub fn HashedReader(
     comptime ReaderType: anytype,
@@ -44,4 +45,5 @@ test {
     _ = lzma2;
     _ = xz;
     _ = zlib;
+    _ = zstd;
 }
diff --git a/lib/std/compress/testdata/rfc8478.txt b/lib/std/compress/testdata/rfc8478.txt
new file mode 100644
index 0000000000..e4ac22a302
--- /dev/null
+++ b/lib/std/compress/testdata/rfc8478.txt
@@ -0,0 +1,3027 @@
+
+
+
+
+
+
+Internet Engineering Task Force (IETF)                         Y. Collet
+Request for Comments: 8478                             M. Kucherawy, Ed.
+Category: Informational                                         Facebook
+ISSN: 2070-1721                                             October 2018
+
+
+       Zstandard Compression and the application/zstd Media Type
+
+Abstract
+
+   Zstandard, or "zstd" (pronounced "zee standard"), is a data
+   compression mechanism.  This document describes the mechanism and
+   registers a media type and content encoding to be used when
+   transporting zstd-compressed content via Multipurpose Internet Mail
+   Extensions (MIME).
+
+   Despite use of the word "standard" as part of its name, readers are
+   advised that this document is not an Internet Standards Track
+   specification; it is being published for informational purposes only.
+
+Status of This Memo
+
+   This document is not an Internet Standards Track specification; it is
+   published for informational purposes.
+
+   This document is a product of the Internet Engineering Task Force
+   (IETF).  It represents the consensus of the IETF community.  It has
+   received public review and has been approved for publication by the
+   Internet Engineering Steering Group (IESG).  Not all documents
+   approved by the IESG are candidates for any level of Internet
+   Standard; see Section 2 of RFC 7841.
+
+   Information about the current status of this document, any errata,
+   and how to provide feedback on it may be obtained at
+   https://www.rfc-editor.org/info/rfc8478.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                     [Page 1]
+
+RFC 8478                    application/zstd                October 2018
+
+
+Copyright Notice
+
+   Copyright (c) 2018 IETF Trust and the persons identified as the
+   document authors.  All rights reserved.
+
+   This document is subject to BCP 78 and the IETF Trust's Legal
+   Provisions Relating to IETF Documents
+   (https://trustee.ietf.org/license-info) in effect on the date of
+   publication of this document.  Please review these documents
+   carefully, as they describe your rights and restrictions with respect
+   to this document.  Code Components extracted from this document must
+   include Simplified BSD License text as described in Section 4.e of
+   the Trust Legal Provisions and are provided without warranty as
+   described in the Simplified BSD License.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                     [Page 2]
+
+RFC 8478                    application/zstd                October 2018
+
+
+Table of Contents
+
+   1.  Introduction  . . . . . . . . . . . . . . . . . . . . . . . .   4
+   2.  Definitions . . . . . . . . . . . . . . . . . . . . . . . . .   4
+   3.  Compression Algorithm . . . . . . . . . . . . . . . . . . . .   5
+     3.1.  Frames  . . . . . . . . . . . . . . . . . . . . . . . . .   6
+       3.1.1.  Zstandard Frames  . . . . . . . . . . . . . . . . . .   6
+         3.1.1.1.  Frame Header  . . . . . . . . . . . . . . . . . .   7
+         3.1.1.2.  Blocks  . . . . . . . . . . . . . . . . . . . . .  12
+         3.1.1.3.  Compressed Blocks . . . . . . . . . . . . . . . .  14
+         3.1.1.4.  Sequence Execution  . . . . . . . . . . . . . . .  28
+         3.1.1.5.  Repeat Offsets  . . . . . . . . . . . . . . . . .  29
+       3.1.2.  Skippable Frames  . . . . . . . . . . . . . . . . . .  30
+   4.  Entropy Encoding  . . . . . . . . . . . . . . . . . . . . . .  30
+     4.1.  FSE . . . . . . . . . . . . . . . . . . . . . . . . . . .  31
+       4.1.1.  FSE Table Description . . . . . . . . . . . . . . . .  31
+     4.2.  Huffman Coding  . . . . . . . . . . . . . . . . . . . . .  34
+       4.2.1.  Huffman Tree Description  . . . . . . . . . . . . . .  35
+         4.2.1.1.  Huffman Tree Header . . . . . . . . . . . . . . .  36
+         4.2.1.2.  FSE Compression of Huffman Weights  . . . . . . .  37
+         4.2.1.3.  Conversion from Weights to Huffman Prefix Codes .  38
+       4.2.2.  Huffman-Coded Streams . . . . . . . . . . . . . . . .  39
+   5.  Dictionary Format . . . . . . . . . . . . . . . . . . . . . .  40
+   6.  IANA Considerations . . . . . . . . . . . . . . . . . . . . .  42
+     6.1.  The 'application/zstd' Media Type . . . . . . . . . . . .  42
+     6.2.  Content Encoding  . . . . . . . . . . . . . . . . . . . .  43
+     6.3.  Dictionaries  . . . . . . . . . . . . . . . . . . . . . .  43
+   7.  Security Considerations . . . . . . . . . . . . . . . . . . .  43
+   8.  Implementation Status . . . . . . . . . . . . . . . . . . . .  44
+   9.  References  . . . . . . . . . . . . . . . . . . . . . . . . .  45
+     9.1.  Normative References  . . . . . . . . . . . . . . . . . .  45
+     9.2.  Informative References  . . . . . . . . . . . . . . . . .  45
+   Appendix A.  Decoding Tables for Predefined Codes . . . . . . . .  46
+     A.1.  Literal Length Code Table . . . . . . . . . . . . . . . .  46
+     A.2.  Match Length Code Table . . . . . . . . . . . . . . . . .  49
+     A.3.  Offset Code Table . . . . . . . . . . . . . . . . . . . .  52
+   Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . . .  53
+   Authors' Addresses  . . . . . . . . . . . . . . . . . . . . . . .  54
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                     [Page 3]
+
+RFC 8478                    application/zstd                October 2018
+
+
+1.  Introduction
+
+   Zstandard, or "zstd" (pronounced "zee standard"), is a data
+   compression mechanism, akin to gzip [RFC1952].
+
+   Despite use of the word "standard" as part of its name, readers are
+   advised that this document is not an Internet Standards Track
+   specification; it is being published for informational purposes only.
+
+   This document describes the Zstandard format.  Also, to enable the
+   transport of a data object compressed with Zstandard, this document
+   registers a media type that can be used to identify such content when
+   it is used in a payload encoded using Multipurpose Internet Mail
+   Extensions (MIME).
+
+2.  Definitions
+
+   Some terms used elsewhere in this document are defined here for
+   clarity.
+
+   uncompressed:  Describes an arbitrary set of bytes in their original
+      form, prior to being subjected to compression.
+
+   compress, compression:  The act of processing a set of bytes via the
+      compression mechanism described here.
+
+   compressed:  Describes the result of passing a set of bytes through
+      this mechanism.  The original input has thus been compressed.
+
+   decompress, decompression:  The act of processing a set of bytes
+      through the inverse of the compression mechanism described here,
+      in an attempt to recover the original set of bytes prior to
+      compression.
+
+   decompressed:  Describes the result of passing a set of bytes through
+      the reverse of this mechanism.  When this is successful, the
+      decompressed payload and the uncompressed payload are
+      indistinguishable.
+
+   encode:  The process of translating data from one form to another;
+      this may include compression or it may refer to other translations
+      done as part of this specification.
+
+   decode:  The reverse of "encode"; describes a process of reversing a
+      prior encoding to recover the original content.
+
+
+
+
+
+
+Collet & Kucherawy            Informational                     [Page 4]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   frame:  Content compressed by Zstandard is transformed into a
+      Zstandard frame.  Multiple frames can be appended into a single
+      file or stream.  A frame is completely independent, has a defined
+      beginning and end, and has a set of parameters that tells the
+      decoder how to decompress it.
+
+   block:  A frame encapsulates one or multiple blocks.  Each block
+      contains arbitrary content, which is described by its header, and
+      has a guaranteed maximum content size that depends upon frame
+      parameters.  Unlike frames, each block depends on previous blocks
+      for proper decoding.  However, each block can be decompressed
+      without waiting for its successor, allowing streaming operations.
+
+   natural order:  A sequence or ordering of objects or values that is
+      typical of that type of object or value.  A set of unique
+      integers, for example, is in "natural order" if when progressing
+      from one element in the set or sequence to the next, there is
+      never a decrease in value.
+
+   The naming convention for identifiers within the specification is
+   Mixed_Case_With_Underscores.  Identifiers inside square brackets
+   indicate that the identifier is optional in the presented context.
+
+3.  Compression Algorithm
+
+   This section describes the Zstandard algorithm.
+
+   The purpose of this document is to define a lossless compressed data
+   format that is a) independent of the CPU type, operating system, file
+   system, and character set and b) is suitable for file compression and
+   pipe and streaming compression, using the Zstandard algorithm.  The
+   text of the specification assumes a basic background in programming
+   at the level of bits and other primitive data representations.
+
+   The data can be produced or consumed, even for an arbitrarily long
+   sequentially presented input data stream, using only an a priori
+   bounded amount of intermediate storage, and hence can be used in data
+   communications.  The format uses the Zstandard compression method,
+   and an optional xxHash-64 checksum method [XXHASH], for detection of
+   data corruption.
+
+   The data format defined by this specification does not attempt to
+   allow random access to compressed data.
+
+   Unless otherwise indicated below, a compliant compressor must produce
+   data sets that conform to the specifications presented here.
+   However, it does not need to support all options.
+
+
+
+
+Collet & Kucherawy            Informational                     [Page 5]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   A compliant decompressor must be able to decompress at least one
+   working set of parameters that conforms to the specifications
+   presented here.  It may also ignore informative fields, such as the
+   checksum.  Whenever it does not support a parameter defined in the
+   compressed stream, it must produce a non-ambiguous error code and
+   associated error message explaining which parameter is unsupported.
+
+   This specification is intended for use by implementers of software to
+   compress data into Zstandard format and/or decompress data from
+   Zstandard format.  The Zstandard format is supported by an open
+   source reference implementation, written in portable C, and available
+   at [ZSTD].
+
+3.1.  Frames
+
+   Zstandard compressed data is made up of one or more frames.  Each
+   frame is independent and can be decompressed independently of other
+   frames.  The decompressed content of multiple concatenated frames is
+   the concatenation of each frame's decompressed content.
+
+   There are two frame formats defined for Zstandard: Zstandard frames
+   and skippable frames.  Zstandard frames contain compressed data,
+   while skippable frames contain custom user metadata.
+
+3.1.1.  Zstandard Frames
+
+   The structure of a single Zstandard frame is as follows:
+
+     +--------------------+------------+
+     |    Magic_Number    | 4 bytes    |
+     +--------------------+------------+
+     |    Frame_Header    | 2-14 bytes |
+     +--------------------+------------+
+     |     Data_Block     | n bytes    |
+     +--------------------+------------+
+     | [More Data_Blocks] |            |
+     +--------------------+------------+
+     | [Content_Checksum] | 0-4 bytes  |
+     +--------------------+------------+
+
+   Magic_Number:  4 bytes, little-endian format.  Value: 0xFD2FB528.
+
+   Frame_Header:  2 to 14 bytes, detailed in Section 3.1.1.1.
+
+   Data_Block:  Detailed in Section 3.1.1.2.  This is where data
+      appears.
+
+
+
+
+
+Collet & Kucherawy            Informational                     [Page 6]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Content_Checksum:  An optional 32-bit checksum, only present if
+      Content_Checksum_Flag is set.  The content checksum is the result
+      of the XXH64() hash function [XXHASH] digesting the original
+      (decoded) data as input, and a seed of zero.  The low 4 bytes of
+      the checksum are stored in little-endian format.
+
+   The magic number was selected to be less probable to find at the
+   beginning of an arbitrary file.  It avoids trivial patterns (0x00,
+   0xFF, repeated bytes, increasing bytes, etc.), contains byte values
+   outside of ASCII range, and doesn't map into UTF-8 space, all of
+   which reduce the likelihood of its appearance at the top of a text
+   file.
+
+3.1.1.1.  Frame Header
+
+   The frame header has a variable size, with a minimum of 2 bytes and
+   up to 14 bytes depending on optional parameters.  The structure of
+   Frame_Header is as follows:
+
+     +-------------------------+-----------+
+     | Frame_Header_Descriptor | 1 byte    |
+     +-------------------------+-----------+
+     |   [Window_Descriptor]   | 0-1 byte  |
+     +-------------------------+-----------+
+     |     [Dictionary_ID]     | 0-4 bytes |
+     +-------------------------+-----------+
+     |  [Frame_Content_Size]   | 0-8 bytes |
+     +-------------------------+-----------+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                     [Page 7]
+
+RFC 8478                    application/zstd                October 2018
+
+
+3.1.1.1.1.  Frame_Header_Descriptor
+
+   The first header's byte is called the Frame_Header_Descriptor.  It
+   describes which other fields are present.  Decoding this byte is
+   enough to tell the size of Frame_Header.
+
+     +------------+-------------------------+
+     | Bit Number | Field Name              |
+     +------------+-------------------------+
+     |    7-6     | Frame_Content_Size_Flag |
+     +------------+-------------------------+
+     |     5      | Single_Segment_Flag     |
+     +------------+-------------------------+
+     |     4      | (unused)                |
+     +------------+-------------------------+
+     |     3      | (reserved)              |
+     +------------+-------------------------+
+     |     2      | Content_Checksum_Flag   |
+     +------------+-------------------------+
+     |    1-0     | Dictionary_ID_Flag      |
+     +------------+-------------------------+
+
+   In this table, bit 7 is the highest bit, while bit 0 is the lowest
+   one.
+
+3.1.1.1.1.1.  Frame_Content_Size_Flag
+
+   This is a 2-bit flag (equivalent to Frame_Header_Descriptor right-
+   shifted 6 bits) specifying whether Frame_Content_Size (the
+   decompressed data size) is provided within the header.  Flag_Value
+   provides FCS_Field_Size, which is the number of bytes used by
+   Frame_Content_Size according to the following table:
+
+     +----------------+--------+---+---+---+
+     | Flag_Value     |   0    | 1 | 2 | 3 |
+     +----------------+--------+---+---+---+
+     | FCS_Field_Size | 0 or 1 | 2 | 4 | 8 |
+     +----------------+--------+---+---+---+
+
+   When Flag_Value is 0, FCS_Field_Size depends on Single_Segment_Flag:
+   If Single_Segment_Flag is set, FCS_Field_Size is 1.  Otherwise,
+   FCS_Field_Size is 0; Frame_Content_Size is not provided.
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                     [Page 8]
+
+RFC 8478                    application/zstd                October 2018
+
+
+3.1.1.1.1.2.  Single_Segment_Flag
+
+   If this flag is set, data must be regenerated within a single
+   continuous memory segment.
+
+   In this case, Window_Descriptor byte is skipped, but
+   Frame_Content_Size is necessarily present.  As a consequence, the
+   decoder must allocate a memory segment of size equal or larger than
+   Frame_Content_Size.
+
+   In order to protect the decoder from unreasonable memory
+   requirements, a decoder is allowed to reject a compressed frame that
+   requests a memory size beyond the decoder's authorized range.
+
+   For broader compatibility, decoders are recommended to support memory
+   sizes of at least 8 MB.  This is only a recommendation; each decoder
+   is free to support higher or lower limits, depending on local
+   limitations.
+
+3.1.1.1.1.3.  Unused Bit
+
+   A decoder compliant with this specification version shall not
+   interpret this bit.  It might be used in a future version, to signal
+   a property that is not mandatory to properly decode the frame.  An
+   encoder compliant with this specification must set this bit to zero.
+
+3.1.1.1.1.4.  Reserved Bit
+
+   This bit is reserved for some future feature.  Its value must be
+   zero.  A decoder compliant with this specification version must
+   ensure it is not set.  This bit may be used in a future revision, to
+   signal a feature that must be interpreted to decode the frame
+   correctly.
+
+3.1.1.1.1.5.  Content_Checksum_Flag
+
+   If this flag is set, a 32-bit Content_Checksum will be present at the
+   frame's end.  See the description of Content_Checksum above.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                     [Page 9]
+
+RFC 8478                    application/zstd                October 2018
+
+
+3.1.1.1.1.6.  Dictionary_ID_Flag
+
+   This is a 2-bit flag (= Frame_Header_Descriptor & 0x3) indicating
+   whether a dictionary ID is provided within the header.  It also
+   specifies the size of this field as DID_Field_Size:
+
+     +----------------+---+---+---+---+
+     | Flag_Value     | 0 | 1 | 2 | 3 |
+     +----------------+---+---+---+---+
+     | DID_Field_Size | 0 | 1 | 2 | 4 |
+     +----------------+---+---+---+---+
+
+3.1.1.1.2.  Window Descriptor
+
+   This provides guarantees about the minimum memory buffer required to
+   decompress a frame.  This information is important for decoders to
+   allocate enough memory.
+
+   The Window_Descriptor byte is optional.  When Single_Segment_Flag is
+   set, Window_Descriptor is not present.  In this case, Window_Size is
+   Frame_Content_Size, which can be any value from 0 to 2^64-1 bytes (16
+   ExaBytes).
+
+     +------------+----------+----------+
+     | Bit Number |   7-3    |   2-0    |
+     +------------+----------+----------+
+     | Field Name | Exponent | Mantissa |
+     +------------+----------+----------+
+
+   The minimum memory buffer size is called Window_Size.  It is
+   described by the following formulae:
+
+     windowLog = 10 + Exponent;
+     windowBase = 1 << windowLog;
+     windowAdd = (windowBase / 8) * Mantissa;
+     Window_Size = windowBase + windowAdd;
+
+   The minimum Window_Size is 1 KB.  The maximum Window_Size is (1<<41)
+   + 7*(1<<38) bytes, which is 3.75 TB.
+
+   In general, larger Window_Size values tend to improve the compression
+   ratio, but at the cost of increased memory usage.
+
+   To properly decode compressed data, a decoder will need to allocate a
+   buffer of at least Window_Size bytes.
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 10]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   In order to protect decoders from unreasonable memory requirements, a
+   decoder is allowed to reject a compressed frame that requests a
+   memory size beyond decoder's authorized range.
+
+   For improved interoperability, it's recommended for decoders to
+   support values of Window_Size up to 8 MB and for encoders not to
+   generate frames requiring a Window_Size larger than 8 MB.  It's
+   merely a recommendation though, and decoders are free to support
+   larger or lower limits, depending on local limitations.
+
+3.1.1.1.3.  Dictionary_ID
+
+   This is a variable size field, which contains the ID of the
+   dictionary required to properly decode the frame.  This field is
+   optional.  When it's not present, it's up to the decoder to know
+   which dictionary to use.
+
+   Dictionary_ID field size is provided by DID_Field_Size.
+   DID_Field_Size is directly derived from the value of
+   Dictionary_ID_Flag.  One byte can represent an ID 0-255; 2 bytes can
+   represent an ID 0-65535; 4 bytes can represent an ID 0-4294967295.
+   Format is little-endian.
+
+   It is permitted to represent a small ID (for example, 13) with a
+   large 4-byte dictionary ID, even if it is less efficient.
+
+   Within private environments, any dictionary ID can be used.  However,
+   for frames and dictionaries distributed in public space,
+   Dictionary_ID must be attributed carefully.  The following ranges are
+   reserved for use only with dictionaries that have been registered
+   with IANA (see Section 6.3):
+
+   low range:  <= 32767
+   high range:  >= (1 << 31)
+
+   Any other value for Dictionary_ID can be used by private arrangement
+   between participants.
+
+   Any payload presented for decompression that references an
+   unregistered reserved dictionary ID results in an error.
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 11]
+
+RFC 8478                    application/zstd                October 2018
+
+
+3.1.1.1.4.  Frame Content Size
+
+   This is the original (uncompressed) size.  This information is
+   optional.  Frame_Content_Size uses a variable number of bytes,
+   provided by FCS_Field_Size.  FCS_Field_Size is provided by the value
+   of Frame_Content_Size_Flag.  FCS_Field_Size can be equal to 0 (not
+   present), 1, 2, 4, or 8 bytes.
+
+     +----------------+--------------+
+     | FCS Field Size | Range        |
+     +----------------+--------------+
+     |        0       | unknown      |
+     +----------------+--------------+
+     |        1       | 0 - 255      |
+     +----------------+--------------+
+     |        2       | 256 - 65791  |
+     +----------------+--------------+
+     |        4       | 0 - 2^32 - 1 |
+     +----------------+--------------+
+     |        8       | 0 - 2^64 - 1 |
+     +----------------+--------------+
+
+   Frame_Content_Size format is little-endian.  When FCS_Field_Size is
+   1, 4, or 8 bytes, the value is read directly.  When FCS_Field_Size is
+   2, the offset of 256 is added.  It's allowed to represent a small
+   size (for example 18) using any compatible variant.
+
+3.1.1.2.  Blocks
+
+   After Magic_Number and Frame_Header, there are some number of blocks.
+   Each frame must have at least 1 block, but there is no upper limit on
+   the number of blocks per frame.
+
+   The structure of a block is as follows:
+
+     +--------------+---------------+
+     | Block_Header | Block_Content |
+     +--------------+---------------+
+     |    3 bytes   |    n bytes    |
+     +--------------+---------------+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 12]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Block_Header uses 3 bytes, written using little-endian convention.
+   It contains three fields:
+
+     +------------+------------+------------+
+     | Last_Block | Block_Type | Block_Size |
+     +------------+------------+------------+
+     |    bit 0   |   bits 1-2 |  bits 3-23 |
+     +------------+------------+------------+
+
+3.1.1.2.1.  Last_Block
+
+   The lowest bit (Last_Block) signals whether this block is the last
+   one.  The frame will end after this last block.  It may be followed
+   by an optional Content_Checksum (see Section 3.1.1).
+
+3.1.1.2.2.  Block_Type
+
+   The next 2 bits represent the Block_Type.  There are four block
+   types:
+
+     +-----------+------------------+
+     |   Value   |    Block_Type    |
+     +-----------+------------------+
+     |     0     |     Raw_Block    |
+     +-----------+------------------+
+     |     1     |     RLE_Block    |
+     +-----------+------------------+
+     |     2     | Compressed_Block |
+     +-----------+------------------+
+     |     3     |     Reserved     |
+     +-----------+------------------+
+
+   Raw_Block:  This is an uncompressed block.  Block_Content contains
+      Block_Size bytes.
+
+   RLE_Block:  This is a single byte, repeated Block_Size times.
+      Block_Content consists of a single byte.  On the decompression
+      side, this byte must be repeated Block_Size times.
+
+   Compressed_Block:  This is a compressed block as described in
+      Section 3.1.1.3.  Block_Size is the length of Block_Content,
+      namely the compressed data.  The decompressed size is not known,
+      but its maximum possible value is guaranteed (see below).
+
+   Reserved:  This is not a block.  This value cannot be used with the
+      current specification.  If such a value is present, it is
+      considered to be corrupt data.
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 13]
+
+RFC 8478                    application/zstd                October 2018
+
+
+3.1.1.2.3.  Block_Size
+
+   The upper 21 bits of Block_Header represent the Block_Size.
+   Block_Size is the size of the block excluding the header.  A block
+   can contain any number of bytes (even zero), up to
+   Block_Maximum_Decompressed_Size, which is the smallest of:
+
+   o  Window_Size
+
+   o  128 KB
+
+   A Compressed_Block has the extra restriction that Block_Size is
+   always strictly less than the decompressed size.  If this condition
+   cannot be respected, the block must be sent uncompressed instead
+   (i.e., treated as a Raw_Block).
+
+3.1.1.3.  Compressed Blocks
+
+   To decompress a compressed block, the compressed size must be
+   provided from the Block_Size field within Block_Header.
+
+   A compressed block consists of two sections: a Literals
+   Section (Section 3.1.1.3.1) and a
+   Sequences_Section (Section 3.1.1.3.2).  The results of the two
+   sections are then combined to produce the decompressed data in
+   Sequence Execution (Section 3.1.1.4).
+
+   To decode a compressed block, the following elements are necessary:
+
+   o  Previous decoded data, up to a distance of Window_Size, or the
+      beginning of the Frame, whichever is smaller.  Single_Segment_Flag
+      will be set in the latter case.
+
+   o  List of "recent offsets" from the previous Compressed_Block.
+
+   o  The previous Huffman tree, required by Treeless_Literals_Block
+      type.
+
+   o  Previous Finite State Entropy (FSE) decoding tables, required by
+      Repeat_Mode, for each symbol type (literals lengths, match
+      lengths, offsets).
+
+   Note that decoding tables are not always from the previous
+   Compressed_Block:
+
+   o  Every decoding table can come from a dictionary.
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 14]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   o  The Huffman tree comes from the previous
+      Compressed_Literals_Block.
+
+3.1.1.3.1.  Literals_Section_Header
+
+   All literals are regrouped in the first part of the block.  They can
+   be decoded first and then copied during Sequence Execution (see
+   Section 3.1.1.4), or they can be decoded on the flow during Sequence
+   Execution.
+
+   Literals can be stored uncompressed or compressed using Huffman
+   prefix codes.  When compressed, an optional tree description can be
+   present, followed by 1 or 4 streams.
+
+     +----------------------------+
+     |   Literals_Section_Header  |
+     +----------------------------+
+     | [Huffman_Tree_Description] |
+     +----------------------------+
+     |        [Jump_Table]        |
+     +----------------------------+
+     |          Stream_1          |
+     +----------------------------+
+     |         [Stream_2]         |
+     +----------------------------+
+     |         [Stream_3]         |
+     +----------------------------+
+     |         [Stream_4]         |
+     +----------------------------+
+
+3.1.1.3.1.1.  Literals_Section_Header
+
+   This field describes how literals are packed.  It's a byte-aligned
+   variable-size bit field, ranging from 1 to 5 bytes, using little-
+   endian convention.
+
+     +---------------------+-----------+
+     | Literals_Block_Type |  2 bits   |
+     +---------------------+-----------+
+     |     Size_Format     | 1-2 bits  |
+     +---------------------+-----------+
+     |   Regenerated_Size  | 5-20 bits |
+     +---------------------+-----------+
+     |  [Compressed_Size]  | 0-18 bits |
+     +---------------------+-----------+
+
+   In this representation, bits at the top are the lowest bits.
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 15]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   The Literals_Block_Type field uses the two lowest bits of the first
+   byte, describing four different block types:
+
+     +---------------------------+-------+
+     |    Literals_Block_Type    | Value |
+     +---------------------------+-------+
+     |     Raw_Literals_Block    |   0   |
+     +---------------------------+-------+
+     |     RLE_Literals_Block    |   1   |
+     +---------------------------+-------+
+     | Compressed_Literals_Block |   2   |
+     +---------------------------+-------+
+     |  Treeless_Literals_Block  |   3   |
+     +---------------------------+-------+
+
+   Raw_Literals_Block:  Literals are stored uncompressed.
+      Literals_Section_Content is Regenerated_Size.
+
+   RLE_Literals_Block:  Literals consist of a single-byte value repeated
+      Regenerated_Size times.  Literals_Section_Content is 1.
+
+   Compressed_Literals_Block:  This is a standard Huffman-compressed
+      block, starting with a Huffman tree description.  See details
+      below.  Literals_Section_Content is Compressed_Size.
+
+   Treeless_Literals_Block:  This is a Huffman-compressed block, using
+      the Huffman tree from the previous Compressed_Literals_Block, or a
+      dictionary if there is no previous Huffman-compressed literals
+      block.  Huffman_Tree_Description will be skipped.  Note that if
+      this mode is triggered without any previous Huffman-table in the
+      frame (or dictionary, per Section 5), it should be treated as data
+      corruption.  Literals_Section_Content is Compressed_Size.
+
+   The Size_Format is divided into two families:
+
+   o  For Raw_Literals_Block and RLE_Literals_Block, it's only necessary
+      to decode Regenerated_Size.  There is no Compressed_Size field.
+
+   o  For Compressed_Block and Treeless_Literals_Block, it's required to
+      decode both Compressed_Size and Regenerated_Size (the decompressed
+      size).  It's also necessary to decode the number of streams (1 or
+      4).
+
+   For values spanning several bytes, the convention is little endian.
+
+   Size_Format for Raw_Literals_Block and RLE_Literals_Block uses 1 or 2
+   bits.  Its value is (Literals_Section_Header[0]>>2) & 0x3.
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 16]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Size_Format == 00 or 10:  Size_Format uses 1 bit.  Regenerated_Size
+      uses 5 bits (value 0-31).  Literals_Section_Header uses 1 byte.
+      Regenerated_Size = Literal_Section_Header[0]>>3.
+
+   Size_Format == 01:  Size_Format uses 2 bits.  Regenerated_Size uses
+      12 bits (values 0-4095).  Literals_Section_Header uses 2 bytes.
+      Regenerated_Size = (Literals_Section_Header[0]>>4) +
+      (Literals_Section_Header[1]<<4).
+
+   Size_Format == 11:  Size_Format uses 2 bits.  Regenerated_Size uses
+      20 bits (values 0-1048575).  Literals_Section_Header uses 3 bytes.
+      Regenerated_Size = (Literals_Section_Header[0]>>4) +
+      (Literals_Section_Header[1]<<4) + (Literals_Section_Header[2]<<12)
+
+   Only Stream_1 is present for these cases.  Note that it is permitted
+   to represent a short value (for example, 13) using a long format,
+   even if it's less efficient.
+
+   Size_Format for Compressed_Literals_Block and Treeless_Literals_Block
+   always uses 2 bits.
+
+   Size_Format == 00:  A single stream.  Both Regenerated_Size and
+      Compressed_Size use 10 bits (values 0-1023).
+      Literals_Section_Header uses 3 bytes.
+
+   Size_Format == 01:  4 streams.  Both Regenerated_Size and
+      Compressed_Size use 10 bits (values 0-1023).
+      Literals_Section_Header uses 3 bytes.
+
+   Size_Format == 10:  4 streams.  Both Regenerated_Size and
+      Compressed_Size use 14 bits (values 0-16383).
+      Literals_Section_Header uses 4 bytes.
+
+   Size_Format == 11:  4 streams.  Both Regenerated_Size and
+      Compressed_Size use 18 bits (values 0-262143).
+      Literals_Section_Header uses 5 bytes.
+
+   Both the Compressed_Size and Regenerated_Size fields follow little-
+   endian convention.  Note that Compressed_Size includes the size of
+   the Huffman_Tree_Description when it is present.
+
+3.1.1.3.1.2.  Raw_Literals_Block
+
+   The data in Stream_1 is Regenerated_Size bytes long.  It contains the
+   raw literals data to be used during Sequence Execution
+   (Section 3.1.1.3.2).
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 17]
+
+RFC 8478                    application/zstd                October 2018
+
+
+3.1.1.3.1.3.  RLE_Literals_Block
+
+   Stream_1 consists of a single byte that should be repeated
+   Regenerated_Size times to generate the decoded literals.
+
+3.1.1.3.1.4.  Compressed_Literals_Block and Treeless_Literals_Block
+
+   Both of these modes contain Huffman-encoded data.  For
+   Treeless_Literals_Block, the Huffman table comes from the previously
+   compressed literals block, or from a dictionary; see Section 5.
+
+3.1.1.3.1.5.  Huffman_Tree_Description
+
+   This section is only present when the Literals_Block_Type type is
+   Compressed_Literals_Block (2).  The format of
+   Huffman_Tree_Description can be found in Section 4.2.1.  The size of
+   Huffman_Tree_Description is determined during the decoding process.
+   It must be used to determine where streams begin.
+
+     Total_Streams_Size = Compressed_Size
+                          - Huffman_Tree_Description_Size
+
+3.1.1.3.1.6.  Jump_Table
+
+   The Jump_Table is only present when there are 4 Huffman-coded
+   streams.
+
+   (Reminder: Huffman-compressed data consists of either 1 or 4 Huffman-
+   coded streams.)
+
+   If only 1 stream is present, it is a single bitstream occupying the
+   entire remaining portion of the literals block, encoded as described
+   within Section 4.2.2.
+
+   If there are 4 streams, Literals_Section_Header only provides enough
+   information to know the decompressed and compressed sizes of all 4
+   streams combined.  The decompressed size of each stream is equal to
+   (Regenerated_Size+3)/4, except for the last stream, which may be up
+   to 3 bytes smaller, to reach a total decompressed size as specified
+   in Regenerated_Size.
+
+   The compressed size of each stream is provided explicitly in the
+   Jump_Table.  The Jump_Table is 6 bytes long and consists of three
+   2-byte little-endian fields, describing the compressed sizes of the
+   first 3 streams.  Stream4_Size is computed from Total_Streams_Size
+   minus sizes of other streams.
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 18]
+
+RFC 8478                    application/zstd                October 2018
+
+
+     Stream4_Size = Total_Streams_Size - 6
+                    - Stream1_Size - Stream2_Size
+                    - Stream3_Size
+
+   Note that if Stream1_Size + Stream2_Size + Stream3_Size exceeds
+   Total_Streams_Size, the data are considered corrupted.
+
+   Each of these 4 bitstreams is then decoded independently as a
+   Huffman-Coded stream, as described in Section 4.2.2.
+
+3.1.1.3.2.  Sequences_Section
+
+   A compressed block is a succession of sequences.  A sequence is a
+   literal copy command, followed by a match copy command.  A literal
+   copy command specifies a length.  It is the number of bytes to be
+   copied (or extracted) from the Literals Section.  A match copy
+   command specifies an offset and a length.
+
+   When all sequences are decoded, if there are literals left in the
+   literals section, these bytes are added at the end of the block.
+
+   This is described in more detail in Section 3.1.1.4.
+
+   The Sequences_Section regroups all symbols required to decode
+   commands.  There are three symbol types: literals lengths, offsets,
+   and match lengths.  They are encoded together, interleaved, in a
+   single "bitstream".
+
+   The Sequences_Section starts by a header, followed by optional
+   probability tables for each symbol type, followed by the bitstream.
+
+     Sequences_Section_Header
+       [Literals_Length_Table]
+       [Offset_Table]
+       [Match_Length_Table]
+       bitStream
+
+   To decode the Sequences_Section, it's necessary to know its size.
+   This size is deduced from the size of the Literals_Section:
+   Sequences_Section_Size = Block_Size - Literals_Section_Header -
+   Literals_Section_Content
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 19]
+
+RFC 8478                    application/zstd                October 2018
+
+
+3.1.1.3.2.1.  Sequences_Section_Header
+
+   This header consists of two items:
+
+   o  Number_of_Sequences
+
+   o  Symbol_Compression_Modes
+
+   Number_of_Sequences is a variable size field using between 1 and 3
+   bytes.  If the first byte is "byte0":
+
+   o  if (byte0 == 0): there are no sequences.  The sequence section
+      stops here.  Decompressed content is defined entirely as Literals
+      Section content.  The FSE tables used in Repeat_Mode are not
+      updated.
+
+   o  if (byte0 < 128): Number_of_Sequences = byte0.  Uses 1 byte.
+
+   o  if (byte0 < 255): Number_of_Sequences = ((byte0 - 128) << 8) +
+      byte1.  Uses 2 bytes.
+
+   o  if (byte0 == 255): Number_of_Sequences = byte1 + (byte2 << 8) +
+      0x7F00.  Uses 3 bytes.
+
+   Symbol_Compression_Modes is a single byte, defining the compression
+   mode of each symbol type.
+
+     +-------------+----------------------+
+     | Bit Number  |      Field Name      |
+     +-------------+----------------------+
+     |     7-6     | Literal_Lengths_Mode |
+     +-------------+----------------------+
+     |     5-4     |     Offsets_Mode     |
+     +-------------+----------------------+
+     |     3-2     |  Match_Lengths_Mode  |
+     +-------------+----------------------+
+     |     1-0     |       Reserved       |
+     +-------------+----------------------+
+
+   The last field, Reserved, must be all zeroes.
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 20]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Literals_Lengths_Mode, Offsets_Mode, and Match_Lengths_Mode define
+   the Compression_Mode of literals lengths, offsets, and match lengths
+   symbols, respectively.  They follow the same enumeration:
+
+     +-------+---------------------+
+     | Value |  Compression_Mode   |
+     +-------+---------------------+
+     |   0   |   Predefined_Mode   |
+     +-------+---------------------+
+     |   1   |      RLE_Mode       |
+     +-------+---------------------+
+     |   2   | FSE_Compressed_Mode |
+     +-------+---------------------+
+     |   3   |     Repeat_Mode     |
+     +-------+---------------------+
+
+   Predefined_Mode:  A predefined FSE (see Section 4.1) distribution
+      table is used, as defined in Section 3.1.1.3.2.2.  No distribution
+      table will be present.
+
+   RLE_Mode:  The table description consists of a single byte, which
+      contains the symbol's value.  This symbol will be used for all
+      sequences.
+
+   FSE_Compressed_Mode:  Standard FSE compression.  A distribution table
+      will be present.  The format of this distribution table is
+      described in Section 4.1.1.  Note that the maximum allowed
+      accuracy log for literals length and match length tables is 9, and
+      the maximum accuracy log for the offsets table is 8.  This mode
+      must not be used when only one symbol is present; RLE_Mode should
+      be used instead (although any other mode will work).
+
+   Repeat_Mode:  The table used in the previous Compressed_Block with
+      Number_Of_Sequences > 0 will be used again, or if this is the
+      first block, the table in the dictionary will be used.  Note that
+      this includes RLE_Mode, so if Repeat_Mode follows RLE_Mode, the
+      same symbol will be repeated.  It also includes Predefined_Mode,
+      in which case Repeat_Mode will have the same outcome as
+      Predefined_Mode.  No distribution table will be present.  If this
+      mode is used without any previous sequence table in the frame (or
+      dictionary; see Section 5) to repeat, this should be treated as
+      corruption.
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 21]
+
+RFC 8478                    application/zstd                October 2018
+
+
+3.1.1.3.2.1.1.  Sequence Codes for Lengths and Offsets
+
+   Each symbol is a code in its own context, which specifies Baseline
+   and Number_of_Bits to add.  Codes are FSE compressed and interleaved
+   with raw additional bits in the same bitstream.
+
+   Literals length codes are values ranging from 0 to 35 inclusive.
+   They define lengths from 0 to 131071 bytes.  The literals length is
+   equal to the decoded Baseline plus the result of reading
+   Number_of_Bits bits from the bitstream, as a little-endian value.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 22]
+
+RFC 8478                    application/zstd                October 2018
+
+
+     +----------------------+----------+----------------+
+     | Literals_Length_Code | Baseline | Number_of_Bits |
+     +----------------------+----------+----------------+
+     |         0-15         |  length  |       0        |
+     +----------------------+----------+----------------+
+     |          16          |    16    |       1        |
+     +----------------------+----------+----------------+
+     |          17          |    18    |       1        |
+     +----------------------+----------+----------------+
+     |          18          |    20    |       1        |
+     +----------------------+----------+----------------+
+     |          19          |    22    |       1        |
+     +----------------------+----------+----------------+
+     |          20          |    24    |       2        |
+     +----------------------+----------+----------------+
+     |          21          |    28    |       2        |
+     +----------------------+----------+----------------+
+     |          22          |    32    |       3        |
+     +----------------------+----------+----------------+
+     |          23          |    40    |       3        |
+     +----------------------+----------+----------------+
+     |          24          |    48    |       4        |
+     +----------------------+----------+----------------+
+     |          25          |    64    |       6        |
+     +----------------------+----------+----------------+
+     |          26          |    128   |       7        |
+     +----------------------+----------+----------------+
+     |          27          |    256   |       8        |
+     +----------------------+----------+----------------+
+     |          28          |    512   |       9        |
+     +----------------------+----------+----------------+
+     |          29          |   1024   |       10       |
+     +----------------------+----------+----------------+
+     |          30          |   2048   |       11       |
+     +----------------------+----------+----------------+
+     |          31          |   4096   |       12       |
+     +----------------------+----------+----------------+
+     |          32          |   8192   |       13       |
+     +----------------------+----------+----------------+
+     |          33          |  16384   |       14       |
+     +----------------------+----------+----------------+
+     |          34          |  32768   |       15       |
+     +----------------------+----------+----------------+
+     |          35          |  65536   |       16       |
+     +----------------------+----------+----------------+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 23]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Match length codes are values ranging from 0 to 52 inclusive.  They
+   define lengths from 3 to 131074 bytes.  The match length is equal to
+   the decoded Baseline plus the result of reading Number_of_Bits bits
+   from the bitstream, as a little-endian value.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 24]
+
+RFC 8478                    application/zstd                October 2018
+
+
+     +-------------------+-----------------------+----------------+
+     | Match_Length_Code |       Baseline        | Number_of_Bits |
+     +-------------------+-----------------------+----------------+
+     |        0-31       | Match_Length_Code + 3 |       0        |
+     +-------------------+-----------------------+----------------+
+     |         32        |          35           |       1        |
+     +-------------------+-----------------------+----------------+
+     |         33        |          37           |       1        |
+     +-------------------+-----------------------+----------------+
+     |         34        |          39           |       1        |
+     +-------------------+-----------------------+----------------+
+     |         35        |          41           |       1        |
+     +-------------------+-----------------------+----------------+
+     |         36        |          43           |       2        |
+     +-------------------+-----------------------+----------------+
+     |         37        |          47           |       2        |
+     +-------------------+-----------------------+----------------+
+     |         38        |          51           |       3        |
+     +-------------------+-----------------------+----------------+
+     |         39        |          59           |       3        |
+     +-------------------+-----------------------+----------------+
+     |         40        |          67           |       4        |
+     +-------------------+-----------------------+----------------+
+     |         41        |          83           |       4        |
+     +-------------------+-----------------------+----------------+
+     |         42        |          99           |       5        |
+     +-------------------+-----------------------+----------------+
+     |         43        |         131           |       7        |
+     +-------------------+-----------------------+----------------+
+     |         44        |         259           |       8        |
+     +-------------------+-----------------------+----------------+
+     |         45        |         515           |       9        |
+     +-------------------+-----------------------+----------------+
+     |         46        |         1027          |       10       |
+     +-------------------+-----------------------+----------------+
+     |         47        |         2051          |       11       |
+     +-------------------+-----------------------+----------------+
+     |         48        |         4099          |       12       |
+     +-------------------+-----------------------+----------------+
+     |         49        |         8195          |       13       |
+     +-------------------+-----------------------+----------------+
+     |         50        |         16387         |       14       |
+     +-------------------+-----------------------+----------------+
+     |         51        |         32771         |       15       |
+     +-------------------+-----------------------+----------------+
+     |         52        |         65539         |       16       |
+     +-------------------+-----------------------+----------------+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 25]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Offset codes are values ranging from 0 to N.
+
+   A decoder is free to limit its maximum supported value for N.
+   Support for values of at least 22 is recommended.  At the time of
+   this writing, the reference decoder supports a maximum N value of 31.
+
+   An offset code is also the number of additional bits to read in
+   little-endian fashion and can be translated into an Offset_Value
+   using the following formulas:
+
+     Offset_Value = (1 << offsetCode) + readNBits(offsetCode);
+     if (Offset_Value > 3) Offset = Offset_Value - 3;
+
+   This means that maximum Offset_Value is (2^(N+1))-1, supporting back-
+   reference distance up to (2^(N+1))-4, but it is limited by the
+   maximum back-reference distance (see Section 3.1.1.1.2).
+
+   Offset_Value from 1 to 3 are special: they define "repeat codes".
+   This is described in more detail in Section 3.1.1.5.
+
+3.1.1.3.2.1.2.  Decoding Sequences
+
+   FSE bitstreams are read in reverse of the direction they are written.
+   In zstd, the compressor writes bits forward into a block, and the
+   decompressor must read the bitstream backwards.
+
+   To find the start of the bitstream, it is therefore necessary to know
+   the offset of the last byte of the block, which can be found by
+   counting Block_Size bytes after the block header.
+
+   After writing the last bit containing information, the compressor
+   writes a single 1 bit and then fills the byte with 0-7 zero bits of
+   padding.  The last byte of the compressed bitstream cannot be zero
+   for that reason.
+
+   When decompressing, the last byte containing the padding is the first
+   byte to read.  The decompressor needs to skip 0-7 initial zero bits
+   until the first 1 bit occurs.  Afterwards, the useful part of the
+   bitstream begins.
+
+   FSE decoding requires a 'state' to be carried from symbol to symbol.
+   For more explanation on FSE decoding, see Section 4.1.
+
+   For sequence decoding, a separate state keeps track of each literal
+   lengths, offsets, and match lengths symbols.  Some FSE primitives are
+   also used.  For more details on the operation of these primitives,
+   see Section 4.1.
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 26]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   The bitstream starts with initial FSE state values, each using the
+   required number of bits in their respective accuracy, decoded
+   previously from their normalized distribution.  It starts with
+   Literals_Length_State, followed by Offset_State, and finally
+   Match_Length_State.
+
+   Note that all values are read backward, so the 'start' of the
+   bitstream is at the highest position in memory, immediately before
+   the last 1 bit for padding.
+
+   After decoding the starting states, a single sequence is decoded
+   Number_Of_Sequences times.  These sequences are decoded in order from
+   first to last.  Since the compressor writes the bitstream in the
+   forward direction, this means the compressor must encode the
+   sequences starting with the last one and ending with the first.
+
+   For each of the symbol types, the FSE state can be used to determine
+   the appropriate code.  The code then defines the Baseline and
+   Number_of_Bits to read for each type.  The description of the codes
+   for how to determine these values can be found in
+   Section 3.1.1.3.2.1.
+
+   Decoding starts by reading the Number_of_Bits required to decode
+   offset.  It does the same for Match_Length and then for
+   Literals_Length.  This sequence is then used for Sequence Execution
+   (see Section 3.1.1.4).
+
+   If it is not the last sequence in the block, the next operation is to
+   update states.  Using the rules pre-calculated in the decoding
+   tables, Literals_Length_State is updated, followed by
+   Match_Length_State, and then Offset_State.  See Section 4.1 for
+   details on how to update states from the bitstream.
+
+   This operation will be repeated Number_of_Sequences times.  At the
+   end, the bitstream shall be entirely consumed; otherwise, the
+   bitstream is considered corrupted.
+
+3.1.1.3.2.2.  Default Distributions
+
+   If Predefined_Mode is selected for a symbol type, its FSE decoding
+   table is generated from a predefined distribution table defined here.
+   For details on how to convert this distribution into a decoding
+   table, see Section 4.1.
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 27]
+
+RFC 8478                    application/zstd                October 2018
+
+
+3.1.1.3.2.2.1.  Literals Length
+
+   The decoding table uses an accuracy log of 6 bits (64 states).
+
+     short literalsLength_defaultDistribution[36] =
+       { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+         2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+         -1,-1,-1,-1
+       };
+
+3.1.1.3.2.2.2.  Match Length
+
+   The decoding table uses an accuracy log of 6 bits (64 states).
+
+     short matchLengths_defaultDistribution[53] =
+       { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
+         -1,-1,-1,-1,-1
+       };
+
+3.1.1.3.2.2.3.  Offset Codes
+
+   The decoding table uses an accuracy log of 5 bits (32 states), and
+   supports a maximum N value of 28, allowing offset values up to
+   536,870,908.
+
+   If any sequence in the compressed block requires a larger offset than
+   this, it's not possible to use the default distribution to represent
+   it.
+
+     short offsetCodes_defaultDistribution[29] =
+       { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+         1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1
+       };
+
+3.1.1.4.  Sequence Execution
+
+   Once literals and sequences have been decoded, they are combined to
+   produce the decoded content of a block.
+
+   Each sequence consists of a tuple of (literals_length, offset_value,
+   match_length), decoded as described in the
+   Sequences_Section (Section 3.1.1.3.2).  To execute a sequence, first
+   copy literals_length bytes from the decoded literals to the output.
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 28]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Then, match_length bytes are copied from previous decoded data.  The
+   offset to copy from is determined by offset_value:
+
+   o  if Offset_Value > 3, then the offset is Offset_Value - 3;
+
+   o  if Offset_Value is from 1-3, the offset is a special repeat offset
+      value.  See Section 3.1.1.5 for how the offset is determined in
+      this case.
+
+   The offset is defined as from the current position (after copying the
+   literals), so an offset of 6 and a match length of 3 means that 3
+   bytes should be copied from 6 bytes back.  Note that all offsets
+   leading to previously decoded data must be smaller than Window_Size
+   defined in Frame_Header_Descriptor (Section 3.1.1.1.1).
+
+3.1.1.5.  Repeat Offsets
+
+   As seen above, the first three values define a repeated offset; we
+   will call them Repeated_Offset1, Repeated_Offset2, and
+   Repeated_Offset3.  They are sorted in recency order, with
+   Repeated_Offset1 meaning "most recent one".
+
+   If offset_value is 1, then the offset used is Repeated_Offset1, etc.
+
+   There is one exception: When the current sequence's literals_length
+   is 0, repeated offsets are shifted by 1, so an offset_value of 1
+   means Repeated_Offset2, an offset_value of 2 means Repeated_Offset3,
+   and an offset_value of 3 means Repeated_Offset1 - 1_byte.
+
+   For the first block, the starting offset history is populated with
+   the following values: Repeated_Offset1 (1), Repeated_Offset2 (4), and
+   Repeated_Offset3 (8), unless a dictionary is used, in which case they
+   come from the dictionary.
+
+   Then each block gets its starting offset history from the ending
+   values of the most recent Compressed_Block.  Note that blocks that
+   are not Compressed_Block are skipped; they do not contribute to
+   offset history.
+
+   The newest offset takes the lead in offset history, shifting others
+   back (up to its previous place if it was already present).  This
+   means that when Repeated_Offset1 (most recent) is used, history is
+   unmodified.  When Repeated_Offset2 is used, it is swapped with
+   Repeated_Offset1.  If any other offset is used, it becomes
+   Repeated_Offset1, and the rest are shifted back by 1.
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 29]
+
+RFC 8478                    application/zstd                October 2018
+
+
+3.1.2.  Skippable Frames
+
+     +--------------+------------+-----------+
+     | Magic_Number | Frame_Size | User_Data |
+     +--------------+------------+-----------+
+     |    4 bytes   |   4 bytes  |  n bytes  |
+     +--------------+------------+-----------+
+
+   Skippable frames allow the insertion of user-defined metadata into a
+   flow of concatenated frames.
+
+   Skippable frames defined in this specification are compatible with
+   skippable frames in [LZ4].
+
+   From a compliant decoder perspective, skippable frames simply need to
+   be skipped, and their content ignored, resuming decoding after the
+   skippable frame.
+
+   It should be noted that a skippable frame can be used to watermark a
+   stream of concatenated frames embedding any kind of tracking
+   information (even just a Universally Unique Identifier (UUID)).
+   Users wary of such possibility should scan the stream of concatenated
+   frames in an attempt to detect such frames for analysis or removal.
+
+   The fields are:
+
+   Magic_Number:  4 bytes, little-endian format.  Value: 0x184D2A5?,
+      which means any value from 0x184D2A50 to 0x184D2A5F.  All 16
+      values are valid to identify a skippable frame.  This
+      specification does not detail any specific tagging methods for
+      skippable frames.
+
+   Frame_Size:  This is the size, in bytes, of the following User_Data
+      (without including the magic number nor the size field itself).
+      This field is represented using 4 bytes, little-endian format,
+      unsigned 32 bits.  This means User_Data can't be bigger than
+      (2^32-1) bytes.
+
+   User_Data:  This field can be anything.  Data will just be skipped by
+      the decoder.
+
+4.  Entropy Encoding
+
+   Two types of entropy encoding are used by the Zstandard format: FSE
+   and Huffman coding.  Huffman is used to compress literals, while FSE
+   is used for all other symbols (Literals_Length_Code,
+   Match_Length_Code, and offset codes) and to compress Huffman headers.
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 30]
+
+RFC 8478                    application/zstd                October 2018
+
+
+4.1.  FSE
+
+   FSE, short for Finite State Entropy, is an entropy codec based on
+   [ANS].  FSE encoding/decoding involves a state that is carried over
+   between symbols, so decoding must be done in the opposite direction
+   as encoding.  Therefore, all FSE bitstreams are read from end to
+   beginning.  Note that the order of the bits in the stream is not
+   reversed; they are simply read in the reverse order from which they
+   were written.
+
+   For additional details on FSE, see Finite State Entropy [FSE].
+
+   FSE decoding involves a decoding table that has a power of 2 size and
+   contains three elements: Symbol, Num_Bits, and Baseline.  The base 2
+   logarithm of the table size is its Accuracy_Log.  An FSE state value
+   represents an index in this table.
+
+   To obtain the initial state value, consume Accuracy_Log bits from the
+   stream as a little-endian value.  The next symbol in the stream is
+   the Symbol indicated in the table for that state.  To obtain the next
+   state value, the decoder should consume Num_Bits bits from the stream
+   as a little-endian value and add it to Baseline.
+
+4.1.1.  FSE Table Description
+
+   To decode FSE streams, it is necessary to construct the decoding
+   table.  The Zstandard format encodes FSE table descriptions as
+   described here.
+
+   An FSE distribution table describes the probabilities of all symbols
+   from 0 to the last present one (included) on a normalized scale of
+   (1 << Accuracy_Log).  Note that there must be two or more symbols
+   with non-zero probability.
+
+   A bitstream is read forward, in little-endian fashion.  It is not
+   necessary to know its exact size, since the size will be discovered
+   and reported by the decoding process.  The bitstream starts by
+   reporting on which scale it operates.  If low4bits designates the
+   lowest 4 bits of the first byte, then Accuracy_Log = low4bits + 5.
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 31]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   This is followed by each symbol value, from 0 to the last present
+   one.  The number of bits used by each field is variable and depends
+   on:
+
+   Remaining probabilities + 1:  For example, presuming an Accuracy_Log
+      of 8, and presuming 100 probabilities points have already been
+      distributed, the decoder may read any value from 0 to
+      (256 - 100 + 1) == 157, inclusive.  Therefore, it must read
+      log2sup(157) == 8 bits.
+
+   Value decoded:  Small values use 1 fewer bit.  For example, presuming
+      values from 0 to 157 (inclusive) are possible, 255 - 157 = 98
+      values are remaining in an 8-bit field.  The first 98 values
+      (hence from 0 to 97) use only 7 bits, and values from 98 to 157
+      use 8 bits.  This is achieved through this scheme:
+
+     +------------+---------------+-----------+
+     | Value Read | Value Decoded | Bits Used |
+     +------------+---------------+-----------+
+     |   0 - 97   |     0 - 97    |     7     |
+     +------------+---------------+-----------+
+     |  98 - 127  |    98 - 127   |     8     |
+     +------------+---------------+-----------+
+     | 128 - 225  |     0 - 97    |     7     |
+     +------------+---------------+-----------+
+     | 226 - 255  |   128 - 157   |     8     |
+     +------------+---------------+-----------+
+
+   Symbol probabilities are read one by one, in order.  The probability
+   is obtained from Value decoded using the formula P = Value - 1.  This
+   means the value 0 becomes the negative probability -1.  This is a
+   special probability that means "less than 1".  Its effect on the
+   distribution table is described below.  For the purpose of
+   calculating total allocated probability points, it counts as 1.
+
+   When a symbol has a probability of zero, it is followed by a 2-bit
+   repeat flag.  This repeat flag tells how many probabilities of zeroes
+   follow the current one.  It provides a number ranging from 0 to 3.
+   If it is a 3, another 2-bit repeat flag follows, and so on.
+
+   When the last symbol reaches a cumulated total of
+   (1 << Accuracy_Log), decoding is complete.  If the last symbol makes
+   the cumulated total go above (1 << Accuracy_Log), distribution is
+   considered corrupted.
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 32]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Finally, the decoder can tell how many bytes were used in this
+   process and how many symbols are present.  The bitstream consumes a
+   round number of bytes.  Any remaining bit within the last byte is
+   simply unused.
+
+   The distribution of normalized probabilities is enough to create a
+   unique decoding table.  The table has a size of (1 << Accuracy_Log).
+   Each cell describes the symbol decoded and instructions to get the
+   next state.
+
+   Symbols are scanned in their natural order for "less than 1"
+   probabilities as described above.  Symbols with this probability are
+   being attributed a single cell, starting from the end of the table
+   and retreating.  These symbols define a full state reset, reading
+   Accuracy_Log bits.
+
+   All remaining symbols are allocated in their natural order.  Starting
+   from symbol 0 and table position 0, each symbol gets allocated as
+   many cells as its probability.  Cell allocation is spread, not
+   linear; each successor position follows this rule:
+
+     position += (tableSize >> 1) + (tableSize >> 3) + 3;
+     position &= tableSize - 1;
+
+   A position is skipped if it is already occupied by a "less than 1"
+   probability symbol.  Position does not reset between symbols; it
+   simply iterates through each position in the table, switching to the
+   next symbol when enough states have been allocated to the current
+   one.
+
+   The result is a list of state values.  Each state will decode the
+   current symbol.
+
+   To get the Number_of_Bits and Baseline required for the next state,
+   it is first necessary to sort all states in their natural order.  The
+   lower states will need 1 more bit than higher ones.  The process is
+   repeated for each symbol.
+
+   For example, presuming a symbol has a probability of 5, it receives
+   five state values.  States are sorted in natural order.  The next
+   power of 2 is 8.  The space of probabilities is divided into 8 equal
+   parts.  Presuming the Accuracy_Log is 7, this defines 128 states, and
+   each share (divided by 8) is 16 in size.  In order to reach 8, 8 - 5
+   = 3 lowest states will count "double", doubling the number of shares
+   (32 in width), requiring 1 more bit in the process.
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 33]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Baseline is assigned starting from the higher states using fewer
+   bits, and proceeding naturally, then resuming at the first state,
+   each taking its allocated width from Baseline.
+
+     +----------------+-------+-------+--------+------+-------+
+     |   state order  |   0   |   1   |   2    |  3   |  4    |
+     +----------------+-------+-------+--------+------+-------+
+     |     width      |   32  |   32  |   32   |  16  |  16   |
+     +----------------+-------+-------+--------+------+-------+
+     | Number_of_Bits |   5   |   5   |   5    |  4   |  4    |
+     +----------------+-------+-------+--------+------+-------+
+     |  range number  |   2   |   4   |   6    |  0   |  1    |
+     +----------------+-------+-------+--------+------+-------+
+     |    Baseline    |   32  |   64  |   96   |  0   |  16   |
+     +----------------+-------+-------+--------+------+-------+
+     |     range      | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 |
+     +----------------+-------+-------+--------+------+-------+
+
+   The next state is determined from the current state by reading the
+   required Number_of_Bits and adding the specified Baseline.
+
+   See Appendix A for the results of this process that are applied to
+   the default distributions.
+
+4.2.  Huffman Coding
+
+   Zstandard Huffman-coded streams are read backwards, similar to the
+   FSE bitstreams.  Therefore, to find the start of the bitstream, it is
+   necessary to know the offset of the last byte of the Huffman-coded
+   stream.
+
+   After writing the last bit containing information, the compressor
+   writes a single 1 bit and then fills the byte with 0-7 0 bits of
+   padding.  The last byte of the compressed bitstream cannot be 0 for
+   that reason.
+
+   When decompressing, the last byte containing the padding is the first
+   byte to read.  The decompressor needs to skip 0-7 initial 0 bits and
+   the first 1 bit that occurs.  Afterwards, the useful part of the
+   bitstream begins.
+
+   The bitstream contains Huffman-coded symbols in little-endian order,
+   with the codes defined by the method below.
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 34]
+
+RFC 8478                    application/zstd                October 2018
+
+
+4.2.1.  Huffman Tree Description
+
+   Prefix coding represents symbols from an a priori known alphabet by
+   bit sequences (codewords), one codeword for each symbol, in a manner
+   such that different symbols may be represented by bit sequences of
+   different lengths, but a parser can always parse an encoded string
+   unambiguously symbol by symbol.
+
+   Given an alphabet with known symbol frequencies, the Huffman
+   algorithm allows the construction of an optimal prefix code using the
+   fewest bits of any possible prefix codes for that alphabet.
+
+   The prefix code must not exceed a maximum code length.  More bits
+   improve accuracy but yield a larger header size and require more
+   memory or more complex decoding operations.  This specification
+   limits the maximum code length to 11 bits.
+
+   All literal values from zero (included) to the last present one
+   (excluded) are represented by Weight with values from 0 to
+   Max_Number_of_Bits.  Transformation from Weight to Number_of_Bits
+   follows this pseudocode:
+
+     if Weight == 0
+       Number_of_Bits = 0
+     else
+       Number_of_Bits = Max_Number_of_Bits + 1 - Weight
+
+   The last symbol's Weight is deduced from previously decoded ones, by
+   completing to the nearest power of 2.  This power of 2 gives
+   Max_Number_of_Bits the depth of the current tree.
+
+   For example, presume the following Huffman tree must be described:
+
+     +---------------+----------------+
+     | Literal Value | Number_of_Bits |
+     +---------------+----------------+
+     |       0       |        1       |
+     +---------------+----------------+
+     |       1       |        2       |
+     +---------------+----------------+
+     |       2       |        3       |
+     +---------------+----------------+
+     |       3       |        0       |
+     +---------------+----------------+
+     |       4       |        4       |
+     +---------------+----------------+
+     |       5       |        4       |
+     +---------------+----------------+
+
+
+
+Collet & Kucherawy            Informational                    [Page 35]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   The tree depth is 4, since its longest element uses 4 bits.  (The
+   longest elements are those with the smallest frequencies.)  Value 5
+   will not be listed as it can be determined from the values for 0-4,
+   nor will values above 5 as they are all 0.  Values from 0 to 4 will
+   be listed using Weight instead of Number_of_Bits.  The pseudocode to
+   determine Weight is:
+
+     if Number_of_Bits == 0
+       Weight = 0
+     else
+       Weight = Max_Number_of_Bits + 1 - Number_of_Bits
+
+   It gives the following series of weights:
+
+     +---------------+--------+
+     | Literal Value | Weight |
+     +---------------+--------+
+     |       0       |   4    |
+     +---------------+--------+
+     |       1       |   3    |
+     +---------------+--------+
+     |       2       |   2    |
+     +---------------+--------+
+     |       3       |   0    |
+     +---------------+--------+
+     |       4       |   1    |
+     +---------------+--------+
+
+   The decoder will do the inverse operation: having collected weights
+   of literals from 0 to 4, it knows the last literal, 5, is present
+   with a non-zero Weight.  The Weight of 5 can be determined by
+   advancing to the next power of 2.  The sum of 2^(Weight-1) (excluding
+   0's) is 15.  The nearest power of 2 is 16.  Therefore,
+   Max_Number_of_Bits = 4 and Weight[5] = 16 - 15 = 1.
+
+4.2.1.1.  Huffman Tree Header
+
+   This is a single byte value (0-255), which describes how the series
+   of weights is encoded.
+
+   headerByte < 128:  The series of weights is compressed using FSE (see
+      below).  The length of the FSE-compressed series is equal to
+      headerByte (0-127).
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 36]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   headerByte >= 128:  This is a direct representation, where each
+      Weight is written directly as a 4-bit field (0-15).  They are
+      encoded forward, 2 weights to a byte with the first weight taking
+      the top 4 bits and the second taking the bottom 4; for example,
+      the following operations could be used to read the weights:
+
+     Weight[0] = (Byte[0] >> 4)
+     Weight[1] = (Byte[0] & 0xf),
+     etc.
+
+      The full representation occupies ceiling(Number_of_Symbols/2)
+      bytes, meaning it uses only full bytes even if Number_of_Symbols
+      is odd.  Number_of_Symbols = headerByte - 127.  Note that maximum
+      Number_of_Symbols is 255 - 127 = 128.  If any literal has a value
+      over 128, raw header mode is not possible, and it is necessary to
+      use FSE compression.
+
+4.2.1.2.  FSE Compression of Huffman Weights
+
+   In this case, the series of Huffman weights is compressed using FSE
+   compression.  It is a single bitstream with two interleaved states,
+   sharing a single distribution table.
+
+   To decode an FSE bitstream, it is necessary to know its compressed
+   size.  Compressed size is provided by headerByte.  It's also
+   necessary to know its maximum possible decompressed size, which is
+   255, since literal values span from 0 to 255, and the last symbol's
+   Weight is not represented.
+
+   An FSE bitstream starts by a header, describing probabilities
+   distribution.  It will create a decoding table.  For a list of
+   Huffman weights, the maximum accuracy log is 6 bits.  For more
+   details, see Section 4.1.1.
+
+   The Huffman header compression uses two states, which share the same
+   FSE distribution table.  The first state (State1) encodes the even-
+   numbered index symbols, and the second (State2) encodes the odd-
+   numbered index symbols.  State1 is initialized first, and then
+   State2, and they take turns decoding a single symbol and updating
+   their state.  For more details on these FSE operations, see
+   Section 4.1.
+
+   The number of symbols to be decoded is determined by tracking the
+   bitStream overflow condition: If updating state after decoding a
+   symbol would require more bits than remain in the stream, it is
+   assumed that extra bits are zero.  Then, symbols for each of the
+   final states are decoded and the process is complete.
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 37]
+
+RFC 8478                    application/zstd                October 2018
+
+
+4.2.1.3.  Conversion from Weights to Huffman Prefix Codes
+
+   All present symbols will now have a Weight value.  It is possible to
+   transform weights into Number_of_Bits, using this formula:
+
+     if Weight > 0
+         Number_of_Bits = Max_Number_of_Bits + 1 - Weight
+     else
+         Number_of_Bits = 0
+
+   Symbols are sorted by Weight.  Within the same Weight, symbols keep
+   natural sequential order.  Symbols with a Weight of zero are removed.
+   Then, starting from the lowest Weight, prefix codes are distributed
+   in sequential order.
+
+   For example, assume the following list of weights has been decoded:
+
+     +---------+--------+
+     | Literal | Weight |
+     +---------+--------+
+     |    0    |   4    |
+     +---------+--------+
+     |    1    |   3    |
+     +---------+--------+
+     |    2    |   2    |
+     +---------+--------+
+     |    3    |   0    |
+     +---------+--------+
+     |    4    |   1    |
+     +---------+--------+
+     |    5    |   1    |
+     +---------+--------+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 38]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Sorting by weight and then the natural sequential order yields the
+   following distribution:
+
+     +---------+--------+----------------+--------------+
+     | Literal | Weight | Number_Of_Bits | Prefix Codes |
+     +---------+--------+----------------|--------------+
+     |    3    |   0    |        0       |      N/A     |
+     +---------+--------+----------------|--------------+
+     |    4    |   1    |        4       |     0000     |
+     +---------+--------+----------------|--------------+
+     |    5    |   1    |        4       |     0001     |
+     +---------+--------+----------------|--------------+
+     |    2    |   2    |        3       |      001     |
+     +---------+--------+----------------|--------------+
+     |    1    |   3    |        2       |       01     |
+     +---------+--------+----------------|--------------+
+     |    0    |   4    |        1       |        1     |
+     +---------+--------+----------------|--------------+
+
+4.2.2.  Huffman-Coded Streams
+
+   Given a Huffman decoding table, it is possible to decode a Huffman-
+   coded stream.
+
+   Each bitstream must be read backward, which starts from the end and
+   goes up to the beginning.  Therefore, it is necessary to know the
+   size of each bitstream.
+
+   It is also necessary to know exactly which bit is the last.  This is
+   detected by a final bit flag: the highest bit of the last byte is a
+   final-bit-flag.  Consequently, a last byte of 0 is not possible.  And
+   the final-bit-flag itself is not part of the useful bitstream.
+   Hence, the last byte contains between 0 and 7 useful bits.
+
+   Starting from the end, it is possible to read the bitstream in a
+   little-endian fashion, keeping track of already used bits.  Since the
+   bitstream is encoded in reverse order, starting from the end, read
+   symbols in forward order.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 39]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   For example, if the literal sequence "0145" was encoded using the
+   above prefix code, it would be encoded (in reverse order) as:
+
+     +---------+----------+
+     | Symbol  | Encoding |
+     +---------+----------+
+     |    5    |   0000   |
+     +---------+----------+
+     |    4    |   0001   |
+     +---------+----------+
+     |    1    |    01    |
+     +---------+----------+
+     |    0    |    1     |
+     +---------+----------+
+     | Padding |   00001  |
+     +---------+----------+
+
+   This results in the following 2-byte bitstream:
+
+     00010000 00001101
+
+   Here is an alternative representation with the symbol codes separated
+   by underscores:
+
+     0001_0000 00001_1_01
+
+   Reading the highest Max_Number_of_Bits bits, it's possible to compare
+   the extracted value to the decoding table, determining the symbol to
+   decode and number of bits to discard.
+
+   The process continues reading up to the required number of symbols
+   per stream.  If a bitstream is not entirely and exactly consumed,
+   hence reaching exactly its beginning position with all bits consumed,
+   the decoding process is considered faulty.
+
+5.  Dictionary Format
+
+   Zstandard is compatible with "raw content" dictionaries, free of any
+   format restriction, except that they must be at least 8 bytes.  These
+   dictionaries function as if they were just the content part of a
+   formatted dictionary.
+
+   However, dictionaries created by "zstd --train" in the reference
+   implementation follow a specific format, described here.
+
+   Dictionaries are not included in the compressed content but rather
+   are provided out of band.  That is, the Dictionary_ID identifies
+   which should be used, but this specification does not describe the
+
+
+
+Collet & Kucherawy            Informational                    [Page 40]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   mechanism by which the dictionary is obtained prior to use during
+   compression or decompression.
+
+   A dictionary has a size, defined either by a buffer limit or a file
+   size.  The general format is:
+
+     +--------------+---------------+----------------+---------+
+     | Magic_Number | Dictionary_ID | Entropy_Tables | Content |
+     +--------------+---------------+----------------+---------+
+
+   Magic_Number:  4 bytes ID, value 0xEC30A437, little-endian format.
+
+   Dictionary_ID:  4 bytes, stored in little-endian format.
+      Dictionary_ID can be any value, except 0 (which means no
+      Dictionary_ID).  It is used by decoders to check if they use the
+      correct dictionary.  If the frame is going to be distributed in a
+      private environment, any Dictionary_ID can be used.  However, for
+      public distribution of compressed frames, the following ranges are
+      reserved and shall not be used:
+
+         low range: <= 32767
+         high range: >= (2^31)
+
+   Entropy_Tables:  Follow the same format as the tables in compressed
+      blocks.  See the relevant FSE and Huffman sections for how to
+      decode these tables.  They are stored in the following order:
+      Huffman table for literals, FSE table for offsets, FSE table for
+      match lengths, and FSE table for literals lengths.  These tables
+      populate the Repeat Stats literals mode and Repeat distribution
+      mode for sequence decoding.  It is finally followed by 3 offset
+      values, populating repeat offsets (instead of using {1,4,8}),
+      stored in order, 4-bytes little-endian each, for a total of 12
+      bytes.  Each repeat offset must have a value less than the
+      dictionary size.
+
+   Content:  The rest of the dictionary is its content.  The content
+      acts as a "past" in front of data to be compressed or
+      decompressed, so it can be referenced in sequence commands.  As
+      long as the amount of data decoded from this frame is less than or
+      equal to Window_Size, sequence commands may specify offsets longer
+      than the total length of decoded output so far to reference back
+      to the dictionary, even parts of the dictionary with offsets
+      larger than Window_Size.  After the total output has surpassed
+      Window_Size, however, this is no longer allowed, and the
+      dictionary is no longer accessible.
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 41]
+
+RFC 8478                    application/zstd                October 2018
+
+
+6.  IANA Considerations
+
+   IANA has made two registrations, as described below.
+
+6.1.  The 'application/zstd' Media Type
+
+   The 'application/zstd' media type identifies a block of data that is
+   compressed using zstd compression.  The data is a stream of bytes as
+   described in this document.  IANA has added the following to the
+   "Media Types" registry:
+
+   Type name:  application
+
+   Subtype name:  zstd
+
+   Required parameters:  N/A
+
+   Optional parameters:  N/A
+
+   Encoding considerations:  binary
+
+   Security considerations:  See Section 7 of RFC 8478
+
+   Interoperability considerations:  N/A
+
+   Published specification:  RFC 8478
+
+   Applications that use this media type:  anywhere data size is an
+      issue
+
+   Additional information:
+
+      Magic number(s):  4 bytes, little-endian format.
+         Value: 0xFD2FB528
+
+      File extension(s):  zst
+
+      Macintosh file type code(s):  N/A
+
+   For further information:  See [ZSTD]
+
+   Intended usage:  common
+
+   Restrictions on usage:  N/A
+
+   Author:  Murray S.  Kucherawy
+
+   Change Controller:  IETF
+
+
+
+Collet & Kucherawy            Informational                    [Page 42]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   Provisional registration:  no
+
+6.2.  Content Encoding
+
+   IANA has added the following entry to the "HTTP Content Coding
+   Registry" within the "Hypertext Transfer Protocol (HTTP) Parameters"
+   registry:
+
+   Name:  zstd
+
+   Description:  A stream of bytes compressed using the Zstandard
+      protocol
+
+   Pointer to specification text:  RFC 8478
+
+6.3.  Dictionaries
+
+   Work in progress includes development of dictionaries that will
+   optimize compression and decompression of particular types of data.
+   Specification of such dictionaries for public use will necessitate
+   registration of a code point from the reserved range described in
+   Section 3.1.1.1.3 and its association with a specific dictionary.
+
+   However, there are at present no such dictionaries published for
+   public use, so this document makes no immediate request of IANA to
+   create such a registry.
+
+7.  Security Considerations
+
+   Any data compression method involves the reduction of redundancy in
+   the data.  Zstandard is no exception, and the usual precautions
+   apply.
+
+   One should never compress a message whose content must remain secret
+   with a message generated by a third party.  Such a compression can be
+   used to guess the content of the secret message through analysis of
+   entropy reduction.  This was demonstrated in the Compression Ratio
+   Info-leak Made Easy (CRIME) attack [CRIME], for example.
+
+   A decoder has to demonstrate capabilities to detect and prevent any
+   kind of data tampering in the compressed frame from triggering system
+   faults, such as reading or writing beyond allowed memory ranges.
+   This can be guaranteed by either the implementation language or
+   careful bound checkings.  Of particular note is the encoding of
+   Number_of_Sequences values that cause the decoder to read into the
+   block header (and beyond), as well as the indication of a
+   Frame_Content_Size that is smaller than the actual decompressed data,
+   in an attempt to trigger a buffer overflow.  It is highly recommended
+
+
+
+Collet & Kucherawy            Informational                    [Page 43]
+
+RFC 8478                    application/zstd                October 2018
+
+
+   to fuzz-test (i.e., provide invalid, unexpected, or random input and
+   verify safe operation of) decoder implementations to test and harden
+   their capability to detect bad frames and deal with them without any
+   adverse system side effect.
+
+   An attacker may provide correctly formed compressed frames with
+   unreasonable memory requirements.  A decoder must always control
+   memory requirements and enforce some (system-specific) limits in
+   order to protect memory usage from such scenarios.
+
+   Compression can be optimized by training a dictionary on a variety of
+   related content payloads.  This dictionary must then be available at
+   the decoder for decompression of the payload to be possible.  While
+   this document does not specify how to acquire a dictionary for a
+   given compressed payload, it is worth noting that third-party
+   dictionaries may interact unexpectedly with a decoder, leading to
+   possible memory or other resource exhaustion attacks.  We expect such
+   topics to be discussed in further detail in the Security
+   Considerations section of a forthcoming RFC for dictionary
+   acquisition and transmission, but highlight this issue now out of an
+   abundance of caution.
+
+   As discussed in Section 3.1.2, it is possible to store arbitrary user
+   metadata in skippable frames.  While such frames are ignored during
+   decompression of the data, they can be used as a watermark to track
+   the path of the compressed payload.
+
+8.  Implementation Status
+
+   Source code for a C language implementation of a Zstandard-compliant
+   library is available at [ZSTD-GITHUB].  This implementation is
+   considered to be the reference implementation and is production
+   ready; it implements the full range of the specification.  It is
+   routinely tested against security hazards and widely deployed within
+   Facebook infrastructure.
+
+   The reference version is optimized for speed and is highly portable.
+   It has been proven to run safely on multiple architectures (e.g.,
+   x86, x64, ARM, MIPS, PowerPC, IA64) featuring 32- or 64-bit
+   addressing schemes, a little- or big-endian storage scheme, a number
+   of different operating systems (e.g., UNIX (including Linux, BSD,
+   OS-X, and Solaris) and Windows), and a number of compilers (e.g.,
+   gcc, clang, visual, and icc).
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 44]
+
+RFC 8478                    application/zstd                October 2018
+
+
+9.  References
+
+9.1.  Normative References
+
+   [ZSTD]     "Zstandard", <http://www.zstd.net>.
+
+9.2.  Informative References
+
+   [ANS]      Duda, J., "Asymmetric numeral systems: entropy coding
+              combining speed of Huffman coding with compression rate of
+              arithmetic coding", January 2014,
+              <https://arxiv.org/pdf/1311.2540>.
+
+   [CRIME]    "CRIME", June 2018, <https://en.wikipedia.org/w/
+              index.php?title=CRIME&oldid=844538656>.
+
+   [FSE]      "FiniteStateEntropy", commit 6efa78a, June 2018,
+              <https://github.com/Cyan4973/FiniteStateEntropy/>.
+
+   [LZ4]      "LZ4 Frame Format Description", commit d03224b, January
+              2018, <https://github.com/lz4/lz4/blob/master/doc/
+              lz4_Frame_format.md>.
+
+   [RFC1952]  Deutsch, P., "GZIP file format specification version 4.3",
+              RFC 1952, DOI 10.17487/RFC1952, May 1996,
+              <https://www.rfc-editor.org/info/rfc1952>.
+
+   [XXHASH]   "XXHASH Algorithm", <http://www.xxhash.org>.
+
+   [ZSTD-GITHUB]
+              "zstd", commit 8514bd8, August 2018,
+              <https://github.com/facebook/zstd>.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 45]
+
+RFC 8478                    application/zstd                October 2018
+
+
+Appendix A.  Decoding Tables for Predefined Codes
+
+   This appendix contains FSE decoding tables for the predefined literal
+   length, match length, and offset codes.  The tables have been
+   constructed using the algorithm as given above in Section 4.1.1.  The
+   tables here can be used as examples to crosscheck that an
+   implementation has built its decoding tables correctly.
+
+A.1.  Literal Length Code Table
+
+     +-------+--------+----------------+------+
+     | State | Symbol | Number_Of_Bits | Base |
+     +-------+--------+----------------+------+
+     |    0  |    0   |        0       |   0  |
+     +-------+--------+----------------+------+
+     |    0  |    0   |        4       |   0  |
+     +-------+--------+----------------+------+
+     |    1  |    0   |        4       |  16  |
+     +-------+--------+----------------+------+
+     |    2  |    1   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |    3  |    3   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    4  |    4   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    5  |    6   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    6  |    7   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    7  |    9   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    8  |   10   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    9  |   12   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   10  |   14   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   11  |   16   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   12  |   18   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   13  |   19   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   14  |   21   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   15  |   22   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   16  |   24   |        5       |   0  |
+
+
+
+Collet & Kucherawy            Informational                    [Page 46]
+
+RFC 8478                    application/zstd                October 2018
+
+
+     +-------+--------+----------------+------+
+     |   17  |   25   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   18  |   26   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   19  |   27   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   20  |   29   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   21  |   31   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   22  |    0   |        4       |  32  |
+     +-------+--------+----------------+------+
+     |   23  |    1   |        4       |   0  |
+     +-------+--------+----------------+------+
+     |   24  |    2   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   25  |    4   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   26  |    5   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   27  |    7   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   28  |    8   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   29  |   10   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   30  |   11   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   31  |   13   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   32  |   16   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   33  |   17   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   34  |   19   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   35  |   20   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   36  |   22   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   37  |   23   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   38  |   25   |        4       |   0  |
+     +-------+--------+----------------+------+
+     |   39  |   25   |        4       |  16  |
+     +-------+--------+----------------+------+
+     |   40  |   26   |        5       |  32  |
+
+
+
+Collet & Kucherawy            Informational                    [Page 47]
+
+RFC 8478                    application/zstd                October 2018
+
+
+     +-------+--------+----------------+------+
+     |   41  |   28   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   42  |   30   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   43  |    0   |        4       |  48  |
+     +-------+--------+----------------+------+
+     |   44  |    1   |        4       |  16  |
+     +-------+--------+----------------+------+
+     |   45  |    2   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   46  |    3   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   47  |    5   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   48  |    6   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   49  |    8   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   50  |    9   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   51  |   11   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   52  |   12   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   53  |   15   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   54  |   17   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   55  |   18   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   56  |   20   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   57  |   21   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   58  |   23   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   59  |   24   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   60  |   35   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   61  |   34   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   62  |   33   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   63  |   32   |        6       |   0  |
+     +-------+--------+----------------+------+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 48]
+
+RFC 8478                    application/zstd                October 2018
+
+
+A.2.  Match Length Code Table
+
+     +-------+--------+----------------+------+
+     | State | Symbol | Number_Of_Bits | Base |
+     +-------+--------+----------------+------+
+     |    0  |    0   |        0       |   0  |
+     +-------+--------+----------------+------+
+     |    0  |    0   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |    1  |    1   |        4       |   0  |
+     +-------+--------+----------------+------+
+     |    2  |    2   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |    3  |    3   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    4  |    5   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    5  |    6   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    6  |    8   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    7  |   10   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |    8  |   13   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |    9  |   16   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   10  |   19   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   11  |   22   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   12  |   25   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   13  |   28   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   14  |   31   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   15  |   33   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   16  |   35   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   17  |   37   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   18  |   39   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   19  |   41   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   20  |   43   |        6       |   0  |
+
+
+
+Collet & Kucherawy            Informational                    [Page 49]
+
+RFC 8478                    application/zstd                October 2018
+
+
+     +-------+--------+----------------+------+
+     |   21  |   45   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   22  |    1   |        4       |  16  |
+     +-------+--------+----------------+------+
+     |   23  |    2   |        4       |   0  |
+     +-------+--------+----------------+------+
+     |   24  |    3   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   25  |    4   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   26  |    6   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   27  |    7   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   28  |    9   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   29  |   12   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   30  |   15   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   31  |   18   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   32  |   21   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   33  |   24   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   34  |   27   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   35  |   30   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   36  |   32   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   37  |   34   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   38  |   36   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   39  |   38   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   40  |   40   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   41  |   42   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   42  |   44   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   43  |    1   |        4       |  32  |
+     +-------+--------+----------------+------+
+     |   44  |    1   |        4       |  48  |
+
+
+
+Collet & Kucherawy            Informational                    [Page 50]
+
+RFC 8478                    application/zstd                October 2018
+
+
+     +-------+--------+----------------+------+
+     |   45  |    2   |        4       |  16  |
+     +-------+--------+----------------+------+
+     |   46  |    4   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   47  |    5   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   48  |    7   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   49  |    8   |        5       |  32  |
+     +-------+--------+----------------+------+
+     |   50  |   11   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   51  |   14   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   52  |   17   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   53  |   20   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   54  |   23   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   55  |   26   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   56  |   29   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   57  |   52   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   58  |   51   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   59  |   50   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   60  |   49   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   61  |   48   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   62  |   47   |        6       |   0  |
+     +-------+--------+----------------+------+
+     |   63  |   46   |        6       |   0  |
+     +-------+--------+----------------+------+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 51]
+
+RFC 8478                    application/zstd                October 2018
+
+
+A.3.  Offset Code Table
+
+     +-------+--------+----------------+------+
+     | State | Symbol | Number_Of_Bits | Base |
+     +-------+--------+----------------+------+
+     |    0  |    0   |        0       |   0  |
+     +-------+--------+----------------+------+
+     |    0  |    0   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    1  |    6   |        4       |   0  |
+     +-------+--------+----------------+------+
+     |    2  |    9   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    3  |   15   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    4  |   21   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    5  |    3   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    6  |    7   |        4       |   0  |
+     +-------+--------+----------------+------+
+     |    7  |   12   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    8  |   18   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |    9  |   23   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   10  |    5   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   11  |    8   |        4       |   0  |
+     +-------+--------+----------------+------+
+     |   12  |   14   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   13  |   20   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   14  |    2   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   15  |    7   |        4       |  16  |
+     +-------+--------+----------------+------+
+     |   16  |   11   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   17  |   17   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   18  |   22   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   19  |    4   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   20  |    8   |        4       |  16  |
+
+
+
+Collet & Kucherawy            Informational                    [Page 52]
+
+RFC 8478                    application/zstd                October 2018
+
+
+     +-------+--------+----------------+------+
+     |   21  |   13   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   22  |   19   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   23  |    1   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   24  |    6   |        4       |  16  |
+     +-------+--------+----------------+------+
+     |   25  |   10   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   26  |   16   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   27  |   28   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   28  |   27   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   29  |   26   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   30  |   25   |        5       |   0  |
+     +-------+--------+----------------+------+
+     |   31  |   24   |        5       |   0  |
+     +-------+--------+----------------+------+
+
+Acknowledgments
+
+   zstd was developed by Yann Collet.
+
+   Bobo Bose-Kolanu, Felix Handte, Kyle Nekritz, Nick Terrell, and David
+   Schleimer provided helpful feedback during the development of this
+   document.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 53]
+
+RFC 8478                    application/zstd                October 2018
+
+
+Authors' Addresses
+
+   Yann Collet
+   Facebook
+   1 Hacker Way
+   Menlo Park, CA  94025
+   United States of America
+
+   Email: cyan@fb.com
+
+
+   Murray S. Kucherawy (editor)
+   Facebook
+   1 Hacker Way
+   Menlo Park, CA  94025
+   United States of America
+
+   Email: msk@fb.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Collet & Kucherawy            Informational                    [Page 54]
+
diff --git a/lib/std/compress/testdata/rfc8478.txt.zst.19 b/lib/std/compress/testdata/rfc8478.txt.zst.19
new file mode 100644
index 0000000000..e0cf325af2
Binary files /dev/null and b/lib/std/compress/testdata/rfc8478.txt.zst.19 differ
diff --git a/lib/std/compress/testdata/rfc8478.txt.zst.3 b/lib/std/compress/testdata/rfc8478.txt.zst.3
new file mode 100644
index 0000000000..781601a8a2
Binary files /dev/null and b/lib/std/compress/testdata/rfc8478.txt.zst.3 differ
diff --git a/lib/std/compress/zstandard.zig b/lib/std/compress/zstandard.zig
new file mode 100644
index 0000000000..f59de87e6b
--- /dev/null
+++ b/lib/std/compress/zstandard.zig
@@ -0,0 +1,286 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const RingBuffer = std.RingBuffer;
+
+const types = @import("zstandard/types.zig");
+pub const frame = types.frame;
+pub const compressed_block = types.compressed_block;
+
+pub const decompress = @import("zstandard/decompress.zig");
+
+pub const DecompressStreamOptions = struct {
+    verify_checksum: bool = true,
+    window_size_max: usize = 1 << 23, // 8MiB default maximum window size,
+};
+
+pub fn DecompressStream(
+    comptime ReaderType: type,
+    comptime options: DecompressStreamOptions,
+) type {
+    return struct {
+        const Self = @This();
+
+        allocator: Allocator,
+        source: std.io.CountingReader(ReaderType),
+        state: enum { NewFrame, InFrame, LastBlock },
+        decode_state: decompress.block.DecodeState,
+        frame_context: decompress.FrameContext,
+        buffer: RingBuffer,
+        literal_fse_buffer: []types.compressed_block.Table.Fse,
+        match_fse_buffer: []types.compressed_block.Table.Fse,
+        offset_fse_buffer: []types.compressed_block.Table.Fse,
+        literals_buffer: []u8,
+        sequence_buffer: []u8,
+        checksum: if (options.verify_checksum) ?u32 else void,
+        current_frame_decompressed_size: usize,
+
+        pub const Error = ReaderType.Error || error{
+            ChecksumFailure,
+            DictionaryIdFlagUnsupported,
+            MalformedBlock,
+            MalformedFrame,
+            OutOfMemory,
+        };
+
+        pub const Reader = std.io.Reader(*Self, Error, read);
+
+        pub fn init(allocator: Allocator, source: ReaderType) Self {
+            return Self{
+                .allocator = allocator,
+                .source = std.io.countingReader(source),
+                .state = .NewFrame,
+                .decode_state = undefined,
+                .frame_context = undefined,
+                .buffer = undefined,
+                .literal_fse_buffer = undefined,
+                .match_fse_buffer = undefined,
+                .offset_fse_buffer = undefined,
+                .literals_buffer = undefined,
+                .sequence_buffer = undefined,
+                .checksum = undefined,
+                .current_frame_decompressed_size = undefined,
+            };
+        }
+
+        fn frameInit(self: *Self) !void {
+            const source_reader = self.source.reader();
+            switch (try decompress.decodeFrameHeader(source_reader)) {
+                .skippable => |header| {
+                    try source_reader.skipBytes(header.frame_size, .{});
+                    self.state = .NewFrame;
+                },
+                .zstandard => |header| {
+                    const frame_context = context: {
+                        break :context try decompress.FrameContext.init(
+                            header,
+                            options.window_size_max,
+                            options.verify_checksum,
+                        );
+                    };
+
+                    const literal_fse_buffer = try self.allocator.alloc(
+                        types.compressed_block.Table.Fse,
+                        types.compressed_block.table_size_max.literal,
+                    );
+                    errdefer self.allocator.free(literal_fse_buffer);
+
+                    const match_fse_buffer = try self.allocator.alloc(
+                        types.compressed_block.Table.Fse,
+                        types.compressed_block.table_size_max.match,
+                    );
+                    errdefer self.allocator.free(match_fse_buffer);
+
+                    const offset_fse_buffer = try self.allocator.alloc(
+                        types.compressed_block.Table.Fse,
+                        types.compressed_block.table_size_max.offset,
+                    );
+                    errdefer self.allocator.free(offset_fse_buffer);
+
+                    const decode_state = decompress.block.DecodeState.init(
+                        literal_fse_buffer,
+                        match_fse_buffer,
+                        offset_fse_buffer,
+                    );
+                    const buffer = try RingBuffer.init(self.allocator, frame_context.window_size);
+
+                    const literals_data = try self.allocator.alloc(u8, options.window_size_max);
+                    errdefer self.allocator.free(literals_data);
+
+                    const sequence_data = try self.allocator.alloc(u8, options.window_size_max);
+                    errdefer self.allocator.free(sequence_data);
+
+                    self.literal_fse_buffer = literal_fse_buffer;
+                    self.match_fse_buffer = match_fse_buffer;
+                    self.offset_fse_buffer = offset_fse_buffer;
+                    self.literals_buffer = literals_data;
+                    self.sequence_buffer = sequence_data;
+
+                    self.buffer = buffer;
+
+                    self.decode_state = decode_state;
+                    self.frame_context = frame_context;
+
+                    self.checksum = if (options.verify_checksum) null else {};
+                    self.current_frame_decompressed_size = 0;
+
+                    self.state = .InFrame;
+                },
+            }
+        }
+
+        pub fn deinit(self: *Self) void {
+            if (self.state == .NewFrame) return;
+            self.allocator.free(self.decode_state.literal_fse_buffer);
+            self.allocator.free(self.decode_state.match_fse_buffer);
+            self.allocator.free(self.decode_state.offset_fse_buffer);
+            self.allocator.free(self.literals_buffer);
+            self.allocator.free(self.sequence_buffer);
+            self.buffer.deinit(self.allocator);
+        }
+
+        pub fn reader(self: *Self) Reader {
+            return .{ .context = self };
+        }
+
+        pub fn read(self: *Self, buffer: []u8) Error!usize {
+            if (buffer.len == 0) return 0;
+
+            var size: usize = 0;
+            while (size == 0) {
+                while (self.state == .NewFrame) {
+                    const initial_count = self.source.bytes_read;
+                    self.frameInit() catch |err| switch (err) {
+                        error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
+                        error.EndOfStream => return if (self.source.bytes_read == initial_count)
+                            0
+                        else
+                            error.MalformedFrame,
+                        error.OutOfMemory => return error.OutOfMemory,
+                        else => return error.MalformedFrame,
+                    };
+                }
+                size = try self.readInner(buffer);
+            }
+            return size;
+        }
+
+        fn readInner(self: *Self, buffer: []u8) Error!usize {
+            std.debug.assert(self.state != .NewFrame);
+
+            const source_reader = self.source.reader();
+            while (self.buffer.isEmpty() and self.state != .LastBlock) {
+                const header_bytes = source_reader.readBytesNoEof(3) catch
+                    return error.MalformedFrame;
+                const block_header = decompress.block.decodeBlockHeader(&header_bytes);
+
+                decompress.block.decodeBlockReader(
+                    &self.buffer,
+                    source_reader,
+                    block_header,
+                    &self.decode_state,
+                    self.frame_context.block_size_max,
+                    self.literals_buffer,
+                    self.sequence_buffer,
+                ) catch
+                    return error.MalformedBlock;
+
+                if (self.frame_context.content_size) |size| {
+                    if (self.current_frame_decompressed_size > size) return error.MalformedFrame;
+                }
+
+                const size = self.buffer.len();
+                self.current_frame_decompressed_size += size;
+
+                if (self.frame_context.hasher_opt) |*hasher| {
+                    if (size > 0) {
+                        const written_slice = self.buffer.sliceLast(size);
+                        hasher.update(written_slice.first);
+                        hasher.update(written_slice.second);
+                    }
+                }
+                if (block_header.last_block) {
+                    self.state = .LastBlock;
+                    if (self.frame_context.has_checksum) {
+                        const checksum = source_reader.readIntLittle(u32) catch
+                            return error.MalformedFrame;
+                        if (comptime options.verify_checksum) {
+                            if (self.frame_context.hasher_opt) |*hasher| {
+                                if (checksum != decompress.computeChecksum(hasher))
+                                    return error.ChecksumFailure;
+                            }
+                        }
+                    }
+                    if (self.frame_context.content_size) |content_size| {
+                        if (content_size != self.current_frame_decompressed_size) {
+                            return error.MalformedFrame;
+                        }
+                    }
+                }
+            }
+
+            const size = @min(self.buffer.len(), buffer.len);
+            for (0..size) |i| {
+                buffer[i] = self.buffer.read().?;
+            }
+            if (self.state == .LastBlock and self.buffer.len() == 0) {
+                self.state = .NewFrame;
+                self.allocator.free(self.literal_fse_buffer);
+                self.allocator.free(self.match_fse_buffer);
+                self.allocator.free(self.offset_fse_buffer);
+                self.allocator.free(self.literals_buffer);
+                self.allocator.free(self.sequence_buffer);
+                self.buffer.deinit(self.allocator);
+            }
+            return size;
+        }
+    };
+}
+
+pub fn decompressStreamOptions(
+    allocator: Allocator,
+    reader: anytype,
+    comptime options: DecompressStreamOptions,
+) DecompressStream(@TypeOf(reader, options)) {
+    return DecompressStream(@TypeOf(reader), options).init(allocator, reader);
+}
+
+pub fn decompressStream(
+    allocator: Allocator,
+    reader: anytype,
+) DecompressStream(@TypeOf(reader), .{}) {
+    return DecompressStream(@TypeOf(reader), .{}).init(allocator, reader);
+}
+
+fn testDecompress(data: []const u8) ![]u8 {
+    var in_stream = std.io.fixedBufferStream(data);
+    var zstd_stream = decompressStream(std.testing.allocator, in_stream.reader());
+    defer zstd_stream.deinit();
+    const result = zstd_stream.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize));
+    return result;
+}
+
+fn testReader(data: []const u8, comptime expected: []const u8) !void {
+    const buf = try testDecompress(data);
+    defer std.testing.allocator.free(buf);
+    try std.testing.expectEqualSlices(u8, expected, buf);
+}
+
+test "zstandard decompression" {
+    const uncompressed = @embedFile("testdata/rfc8478.txt");
+    const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3");
+    const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19");
+
+    var buffer = try std.testing.allocator.alloc(u8, uncompressed.len);
+    defer std.testing.allocator.free(buffer);
+
+    const res3 = try decompress.decode(buffer, compressed3, true);
+    try std.testing.expectEqual(uncompressed.len, res3);
+    try std.testing.expectEqualSlices(u8, uncompressed, buffer);
+
+    const res19 = try decompress.decode(buffer, compressed19, true);
+    try std.testing.expectEqual(uncompressed.len, res19);
+    try std.testing.expectEqualSlices(u8, uncompressed, buffer);
+
+    try testReader(compressed3, uncompressed);
+    try testReader(compressed19, uncompressed);
+}
diff --git a/lib/std/compress/zstandard/decode/block.zig b/lib/std/compress/zstandard/decode/block.zig
new file mode 100644
index 0000000000..4b7353f63c
--- /dev/null
+++ b/lib/std/compress/zstandard/decode/block.zig
@@ -0,0 +1,1149 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const RingBuffer = std.RingBuffer;
+
+const types = @import("../types.zig");
+const frame = types.frame;
+const Table = types.compressed_block.Table;
+const LiteralsSection = types.compressed_block.LiteralsSection;
+const SequencesSection = types.compressed_block.SequencesSection;
+
+const huffman = @import("huffman.zig");
+const readers = @import("../readers.zig");
+
+const decodeFseTable = @import("fse.zig").decodeFseTable;
+
+const readInt = std.mem.readIntLittle;
+
+pub const Error = error{
+    BlockSizeOverMaximum,
+    MalformedBlockSize,
+    ReservedBlock,
+    MalformedRleBlock,
+    MalformedCompressedBlock,
+};
+
+pub const DecodeState = struct {
+    repeat_offsets: [3]u32,
+
+    offset: StateData(8),
+    match: StateData(9),
+    literal: StateData(9),
+
+    offset_fse_buffer: []Table.Fse,
+    match_fse_buffer: []Table.Fse,
+    literal_fse_buffer: []Table.Fse,
+
+    fse_tables_undefined: bool,
+
+    literal_stream_reader: readers.ReverseBitReader,
+    literal_stream_index: usize,
+    literal_streams: LiteralsSection.Streams,
+    literal_header: LiteralsSection.Header,
+    huffman_tree: ?LiteralsSection.HuffmanTree,
+
+    literal_written_count: usize,
+    written_count: usize = 0,
+
+    fn StateData(comptime max_accuracy_log: comptime_int) type {
+        return struct {
+            state: State,
+            table: Table,
+            accuracy_log: u8,
+
+            const State = std.meta.Int(.unsigned, max_accuracy_log);
+        };
+    }
+
+    pub fn init(
+        literal_fse_buffer: []Table.Fse,
+        match_fse_buffer: []Table.Fse,
+        offset_fse_buffer: []Table.Fse,
+    ) DecodeState {
+        return DecodeState{
+            .repeat_offsets = .{
+                types.compressed_block.start_repeated_offset_1,
+                types.compressed_block.start_repeated_offset_2,
+                types.compressed_block.start_repeated_offset_3,
+            },
+
+            .offset = undefined,
+            .match = undefined,
+            .literal = undefined,
+
+            .literal_fse_buffer = literal_fse_buffer,
+            .match_fse_buffer = match_fse_buffer,
+            .offset_fse_buffer = offset_fse_buffer,
+
+            .fse_tables_undefined = true,
+
+            .literal_written_count = 0,
+            .literal_header = undefined,
+            .literal_streams = undefined,
+            .literal_stream_reader = undefined,
+            .literal_stream_index = undefined,
+            .huffman_tree = null,
+
+            .written_count = 0,
+        };
+    }
+
+    /// Prepare the decoder to decode a compressed block. Loads the literals
+    /// stream and Huffman tree from `literals` and reads the FSE tables from
+    /// `source`.
+    ///
+    /// Errors returned:
+    ///   - `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
+    ///     first byte does not have any bits set
+    ///   - `error.TreelessLiteralsFirst` `literals` is a treeless literals
+    ///     section and the decode state does not have a Huffman tree from a
+    ///     previous block
+    ///   - `error.RepeatModeFirst` on the first call if one of the sequence FSE
+    ///     tables is set to repeat mode
+    ///   - `error.MalformedAccuracyLog` if an FSE table has an invalid accuracy
+    ///   - `error.MalformedFseTable` if there are errors decoding an FSE table
+    ///   - `error.EndOfStream` if `source` ends before all FSE tables are read
+    pub fn prepare(
+        self: *DecodeState,
+        source: anytype,
+        literals: LiteralsSection,
+        sequences_header: SequencesSection.Header,
+    ) !void {
+        self.literal_written_count = 0;
+        self.literal_header = literals.header;
+        self.literal_streams = literals.streams;
+
+        if (literals.huffman_tree) |tree| {
+            self.huffman_tree = tree;
+        } else if (literals.header.block_type == .treeless and self.huffman_tree == null) {
+            return error.TreelessLiteralsFirst;
+        }
+
+        switch (literals.header.block_type) {
+            .raw, .rle => {},
+            .compressed, .treeless => {
+                self.literal_stream_index = 0;
+                switch (literals.streams) {
+                    .one => |slice| try self.initLiteralStream(slice),
+                    .four => |streams| try self.initLiteralStream(streams[0]),
+                }
+            },
+        }
+
+        if (sequences_header.sequence_count > 0) {
+            try self.updateFseTable(source, .literal, sequences_header.literal_lengths);
+            try self.updateFseTable(source, .offset, sequences_header.offsets);
+            try self.updateFseTable(source, .match, sequences_header.match_lengths);
+            self.fse_tables_undefined = false;
+        }
+    }
+
+    /// Read initial FSE states for sequence decoding.
+    ///
+    /// Errors returned:
+    ///   - `error.EndOfStream` if `bit_reader` does not contain enough bits.
+    pub fn readInitialFseState(self: *DecodeState, bit_reader: *readers.ReverseBitReader) error{EndOfStream}!void {
+        self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log);
+        self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log);
+        self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log);
+    }
+
+    fn updateRepeatOffset(self: *DecodeState, offset: u32) void {
+        self.repeat_offsets[2] = self.repeat_offsets[1];
+        self.repeat_offsets[1] = self.repeat_offsets[0];
+        self.repeat_offsets[0] = offset;
+    }
+
+    fn useRepeatOffset(self: *DecodeState, index: usize) u32 {
+        if (index == 1)
+            std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1])
+        else if (index == 2) {
+            std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]);
+            std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]);
+        }
+        return self.repeat_offsets[0];
+    }
+
+    const DataType = enum { offset, match, literal };
+
+    fn updateState(
+        self: *DecodeState,
+        comptime choice: DataType,
+        bit_reader: *readers.ReverseBitReader,
+    ) error{ MalformedFseBits, EndOfStream }!void {
+        switch (@field(self, @tagName(choice)).table) {
+            .rle => {},
+            .fse => |table| {
+                const data = table[@field(self, @tagName(choice)).state];
+                const T = @TypeOf(@field(self, @tagName(choice))).State;
+                const bits_summand = try bit_reader.readBitsNoEof(T, data.bits);
+                const next_state = std.math.cast(
+                    @TypeOf(@field(self, @tagName(choice))).State,
+                    data.baseline + bits_summand,
+                ) orelse return error.MalformedFseBits;
+                @field(self, @tagName(choice)).state = next_state;
+            },
+        }
+    }
+
+    const FseTableError = error{
+        MalformedFseTable,
+        MalformedAccuracyLog,
+        RepeatModeFirst,
+        EndOfStream,
+    };
+
+    fn updateFseTable(
+        self: *DecodeState,
+        source: anytype,
+        comptime choice: DataType,
+        mode: SequencesSection.Header.Mode,
+    ) !void {
+        const field_name = @tagName(choice);
+        switch (mode) {
+            .predefined => {
+                @field(self, field_name).accuracy_log =
+                    @field(types.compressed_block.default_accuracy_log, field_name);
+
+                @field(self, field_name).table =
+                    @field(types.compressed_block, "predefined_" ++ field_name ++ "_fse_table");
+            },
+            .rle => {
+                @field(self, field_name).accuracy_log = 0;
+                @field(self, field_name).table = .{ .rle = try source.readByte() };
+            },
+            .fse => {
+                var bit_reader = readers.bitReader(source);
+
+                const table_size = try decodeFseTable(
+                    &bit_reader,
+                    @field(types.compressed_block.table_symbol_count_max, field_name),
+                    @field(types.compressed_block.table_accuracy_log_max, field_name),
+                    @field(self, field_name ++ "_fse_buffer"),
+                );
+                @field(self, field_name).table = .{
+                    .fse = @field(self, field_name ++ "_fse_buffer")[0..table_size],
+                };
+                @field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size);
+            },
+            .repeat => if (self.fse_tables_undefined) return error.RepeatModeFirst,
+        }
+    }
+
+    const Sequence = struct {
+        literal_length: u32,
+        match_length: u32,
+        offset: u32,
+    };
+
+    fn nextSequence(
+        self: *DecodeState,
+        bit_reader: *readers.ReverseBitReader,
+    ) error{ InvalidBitStream, EndOfStream }!Sequence {
+        const raw_code = self.getCode(.offset);
+        const offset_code = std.math.cast(u5, raw_code) orelse {
+            return error.InvalidBitStream;
+        };
+        const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code);
+
+        const match_code = self.getCode(.match);
+        if (match_code >= types.compressed_block.match_length_code_table.len)
+            return error.InvalidBitStream;
+        const match = types.compressed_block.match_length_code_table[match_code];
+        const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]);
+
+        const literal_code = self.getCode(.literal);
+        if (literal_code >= types.compressed_block.literals_length_code_table.len)
+            return error.InvalidBitStream;
+        const literal = types.compressed_block.literals_length_code_table[literal_code];
+        const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]);
+
+        const offset = if (offset_value > 3) offset: {
+            const offset = offset_value - 3;
+            self.updateRepeatOffset(offset);
+            break :offset offset;
+        } else offset: {
+            if (literal_length == 0) {
+                if (offset_value == 3) {
+                    const offset = self.repeat_offsets[0] - 1;
+                    self.updateRepeatOffset(offset);
+                    break :offset offset;
+                }
+                break :offset self.useRepeatOffset(offset_value);
+            }
+            break :offset self.useRepeatOffset(offset_value - 1);
+        };
+
+        if (offset == 0) return error.InvalidBitStream;
+
+        return .{
+            .literal_length = literal_length,
+            .match_length = match_length,
+            .offset = offset,
+        };
+    }
+
+    fn executeSequenceSlice(
+        self: *DecodeState,
+        dest: []u8,
+        write_pos: usize,
+        sequence: Sequence,
+    ) (error{MalformedSequence} || DecodeLiteralsError)!void {
+        if (sequence.offset > write_pos + sequence.literal_length) return error.MalformedSequence;
+
+        try self.decodeLiteralsSlice(dest[write_pos..], sequence.literal_length);
+        const copy_start = write_pos + sequence.literal_length - sequence.offset;
+        const copy_end = copy_start + sequence.match_length;
+        // NOTE: we ignore the usage message for std.mem.copy and copy with dest.ptr >= src.ptr
+        //       to allow repeats
+        std.mem.copy(u8, dest[write_pos + sequence.literal_length ..], dest[copy_start..copy_end]);
+        self.written_count += sequence.match_length;
+    }
+
+    fn executeSequenceRingBuffer(
+        self: *DecodeState,
+        dest: *RingBuffer,
+        sequence: Sequence,
+    ) (error{MalformedSequence} || DecodeLiteralsError)!void {
+        if (sequence.offset > @min(dest.data.len, self.written_count + sequence.literal_length))
+            return error.MalformedSequence;
+
+        try self.decodeLiteralsRingBuffer(dest, sequence.literal_length);
+        const copy_start = dest.write_index + dest.data.len - sequence.offset;
+        const copy_slice = dest.sliceAt(copy_start, sequence.match_length);
+        // TODO: would std.mem.copy and figuring out dest slice be better/faster?
+        for (copy_slice.first) |b| dest.writeAssumeCapacity(b);
+        for (copy_slice.second) |b| dest.writeAssumeCapacity(b);
+        self.written_count += sequence.match_length;
+    }
+
+    const DecodeSequenceError = error{
+        InvalidBitStream,
+        EndOfStream,
+        MalformedSequence,
+        MalformedFseBits,
+    } || DecodeLiteralsError;
+
+    /// Decode one sequence from `bit_reader` into `dest`, written starting at
+    /// `write_pos` and update FSE states if `last_sequence` is `false`.
+    /// `prepare()` must be called for the block before attempting to decode
+    /// sequences.
+    ///
+    /// Errors returned:
+    ///   - `error.MalformedSequence` if the decompressed sequence would be
+    ///     longer than `sequence_size_limit` or the sequence's offset is too
+    ///     large
+    ///   - `error.UnexpectedEndOfLiteralStream` if the decoder state's literal
+    ///     streams do not contain enough literals for the sequence (this may
+    ///     mean the literal stream or the sequence is malformed).
+    ///   - `error.InvalidBitStream` if the FSE sequence bitstream is malformed
+    ///   - `error.EndOfStream` if `bit_reader` does not contain enough bits
+    ///   - `error.DestTooSmall` if `dest` is not large enough to holde the
+    ///     decompressed sequence
+    pub fn decodeSequenceSlice(
+        self: *DecodeState,
+        dest: []u8,
+        write_pos: usize,
+        bit_reader: *readers.ReverseBitReader,
+        sequence_size_limit: usize,
+        last_sequence: bool,
+    ) (error{DestTooSmall} || DecodeSequenceError)!usize {
+        const sequence = try self.nextSequence(bit_reader);
+        const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length;
+        if (sequence_length > sequence_size_limit) return error.MalformedSequence;
+        if (sequence_length > dest[write_pos..].len) return error.DestTooSmall;
+
+        try self.executeSequenceSlice(dest, write_pos, sequence);
+        if (!last_sequence) {
+            try self.updateState(.literal, bit_reader);
+            try self.updateState(.match, bit_reader);
+            try self.updateState(.offset, bit_reader);
+        }
+        return sequence_length;
+    }
+
+    /// Decode one sequence from `bit_reader` into `dest`; see
+    /// `decodeSequenceSlice`.
+    pub fn decodeSequenceRingBuffer(
+        self: *DecodeState,
+        dest: *RingBuffer,
+        bit_reader: anytype,
+        sequence_size_limit: usize,
+        last_sequence: bool,
+    ) DecodeSequenceError!usize {
+        const sequence = try self.nextSequence(bit_reader);
+        const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length;
+        if (sequence_length > sequence_size_limit) return error.MalformedSequence;
+
+        try self.executeSequenceRingBuffer(dest, sequence);
+        if (!last_sequence) {
+            try self.updateState(.literal, bit_reader);
+            try self.updateState(.match, bit_reader);
+            try self.updateState(.offset, bit_reader);
+        }
+        return sequence_length;
+    }
+
+    fn nextLiteralMultiStream(
+        self: *DecodeState,
+    ) error{BitStreamHasNoStartBit}!void {
+        self.literal_stream_index += 1;
+        try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]);
+    }
+
+    fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
+        try self.literal_stream_reader.init(bytes);
+    }
+
+    fn isLiteralStreamEmpty(self: *DecodeState) bool {
+        switch (self.literal_streams) {
+            .one => return self.literal_stream_reader.isEmpty(),
+            .four => return self.literal_stream_index == 3 and self.literal_stream_reader.isEmpty(),
+        }
+    }
+
+    const LiteralBitsError = error{
+        BitStreamHasNoStartBit,
+        UnexpectedEndOfLiteralStream,
+    };
+    fn readLiteralsBits(
+        self: *DecodeState,
+        bit_count_to_read: usize,
+    ) LiteralBitsError!u16 {
+        return self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch bits: {
+            if (self.literal_streams == .four and self.literal_stream_index < 3) {
+                try self.nextLiteralMultiStream();
+                break :bits self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch
+                    return error.UnexpectedEndOfLiteralStream;
+            } else {
+                return error.UnexpectedEndOfLiteralStream;
+            }
+        };
+    }
+
+    const DecodeLiteralsError = error{
+        MalformedLiteralsLength,
+        NotFound,
+    } || LiteralBitsError;
+
+    /// Decode `len` bytes of literals into `dest`.
+    ///
+    /// Errors returned:
+    ///   - `error.MalformedLiteralsLength` if the number of literal bytes
+    ///     decoded by `self` plus `len` is greater than the regenerated size of
+    ///     `literals`
+    ///   - `error.UnexpectedEndOfLiteralStream` and `error.NotFound` if there
+    ///     are problems decoding Huffman compressed literals
+    pub fn decodeLiteralsSlice(
+        self: *DecodeState,
+        dest: []u8,
+        len: usize,
+    ) DecodeLiteralsError!void {
+        if (self.literal_written_count + len > self.literal_header.regenerated_size)
+            return error.MalformedLiteralsLength;
+
+        switch (self.literal_header.block_type) {
+            .raw => {
+                const literals_end = self.literal_written_count + len;
+                const literal_data = self.literal_streams.one[self.literal_written_count..literals_end];
+                std.mem.copy(u8, dest, literal_data);
+                self.literal_written_count += len;
+                self.written_count += len;
+            },
+            .rle => {
+                for (0..len) |i| {
+                    dest[i] = self.literal_streams.one[0];
+                }
+                self.literal_written_count += len;
+                self.written_count += len;
+            },
+            .compressed, .treeless => {
+                // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4;
+                const huffman_tree = self.huffman_tree orelse unreachable;
+                const max_bit_count = huffman_tree.max_bit_count;
+                const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
+                    huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight,
+                    max_bit_count,
+                );
+                var bits_read: u4 = 0;
+                var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one;
+                var bit_count_to_read: u4 = starting_bit_count;
+                for (0..len) |i| {
+                    var prefix: u16 = 0;
+                    while (true) {
+                        const new_bits = self.readLiteralsBits(bit_count_to_read) catch |err| {
+                            return err;
+                        };
+                        prefix <<= bit_count_to_read;
+                        prefix |= new_bits;
+                        bits_read += bit_count_to_read;
+                        const result = huffman_tree.query(huffman_tree_index, prefix) catch |err| {
+                            return err;
+                        };
+
+                        switch (result) {
+                            .symbol => |sym| {
+                                dest[i] = sym;
+                                bit_count_to_read = starting_bit_count;
+                                bits_read = 0;
+                                huffman_tree_index = huffman_tree.symbol_count_minus_one;
+                                break;
+                            },
+                            .index => |index| {
+                                huffman_tree_index = index;
+                                const bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
+                                    huffman_tree.nodes[index].weight,
+                                    max_bit_count,
+                                );
+                                bit_count_to_read = bit_count - bits_read;
+                            },
+                        }
+                    }
+                }
+                self.literal_written_count += len;
+                self.written_count += len;
+            },
+        }
+    }
+
+    /// Decode literals into `dest`; see `decodeLiteralsSlice()`.
+    pub fn decodeLiteralsRingBuffer(
+        self: *DecodeState,
+        dest: *RingBuffer,
+        len: usize,
+    ) DecodeLiteralsError!void {
+        if (self.literal_written_count + len > self.literal_header.regenerated_size)
+            return error.MalformedLiteralsLength;
+
+        switch (self.literal_header.block_type) {
+            .raw => {
+                const literals_end = self.literal_written_count + len;
+                const literal_data = self.literal_streams.one[self.literal_written_count..literals_end];
+                dest.writeSliceAssumeCapacity(literal_data);
+                self.literal_written_count += len;
+                self.written_count += len;
+            },
+            .rle => {
+                for (0..len) |_| {
+                    dest.writeAssumeCapacity(self.literal_streams.one[0]);
+                }
+                self.literal_written_count += len;
+                self.written_count += len;
+            },
+            .compressed, .treeless => {
+                // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4;
+                const huffman_tree = self.huffman_tree orelse unreachable;
+                const max_bit_count = huffman_tree.max_bit_count;
+                const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
+                    huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight,
+                    max_bit_count,
+                );
+                var bits_read: u4 = 0;
+                var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one;
+                var bit_count_to_read: u4 = starting_bit_count;
+                for (0..len) |_| {
+                    var prefix: u16 = 0;
+                    while (true) {
+                        const new_bits = try self.readLiteralsBits(bit_count_to_read);
+                        prefix <<= bit_count_to_read;
+                        prefix |= new_bits;
+                        bits_read += bit_count_to_read;
+                        const result = try huffman_tree.query(huffman_tree_index, prefix);
+
+                        switch (result) {
+                            .symbol => |sym| {
+                                dest.writeAssumeCapacity(sym);
+                                bit_count_to_read = starting_bit_count;
+                                bits_read = 0;
+                                huffman_tree_index = huffman_tree.symbol_count_minus_one;
+                                break;
+                            },
+                            .index => |index| {
+                                huffman_tree_index = index;
+                                const bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
+                                    huffman_tree.nodes[index].weight,
+                                    max_bit_count,
+                                );
+                                bit_count_to_read = bit_count - bits_read;
+                            },
+                        }
+                    }
+                }
+                self.literal_written_count += len;
+                self.written_count += len;
+            },
+        }
+    }
+
+    fn getCode(self: *DecodeState, comptime choice: DataType) u32 {
+        return switch (@field(self, @tagName(choice)).table) {
+            .rle => |value| value,
+            .fse => |table| table[@field(self, @tagName(choice)).state].symbol,
+        };
+    }
+};
+
+/// Decode a single block from `src` into `dest`. The beginning of `src` must be
+/// the start of the block content (i.e. directly after the block header).
+/// Increments `consumed_count` by the number of bytes read from `src` to decode
+/// the block and returns the decompressed size of the block.
+///
+/// Errors returned:
+///
+///   - `error.BlockSizeOverMaximum` if block's size is larger than 1 << 17 or
+///     `dest[written_count..].len`
+///   - `error.MalformedBlockSize` if `src.len` is smaller than the block size
+///     and the block is a raw or compressed block
+///   - `error.ReservedBlock` if the block is a reserved block
+///   - `error.MalformedRleBlock` if the block is an RLE block and `src.len < 1`
+///   - `error.MalformedCompressedBlock` if there are errors decoding a
+///     compressed block
+///   - `error.DestTooSmall` is `dest` is not large enough to hold the
+///     decompressed block
+pub fn decodeBlock(
+    dest: []u8,
+    src: []const u8,
+    block_header: frame.Zstandard.Block.Header,
+    decode_state: *DecodeState,
+    consumed_count: *usize,
+    block_size_max: usize,
+    written_count: usize,
+) (error{DestTooSmall} || Error)!usize {
+    const block_size = block_header.block_size;
+    if (block_size_max < block_size) return error.BlockSizeOverMaximum;
+    switch (block_header.block_type) {
+        .raw => {
+            if (src.len < block_size) return error.MalformedBlockSize;
+            if (dest[written_count..].len < block_size) return error.DestTooSmall;
+            const data = src[0..block_size];
+            std.mem.copy(u8, dest[written_count..], data);
+            consumed_count.* += block_size;
+            decode_state.written_count += block_size;
+            return block_size;
+        },
+        .rle => {
+            if (src.len < 1) return error.MalformedRleBlock;
+            if (dest[written_count..].len < block_size) return error.DestTooSmall;
+            for (written_count..block_size + written_count) |write_pos| {
+                dest[write_pos] = src[0];
+            }
+            consumed_count.* += 1;
+            decode_state.written_count += block_size;
+            return block_size;
+        },
+        .compressed => {
+            if (src.len < block_size) return error.MalformedBlockSize;
+            var bytes_read: usize = 0;
+            const literals = decodeLiteralsSectionSlice(src[0..block_size], &bytes_read) catch
+                return error.MalformedCompressedBlock;
+            var fbs = std.io.fixedBufferStream(src[bytes_read..block_size]);
+            const fbs_reader = fbs.reader();
+            const sequences_header = decodeSequencesHeader(fbs_reader) catch
+                return error.MalformedCompressedBlock;
+
+            decode_state.prepare(fbs_reader, literals, sequences_header) catch
+                return error.MalformedCompressedBlock;
+
+            bytes_read += fbs.pos;
+
+            var bytes_written: usize = 0;
+            {
+                const bit_stream_bytes = src[bytes_read..block_size];
+                var bit_stream: readers.ReverseBitReader = undefined;
+                bit_stream.init(bit_stream_bytes) catch return error.MalformedCompressedBlock;
+
+                if (sequences_header.sequence_count > 0) {
+                    decode_state.readInitialFseState(&bit_stream) catch
+                        return error.MalformedCompressedBlock;
+
+                    var sequence_size_limit = block_size_max;
+                    for (0..sequences_header.sequence_count) |i| {
+                        const write_pos = written_count + bytes_written;
+                        const decompressed_size = decode_state.decodeSequenceSlice(
+                            dest,
+                            write_pos,
+                            &bit_stream,
+                            sequence_size_limit,
+                            i == sequences_header.sequence_count - 1,
+                        ) catch |err| switch (err) {
+                            error.DestTooSmall => return error.DestTooSmall,
+                            else => return error.MalformedCompressedBlock,
+                        };
+                        bytes_written += decompressed_size;
+                        sequence_size_limit -= decompressed_size;
+                    }
+                }
+
+                if (!bit_stream.isEmpty()) {
+                    return error.MalformedCompressedBlock;
+                }
+            }
+
+            if (decode_state.literal_written_count < literals.header.regenerated_size) {
+                const len = literals.header.regenerated_size - decode_state.literal_written_count;
+                if (len > dest[written_count + bytes_written ..].len) return error.DestTooSmall;
+                decode_state.decodeLiteralsSlice(dest[written_count + bytes_written ..], len) catch
+                    return error.MalformedCompressedBlock;
+                bytes_written += len;
+            }
+
+            switch (decode_state.literal_header.block_type) {
+                .treeless, .compressed => {
+                    if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock;
+                },
+                .raw, .rle => {},
+            }
+
+            consumed_count.* += block_size;
+            return bytes_written;
+        },
+        .reserved => return error.ReservedBlock,
+    }
+}
+
+/// Decode a single block from `src` into `dest`; see `decodeBlock()`. Returns
+/// the size of the decompressed block, which can be used with `dest.sliceLast()`
+/// to get the decompressed bytes. `error.BlockSizeOverMaximum` is returned if
+/// the block's compressed or decompressed size is larger than `block_size_max`.
+pub fn decodeBlockRingBuffer(
+    dest: *RingBuffer,
+    src: []const u8,
+    block_header: frame.Zstandard.Block.Header,
+    decode_state: *DecodeState,
+    consumed_count: *usize,
+    block_size_max: usize,
+) Error!usize {
+    const block_size = block_header.block_size;
+    if (block_size_max < block_size) return error.BlockSizeOverMaximum;
+    switch (block_header.block_type) {
+        .raw => {
+            if (src.len < block_size) return error.MalformedBlockSize;
+            const data = src[0..block_size];
+            dest.writeSliceAssumeCapacity(data);
+            consumed_count.* += block_size;
+            decode_state.written_count += block_size;
+            return block_size;
+        },
+        .rle => {
+            if (src.len < 1) return error.MalformedRleBlock;
+            for (0..block_size) |_| {
+                dest.writeAssumeCapacity(src[0]);
+            }
+            consumed_count.* += 1;
+            decode_state.written_count += block_size;
+            return block_size;
+        },
+        .compressed => {
+            if (src.len < block_size) return error.MalformedBlockSize;
+            var bytes_read: usize = 0;
+            const literals = decodeLiteralsSectionSlice(src[0..block_size], &bytes_read) catch
+                return error.MalformedCompressedBlock;
+            var fbs = std.io.fixedBufferStream(src[bytes_read..block_size]);
+            const fbs_reader = fbs.reader();
+            const sequences_header = decodeSequencesHeader(fbs_reader) catch
+                return error.MalformedCompressedBlock;
+
+            decode_state.prepare(fbs_reader, literals, sequences_header) catch
+                return error.MalformedCompressedBlock;
+
+            bytes_read += fbs.pos;
+
+            var bytes_written: usize = 0;
+            {
+                const bit_stream_bytes = src[bytes_read..block_size];
+                var bit_stream: readers.ReverseBitReader = undefined;
+                bit_stream.init(bit_stream_bytes) catch return error.MalformedCompressedBlock;
+
+                if (sequences_header.sequence_count > 0) {
+                    decode_state.readInitialFseState(&bit_stream) catch
+                        return error.MalformedCompressedBlock;
+
+                    var sequence_size_limit = block_size_max;
+                    for (0..sequences_header.sequence_count) |i| {
+                        const decompressed_size = decode_state.decodeSequenceRingBuffer(
+                            dest,
+                            &bit_stream,
+                            sequence_size_limit,
+                            i == sequences_header.sequence_count - 1,
+                        ) catch return error.MalformedCompressedBlock;
+                        bytes_written += decompressed_size;
+                        sequence_size_limit -= decompressed_size;
+                    }
+                }
+
+                if (!bit_stream.isEmpty()) {
+                    return error.MalformedCompressedBlock;
+                }
+            }
+
+            if (decode_state.literal_written_count < literals.header.regenerated_size) {
+                const len = literals.header.regenerated_size - decode_state.literal_written_count;
+                decode_state.decodeLiteralsRingBuffer(dest, len) catch
+                    return error.MalformedCompressedBlock;
+                bytes_written += len;
+            }
+
+            switch (decode_state.literal_header.block_type) {
+                .treeless, .compressed => {
+                    if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock;
+                },
+                .raw, .rle => {},
+            }
+
+            consumed_count.* += block_size;
+            if (bytes_written > block_size_max) return error.BlockSizeOverMaximum;
+            return bytes_written;
+        },
+        .reserved => return error.ReservedBlock,
+    }
+}
+
+/// Decode a single block from `source` into `dest`. Literal and sequence data
+/// from the block is copied into `literals_buffer` and `sequence_buffer`, which
+/// must be large enough or `error.LiteralsBufferTooSmall` and
+/// `error.SequenceBufferTooSmall` are returned (the maximum block size is an
+/// upper bound for the size of both buffers). See `decodeBlock`
+/// and `decodeBlockRingBuffer` for function that can decode a block without
+/// these extra copies. `error.EndOfStream` is returned if `source` does not
+/// contain enough bytes.
+pub fn decodeBlockReader(
+    dest: *RingBuffer,
+    source: anytype,
+    block_header: frame.Zstandard.Block.Header,
+    decode_state: *DecodeState,
+    block_size_max: usize,
+    literals_buffer: []u8,
+    sequence_buffer: []u8,
+) !void {
+    const block_size = block_header.block_size;
+    var block_reader_limited = std.io.limitedReader(source, block_size);
+    const block_reader = block_reader_limited.reader();
+    if (block_size_max < block_size) return error.BlockSizeOverMaximum;
+    switch (block_header.block_type) {
+        .raw => {
+            if (block_size == 0) return;
+            const slice = dest.sliceAt(dest.write_index, block_size);
+            try source.readNoEof(slice.first);
+            try source.readNoEof(slice.second);
+            dest.write_index = dest.mask2(dest.write_index + block_size);
+            decode_state.written_count += block_size;
+        },
+        .rle => {
+            const byte = try source.readByte();
+            for (0..block_size) |_| {
+                dest.writeAssumeCapacity(byte);
+            }
+            decode_state.written_count += block_size;
+        },
+        .compressed => {
+            const literals = try decodeLiteralsSection(block_reader, literals_buffer);
+            const sequences_header = try decodeSequencesHeader(block_reader);
+
+            try decode_state.prepare(block_reader, literals, sequences_header);
+
+            var bytes_written: usize = 0;
+            {
+                const size = try block_reader.readAll(sequence_buffer);
+                var bit_stream: readers.ReverseBitReader = undefined;
+                try bit_stream.init(sequence_buffer[0..size]);
+
+                if (sequences_header.sequence_count > 0) {
+                    if (sequence_buffer.len < block_reader_limited.bytes_left)
+                        return error.SequenceBufferTooSmall;
+
+                    decode_state.readInitialFseState(&bit_stream) catch
+                        return error.MalformedCompressedBlock;
+
+                    var sequence_size_limit = block_size_max;
+                    for (0..sequences_header.sequence_count) |i| {
+                        const decompressed_size = decode_state.decodeSequenceRingBuffer(
+                            dest,
+                            &bit_stream,
+                            sequence_size_limit,
+                            i == sequences_header.sequence_count - 1,
+                        ) catch return error.MalformedCompressedBlock;
+                        sequence_size_limit -= decompressed_size;
+                        bytes_written += decompressed_size;
+                    }
+                }
+
+                if (!bit_stream.isEmpty()) {
+                    return error.MalformedCompressedBlock;
+                }
+            }
+
+            if (decode_state.literal_written_count < literals.header.regenerated_size) {
+                const len = literals.header.regenerated_size - decode_state.literal_written_count;
+                decode_state.decodeLiteralsRingBuffer(dest, len) catch
+                    return error.MalformedCompressedBlock;
+                bytes_written += len;
+            }
+
+            switch (decode_state.literal_header.block_type) {
+                .treeless, .compressed => {
+                    if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock;
+                },
+                .raw, .rle => {},
+            }
+
+            if (bytes_written > block_size_max) return error.BlockSizeOverMaximum;
+            if (block_reader_limited.bytes_left != 0) return error.MalformedCompressedBlock;
+            decode_state.literal_written_count = 0;
+        },
+        .reserved => return error.ReservedBlock,
+    }
+}
+
+/// Decode the header of a block.
+pub fn decodeBlockHeader(src: *const [3]u8) frame.Zstandard.Block.Header {
+    const last_block = src[0] & 1 == 1;
+    const block_type = @intToEnum(frame.Zstandard.Block.Type, (src[0] & 0b110) >> 1);
+    const block_size = ((src[0] & 0b11111000) >> 3) + (@as(u21, src[1]) << 5) + (@as(u21, src[2]) << 13);
+    return .{
+        .last_block = last_block,
+        .block_type = block_type,
+        .block_size = block_size,
+    };
+}
+
+/// Decode the header of a block.
+///
+/// Errors returned:
+///   - `error.EndOfStream` if `src.len < 3`
+pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.Zstandard.Block.Header {
+    if (src.len < 3) return error.EndOfStream;
+    return decodeBlockHeader(src[0..3]);
+}
+
+/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
+/// number of bytes the section uses.
+///
+/// Errors returned:
+///   - `error.MalformedLiteralsHeader` if the header is invalid
+///   - `error.MalformedLiteralsSection` if there are decoding errors
+///   - `error.MalformedAccuracyLog` if compressed literals have invalid
+///     accuracy
+///   - `error.MalformedFseTable` if compressed literals have invalid FSE table
+///   - `error.MalformedHuffmanTree` if there are errors decoding a Huffamn tree
+///   - `error.EndOfStream` if there are not enough bytes in `src`
+pub fn decodeLiteralsSectionSlice(
+    src: []const u8,
+    consumed_count: *usize,
+) (error{ MalformedLiteralsHeader, MalformedLiteralsSection, EndOfStream } || huffman.Error)!LiteralsSection {
+    var bytes_read: usize = 0;
+    const header = header: {
+        var fbs = std.io.fixedBufferStream(src);
+        defer bytes_read = fbs.pos;
+        break :header decodeLiteralsHeader(fbs.reader()) catch return error.MalformedLiteralsHeader;
+    };
+    switch (header.block_type) {
+        .raw => {
+            if (src.len < bytes_read + header.regenerated_size) return error.MalformedLiteralsSection;
+            const stream = src[bytes_read .. bytes_read + header.regenerated_size];
+            consumed_count.* += header.regenerated_size + bytes_read;
+            return LiteralsSection{
+                .header = header,
+                .huffman_tree = null,
+                .streams = .{ .one = stream },
+            };
+        },
+        .rle => {
+            if (src.len < bytes_read + 1) return error.MalformedLiteralsSection;
+            const stream = src[bytes_read .. bytes_read + 1];
+            consumed_count.* += 1 + bytes_read;
+            return LiteralsSection{
+                .header = header,
+                .huffman_tree = null,
+                .streams = .{ .one = stream },
+            };
+        },
+        .compressed, .treeless => {
+            const huffman_tree_start = bytes_read;
+            const huffman_tree = if (header.block_type == .compressed)
+                try huffman.decodeHuffmanTreeSlice(src[bytes_read..], &bytes_read)
+            else
+                null;
+            const huffman_tree_size = bytes_read - huffman_tree_start;
+            const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch
+                return error.MalformedLiteralsSection;
+
+            if (src.len < bytes_read + total_streams_size) return error.MalformedLiteralsSection;
+            const stream_data = src[bytes_read .. bytes_read + total_streams_size];
+
+            const streams = try decodeStreams(header.size_format, stream_data);
+            consumed_count.* += bytes_read + total_streams_size;
+            return LiteralsSection{
+                .header = header,
+                .huffman_tree = huffman_tree,
+                .streams = streams,
+            };
+        },
+    }
+}
+
+/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
+/// number of bytes the section uses. See `decodeLiterasSectionSlice()`.
+pub fn decodeLiteralsSection(
+    source: anytype,
+    buffer: []u8,
+) !LiteralsSection {
+    const header = try decodeLiteralsHeader(source);
+    switch (header.block_type) {
+        .raw => {
+            try source.readNoEof(buffer[0..header.regenerated_size]);
+            return LiteralsSection{
+                .header = header,
+                .huffman_tree = null,
+                .streams = .{ .one = buffer },
+            };
+        },
+        .rle => {
+            buffer[0] = try source.readByte();
+            return LiteralsSection{
+                .header = header,
+                .huffman_tree = null,
+                .streams = .{ .one = buffer[0..1] },
+            };
+        },
+        .compressed, .treeless => {
+            var counting_reader = std.io.countingReader(source);
+            const huffman_tree = if (header.block_type == .compressed)
+                try huffman.decodeHuffmanTree(counting_reader.reader(), buffer)
+            else
+                null;
+            const huffman_tree_size = @intCast(usize, counting_reader.bytes_read);
+            const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch
+                return error.MalformedLiteralsSection;
+
+            if (total_streams_size > buffer.len) return error.LiteralsBufferTooSmall;
+            try source.readNoEof(buffer[0..total_streams_size]);
+            const stream_data = buffer[0..total_streams_size];
+
+            const streams = try decodeStreams(header.size_format, stream_data);
+            return LiteralsSection{
+                .header = header,
+                .huffman_tree = huffman_tree,
+                .streams = streams,
+            };
+        },
+    }
+}
+
+fn decodeStreams(size_format: u2, stream_data: []const u8) !LiteralsSection.Streams {
+    if (size_format == 0) {
+        return .{ .one = stream_data };
+    }
+
+    if (stream_data.len < 6) return error.MalformedLiteralsSection;
+
+    const stream_1_length = @as(usize, readInt(u16, stream_data[0..2]));
+    const stream_2_length = @as(usize, readInt(u16, stream_data[2..4]));
+    const stream_3_length = @as(usize, readInt(u16, stream_data[4..6]));
+
+    const stream_1_start = 6;
+    const stream_2_start = stream_1_start + stream_1_length;
+    const stream_3_start = stream_2_start + stream_2_length;
+    const stream_4_start = stream_3_start + stream_3_length;
+
+    if (stream_data.len < stream_4_start) return error.MalformedLiteralsSection;
+
+    return .{ .four = .{
+        stream_data[stream_1_start .. stream_1_start + stream_1_length],
+        stream_data[stream_2_start .. stream_2_start + stream_2_length],
+        stream_data[stream_3_start .. stream_3_start + stream_3_length],
+        stream_data[stream_4_start..],
+    } };
+}
+
+/// Decode a literals section header.
+///
+/// Errors returned:
+///   - `error.EndOfStream` if there are not enough bytes in `source`
+pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {
+    const byte0 = try source.readByte();
+    const block_type = @intToEnum(LiteralsSection.BlockType, byte0 & 0b11);
+    const size_format = @intCast(u2, (byte0 & 0b1100) >> 2);
+    var regenerated_size: u20 = undefined;
+    var compressed_size: ?u18 = null;
+    switch (block_type) {
+        .raw, .rle => {
+            switch (size_format) {
+                0, 2 => {
+                    regenerated_size = byte0 >> 3;
+                },
+                1 => regenerated_size = (byte0 >> 4) + (@as(u20, try source.readByte()) << 4),
+                3 => regenerated_size = (byte0 >> 4) +
+                    (@as(u20, try source.readByte()) << 4) +
+                    (@as(u20, try source.readByte()) << 12),
+            }
+        },
+        .compressed, .treeless => {
+            const byte1 = try source.readByte();
+            const byte2 = try source.readByte();
+            switch (size_format) {
+                0, 1 => {
+                    regenerated_size = (byte0 >> 4) + ((@as(u20, byte1) & 0b00111111) << 4);
+                    compressed_size = ((byte1 & 0b11000000) >> 6) + (@as(u18, byte2) << 2);
+                },
+                2 => {
+                    const byte3 = try source.readByte();
+                    regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00000011) << 12);
+                    compressed_size = ((byte2 & 0b11111100) >> 2) + (@as(u18, byte3) << 6);
+                },
+                3 => {
+                    const byte3 = try source.readByte();
+                    const byte4 = try source.readByte();
+                    regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00111111) << 12);
+                    compressed_size = ((byte2 & 0b11000000) >> 6) + (@as(u18, byte3) << 2) + (@as(u18, byte4) << 10);
+                },
+            }
+        },
+    }
+    return LiteralsSection.Header{
+        .block_type = block_type,
+        .size_format = size_format,
+        .regenerated_size = regenerated_size,
+        .compressed_size = compressed_size,
+    };
+}
+
+/// Decode a sequences section header.
+///
+/// Errors returned:
+///   - `error.ReservedBitSet` if the reserved bit is set
+///   - `error.EndOfStream` if there are not enough bytes in `source`
+pub fn decodeSequencesHeader(
+    source: anytype,
+) !SequencesSection.Header {
+    var sequence_count: u24 = undefined;
+
+    const byte0 = try source.readByte();
+    if (byte0 == 0) {
+        return SequencesSection.Header{
+            .sequence_count = 0,
+            .offsets = undefined,
+            .match_lengths = undefined,
+            .literal_lengths = undefined,
+        };
+    } else if (byte0 < 128) {
+        sequence_count = byte0;
+    } else if (byte0 < 255) {
+        sequence_count = (@as(u24, (byte0 - 128)) << 8) + try source.readByte();
+    } else {
+        sequence_count = (try source.readByte()) + (@as(u24, try source.readByte()) << 8) + 0x7F00;
+    }
+
+    const compression_modes = try source.readByte();
+
+    const matches_mode = @intToEnum(SequencesSection.Header.Mode, (compression_modes & 0b00001100) >> 2);
+    const offsets_mode = @intToEnum(SequencesSection.Header.Mode, (compression_modes & 0b00110000) >> 4);
+    const literal_mode = @intToEnum(SequencesSection.Header.Mode, (compression_modes & 0b11000000) >> 6);
+    if (compression_modes & 0b11 != 0) return error.ReservedBitSet;
+
+    return SequencesSection.Header{
+        .sequence_count = sequence_count,
+        .offsets = offsets_mode,
+        .match_lengths = matches_mode,
+        .literal_lengths = literal_mode,
+    };
+}
diff --git a/lib/std/compress/zstandard/decode/fse.zig b/lib/std/compress/zstandard/decode/fse.zig
new file mode 100644
index 0000000000..41a34d0fc1
--- /dev/null
+++ b/lib/std/compress/zstandard/decode/fse.zig
@@ -0,0 +1,153 @@
+const std = @import("std");
+const assert = std.debug.assert;
+
+const types = @import("../types.zig");
+const Table = types.compressed_block.Table;
+
+pub fn decodeFseTable(
+    bit_reader: anytype,
+    expected_symbol_count: usize,
+    max_accuracy_log: u4,
+    entries: []Table.Fse,
+) !usize {
+    const accuracy_log_biased = try bit_reader.readBitsNoEof(u4, 4);
+    if (accuracy_log_biased > max_accuracy_log -| 5) return error.MalformedAccuracyLog;
+    const accuracy_log = accuracy_log_biased + 5;
+
+    var values: [256]u16 = undefined;
+    var value_count: usize = 0;
+
+    const total_probability = @as(u16, 1) << accuracy_log;
+    var accumulated_probability: u16 = 0;
+
+    while (accumulated_probability < total_probability) {
+        // WARNING: The RFC in poorly worded, and would suggest std.math.log2_int_ceil is correct here,
+        //          but power of two (remaining probabilities + 1) need max bits set to 1 more.
+        const max_bits = std.math.log2_int(u16, total_probability - accumulated_probability + 1) + 1;
+        const small = try bit_reader.readBitsNoEof(u16, max_bits - 1);
+
+        const cutoff = (@as(u16, 1) << max_bits) - 1 - (total_probability - accumulated_probability + 1);
+
+        const value = if (small < cutoff)
+            small
+        else value: {
+            const value_read = small + (try bit_reader.readBitsNoEof(u16, 1) << (max_bits - 1));
+            break :value if (value_read < @as(u16, 1) << (max_bits - 1))
+                value_read
+            else
+                value_read - cutoff;
+        };
+
+        accumulated_probability += if (value != 0) value - 1 else 1;
+
+        values[value_count] = value;
+        value_count += 1;
+
+        if (value == 1) {
+            while (true) {
+                const repeat_flag = try bit_reader.readBitsNoEof(u2, 2);
+                if (repeat_flag + value_count > 256) return error.MalformedFseTable;
+                for (0..repeat_flag) |_| {
+                    values[value_count] = 1;
+                    value_count += 1;
+                }
+                if (repeat_flag < 3) break;
+            }
+        }
+        if (value_count == 256) break;
+    }
+    bit_reader.alignToByte();
+
+    if (value_count < 2) return error.MalformedFseTable;
+    if (accumulated_probability != total_probability) return error.MalformedFseTable;
+    if (value_count > expected_symbol_count) return error.MalformedFseTable;
+
+    const table_size = total_probability;
+
+    try buildFseTable(values[0..value_count], entries[0..table_size]);
+    return table_size;
+}
+
+fn buildFseTable(values: []const u16, entries: []Table.Fse) !void {
+    const total_probability = @intCast(u16, entries.len);
+    const accuracy_log = std.math.log2_int(u16, total_probability);
+    assert(total_probability <= 1 << 9);
+
+    var less_than_one_count: usize = 0;
+    for (values, 0..) |value, i| {
+        if (value == 0) {
+            entries[entries.len - 1 - less_than_one_count] = Table.Fse{
+                .symbol = @intCast(u8, i),
+                .baseline = 0,
+                .bits = accuracy_log,
+            };
+            less_than_one_count += 1;
+        }
+    }
+
+    var position: usize = 0;
+    var temp_states: [1 << 9]u16 = undefined;
+    for (values, 0..) |value, symbol| {
+        if (value == 0 or value == 1) continue;
+        const probability = value - 1;
+
+        const state_share_dividend = std.math.ceilPowerOfTwo(u16, probability) catch
+            return error.MalformedFseTable;
+        const share_size = @divExact(total_probability, state_share_dividend);
+        const double_state_count = state_share_dividend - probability;
+        const single_state_count = probability - double_state_count;
+        const share_size_log = std.math.log2_int(u16, share_size);
+
+        for (0..probability) |i| {
+            temp_states[i] = @intCast(u16, position);
+            position += (entries.len >> 1) + (entries.len >> 3) + 3;
+            position &= entries.len - 1;
+            while (position >= entries.len - less_than_one_count) {
+                position += (entries.len >> 1) + (entries.len >> 3) + 3;
+                position &= entries.len - 1;
+            }
+        }
+        std.sort.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16));
+        for (0..probability) |i| {
+            entries[temp_states[i]] = if (i < double_state_count) Table.Fse{
+                .symbol = @intCast(u8, symbol),
+                .bits = share_size_log + 1,
+                .baseline = single_state_count * share_size + @intCast(u16, i) * 2 * share_size,
+            } else Table.Fse{
+                .symbol = @intCast(u8, symbol),
+                .bits = share_size_log,
+                .baseline = (@intCast(u16, i) - double_state_count) * share_size,
+            };
+        }
+    }
+}
+
+test buildFseTable {
+    const literals_length_default_values = [36]u16{
+        5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 2, 2, 2, 2,
+        0, 0, 0, 0,
+    };
+
+    const match_lengths_default_values = [53]u16{
+        2, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0,
+        0, 0, 0, 0, 0,
+    };
+
+    const offset_codes_default_values = [29]u16{
+        2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0,
+    };
+
+    var entries: [64]Table.Fse = undefined;
+    try buildFseTable(&literals_length_default_values, &entries);
+    try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_literal_fse_table.fse, &entries);
+
+    try buildFseTable(&match_lengths_default_values, &entries);
+    try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_match_fse_table.fse, &entries);
+
+    try buildFseTable(&offset_codes_default_values, entries[0..32]);
+    try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_offset_fse_table.fse, entries[0..32]);
+}
diff --git a/lib/std/compress/zstandard/decode/huffman.zig b/lib/std/compress/zstandard/decode/huffman.zig
new file mode 100644
index 0000000000..2914198268
--- /dev/null
+++ b/lib/std/compress/zstandard/decode/huffman.zig
@@ -0,0 +1,234 @@
+const std = @import("std");
+
+const types = @import("../types.zig");
+const LiteralsSection = types.compressed_block.LiteralsSection;
+const Table = types.compressed_block.Table;
+
+const readers = @import("../readers.zig");
+
+const decodeFseTable = @import("fse.zig").decodeFseTable;
+
+pub const Error = error{
+    MalformedHuffmanTree,
+    MalformedFseTable,
+    MalformedAccuracyLog,
+    EndOfStream,
+};
+
+fn decodeFseHuffmanTree(
+    source: anytype,
+    compressed_size: usize,
+    buffer: []u8,
+    weights: *[256]u4,
+) !usize {
+    var stream = std.io.limitedReader(source, compressed_size);
+    var bit_reader = readers.bitReader(stream.reader());
+
+    var entries: [1 << 6]Table.Fse = undefined;
+    const table_size = decodeFseTable(&bit_reader, 256, 6, &entries) catch |err| switch (err) {
+        error.MalformedAccuracyLog, error.MalformedFseTable => |e| return e,
+        error.EndOfStream => return error.MalformedFseTable,
+        else => |e| return e,
+    };
+    const accuracy_log = std.math.log2_int_ceil(usize, table_size);
+
+    const amount = try stream.reader().readAll(buffer);
+    var huff_bits: readers.ReverseBitReader = undefined;
+    huff_bits.init(buffer[0..amount]) catch return error.MalformedHuffmanTree;
+
+    return assignWeights(&huff_bits, accuracy_log, &entries, weights);
+}
+
+fn decodeFseHuffmanTreeSlice(src: []const u8, compressed_size: usize, weights: *[256]u4) !usize {
+    if (src.len < compressed_size) return error.MalformedHuffmanTree;
+    var stream = std.io.fixedBufferStream(src[0..compressed_size]);
+    var counting_reader = std.io.countingReader(stream.reader());
+    var bit_reader = readers.bitReader(counting_reader.reader());
+
+    var entries: [1 << 6]Table.Fse = undefined;
+    const table_size = decodeFseTable(&bit_reader, 256, 6, &entries) catch |err| switch (err) {
+        error.MalformedAccuracyLog, error.MalformedFseTable => |e| return e,
+        error.EndOfStream => return error.MalformedFseTable,
+    };
+    const accuracy_log = std.math.log2_int_ceil(usize, table_size);
+
+    const start_index = std.math.cast(usize, counting_reader.bytes_read) orelse
+        return error.MalformedHuffmanTree;
+    var huff_data = src[start_index..compressed_size];
+    var huff_bits: readers.ReverseBitReader = undefined;
+    huff_bits.init(huff_data) catch return error.MalformedHuffmanTree;
+
+    return assignWeights(&huff_bits, accuracy_log, &entries, weights);
+}
+
+fn assignWeights(
+    huff_bits: *readers.ReverseBitReader,
+    accuracy_log: usize,
+    entries: *[1 << 6]Table.Fse,
+    weights: *[256]u4,
+) !usize {
+    var i: usize = 0;
+    var even_state: u32 = huff_bits.readBitsNoEof(u32, accuracy_log) catch return error.MalformedHuffmanTree;
+    var odd_state: u32 = huff_bits.readBitsNoEof(u32, accuracy_log) catch return error.MalformedHuffmanTree;
+
+    while (i < 254) {
+        const even_data = entries[even_state];
+        var read_bits: usize = 0;
+        const even_bits = huff_bits.readBits(u32, even_data.bits, &read_bits) catch unreachable;
+        weights[i] = std.math.cast(u4, even_data.symbol) orelse return error.MalformedHuffmanTree;
+        i += 1;
+        if (read_bits < even_data.bits) {
+            weights[i] = std.math.cast(u4, entries[odd_state].symbol) orelse return error.MalformedHuffmanTree;
+            i += 1;
+            break;
+        }
+        even_state = even_data.baseline + even_bits;
+
+        read_bits = 0;
+        const odd_data = entries[odd_state];
+        const odd_bits = huff_bits.readBits(u32, odd_data.bits, &read_bits) catch unreachable;
+        weights[i] = std.math.cast(u4, odd_data.symbol) orelse return error.MalformedHuffmanTree;
+        i += 1;
+        if (read_bits < odd_data.bits) {
+            if (i == 255) return error.MalformedHuffmanTree;
+            weights[i] = std.math.cast(u4, entries[even_state].symbol) orelse return error.MalformedHuffmanTree;
+            i += 1;
+            break;
+        }
+        odd_state = odd_data.baseline + odd_bits;
+    } else return error.MalformedHuffmanTree;
+
+    if (!huff_bits.isEmpty()) {
+        return error.MalformedHuffmanTree;
+    }
+
+    return i + 1; // stream contains all but the last symbol
+}
+
+fn decodeDirectHuffmanTree(source: anytype, encoded_symbol_count: usize, weights: *[256]u4) !usize {
+    const weights_byte_count = (encoded_symbol_count + 1) / 2;
+    for (0..weights_byte_count) |i| {
+        const byte = try source.readByte();
+        weights[2 * i] = @intCast(u4, byte >> 4);
+        weights[2 * i + 1] = @intCast(u4, byte & 0xF);
+    }
+    return encoded_symbol_count + 1;
+}
+
+fn assignSymbols(weight_sorted_prefixed_symbols: []LiteralsSection.HuffmanTree.PrefixedSymbol, weights: [256]u4) usize {
+    for (0..weight_sorted_prefixed_symbols.len) |i| {
+        weight_sorted_prefixed_symbols[i] = .{
+            .symbol = @intCast(u8, i),
+            .weight = undefined,
+            .prefix = undefined,
+        };
+    }
+
+    std.sort.sort(
+        LiteralsSection.HuffmanTree.PrefixedSymbol,
+        weight_sorted_prefixed_symbols,
+        weights,
+        lessThanByWeight,
+    );
+
+    var prefix: u16 = 0;
+    var prefixed_symbol_count: usize = 0;
+    var sorted_index: usize = 0;
+    const symbol_count = weight_sorted_prefixed_symbols.len;
+    while (sorted_index < symbol_count) {
+        var symbol = weight_sorted_prefixed_symbols[sorted_index].symbol;
+        const weight = weights[symbol];
+        if (weight == 0) {
+            sorted_index += 1;
+            continue;
+        }
+
+        while (sorted_index < symbol_count) : ({
+            sorted_index += 1;
+            prefixed_symbol_count += 1;
+            prefix += 1;
+        }) {
+            symbol = weight_sorted_prefixed_symbols[sorted_index].symbol;
+            if (weights[symbol] != weight) {
+                prefix = ((prefix - 1) >> (weights[symbol] - weight)) + 1;
+                break;
+            }
+            weight_sorted_prefixed_symbols[prefixed_symbol_count].symbol = symbol;
+            weight_sorted_prefixed_symbols[prefixed_symbol_count].prefix = prefix;
+            weight_sorted_prefixed_symbols[prefixed_symbol_count].weight = weight;
+        }
+    }
+    return prefixed_symbol_count;
+}
+
+fn buildHuffmanTree(weights: *[256]u4, symbol_count: usize) error{MalformedHuffmanTree}!LiteralsSection.HuffmanTree {
+    var weight_power_sum_big: u32 = 0;
+    for (weights[0 .. symbol_count - 1]) |value| {
+        weight_power_sum_big += (@as(u16, 1) << value) >> 1;
+    }
+    if (weight_power_sum_big >= 1 << 11) return error.MalformedHuffmanTree;
+    const weight_power_sum = @intCast(u16, weight_power_sum_big);
+
+    // advance to next power of two (even if weight_power_sum is a power of 2)
+    // TODO: is it valid to have weight_power_sum == 0?
+    const max_number_of_bits = if (weight_power_sum == 0) 1 else std.math.log2_int(u16, weight_power_sum) + 1;
+    const next_power_of_two = @as(u16, 1) << max_number_of_bits;
+    weights[symbol_count - 1] = std.math.log2_int(u16, next_power_of_two - weight_power_sum) + 1;
+
+    var weight_sorted_prefixed_symbols: [256]LiteralsSection.HuffmanTree.PrefixedSymbol = undefined;
+    const prefixed_symbol_count = assignSymbols(weight_sorted_prefixed_symbols[0..symbol_count], weights.*);
+    const tree = LiteralsSection.HuffmanTree{
+        .max_bit_count = max_number_of_bits,
+        .symbol_count_minus_one = @intCast(u8, prefixed_symbol_count - 1),
+        .nodes = weight_sorted_prefixed_symbols,
+    };
+    return tree;
+}
+
+pub fn decodeHuffmanTree(
+    source: anytype,
+    buffer: []u8,
+) (@TypeOf(source).Error || Error)!LiteralsSection.HuffmanTree {
+    const header = try source.readByte();
+    var weights: [256]u4 = undefined;
+    const symbol_count = if (header < 128)
+        // FSE compressed weights
+        try decodeFseHuffmanTree(source, header, buffer, &weights)
+    else
+        try decodeDirectHuffmanTree(source, header - 127, &weights);
+
+    return buildHuffmanTree(&weights, symbol_count);
+}
+
+pub fn decodeHuffmanTreeSlice(
+    src: []const u8,
+    consumed_count: *usize,
+) Error!LiteralsSection.HuffmanTree {
+    if (src.len == 0) return error.MalformedHuffmanTree;
+    const header = src[0];
+    var bytes_read: usize = 1;
+    var weights: [256]u4 = undefined;
+    const symbol_count = if (header < 128) count: {
+        // FSE compressed weights
+        bytes_read += header;
+        break :count try decodeFseHuffmanTreeSlice(src[1..], header, &weights);
+    } else count: {
+        var fbs = std.io.fixedBufferStream(src[1..]);
+        defer bytes_read += fbs.pos;
+        break :count try decodeDirectHuffmanTree(fbs.reader(), header - 127, &weights);
+    };
+
+    consumed_count.* += bytes_read;
+    return buildHuffmanTree(&weights, symbol_count);
+}
+
+fn lessThanByWeight(
+    weights: [256]u4,
+    lhs: LiteralsSection.HuffmanTree.PrefixedSymbol,
+    rhs: LiteralsSection.HuffmanTree.PrefixedSymbol,
+) bool {
+    // NOTE: this function relies on the use of a stable sorting algorithm,
+    //       otherwise a special case of if (weights[lhs] == weights[rhs]) return lhs < rhs;
+    //       should be added
+    return weights[lhs.symbol] < weights[rhs.symbol];
+}
diff --git a/lib/std/compress/zstandard/decompress.zig b/lib/std/compress/zstandard/decompress.zig
new file mode 100644
index 0000000000..a2ba59e688
--- /dev/null
+++ b/lib/std/compress/zstandard/decompress.zig
@@ -0,0 +1,636 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const RingBuffer = std.RingBuffer;
+
+const types = @import("types.zig");
+const frame = types.frame;
+const LiteralsSection = types.compressed_block.LiteralsSection;
+const SequencesSection = types.compressed_block.SequencesSection;
+const SkippableHeader = types.frame.Skippable.Header;
+const ZstandardHeader = types.frame.Zstandard.Header;
+const Table = types.compressed_block.Table;
+
+pub const block = @import("decode/block.zig");
+
+const readers = @import("readers.zig");
+
+const readInt = std.mem.readIntLittle;
+const readIntSlice = std.mem.readIntSliceLittle;
+
+/// Returns `true` is `magic` is a valid magic number for a skippable frame
+pub fn isSkippableMagic(magic: u32) bool {
+    return frame.Skippable.magic_number_min <= magic and magic <= frame.Skippable.magic_number_max;
+}
+
+/// Returns the kind of frame at the beginning of `source`.
+///
+/// Errors returned:
+///   - `error.BadMagic` if `source` begins with bytes not equal to the
+///     Zstandard frame magic number, or outside the range of magic numbers for
+///     skippable frames.
+///   - `error.EndOfStream` if `source` contains fewer than 4 bytes
+pub fn decodeFrameType(source: anytype) error{ BadMagic, EndOfStream }!frame.Kind {
+    const magic = try source.readIntLittle(u32);
+    return frameType(magic);
+}
+
+/// Returns the kind of frame associated to `magic`.
+///
+/// Errors returned:
+///   - `error.BadMagic` if `magic` is not a valid magic number.
+pub fn frameType(magic: u32) error{BadMagic}!frame.Kind {
+    return if (magic == frame.Zstandard.magic_number)
+        .zstandard
+    else if (isSkippableMagic(magic))
+        .skippable
+    else
+        error.BadMagic;
+}
+
+pub const FrameHeader = union(enum) {
+    zstandard: ZstandardHeader,
+    skippable: SkippableHeader,
+};
+
+pub const HeaderError = error{ BadMagic, EndOfStream, ReservedBitSet };
+
+/// Returns the header of the frame at the beginning of `source`.
+///
+/// Errors returned:
+///   - `error.BadMagic` if `source` begins with bytes not equal to the
+///     Zstandard frame magic number, or outside the range of magic numbers for
+///     skippable frames.
+///   - `error.EndOfStream` if `source` contains fewer than 4 bytes
+///   - `error.ReservedBitSet` if the frame is a Zstandard frame and any of the
+///     reserved bits are set
+pub fn decodeFrameHeader(source: anytype) (@TypeOf(source).Error || HeaderError)!FrameHeader {
+    const magic = try source.readIntLittle(u32);
+    const frame_type = try frameType(magic);
+    switch (frame_type) {
+        .zstandard => return FrameHeader{ .zstandard = try decodeZstandardHeader(source) },
+        .skippable => return FrameHeader{
+            .skippable = .{
+                .magic_number = magic,
+                .frame_size = try source.readIntLittle(u32),
+            },
+        },
+    }
+}
+
+pub const ReadWriteCount = struct {
+    read_count: usize,
+    write_count: usize,
+};
+
+/// Decodes frames from `src` into `dest`; returns the length of the result.
+/// The stream should not have extra trailing bytes - either all bytes in `src`
+/// will be decoded, or an error will be returned. An error will be returned if
+/// a Zstandard frame in `src` does not declare its content size.
+///
+/// Errors returned:
+///   - `error.DictionaryIdFlagUnsupported` if a `src` contains a frame that
+///     uses a dictionary
+///   - `error.MalformedFrame` if a frame in `src` is invalid
+///   - `error.UnknownContentSizeUnsupported` if a frame in `src` does not
+///     declare its content size
+pub fn decode(dest: []u8, src: []const u8, verify_checksum: bool) error{
+    MalformedFrame,
+    UnknownContentSizeUnsupported,
+    DictionaryIdFlagUnsupported,
+}!usize {
+    var write_count: usize = 0;
+    var read_count: usize = 0;
+    while (read_count < src.len) {
+        const counts = decodeFrame(dest, src[read_count..], verify_checksum) catch |err| {
+            switch (err) {
+                error.UnknownContentSizeUnsupported => return error.UnknownContentSizeUnsupported,
+                error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
+                else => return error.MalformedFrame,
+            }
+        };
+        read_count += counts.read_count;
+        write_count += counts.write_count;
+    }
+    return write_count;
+}
+
+/// Decodes a stream of frames from `src`; returns the decoded bytes. The stream
+/// should not have extra trailing bytes - either all bytes in `src` will be
+/// decoded, or an error will be returned.
+///
+/// Errors returned:
+///   - `error.DictionaryIdFlagUnsupported` if a `src` contains a frame that
+///     uses a dictionary
+///   - `error.MalformedFrame` if a frame in `src` is invalid
+///   - `error.OutOfMemory` if `allocator` cannot allocate enough memory
+pub fn decodeAlloc(
+    allocator: Allocator,
+    src: []const u8,
+    verify_checksum: bool,
+    window_size_max: usize,
+) error{ DictionaryIdFlagUnsupported, MalformedFrame, OutOfMemory }![]u8 {
+    var result = std.ArrayList(u8).init(allocator);
+    errdefer result.deinit();
+
+    var read_count: usize = 0;
+    while (read_count < src.len) {
+        read_count += decodeFrameArrayList(
+            allocator,
+            &result,
+            src[read_count..],
+            verify_checksum,
+            window_size_max,
+        ) catch |err| switch (err) {
+            error.OutOfMemory => return error.OutOfMemory,
+            error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
+            else => return error.MalformedFrame,
+        };
+    }
+    return result.toOwnedSlice();
+}
+
+/// Decodes the frame at the start of `src` into `dest`. Returns the number of
+/// bytes read from `src` and written to `dest`. This function can only decode
+/// frames that declare the decompressed content size.
+///
+/// Errors returned:
+///   - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic
+///     number for a Zstandard or skippable frame
+///   - `error.UnknownContentSizeUnsupported` if the frame does not declare the
+///     uncompressed content size
+///   - `error.WindowSizeUnknown` if the frame does not have a valid window size
+///   - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
+///     size declared by the frame header
+///   - `error.ContentSizeTooLarge` if the frame header indicates a content size
+///     that is larger than `std.math.maxInt(usize)`
+///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+///   - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+///     contains a checksum that does not match the checksum of the decompressed
+///     data
+///   - `error.ReservedBitSet` if any of the reserved bits of the frame header
+///     are set
+///   - `error.EndOfStream` if `src` does not contain a complete frame
+///   - `error.BadContentSize` if the content size declared by the frame does
+///     not equal the actual size of decompressed data
+///   - an error in `block.Error` if there are errors decoding a block
+///   - `error.SkippableSizeTooLarge` if the frame is skippable and reports a
+///     size greater than `src.len`
+pub fn decodeFrame(
+    dest: []u8,
+    src: []const u8,
+    verify_checksum: bool,
+) (error{
+    BadMagic,
+    UnknownContentSizeUnsupported,
+    ContentTooLarge,
+    ContentSizeTooLarge,
+    WindowSizeUnknown,
+    DictionaryIdFlagUnsupported,
+    SkippableSizeTooLarge,
+} || FrameError)!ReadWriteCount {
+    var fbs = std.io.fixedBufferStream(src);
+    switch (try decodeFrameType(fbs.reader())) {
+        .zstandard => return decodeZstandardFrame(dest, src, verify_checksum),
+        .skippable => {
+            const content_size = try fbs.reader().readIntLittle(u32);
+            if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge;
+            const read_count = @as(usize, content_size) + 8;
+            if (read_count > src.len) return error.SkippableSizeTooLarge;
+            return ReadWriteCount{
+                .read_count = read_count,
+                .write_count = 0,
+            };
+        },
+    }
+}
+
+/// Decodes the frame at the start of `src` into `dest`. Returns the number of
+/// bytes read from `src`.
+///
+/// Errors returned:
+///   - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic
+///     number for a Zstandard or skippable frame
+///   - `error.WindowSizeUnknown` if the frame does not have a valid window size
+///   - `error.WindowTooLarge` if the window size is larger than
+///     `window_size_max`
+///   - `error.ContentSizeTooLarge` if the frame header indicates a content size
+///     that is larger than `std.math.maxInt(usize)`
+///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+///   - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+///     contains a checksum that does not match the checksum of the decompressed
+///     data
+///   - `error.ReservedBitSet` if any of the reserved bits of the frame header
+///     are set
+///   - `error.EndOfStream` if `src` does not contain a complete frame
+///   - `error.BadContentSize` if the content size declared by the frame does
+///     not equal the actual size of decompressed data
+///   - `error.OutOfMemory` if `allocator` cannot allocate enough memory
+///   - an error in `block.Error` if there are errors decoding a block
+///   - `error.SkippableSizeTooLarge` if the frame is skippable and reports a
+///     size greater than `src.len`
+pub fn decodeFrameArrayList(
+    allocator: Allocator,
+    dest: *std.ArrayList(u8),
+    src: []const u8,
+    verify_checksum: bool,
+    window_size_max: usize,
+) (error{ BadMagic, OutOfMemory, SkippableSizeTooLarge } || FrameContext.Error || FrameError)!usize {
+    var fbs = std.io.fixedBufferStream(src);
+    const reader = fbs.reader();
+    const magic = try reader.readIntLittle(u32);
+    switch (try frameType(magic)) {
+        .zstandard => return decodeZstandardFrameArrayList(
+            allocator,
+            dest,
+            src,
+            verify_checksum,
+            window_size_max,
+        ),
+        .skippable => {
+            const content_size = try fbs.reader().readIntLittle(u32);
+            if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge;
+            const read_count = @as(usize, content_size) + 8;
+            if (read_count > src.len) return error.SkippableSizeTooLarge;
+            return read_count;
+        },
+    }
+}
+
+/// Returns the frame checksum corresponding to the data fed into `hasher`
+pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
+    const hash = hasher.final();
+    return @intCast(u32, hash & 0xFFFFFFFF);
+}
+
+const FrameError = error{
+    ChecksumFailure,
+    BadContentSize,
+    EndOfStream,
+    ReservedBitSet,
+} || block.Error;
+
+/// Decode a Zstandard frame from `src` into `dest`, returning the number of
+/// bytes read from `src` and written to `dest`. The first four bytes of `src`
+/// must be the magic number for a Zstandard frame.
+///
+/// Error returned:
+///   - `error.UnknownContentSizeUnsupported` if the frame does not declare the
+///     uncompressed content size
+///   - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
+///     size declared by the frame header
+///   - `error.WindowSizeUnknown` if the frame does not have a valid window size
+///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+///   - `error.ContentSizeTooLarge` if the frame header indicates a content size
+///     that is larger than `std.math.maxInt(usize)`
+///   - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+///     contains a checksum that does not match the checksum of the decompressed
+///     data
+///   - `error.ReservedBitSet` if the reserved bit of the frame header is set
+///   - `error.EndOfStream` if `src` does not contain a complete frame
+///   - an error in `block.Error` if there are errors decoding a block
+///   - `error.BadContentSize` if the content size declared by the frame does
+///     not equal the actual size of decompressed data
+pub fn decodeZstandardFrame(
+    dest: []u8,
+    src: []const u8,
+    verify_checksum: bool,
+) (error{
+    UnknownContentSizeUnsupported,
+    ContentTooLarge,
+    ContentSizeTooLarge,
+    WindowSizeUnknown,
+    DictionaryIdFlagUnsupported,
+} || FrameError)!ReadWriteCount {
+    assert(readInt(u32, src[0..4]) == frame.Zstandard.magic_number);
+    var consumed_count: usize = 4;
+
+    var frame_context = context: {
+        var fbs = std.io.fixedBufferStream(src[consumed_count..]);
+        var source = fbs.reader();
+        const frame_header = try decodeZstandardHeader(source);
+        consumed_count += fbs.pos;
+        break :context FrameContext.init(
+            frame_header,
+            std.math.maxInt(usize),
+            verify_checksum,
+        ) catch |err| switch (err) {
+            error.WindowTooLarge => unreachable,
+            inline else => |e| return e,
+        };
+    };
+    const counts = try decodeZStandardFrameBlocks(
+        dest,
+        src[consumed_count..],
+        &frame_context,
+    );
+    return ReadWriteCount{
+        .read_count = counts.read_count + consumed_count,
+        .write_count = counts.write_count,
+    };
+}
+
+pub fn decodeZStandardFrameBlocks(
+    dest: []u8,
+    src: []const u8,
+    frame_context: *FrameContext,
+) (error{ ContentTooLarge, UnknownContentSizeUnsupported } || FrameError)!ReadWriteCount {
+    const content_size = frame_context.content_size orelse
+        return error.UnknownContentSizeUnsupported;
+    if (dest.len < content_size) return error.ContentTooLarge;
+
+    var consumed_count: usize = 0;
+    const written_count = decodeFrameBlocksInner(
+        dest[0..content_size],
+        src[consumed_count..],
+        &consumed_count,
+        if (frame_context.hasher_opt) |*hasher| hasher else null,
+        frame_context.block_size_max,
+    ) catch |err| switch (err) {
+        error.DestTooSmall => return error.BadContentSize,
+        inline else => |e| return e,
+    };
+
+    if (written_count != content_size) return error.BadContentSize;
+    if (frame_context.has_checksum) {
+        if (src.len < consumed_count + 4) return error.EndOfStream;
+        const checksum = readIntSlice(u32, src[consumed_count .. consumed_count + 4]);
+        consumed_count += 4;
+        if (frame_context.hasher_opt) |*hasher| {
+            if (checksum != computeChecksum(hasher)) return error.ChecksumFailure;
+        }
+    }
+    return ReadWriteCount{ .read_count = consumed_count, .write_count = written_count };
+}
+
+pub const FrameContext = struct {
+    hasher_opt: ?std.hash.XxHash64,
+    window_size: usize,
+    has_checksum: bool,
+    block_size_max: usize,
+    content_size: ?usize,
+
+    const Error = error{
+        DictionaryIdFlagUnsupported,
+        WindowSizeUnknown,
+        WindowTooLarge,
+        ContentSizeTooLarge,
+    };
+    /// Validates `frame_header` and returns the associated `FrameContext`.
+    ///
+    /// Errors returned:
+    ///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+    ///   - `error.WindowSizeUnknown` if the frame does not have a valid window
+    ///     size
+    ///   - `error.WindowTooLarge` if the window size is larger than
+    ///     `window_size_max`
+    ///   - `error.ContentSizeTooLarge` if the frame header indicates a content
+    ///     size larger than `std.math.maxInt(usize)`
+    pub fn init(
+        frame_header: ZstandardHeader,
+        window_size_max: usize,
+        verify_checksum: bool,
+    ) Error!FrameContext {
+        if (frame_header.descriptor.dictionary_id_flag != 0)
+            return error.DictionaryIdFlagUnsupported;
+
+        const window_size_raw = frameWindowSize(frame_header) orelse return error.WindowSizeUnknown;
+        const window_size = if (window_size_raw > window_size_max)
+            return error.WindowTooLarge
+        else
+            @intCast(usize, window_size_raw);
+
+        const should_compute_checksum =
+            frame_header.descriptor.content_checksum_flag and verify_checksum;
+
+        const content_size = if (frame_header.content_size) |size|
+            std.math.cast(usize, size) orelse return error.ContentSizeTooLarge
+        else
+            null;
+
+        return .{
+            .hasher_opt = if (should_compute_checksum) std.hash.XxHash64.init(0) else null,
+            .window_size = window_size,
+            .has_checksum = frame_header.descriptor.content_checksum_flag,
+            .block_size_max = @min(1 << 17, window_size),
+            .content_size = content_size,
+        };
+    }
+};
+
+/// Decode a Zstandard from from `src` and return number of bytes read; see
+/// `decodeZstandardFrame()`. The first four bytes of `src` must be the magic
+/// number for a Zstandard frame.
+///
+/// Errors returned:
+///   - `error.WindowSizeUnknown` if the frame does not have a valid window size
+///   - `error.WindowTooLarge` if the window size is larger than
+///     `window_size_max`
+///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+///   - `error.ContentSizeTooLarge` if the frame header indicates a content size
+///     that is larger than `std.math.maxInt(usize)`
+///   - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+///     contains a checksum that does not match the checksum of the decompressed
+///     data
+///   - `error.ReservedBitSet` if the reserved bit of the frame header is set
+///   - `error.EndOfStream` if `src` does not contain a complete frame
+///   - `error.OutOfMemory` if `allocator` cannot allocate enough memory
+///   - an error in `block.Error` if there are errors decoding a block
+///   - `error.BadContentSize` if the content size declared by the frame does
+///     not equal the size of decompressed data
+pub fn decodeZstandardFrameArrayList(
+    allocator: Allocator,
+    dest: *std.ArrayList(u8),
+    src: []const u8,
+    verify_checksum: bool,
+    window_size_max: usize,
+) (error{OutOfMemory} || FrameContext.Error || FrameError)!usize {
+    assert(readInt(u32, src[0..4]) == frame.Zstandard.magic_number);
+    var consumed_count: usize = 4;
+
+    var frame_context = context: {
+        var fbs = std.io.fixedBufferStream(src[consumed_count..]);
+        var source = fbs.reader();
+        const frame_header = try decodeZstandardHeader(source);
+        consumed_count += fbs.pos;
+        break :context try FrameContext.init(frame_header, window_size_max, verify_checksum);
+    };
+
+    consumed_count += try decodeZstandardFrameBlocksArrayList(
+        allocator,
+        dest,
+        src[consumed_count..],
+        &frame_context,
+    );
+    return consumed_count;
+}
+
+pub fn decodeZstandardFrameBlocksArrayList(
+    allocator: Allocator,
+    dest: *std.ArrayList(u8),
+    src: []const u8,
+    frame_context: *FrameContext,
+) (error{OutOfMemory} || FrameError)!usize {
+    const initial_len = dest.items.len;
+
+    var ring_buffer = try RingBuffer.init(allocator, frame_context.window_size);
+    defer ring_buffer.deinit(allocator);
+
+    // These tables take 7680 bytes
+    var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
+    var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
+    var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined;
+
+    var block_header = try block.decodeBlockHeaderSlice(src);
+    var consumed_count: usize = 3;
+    var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data);
+    while (true) : ({
+        block_header = try block.decodeBlockHeaderSlice(src[consumed_count..]);
+        consumed_count += 3;
+    }) {
+        const written_size = try block.decodeBlockRingBuffer(
+            &ring_buffer,
+            src[consumed_count..],
+            block_header,
+            &decode_state,
+            &consumed_count,
+            frame_context.block_size_max,
+        );
+        if (frame_context.content_size) |size| {
+            if (dest.items.len - initial_len > size) {
+                return error.BadContentSize;
+            }
+        }
+        if (written_size > 0) {
+            const written_slice = ring_buffer.sliceLast(written_size);
+            try dest.appendSlice(written_slice.first);
+            try dest.appendSlice(written_slice.second);
+            if (frame_context.hasher_opt) |*hasher| {
+                hasher.update(written_slice.first);
+                hasher.update(written_slice.second);
+            }
+        }
+        if (block_header.last_block) break;
+    }
+    if (frame_context.content_size) |size| {
+        if (dest.items.len - initial_len != size) {
+            return error.BadContentSize;
+        }
+    }
+
+    if (frame_context.has_checksum) {
+        if (src.len < consumed_count + 4) return error.EndOfStream;
+        const checksum = readIntSlice(u32, src[consumed_count .. consumed_count + 4]);
+        consumed_count += 4;
+        if (frame_context.hasher_opt) |*hasher| {
+            if (checksum != computeChecksum(hasher)) return error.ChecksumFailure;
+        }
+    }
+    return consumed_count;
+}
+
+fn decodeFrameBlocksInner(
+    dest: []u8,
+    src: []const u8,
+    consumed_count: *usize,
+    hash: ?*std.hash.XxHash64,
+    block_size_max: usize,
+) (error{ EndOfStream, DestTooSmall } || block.Error)!usize {
+    // These tables take 7680 bytes
+    var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
+    var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
+    var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined;
+
+    var block_header = try block.decodeBlockHeaderSlice(src);
+    var bytes_read: usize = 3;
+    defer consumed_count.* += bytes_read;
+    var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data);
+    var count: usize = 0;
+    while (true) : ({
+        block_header = try block.decodeBlockHeaderSlice(src[bytes_read..]);
+        bytes_read += 3;
+    }) {
+        const written_size = try block.decodeBlock(
+            dest,
+            src[bytes_read..],
+            block_header,
+            &decode_state,
+            &bytes_read,
+            block_size_max,
+            count,
+        );
+        if (hash) |hash_state| hash_state.update(dest[count .. count + written_size]);
+        count += written_size;
+        if (block_header.last_block) break;
+    }
+    return count;
+}
+
+/// Decode the header of a skippable frame. The first four bytes of `src` must
+/// be a valid magic number for a skippable frame.
+pub fn decodeSkippableHeader(src: *const [8]u8) SkippableHeader {
+    const magic = readInt(u32, src[0..4]);
+    assert(isSkippableMagic(magic));
+    const frame_size = readInt(u32, src[4..8]);
+    return .{
+        .magic_number = magic,
+        .frame_size = frame_size,
+    };
+}
+
+/// Returns the window size required to decompress a frame, or `null` if it
+/// cannot be determined (which indicates a malformed frame header).
+pub fn frameWindowSize(header: ZstandardHeader) ?u64 {
+    if (header.window_descriptor) |descriptor| {
+        const exponent = (descriptor & 0b11111000) >> 3;
+        const mantissa = descriptor & 0b00000111;
+        const window_log = 10 + exponent;
+        const window_base = @as(u64, 1) << @intCast(u6, window_log);
+        const window_add = (window_base / 8) * mantissa;
+        return window_base + window_add;
+    } else return header.content_size;
+}
+
+/// Decode the header of a Zstandard frame.
+///
+/// Errors returned:
+///   - `error.ReservedBitSet` if any of the reserved bits of the header are set
+///   - `error.EndOfStream` if `source` does not contain a complete header
+pub fn decodeZstandardHeader(
+    source: anytype,
+) (@TypeOf(source).Error || error{ EndOfStream, ReservedBitSet })!ZstandardHeader {
+    const descriptor = @bitCast(ZstandardHeader.Descriptor, try source.readByte());
+
+    if (descriptor.reserved) return error.ReservedBitSet;
+
+    var window_descriptor: ?u8 = null;
+    if (!descriptor.single_segment_flag) {
+        window_descriptor = try source.readByte();
+    }
+
+    var dictionary_id: ?u32 = null;
+    if (descriptor.dictionary_id_flag > 0) {
+        // if flag is 3 then field_size = 4, else field_size = flag
+        const field_size = (@as(u4, 1) << descriptor.dictionary_id_flag) >> 1;
+        dictionary_id = try source.readVarInt(u32, .Little, field_size);
+    }
+
+    var content_size: ?u64 = null;
+    if (descriptor.single_segment_flag or descriptor.content_size_flag > 0) {
+        const field_size = @as(u4, 1) << descriptor.content_size_flag;
+        content_size = try source.readVarInt(u64, .Little, field_size);
+        if (field_size == 2) content_size.? += 256;
+    }
+
+    const header = ZstandardHeader{
+        .descriptor = descriptor,
+        .window_descriptor = window_descriptor,
+        .dictionary_id = dictionary_id,
+        .content_size = content_size,
+    };
+    return header;
+}
+
+test {
+    std.testing.refAllDecls(@This());
+}
diff --git a/lib/std/compress/zstandard/readers.zig b/lib/std/compress/zstandard/readers.zig
new file mode 100644
index 0000000000..e2f62ddc51
--- /dev/null
+++ b/lib/std/compress/zstandard/readers.zig
@@ -0,0 +1,82 @@
+const std = @import("std");
+
+pub const ReversedByteReader = struct {
+    remaining_bytes: usize,
+    bytes: []const u8,
+
+    const Reader = std.io.Reader(*ReversedByteReader, error{}, readFn);
+
+    pub fn init(bytes: []const u8) ReversedByteReader {
+        return .{
+            .bytes = bytes,
+            .remaining_bytes = bytes.len,
+        };
+    }
+
+    pub fn reader(self: *ReversedByteReader) Reader {
+        return .{ .context = self };
+    }
+
+    fn readFn(ctx: *ReversedByteReader, buffer: []u8) !usize {
+        if (ctx.remaining_bytes == 0) return 0;
+        const byte_index = ctx.remaining_bytes - 1;
+        buffer[0] = ctx.bytes[byte_index];
+        // buffer[0] = @bitReverse(ctx.bytes[byte_index]);
+        ctx.remaining_bytes = byte_index;
+        return 1;
+    }
+};
+
+/// A bit reader for reading the reversed bit streams used to encode
+/// FSE compressed data.
+pub const ReverseBitReader = struct {
+    byte_reader: ReversedByteReader,
+    bit_reader: std.io.BitReader(.Big, ReversedByteReader.Reader),
+
+    pub fn init(self: *ReverseBitReader, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
+        self.byte_reader = ReversedByteReader.init(bytes);
+        self.bit_reader = std.io.bitReader(.Big, self.byte_reader.reader());
+        if (bytes.len == 0) return;
+        var i: usize = 0;
+        while (i < 8 and 0 == self.readBitsNoEof(u1, 1) catch unreachable) : (i += 1) {}
+        if (i == 8) return error.BitStreamHasNoStartBit;
+    }
+
+    pub fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: usize) error{EndOfStream}!U {
+        return self.bit_reader.readBitsNoEof(U, num_bits);
+    }
+
+    pub fn readBits(self: *@This(), comptime U: type, num_bits: usize, out_bits: *usize) error{}!U {
+        return try self.bit_reader.readBits(U, num_bits, out_bits);
+    }
+
+    pub fn alignToByte(self: *@This()) void {
+        self.bit_reader.alignToByte();
+    }
+
+    pub fn isEmpty(self: ReverseBitReader) bool {
+        return self.byte_reader.remaining_bytes == 0 and self.bit_reader.bit_count == 0;
+    }
+};
+
+pub fn BitReader(comptime Reader: type) type {
+    return struct {
+        underlying: std.io.BitReader(.Little, Reader),
+
+        pub fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: usize) !U {
+            return self.underlying.readBitsNoEof(U, num_bits);
+        }
+
+        pub fn readBits(self: *@This(), comptime U: type, num_bits: usize, out_bits: *usize) !U {
+            return self.underlying.readBits(U, num_bits, out_bits);
+        }
+
+        pub fn alignToByte(self: *@This()) void {
+            self.underlying.alignToByte();
+        }
+    };
+}
+
+pub fn bitReader(reader: anytype) BitReader(@TypeOf(reader)) {
+    return .{ .underlying = std.io.bitReader(.Little, reader) };
+}
diff --git a/lib/std/compress/zstandard/types.zig b/lib/std/compress/zstandard/types.zig
new file mode 100644
index 0000000000..db4fbdee2d
--- /dev/null
+++ b/lib/std/compress/zstandard/types.zig
@@ -0,0 +1,401 @@
+pub const frame = struct {
+    pub const Kind = enum { zstandard, skippable };
+
+    pub const Zstandard = struct {
+        pub const magic_number = 0xFD2FB528;
+
+        header: Header,
+        data_blocks: []Block,
+        checksum: ?u32,
+
+        pub const Header = struct {
+            descriptor: Descriptor,
+            window_descriptor: ?u8,
+            dictionary_id: ?u32,
+            content_size: ?u64,
+
+            pub const Descriptor = packed struct {
+                dictionary_id_flag: u2,
+                content_checksum_flag: bool,
+                reserved: bool,
+                unused: bool,
+                single_segment_flag: bool,
+                content_size_flag: u2,
+            };
+        };
+
+        pub const Block = struct {
+            pub const Header = struct {
+                last_block: bool,
+                block_type: Block.Type,
+                block_size: u21,
+            };
+
+            pub const Type = enum(u2) {
+                raw,
+                rle,
+                compressed,
+                reserved,
+            };
+        };
+    };
+
+    pub const Skippable = struct {
+        pub const magic_number_min = 0x184D2A50;
+        pub const magic_number_max = 0x184D2A5F;
+
+        pub const Header = struct {
+            magic_number: u32,
+            frame_size: u32,
+        };
+    };
+};
+
+pub const compressed_block = struct {
+    pub const LiteralsSection = struct {
+        header: Header,
+        huffman_tree: ?HuffmanTree,
+        streams: Streams,
+
+        pub const Streams = union(enum) {
+            one: []const u8,
+            four: [4][]const u8,
+        };
+
+        pub const Header = struct {
+            block_type: BlockType,
+            size_format: u2,
+            regenerated_size: u20,
+            compressed_size: ?u18,
+        };
+
+        pub const BlockType = enum(u2) {
+            raw,
+            rle,
+            compressed,
+            treeless,
+        };
+
+        pub const HuffmanTree = struct {
+            max_bit_count: u4,
+            symbol_count_minus_one: u8,
+            nodes: [256]PrefixedSymbol,
+
+            pub const PrefixedSymbol = struct {
+                symbol: u8,
+                prefix: u16,
+                weight: u4,
+            };
+
+            pub const Result = union(enum) {
+                symbol: u8,
+                index: usize,
+            };
+
+            pub fn query(self: HuffmanTree, index: usize, prefix: u16) error{NotFound}!Result {
+                var node = self.nodes[index];
+                const weight = node.weight;
+                var i: usize = index;
+                while (node.weight == weight) {
+                    if (node.prefix == prefix) return Result{ .symbol = node.symbol };
+                    if (i == 0) return error.NotFound;
+                    i -= 1;
+                    node = self.nodes[i];
+                }
+                return Result{ .index = i };
+            }
+
+            pub fn weightToBitCount(weight: u4, max_bit_count: u4) u4 {
+                return if (weight == 0) 0 else ((max_bit_count + 1) - weight);
+            }
+        };
+
+        pub const StreamCount = enum { one, four };
+        pub fn streamCount(size_format: u2, block_type: BlockType) StreamCount {
+            return switch (block_type) {
+                .raw, .rle => .one,
+                .compressed, .treeless => if (size_format == 0) .one else .four,
+            };
+        }
+    };
+
+    pub const SequencesSection = struct {
+        header: SequencesSection.Header,
+        literals_length_table: Table,
+        offset_table: Table,
+        match_length_table: Table,
+
+        pub const Header = struct {
+            sequence_count: u24,
+            match_lengths: Mode,
+            offsets: Mode,
+            literal_lengths: Mode,
+
+            pub const Mode = enum(u2) {
+                predefined,
+                rle,
+                fse,
+                repeat,
+            };
+        };
+    };
+
+    pub const Table = union(enum) {
+        fse: []const Fse,
+        rle: u8,
+
+        pub const Fse = struct {
+            symbol: u8,
+            baseline: u16,
+            bits: u8,
+        };
+    };
+
+    pub const literals_length_code_table = [36]struct { u32, u5 }{
+        .{ 0, 0 },     .{ 1, 0 },      .{ 2, 0 },      .{ 3, 0 },
+        .{ 4, 0 },     .{ 5, 0 },      .{ 6, 0 },      .{ 7, 0 },
+        .{ 8, 0 },     .{ 9, 0 },      .{ 10, 0 },     .{ 11, 0 },
+        .{ 12, 0 },    .{ 13, 0 },     .{ 14, 0 },     .{ 15, 0 },
+        .{ 16, 1 },    .{ 18, 1 },     .{ 20, 1 },     .{ 22, 1 },
+        .{ 24, 2 },    .{ 28, 2 },     .{ 32, 3 },     .{ 40, 3 },
+        .{ 48, 4 },    .{ 64, 6 },     .{ 128, 7 },    .{ 256, 8 },
+        .{ 512, 9 },   .{ 1024, 10 },  .{ 2048, 11 },  .{ 4096, 12 },
+        .{ 8192, 13 }, .{ 16384, 14 }, .{ 32768, 15 }, .{ 65536, 16 },
+    };
+
+    pub const match_length_code_table = [53]struct { u32, u5 }{
+        .{ 3, 0 },     .{ 4, 0 },     .{ 5, 0 },      .{ 6, 0 },      .{ 7, 0 },      .{ 8, 0 },
+        .{ 9, 0 },     .{ 10, 0 },    .{ 11, 0 },     .{ 12, 0 },     .{ 13, 0 },     .{ 14, 0 },
+        .{ 15, 0 },    .{ 16, 0 },    .{ 17, 0 },     .{ 18, 0 },     .{ 19, 0 },     .{ 20, 0 },
+        .{ 21, 0 },    .{ 22, 0 },    .{ 23, 0 },     .{ 24, 0 },     .{ 25, 0 },     .{ 26, 0 },
+        .{ 27, 0 },    .{ 28, 0 },    .{ 29, 0 },     .{ 30, 0 },     .{ 31, 0 },     .{ 32, 0 },
+        .{ 33, 0 },    .{ 34, 0 },    .{ 35, 1 },     .{ 37, 1 },     .{ 39, 1 },     .{ 41, 1 },
+        .{ 43, 2 },    .{ 47, 2 },    .{ 51, 3 },     .{ 59, 3 },     .{ 67, 4 },     .{ 83, 4 },
+        .{ 99, 5 },    .{ 131, 7 },   .{ 259, 8 },    .{ 515, 9 },    .{ 1027, 10 },  .{ 2051, 11 },
+        .{ 4099, 12 }, .{ 8195, 13 }, .{ 16387, 14 }, .{ 32771, 15 }, .{ 65539, 16 },
+    };
+
+    pub const literals_length_default_distribution = [36]i16{
+        4,  3,  2,  2,  2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+        2,  2,  2,  2,  2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+        -1, -1, -1, -1,
+    };
+
+    pub const match_lengths_default_distribution = [53]i16{
+        1,  4,  3,  2,  2,  2, 2, 2, 2, 1, 1, 1, 1, 1, 1,  1,
+        1,  1,  1,  1,  1,  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  1,
+        1,  1,  1,  1,  1,  1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
+        -1, -1, -1, -1, -1,
+    };
+
+    pub const offset_codes_default_distribution = [29]i16{
+        1, 1, 1, 1, 1, 1, 2, 2, 2,  1,  1,  1,  1,  1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
+    };
+
+    pub const predefined_literal_fse_table = Table{
+        .fse = &[64]Table.Fse{
+            .{ .symbol = 0, .bits = 4, .baseline = 0 },
+            .{ .symbol = 0, .bits = 4, .baseline = 16 },
+            .{ .symbol = 1, .bits = 5, .baseline = 32 },
+            .{ .symbol = 3, .bits = 5, .baseline = 0 },
+            .{ .symbol = 4, .bits = 5, .baseline = 0 },
+            .{ .symbol = 6, .bits = 5, .baseline = 0 },
+            .{ .symbol = 7, .bits = 5, .baseline = 0 },
+            .{ .symbol = 9, .bits = 5, .baseline = 0 },
+            .{ .symbol = 10, .bits = 5, .baseline = 0 },
+            .{ .symbol = 12, .bits = 5, .baseline = 0 },
+            .{ .symbol = 14, .bits = 6, .baseline = 0 },
+            .{ .symbol = 16, .bits = 5, .baseline = 0 },
+            .{ .symbol = 18, .bits = 5, .baseline = 0 },
+            .{ .symbol = 19, .bits = 5, .baseline = 0 },
+            .{ .symbol = 21, .bits = 5, .baseline = 0 },
+            .{ .symbol = 22, .bits = 5, .baseline = 0 },
+            .{ .symbol = 24, .bits = 5, .baseline = 0 },
+            .{ .symbol = 25, .bits = 5, .baseline = 32 },
+            .{ .symbol = 26, .bits = 5, .baseline = 0 },
+            .{ .symbol = 27, .bits = 6, .baseline = 0 },
+            .{ .symbol = 29, .bits = 6, .baseline = 0 },
+            .{ .symbol = 31, .bits = 6, .baseline = 0 },
+            .{ .symbol = 0, .bits = 4, .baseline = 32 },
+            .{ .symbol = 1, .bits = 4, .baseline = 0 },
+            .{ .symbol = 2, .bits = 5, .baseline = 0 },
+            .{ .symbol = 4, .bits = 5, .baseline = 32 },
+            .{ .symbol = 5, .bits = 5, .baseline = 0 },
+            .{ .symbol = 7, .bits = 5, .baseline = 32 },
+            .{ .symbol = 8, .bits = 5, .baseline = 0 },
+            .{ .symbol = 10, .bits = 5, .baseline = 32 },
+            .{ .symbol = 11, .bits = 5, .baseline = 0 },
+            .{ .symbol = 13, .bits = 6, .baseline = 0 },
+            .{ .symbol = 16, .bits = 5, .baseline = 32 },
+            .{ .symbol = 17, .bits = 5, .baseline = 0 },
+            .{ .symbol = 19, .bits = 5, .baseline = 32 },
+            .{ .symbol = 20, .bits = 5, .baseline = 0 },
+            .{ .symbol = 22, .bits = 5, .baseline = 32 },
+            .{ .symbol = 23, .bits = 5, .baseline = 0 },
+            .{ .symbol = 25, .bits = 4, .baseline = 0 },
+            .{ .symbol = 25, .bits = 4, .baseline = 16 },
+            .{ .symbol = 26, .bits = 5, .baseline = 32 },
+            .{ .symbol = 28, .bits = 6, .baseline = 0 },
+            .{ .symbol = 30, .bits = 6, .baseline = 0 },
+            .{ .symbol = 0, .bits = 4, .baseline = 48 },
+            .{ .symbol = 1, .bits = 4, .baseline = 16 },
+            .{ .symbol = 2, .bits = 5, .baseline = 32 },
+            .{ .symbol = 3, .bits = 5, .baseline = 32 },
+            .{ .symbol = 5, .bits = 5, .baseline = 32 },
+            .{ .symbol = 6, .bits = 5, .baseline = 32 },
+            .{ .symbol = 8, .bits = 5, .baseline = 32 },
+            .{ .symbol = 9, .bits = 5, .baseline = 32 },
+            .{ .symbol = 11, .bits = 5, .baseline = 32 },
+            .{ .symbol = 12, .bits = 5, .baseline = 32 },
+            .{ .symbol = 15, .bits = 6, .baseline = 0 },
+            .{ .symbol = 17, .bits = 5, .baseline = 32 },
+            .{ .symbol = 18, .bits = 5, .baseline = 32 },
+            .{ .symbol = 20, .bits = 5, .baseline = 32 },
+            .{ .symbol = 21, .bits = 5, .baseline = 32 },
+            .{ .symbol = 23, .bits = 5, .baseline = 32 },
+            .{ .symbol = 24, .bits = 5, .baseline = 32 },
+            .{ .symbol = 35, .bits = 6, .baseline = 0 },
+            .{ .symbol = 34, .bits = 6, .baseline = 0 },
+            .{ .symbol = 33, .bits = 6, .baseline = 0 },
+            .{ .symbol = 32, .bits = 6, .baseline = 0 },
+        },
+    };
+
+    pub const predefined_match_fse_table = Table{
+        .fse = &[64]Table.Fse{
+            .{ .symbol = 0, .bits = 6, .baseline = 0 },
+            .{ .symbol = 1, .bits = 4, .baseline = 0 },
+            .{ .symbol = 2, .bits = 5, .baseline = 32 },
+            .{ .symbol = 3, .bits = 5, .baseline = 0 },
+            .{ .symbol = 5, .bits = 5, .baseline = 0 },
+            .{ .symbol = 6, .bits = 5, .baseline = 0 },
+            .{ .symbol = 8, .bits = 5, .baseline = 0 },
+            .{ .symbol = 10, .bits = 6, .baseline = 0 },
+            .{ .symbol = 13, .bits = 6, .baseline = 0 },
+            .{ .symbol = 16, .bits = 6, .baseline = 0 },
+            .{ .symbol = 19, .bits = 6, .baseline = 0 },
+            .{ .symbol = 22, .bits = 6, .baseline = 0 },
+            .{ .symbol = 25, .bits = 6, .baseline = 0 },
+            .{ .symbol = 28, .bits = 6, .baseline = 0 },
+            .{ .symbol = 31, .bits = 6, .baseline = 0 },
+            .{ .symbol = 33, .bits = 6, .baseline = 0 },
+            .{ .symbol = 35, .bits = 6, .baseline = 0 },
+            .{ .symbol = 37, .bits = 6, .baseline = 0 },
+            .{ .symbol = 39, .bits = 6, .baseline = 0 },
+            .{ .symbol = 41, .bits = 6, .baseline = 0 },
+            .{ .symbol = 43, .bits = 6, .baseline = 0 },
+            .{ .symbol = 45, .bits = 6, .baseline = 0 },
+            .{ .symbol = 1, .bits = 4, .baseline = 16 },
+            .{ .symbol = 2, .bits = 4, .baseline = 0 },
+            .{ .symbol = 3, .bits = 5, .baseline = 32 },
+            .{ .symbol = 4, .bits = 5, .baseline = 0 },
+            .{ .symbol = 6, .bits = 5, .baseline = 32 },
+            .{ .symbol = 7, .bits = 5, .baseline = 0 },
+            .{ .symbol = 9, .bits = 6, .baseline = 0 },
+            .{ .symbol = 12, .bits = 6, .baseline = 0 },
+            .{ .symbol = 15, .bits = 6, .baseline = 0 },
+            .{ .symbol = 18, .bits = 6, .baseline = 0 },
+            .{ .symbol = 21, .bits = 6, .baseline = 0 },
+            .{ .symbol = 24, .bits = 6, .baseline = 0 },
+            .{ .symbol = 27, .bits = 6, .baseline = 0 },
+            .{ .symbol = 30, .bits = 6, .baseline = 0 },
+            .{ .symbol = 32, .bits = 6, .baseline = 0 },
+            .{ .symbol = 34, .bits = 6, .baseline = 0 },
+            .{ .symbol = 36, .bits = 6, .baseline = 0 },
+            .{ .symbol = 38, .bits = 6, .baseline = 0 },
+            .{ .symbol = 40, .bits = 6, .baseline = 0 },
+            .{ .symbol = 42, .bits = 6, .baseline = 0 },
+            .{ .symbol = 44, .bits = 6, .baseline = 0 },
+            .{ .symbol = 1, .bits = 4, .baseline = 32 },
+            .{ .symbol = 1, .bits = 4, .baseline = 48 },
+            .{ .symbol = 2, .bits = 4, .baseline = 16 },
+            .{ .symbol = 4, .bits = 5, .baseline = 32 },
+            .{ .symbol = 5, .bits = 5, .baseline = 32 },
+            .{ .symbol = 7, .bits = 5, .baseline = 32 },
+            .{ .symbol = 8, .bits = 5, .baseline = 32 },
+            .{ .symbol = 11, .bits = 6, .baseline = 0 },
+            .{ .symbol = 14, .bits = 6, .baseline = 0 },
+            .{ .symbol = 17, .bits = 6, .baseline = 0 },
+            .{ .symbol = 20, .bits = 6, .baseline = 0 },
+            .{ .symbol = 23, .bits = 6, .baseline = 0 },
+            .{ .symbol = 26, .bits = 6, .baseline = 0 },
+            .{ .symbol = 29, .bits = 6, .baseline = 0 },
+            .{ .symbol = 52, .bits = 6, .baseline = 0 },
+            .{ .symbol = 51, .bits = 6, .baseline = 0 },
+            .{ .symbol = 50, .bits = 6, .baseline = 0 },
+            .{ .symbol = 49, .bits = 6, .baseline = 0 },
+            .{ .symbol = 48, .bits = 6, .baseline = 0 },
+            .{ .symbol = 47, .bits = 6, .baseline = 0 },
+            .{ .symbol = 46, .bits = 6, .baseline = 0 },
+        },
+    };
+
+    pub const predefined_offset_fse_table = Table{
+        .fse = &[32]Table.Fse{
+            .{ .symbol = 0, .bits = 5, .baseline = 0 },
+            .{ .symbol = 6, .bits = 4, .baseline = 0 },
+            .{ .symbol = 9, .bits = 5, .baseline = 0 },
+            .{ .symbol = 15, .bits = 5, .baseline = 0 },
+            .{ .symbol = 21, .bits = 5, .baseline = 0 },
+            .{ .symbol = 3, .bits = 5, .baseline = 0 },
+            .{ .symbol = 7, .bits = 4, .baseline = 0 },
+            .{ .symbol = 12, .bits = 5, .baseline = 0 },
+            .{ .symbol = 18, .bits = 5, .baseline = 0 },
+            .{ .symbol = 23, .bits = 5, .baseline = 0 },
+            .{ .symbol = 5, .bits = 5, .baseline = 0 },
+            .{ .symbol = 8, .bits = 4, .baseline = 0 },
+            .{ .symbol = 14, .bits = 5, .baseline = 0 },
+            .{ .symbol = 20, .bits = 5, .baseline = 0 },
+            .{ .symbol = 2, .bits = 5, .baseline = 0 },
+            .{ .symbol = 7, .bits = 4, .baseline = 16 },
+            .{ .symbol = 11, .bits = 5, .baseline = 0 },
+            .{ .symbol = 17, .bits = 5, .baseline = 0 },
+            .{ .symbol = 22, .bits = 5, .baseline = 0 },
+            .{ .symbol = 4, .bits = 5, .baseline = 0 },
+            .{ .symbol = 8, .bits = 4, .baseline = 16 },
+            .{ .symbol = 13, .bits = 5, .baseline = 0 },
+            .{ .symbol = 19, .bits = 5, .baseline = 0 },
+            .{ .symbol = 1, .bits = 5, .baseline = 0 },
+            .{ .symbol = 6, .bits = 4, .baseline = 16 },
+            .{ .symbol = 10, .bits = 5, .baseline = 0 },
+            .{ .symbol = 16, .bits = 5, .baseline = 0 },
+            .{ .symbol = 28, .bits = 5, .baseline = 0 },
+            .{ .symbol = 27, .bits = 5, .baseline = 0 },
+            .{ .symbol = 26, .bits = 5, .baseline = 0 },
+            .{ .symbol = 25, .bits = 5, .baseline = 0 },
+            .{ .symbol = 24, .bits = 5, .baseline = 0 },
+        },
+    };
+    pub const start_repeated_offset_1 = 1;
+    pub const start_repeated_offset_2 = 4;
+    pub const start_repeated_offset_3 = 8;
+
+    pub const table_accuracy_log_max = struct {
+        pub const literal = 9;
+        pub const match = 9;
+        pub const offset = 8;
+    };
+
+    pub const table_symbol_count_max = struct {
+        pub const literal = 36;
+        pub const match = 53;
+        pub const offset = 32;
+    };
+
+    pub const default_accuracy_log = struct {
+        pub const literal = 6;
+        pub const match = 6;
+        pub const offset = 5;
+    };
+    pub const table_size_max = struct {
+        pub const literal = 1 << table_accuracy_log_max.literal;
+        pub const match = 1 << table_accuracy_log_max.match;
+        pub const offset = 1 << table_accuracy_log_max.match;
+    };
+};
+
+test {
+    const testing = @import("std").testing;
+    testing.refAllDeclsRecursive(@This());
+}
diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig
index c52758b181..71c22f2b4c 100644
--- a/lib/std/crypto/benchmark.zig
+++ b/lib/std/crypto/benchmark.zig
@@ -178,7 +178,7 @@ pub fn benchmarkBatchSignatureVerification(comptime Signature: anytype, comptime
     const sig = try key_pair.sign(&msg, null);
 
     var batch: [64]Signature.BatchElement = undefined;
-    for (batch) |*element| {
+    for (&batch) |*element| {
         element.* = Signature.BatchElement{ .sig = sig, .msg = &msg, .public_key = key_pair.public_key };
     }
 
diff --git a/lib/std/hash.zig b/lib/std/hash.zig
index 2680a8e263..8e92b4c9de 100644
--- a/lib/std/hash.zig
+++ b/lib/std/hash.zig
@@ -32,6 +32,10 @@ pub const CityHash64 = cityhash.CityHash64;
 const wyhash = @import("hash/wyhash.zig");
 pub const Wyhash = wyhash.Wyhash;
 
+const xxhash = @import("hash/xxhash.zig");
+pub const XxHash64 = xxhash.XxHash64;
+pub const XxHash32 = xxhash.XxHash32;
+
 test "hash" {
     _ = adler;
     _ = auto_hash;
@@ -40,4 +44,5 @@ test "hash" {
     _ = murmur;
     _ = cityhash;
     _ = wyhash;
+    _ = xxhash;
 }
diff --git a/lib/std/hash/xxhash.zig b/lib/std/hash/xxhash.zig
new file mode 100644
index 0000000000..bf4877e029
--- /dev/null
+++ b/lib/std/hash/xxhash.zig
@@ -0,0 +1,268 @@
+const std = @import("std");
+const mem = std.mem;
+const expectEqual = std.testing.expectEqual;
+
+const rotl = std.math.rotl;
+
+pub const XxHash64 = struct {
+    acc1: u64,
+    acc2: u64,
+    acc3: u64,
+    acc4: u64,
+
+    seed: u64,
+    buf: [32]u8,
+    buf_len: usize,
+    byte_count: usize,
+
+    const prime_1 = 0x9E3779B185EBCA87; // 0b1001111000110111011110011011000110000101111010111100101010000111
+    const prime_2 = 0xC2B2AE3D27D4EB4F; // 0b1100001010110010101011100011110100100111110101001110101101001111
+    const prime_3 = 0x165667B19E3779F9; // 0b0001011001010110011001111011000110011110001101110111100111111001
+    const prime_4 = 0x85EBCA77C2B2AE63; // 0b1000010111101011110010100111011111000010101100101010111001100011
+    const prime_5 = 0x27D4EB2F165667C5; // 0b0010011111010100111010110010111100010110010101100110011111000101
+
+    pub fn init(seed: u64) XxHash64 {
+        return XxHash64{
+            .seed = seed,
+            .acc1 = seed +% prime_1 +% prime_2,
+            .acc2 = seed +% prime_2,
+            .acc3 = seed,
+            .acc4 = seed -% prime_1,
+            .buf = undefined,
+            .buf_len = 0,
+            .byte_count = 0,
+        };
+    }
+
+    pub fn update(self: *XxHash64, input: []const u8) void {
+        if (input.len < 32 - self.buf_len) {
+            mem.copy(u8, self.buf[self.buf_len..], input);
+            self.buf_len += input.len;
+            return;
+        }
+
+        var i: usize = 0;
+
+        if (self.buf_len > 0) {
+            i = 32 - self.buf_len;
+            mem.copy(u8, self.buf[self.buf_len..], input[0..i]);
+            self.processStripe(&self.buf);
+            self.buf_len = 0;
+        }
+
+        while (i + 32 <= input.len) : (i += 32) {
+            self.processStripe(input[i..][0..32]);
+        }
+
+        const remaining_bytes = input[i..];
+        mem.copy(u8, &self.buf, remaining_bytes);
+        self.buf_len = remaining_bytes.len;
+    }
+
+    inline fn processStripe(self: *XxHash64, buf: *const [32]u8) void {
+        self.acc1 = round(self.acc1, mem.readIntLittle(u64, buf[0..8]));
+        self.acc2 = round(self.acc2, mem.readIntLittle(u64, buf[8..16]));
+        self.acc3 = round(self.acc3, mem.readIntLittle(u64, buf[16..24]));
+        self.acc4 = round(self.acc4, mem.readIntLittle(u64, buf[24..32]));
+        self.byte_count += 32;
+    }
+
+    inline fn round(acc: u64, lane: u64) u64 {
+        const a = acc +% (lane *% prime_2);
+        const b = rotl(u64, a, 31);
+        return b *% prime_1;
+    }
+
+    pub fn final(self: *XxHash64) u64 {
+        var acc: u64 = undefined;
+
+        if (self.byte_count < 32) {
+            acc = self.seed +% prime_5;
+        } else {
+            acc = rotl(u64, self.acc1, 1) +% rotl(u64, self.acc2, 7) +%
+                rotl(u64, self.acc3, 12) +% rotl(u64, self.acc4, 18);
+            acc = mergeAccumulator(acc, self.acc1);
+            acc = mergeAccumulator(acc, self.acc2);
+            acc = mergeAccumulator(acc, self.acc3);
+            acc = mergeAccumulator(acc, self.acc4);
+        }
+
+        acc = acc +% @as(u64, self.byte_count) +% @as(u64, self.buf_len);
+
+        var pos: usize = 0;
+        while (pos + 8 <= self.buf_len) : (pos += 8) {
+            const lane = mem.readIntLittle(u64, self.buf[pos..][0..8]);
+            acc ^= round(0, lane);
+            acc = rotl(u64, acc, 27) *% prime_1;
+            acc +%= prime_4;
+        }
+
+        if (pos + 4 <= self.buf_len) {
+            const lane = @as(u64, mem.readIntLittle(u32, self.buf[pos..][0..4]));
+            acc ^= lane *% prime_1;
+            acc = rotl(u64, acc, 23) *% prime_2;
+            acc +%= prime_3;
+            pos += 4;
+        }
+
+        while (pos < self.buf_len) : (pos += 1) {
+            const lane = @as(u64, self.buf[pos]);
+            acc ^= lane *% prime_5;
+            acc = rotl(u64, acc, 11) *% prime_1;
+        }
+
+        acc ^= acc >> 33;
+        acc *%= prime_2;
+        acc ^= acc >> 29;
+        acc *%= prime_3;
+        acc ^= acc >> 32;
+
+        return acc;
+    }
+
+    inline fn mergeAccumulator(acc: u64, other: u64) u64 {
+        const a = acc ^ round(0, other);
+        const b = a *% prime_1;
+        return b +% prime_4;
+    }
+
+    pub fn hash(input: []const u8) u64 {
+        var hasher = XxHash64.init(0);
+        hasher.update(input);
+        return hasher.final();
+    }
+};
+
+pub const XxHash32 = struct {
+    acc1: u32,
+    acc2: u32,
+    acc3: u32,
+    acc4: u32,
+
+    seed: u32,
+    buf: [16]u8,
+    buf_len: usize,
+    byte_count: usize,
+
+    const prime_1 = 0x9E3779B1; // 0b10011110001101110111100110110001
+    const prime_2 = 0x85EBCA77; // 0b10000101111010111100101001110111
+    const prime_3 = 0xC2B2AE3D; // 0b11000010101100101010111000111101
+    const prime_4 = 0x27D4EB2F; // 0b00100111110101001110101100101111
+    const prime_5 = 0x165667B1; // 0b00010110010101100110011110110001
+
+    pub fn init(seed: u32) XxHash32 {
+        return XxHash32{
+            .seed = seed,
+            .acc1 = seed +% prime_1 +% prime_2,
+            .acc2 = seed +% prime_2,
+            .acc3 = seed,
+            .acc4 = seed -% prime_1,
+            .buf = undefined,
+            .buf_len = 0,
+            .byte_count = 0,
+        };
+    }
+
+    pub fn update(self: *XxHash32, input: []const u8) void {
+        if (input.len < 16 - self.buf_len) {
+            mem.copy(u8, self.buf[self.buf_len..], input);
+            self.buf_len += input.len;
+            return;
+        }
+
+        var i: usize = 0;
+
+        if (self.buf_len > 0) {
+            i = 16 - self.buf_len;
+            mem.copy(u8, self.buf[self.buf_len..], input[0..i]);
+            self.processStripe(&self.buf);
+            self.buf_len = 0;
+        }
+
+        while (i + 16 <= input.len) : (i += 16) {
+            self.processStripe(input[i..][0..16]);
+        }
+
+        const remaining_bytes = input[i..];
+        mem.copy(u8, &self.buf, remaining_bytes);
+        self.buf_len = remaining_bytes.len;
+    }
+
+    inline fn processStripe(self: *XxHash32, buf: *const [16]u8) void {
+        self.acc1 = round(self.acc1, mem.readIntLittle(u32, buf[0..4]));
+        self.acc2 = round(self.acc2, mem.readIntLittle(u32, buf[4..8]));
+        self.acc3 = round(self.acc3, mem.readIntLittle(u32, buf[8..12]));
+        self.acc4 = round(self.acc4, mem.readIntLittle(u32, buf[12..16]));
+        self.byte_count += 16;
+    }
+
+    inline fn round(acc: u32, lane: u32) u32 {
+        const a = acc +% (lane *% prime_2);
+        const b = rotl(u32, a, 13);
+        return b *% prime_1;
+    }
+
+    pub fn final(self: *XxHash32) u32 {
+        var acc: u32 = undefined;
+
+        if (self.byte_count < 16) {
+            acc = self.seed +% prime_5;
+        } else {
+            acc = rotl(u32, self.acc1, 1) +% rotl(u32, self.acc2, 7) +%
+                rotl(u32, self.acc3, 12) +% rotl(u32, self.acc4, 18);
+        }
+
+        acc = acc +% @intCast(u32, self.byte_count) +% @intCast(u32, self.buf_len);
+
+        var pos: usize = 0;
+        while (pos + 4 <= self.buf_len) : (pos += 4) {
+            const lane = mem.readIntLittle(u32, self.buf[pos..][0..4]);
+            acc +%= lane *% prime_3;
+            acc = rotl(u32, acc, 17) *% prime_4;
+        }
+
+        while (pos < self.buf_len) : (pos += 1) {
+            const lane = @as(u32, self.buf[pos]);
+            acc +%= lane *% prime_5;
+            acc = rotl(u32, acc, 11) *% prime_1;
+        }
+
+        acc ^= acc >> 15;
+        acc *%= prime_2;
+        acc ^= acc >> 13;
+        acc *%= prime_3;
+        acc ^= acc >> 16;
+
+        return acc;
+    }
+
+    pub fn hash(input: []const u8) u32 {
+        var hasher = XxHash32.init(0);
+        hasher.update(input);
+        return hasher.final();
+    }
+};
+
+test "xxhash64" {
+    const hash = XxHash64.hash;
+
+    try expectEqual(hash(""), 0xef46db3751d8e999);
+    try expectEqual(hash("a"), 0xd24ec4f1a98c6e5b);
+    try expectEqual(hash("abc"), 0x44bc2cf5ad770999);
+    try expectEqual(hash("message digest"), 0x066ed728fceeb3be);
+    try expectEqual(hash("abcdefghijklmnopqrstuvwxyz"), 0xcfe1f278fa89835c);
+    try expectEqual(hash("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0xaaa46907d3047814);
+    try expectEqual(hash("12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0xe04a477f19ee145d);
+}
+
+test "xxhash32" {
+    const hash = XxHash32.hash;
+
+    try expectEqual(hash(""), 0x02cc5d05);
+    try expectEqual(hash("a"), 0x550d7456);
+    try expectEqual(hash("abc"), 0x32d153ff);
+    try expectEqual(hash("message digest"), 0x7c948494);
+    try expectEqual(hash("abcdefghijklmnopqrstuvwxyz"), 0x63a14d5f);
+    try expectEqual(hash("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x9c285e64);
+    try expectEqual(hash("12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x9c05f475);
+}
diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig
index 78fcf68b56..164b81d651 100644
--- a/lib/std/hash_map.zig
+++ b/lib/std/hash_map.zig
@@ -508,7 +508,7 @@ pub fn HashMap(
         /// If a new entry needs to be stored, this function asserts there
         /// is enough capacity to store it.
         pub fn getOrPutAssumeCapacityAdapted(self: *Self, key: anytype, ctx: anytype) GetOrPutResult {
-            return self.unmanaged.getOrPutAssumeCapacityAdapted(self.allocator, key, ctx);
+            return self.unmanaged.getOrPutAssumeCapacityAdapted(key, ctx);
         }
 
         pub fn getOrPutValue(self: *Self, key: K, value: V) Allocator.Error!Entry {
@@ -2130,7 +2130,7 @@ test "std.hash_map getOrPutAdapted" {
     try testing.expectEqual(map.count(), keys.len);
 
     inline for (keys, 0..) |key_str, i| {
-        const result = try map.getOrPutAdapted(key_str, AdaptedContext{});
+        const result = map.getOrPutAssumeCapacityAdapted(key_str, AdaptedContext{});
         try testing.expect(result.found_existing);
         try testing.expectEqual(real_keys[i], result.key_ptr.*);
         try testing.expectEqual(@as(u64, i) * 2, result.value_ptr.*);
diff --git a/lib/std/mem.zig b/lib/std/mem.zig
index fdd1c05862..b9b5fb1004 100644
--- a/lib/std/mem.zig
+++ b/lib/std/mem.zig
@@ -941,21 +941,21 @@ pub fn allEqual(comptime T: type, slice: []const T, scalar: T) bool {
     return true;
 }
 
-/// Remove values from the beginning of a slice.
+/// Remove a set of values from the beginning of a slice.
 pub fn trimLeft(comptime T: type, slice: []const T, values_to_strip: []const T) []const T {
     var begin: usize = 0;
     while (begin < slice.len and indexOfScalar(T, values_to_strip, slice[begin]) != null) : (begin += 1) {}
     return slice[begin..];
 }
 
-/// Remove values from the end of a slice.
+/// Remove a set of values from the end of a slice.
 pub fn trimRight(comptime T: type, slice: []const T, values_to_strip: []const T) []const T {
     var end: usize = slice.len;
     while (end > 0 and indexOfScalar(T, values_to_strip, slice[end - 1]) != null) : (end -= 1) {}
     return slice[0..end];
 }
 
-/// Remove values from the beginning and end of a slice.
+/// Remove a set of values from the beginning and end of a slice.
 pub fn trim(comptime T: type, slice: []const T, values_to_strip: []const T) []const T {
     var begin: usize = 0;
     var end: usize = slice.len;
diff --git a/lib/std/multi_array_list.zig b/lib/std/multi_array_list.zig
index afdd6a5a8d..56b36aaa81 100644
--- a/lib/std/multi_array_list.zig
+++ b/lib/std/multi_array_list.zig
@@ -433,15 +433,9 @@ pub fn MultiArrayList(comptime S: type) type {
         }
 
         fn capacityInBytes(capacity: usize) usize {
-            if (builtin.zig_backend == .stage2_c) {
-                var bytes: usize = 0;
-                for (sizes.bytes) |size| bytes += size * capacity;
-                return bytes;
-            } else {
-                const sizes_vector: @Vector(sizes.bytes.len, usize) = sizes.bytes;
-                const capacity_vector = @splat(sizes.bytes.len, capacity);
-                return @reduce(.Add, capacity_vector * sizes_vector);
-            }
+            comptime var elem_bytes: usize = 0;
+            inline for (sizes.bytes) |size| elem_bytes += size;
+            return elem_bytes * capacity;
         }
 
         fn allocatedBytes(self: Self) []align(@alignOf(S)) u8 {
diff --git a/lib/std/os.zig b/lib/std/os.zig
index c5eeb34b1c..bd6719ec8f 100644
--- a/lib/std/os.zig
+++ b/lib/std/os.zig
@@ -7056,3 +7056,21 @@ pub fn timerfd_gettime(fd: i32) TimerFdGetError!linux.itimerspec {
         else => |err| return unexpectedErrno(err),
     };
 }
+
+pub const have_sigpipe_support = @hasDecl(@This(), "SIG") and @hasDecl(SIG, "PIPE");
+
+fn noopSigHandler(_: c_int) callconv(.C) void {}
+
+pub fn maybeIgnoreSigpipe() void {
+    if (have_sigpipe_support and !std.options.keep_sigpipe) {
+        const act = Sigaction{
+            // We set handler to a noop function instead of SIG.IGN so we don't leak our
+            // signal disposition to a child process
+            .handler = .{ .handler = noopSigHandler },
+            .mask = empty_sigset,
+            .flags = 0,
+        };
+        sigaction(SIG.PIPE, &act, null) catch |err|
+            std.debug.panic("failed to install noop SIGPIPE handler with '{s}'", .{@errorName(err)});
+    }
+}
diff --git a/lib/std/os/windows/advapi32.zig b/lib/std/os/windows/advapi32.zig
index 6d7ea3f8e0..67234a26e0 100644
--- a/lib/std/os/windows/advapi32.zig
+++ b/lib/std/os/windows/advapi32.zig
@@ -21,10 +21,10 @@ pub extern "advapi32" fn RegOpenKeyExW(
 pub extern "advapi32" fn RegQueryValueExW(
     hKey: HKEY,
     lpValueName: LPCWSTR,
-    lpReserved: *DWORD,
-    lpType: *DWORD,
-    lpData: *BYTE,
-    lpcbData: *DWORD,
+    lpReserved: ?*DWORD,
+    lpType: ?*DWORD,
+    lpData: ?*BYTE,
+    lpcbData: ?*DWORD,
 ) callconv(WINAPI) LSTATUS;
 
 // RtlGenRandom is known as SystemFunction036 under advapi32
diff --git a/lib/std/sort.zig b/lib/std/sort.zig
index fa1e33e7ce..405ab658d1 100644
--- a/lib/std/sort.zig
+++ b/lib/std/sort.zig
@@ -6,10 +6,10 @@ const math = std.math;
 
 pub fn binarySearch(
     comptime T: type,
-    key: T,
+    key: anytype,
     items: []const T,
     context: anytype,
-    comptime compareFn: fn (context: @TypeOf(context), lhs: T, rhs: T) math.Order,
+    comptime compareFn: fn (context: @TypeOf(context), key: @TypeOf(key), mid: T) math.Order,
 ) ?usize {
     var left: usize = 0;
     var right: usize = items.len;
@@ -41,35 +41,69 @@ test "binarySearch" {
     };
     try testing.expectEqual(
         @as(?usize, null),
-        binarySearch(u32, 1, &[_]u32{}, {}, S.order_u32),
+        binarySearch(u32, @as(u32, 1), &[_]u32{}, {}, S.order_u32),
     );
     try testing.expectEqual(
         @as(?usize, 0),
-        binarySearch(u32, 1, &[_]u32{1}, {}, S.order_u32),
+        binarySearch(u32, @as(u32, 1), &[_]u32{1}, {}, S.order_u32),
     );
     try testing.expectEqual(
         @as(?usize, null),
-        binarySearch(u32, 1, &[_]u32{0}, {}, S.order_u32),
+        binarySearch(u32, @as(u32, 1), &[_]u32{0}, {}, S.order_u32),
     );
     try testing.expectEqual(
         @as(?usize, null),
-        binarySearch(u32, 0, &[_]u32{1}, {}, S.order_u32),
+        binarySearch(u32, @as(u32, 0), &[_]u32{1}, {}, S.order_u32),
     );
     try testing.expectEqual(
         @as(?usize, 4),
-        binarySearch(u32, 5, &[_]u32{ 1, 2, 3, 4, 5 }, {}, S.order_u32),
+        binarySearch(u32, @as(u32, 5), &[_]u32{ 1, 2, 3, 4, 5 }, {}, S.order_u32),
     );
     try testing.expectEqual(
         @as(?usize, 0),
-        binarySearch(u32, 2, &[_]u32{ 2, 4, 8, 16, 32, 64 }, {}, S.order_u32),
+        binarySearch(u32, @as(u32, 2), &[_]u32{ 2, 4, 8, 16, 32, 64 }, {}, S.order_u32),
     );
     try testing.expectEqual(
         @as(?usize, 1),
-        binarySearch(i32, -4, &[_]i32{ -7, -4, 0, 9, 10 }, {}, S.order_i32),
+        binarySearch(i32, @as(i32, -4), &[_]i32{ -7, -4, 0, 9, 10 }, {}, S.order_i32),
     );
     try testing.expectEqual(
         @as(?usize, 3),
-        binarySearch(i32, 98, &[_]i32{ -100, -25, 2, 98, 99, 100 }, {}, S.order_i32),
+        binarySearch(i32, @as(i32, 98), &[_]i32{ -100, -25, 2, 98, 99, 100 }, {}, S.order_i32),
+    );
+    const R = struct {
+        b: i32,
+        e: i32,
+
+        fn r(b: i32, e: i32) @This() {
+            return @This(){ .b = b, .e = e };
+        }
+
+        fn order(context: void, key: i32, mid: @This()) math.Order {
+            _ = context;
+
+            if (key < mid.b) {
+                return .lt;
+            }
+
+            if (key > mid.e) {
+                return .gt;
+            }
+
+            return .eq;
+        }
+    };
+    try testing.expectEqual(
+        @as(?usize, null),
+        binarySearch(R, @as(i32, -45), &[_]R{ R.r(-100, -50), R.r(-40, -20), R.r(-10, 20), R.r(30, 40) }, {}, R.order),
+    );
+    try testing.expectEqual(
+        @as(?usize, 2),
+        binarySearch(R, @as(i32, 10), &[_]R{ R.r(-100, -50), R.r(-40, -20), R.r(-10, 20), R.r(30, 40) }, {}, R.order),
+    );
+    try testing.expectEqual(
+        @as(?usize, 1),
+        binarySearch(R, @as(i32, -20), &[_]R{ R.r(-100, -50), R.r(-40, -20), R.r(-10, 20), R.r(30, 40) }, {}, R.order),
     );
 }
 
diff --git a/lib/std/start.zig b/lib/std/start.zig
index ea221d1539..6edebde122 100644
--- a/lib/std/start.zig
+++ b/lib/std/start.zig
@@ -496,6 +496,7 @@ fn callMainWithArgs(argc: usize, argv: [*][*:0]u8, envp: [][*:0]u8) u8 {
     std.os.environ = envp;
 
     std.debug.maybeEnableSegfaultHandler();
+    std.os.maybeIgnoreSigpipe();
 
     return initEventLoopAndCallMain();
 }
diff --git a/lib/std/std.zig b/lib/std/std.zig
index e02be2ebaf..4a6d330003 100644
--- a/lib/std/std.zig
+++ b/lib/std/std.zig
@@ -31,6 +31,7 @@ pub const PackedIntSliceEndian = @import("packed_int_array.zig").PackedIntSliceE
 pub const PriorityQueue = @import("priority_queue.zig").PriorityQueue;
 pub const PriorityDequeue = @import("priority_dequeue.zig").PriorityDequeue;
 pub const Progress = @import("Progress.zig");
+pub const RingBuffer = @import("RingBuffer.zig");
 pub const SegmentedList = @import("segmented_list.zig").SegmentedList;
 pub const SemanticVersion = @import("SemanticVersion.zig");
 pub const SinglyLinkedList = @import("linked_list.zig").SinglyLinkedList;
@@ -167,6 +168,22 @@ pub const options = struct {
         options_override.crypto_always_getrandom
     else
         false;
+
+    /// By default Zig disables SIGPIPE by setting a "no-op" handler for it.  Set this option
+    /// to `true` to prevent that.
+    ///
+    /// Note that we use a "no-op" handler instead of SIG_IGN because it will not be inherited by
+    /// any child process.
+    ///
+    /// SIGPIPE is triggered when a process attempts to write to a broken pipe. By default, SIGPIPE
+    /// will terminate the process instead of exiting.  It doesn't trigger the panic handler so in many
+    /// cases it's unclear why the process was terminated.  By capturing SIGPIPE instead, functions that
+    /// write to broken pipes will return the EPIPE error (error.BrokenPipe) and the program can handle
+    /// it like any other error.
+    pub const keep_sigpipe: bool = if (@hasDecl(options_override, "keep_sigpipe"))
+        options_override.keep_sigpipe
+    else
+        false;
 };
 
 // This forces the start.zig file to be imported, and the comptime logic inside that
diff --git a/lib/std/target/loongarch.zig b/lib/std/target/loongarch.zig
index b4b289fa91..dcc6bd43fc 100644
--- a/lib/std/target/loongarch.zig
+++ b/lib/std/target/loongarch.zig
@@ -89,7 +89,7 @@ pub const all_features = blk: {
         .dependencies = featureSet(&[_]Feature{}),
     };
     const ti = @typeInfo(Feature);
-    for (result) |*elem, i| {
+    for (&result, 0..) |*elem, i| {
         elem.index = i;
         elem.name = ti.Enum.fields[i].name;
     }
diff --git a/lib/std/target/xtensa.zig b/lib/std/target/xtensa.zig
index 98b38a4026..5979abcaf1 100644
--- a/lib/std/target/xtensa.zig
+++ b/lib/std/target/xtensa.zig
@@ -23,7 +23,7 @@ pub const all_features = blk: {
         .dependencies = featureSet(&[_]Feature{}),
     };
     const ti = @typeInfo(Feature);
-    for (result) |*elem, i| {
+    for (&result, 0..) |*elem, i| {
         elem.index = i;
         elem.name = ti.Enum.fields[i].name;
     }
diff --git a/lib/zig.h b/lib/zig.h
index 0756d9f731..c10720d1bd 100644
--- a/lib/zig.h
+++ b/lib/zig.h
@@ -1,8 +1,11 @@
 #undef linux
 
+#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
 #define __STDC_WANT_IEC_60559_TYPES_EXT__
+#endif
 #include <float.h>
 #include <limits.h>
+#include <stdarg.h>
 #include <stddef.h>
 #include <stdint.h>
 
@@ -75,6 +78,32 @@ typedef char bool;
 #define zig_cold
 #endif
 
+#if zig_has_attribute(flatten)
+#define zig_maybe_flatten __attribute__((flatten))
+#else
+#define zig_maybe_flatten
+#endif
+
+#if zig_has_attribute(noinline)
+#define zig_never_inline __attribute__((noinline)) zig_maybe_flatten
+#elif defined(_MSC_VER)
+#define zig_never_inline __declspec(noinline) zig_maybe_flatten
+#else
+#define zig_never_inline zig_never_inline_unavailable
+#endif
+
+#if zig_has_attribute(not_tail_called)
+#define zig_never_tail __attribute__((not_tail_called)) zig_never_inline
+#else
+#define zig_never_tail zig_never_tail_unavailable
+#endif
+
+#if zig_has_attribute(always_inline)
+#define zig_always_tail __attribute__((musttail))
+#else
+#define zig_always_tail zig_always_tail_unavailable
+#endif
+
 #if __STDC_VERSION__ >= 199901L
 #define zig_restrict restrict
 #elif defined(__GNUC__)
@@ -286,701 +315,802 @@ typedef char bool;
 #endif
 
 #if __STDC_VERSION__ >= 201112L
-#define zig_noreturn _Noreturn void
+#define zig_noreturn _Noreturn
 #elif zig_has_attribute(noreturn) || defined(zig_gnuc)
-#define zig_noreturn __attribute__((noreturn)) void
+#define zig_noreturn __attribute__((noreturn))
 #elif _MSC_VER
-#define zig_noreturn __declspec(noreturn) void
+#define zig_noreturn __declspec(noreturn)
 #else
-#define zig_noreturn void
+#define zig_noreturn
 #endif
 
 #define zig_bitSizeOf(T) (CHAR_BIT * sizeof(T))
 
-typedef              uintptr_t zig_usize;
-typedef               intptr_t zig_isize;
-typedef   signed     short int zig_c_short;
-typedef unsigned     short int zig_c_ushort;
-typedef   signed           int zig_c_int;
-typedef unsigned           int zig_c_uint;
-typedef   signed      long int zig_c_long;
-typedef unsigned      long int zig_c_ulong;
-typedef   signed long long int zig_c_longlong;
-typedef unsigned long long int zig_c_ulonglong;
+#define zig_compiler_rt_abbrev_uint32_t si
+#define zig_compiler_rt_abbrev_int32_t  si
+#define zig_compiler_rt_abbrev_uint64_t di
+#define zig_compiler_rt_abbrev_int64_t  di
+#define zig_compiler_rt_abbrev_zig_u128 ti
+#define zig_compiler_rt_abbrev_zig_i128 ti
+#define zig_compiler_rt_abbrev_zig_f16  hf
+#define zig_compiler_rt_abbrev_zig_f32  sf
+#define zig_compiler_rt_abbrev_zig_f64  df
+#define zig_compiler_rt_abbrev_zig_f80  xf
+#define zig_compiler_rt_abbrev_zig_f128 tf
 
-typedef uint8_t  zig_u8;
-typedef  int8_t  zig_i8;
-typedef uint16_t zig_u16;
-typedef  int16_t zig_i16;
-typedef uint32_t zig_u32;
-typedef  int32_t zig_i32;
-typedef uint64_t zig_u64;
-typedef  int64_t zig_i64;
+zig_extern void *memcpy (void *zig_restrict, void const *zig_restrict, size_t);
+zig_extern void *memset (void *, int, size_t);
 
-#define zig_as_u8(val)  UINT8_C(val)
-#define zig_as_i8(val)   INT8_C(val)
-#define zig_as_u16(val) UINT16_C(val)
-#define zig_as_i16(val)  INT16_C(val)
-#define zig_as_u32(val) UINT32_C(val)
-#define zig_as_i32(val)  INT32_C(val)
-#define zig_as_u64(val) UINT64_C(val)
-#define zig_as_i64(val)  INT64_C(val)
+/* ===================== 8/16/32/64-bit Integer Support ===================== */
+
+#if __STDC_VERSION__ >= 199901L || _MSC_VER
+#include <stdint.h>
+#else
+
+#if SCHAR_MIN == ~0x7F && SCHAR_MAX == 0x7F && UCHAR_MAX == 0xFF
+typedef unsigned      char uint8_t;
+typedef   signed      char  int8_t;
+#define  INT8_C(c) c
+#define UINT8_C(c) c##U
+#elif SHRT_MIN == ~0x7F && SHRT_MAX == 0x7F && USHRT_MAX == 0xFF
+typedef unsigned     short uint8_t;
+typedef   signed     short  int8_t;
+#define  INT8_C(c) c
+#define UINT8_C(c) c##U
+#elif INT_MIN == ~0x7F && INT_MAX == 0x7F && UINT_MAX == 0xFF
+typedef unsigned       int uint8_t;
+typedef   signed       int  int8_t;
+#define  INT8_C(c) c
+#define UINT8_C(c) c##U
+#elif LONG_MIN == ~0x7F && LONG_MAX == 0x7F && ULONG_MAX == 0xFF
+typedef unsigned      long uint8_t;
+typedef   signed      long  int8_t;
+#define  INT8_C(c) c##L
+#define UINT8_C(c) c##LU
+#elif LLONG_MIN == ~0x7F && LLONG_MAX == 0x7F && ULLONG_MAX == 0xFF
+typedef unsigned long long uint8_t;
+typedef   signed long long  int8_t;
+#define  INT8_C(c) c##LL
+#define UINT8_C(c) c##LLU
+#endif
+#define  INT8_MIN (~INT8_C(0x7F))
+#define  INT8_MAX ( INT8_C(0x7F))
+#define UINT8_MAX ( INT8_C(0xFF))
+
+#if SCHAR_MIN == ~0x7FFF && SCHAR_MAX == 0x7FFF && UCHAR_MAX == 0xFFFF
+typedef unsigned      char uint16_t;
+typedef   signed      char  int16_t;
+#define  INT16_C(c) c
+#define UINT16_C(c) c##U
+#elif SHRT_MIN == ~0x7FFF && SHRT_MAX == 0x7FFF && USHRT_MAX == 0xFFFF
+typedef unsigned     short uint16_t;
+typedef   signed     short  int16_t;
+#define  INT16_C(c) c
+#define UINT16_C(c) c##U
+#elif INT_MIN == ~0x7FFF && INT_MAX == 0x7FFF && UINT_MAX == 0xFFFF
+typedef unsigned       int uint16_t;
+typedef   signed       int  int16_t;
+#define  INT16_C(c) c
+#define UINT16_C(c) c##U
+#elif LONG_MIN == ~0x7FFF && LONG_MAX == 0x7FFF && ULONG_MAX == 0xFFFF
+typedef unsigned      long uint16_t;
+typedef   signed      long  int16_t;
+#define  INT16_C(c) c##L
+#define UINT16_C(c) c##LU
+#elif LLONG_MIN == ~0x7FFF && LLONG_MAX == 0x7FFF && ULLONG_MAX == 0xFFFF
+typedef unsigned long long uint16_t;
+typedef   signed long long  int16_t;
+#define  INT16_C(c) c##LL
+#define UINT16_C(c) c##LLU
+#endif
+#define  INT16_MIN (~INT16_C(0x7FFF))
+#define  INT16_MAX ( INT16_C(0x7FFF))
+#define UINT16_MAX ( INT16_C(0xFFFF))
+
+#if SCHAR_MIN == ~0x7FFFFFFF && SCHAR_MAX == 0x7FFFFFFF && UCHAR_MAX == 0xFFFFFFFF
+typedef unsigned      char uint32_t;
+typedef   signed      char  int32_t;
+#define  INT32_C(c) c
+#define UINT32_C(c) c##U
+#elif SHRT_MIN == ~0x7FFFFFFF && SHRT_MAX == 0x7FFFFFFF && USHRT_MAX == 0xFFFFFFFF
+typedef unsigned     short uint32_t;
+typedef   signed     short  int32_t;
+#define  INT32_C(c) c
+#define UINT32_C(c) c##U
+#elif INT_MIN == ~0x7FFFFFFF && INT_MAX == 0x7FFFFFFF && UINT_MAX == 0xFFFFFFFF
+typedef unsigned       int uint32_t;
+typedef   signed       int  int32_t;
+#define  INT32_C(c) c
+#define UINT32_C(c) c##U
+#elif LONG_MIN == ~0x7FFFFFFF && LONG_MAX == 0x7FFFFFFF && ULONG_MAX == 0xFFFFFFFF
+typedef unsigned      long uint32_t;
+typedef   signed      long  int32_t;
+#define  INT32_C(c) c##L
+#define UINT32_C(c) c##LU
+#elif LLONG_MIN == ~0x7FFFFFFF && LLONG_MAX == 0x7FFFFFFF && ULLONG_MAX == 0xFFFFFFFF
+typedef unsigned long long uint32_t;
+typedef   signed long long  int32_t;
+#define  INT32_C(c) c##LL
+#define UINT32_C(c) c##LLU
+#endif
+#define  INT32_MIN (~INT32_C(0x7FFFFFFF))
+#define  INT32_MAX ( INT32_C(0x7FFFFFFF))
+#define UINT32_MAX ( INT32_C(0xFFFFFFFF))
+
+#if SCHAR_MIN == ~0x7FFFFFFFFFFFFFFF && SCHAR_MAX == 0x7FFFFFFFFFFFFFFF && UCHAR_MAX == 0xFFFFFFFFFFFFFFFF
+typedef unsigned      char uint64_t;
+typedef   signed      char  int64_t;
+#define  INT64_C(c) c
+#define UINT64_C(c) c##U
+#elif SHRT_MIN == ~0x7FFFFFFFFFFFFFFF && SHRT_MAX == 0x7FFFFFFFFFFFFFFF && USHRT_MAX == 0xFFFFFFFFFFFFFFFF
+typedef unsigned     short uint64_t;
+typedef   signed     short  int64_t;
+#define  INT64_C(c) c
+#define UINT64_C(c) c##U
+#elif INT_MIN == ~0x7FFFFFFFFFFFFFFF && INT_MAX == 0x7FFFFFFFFFFFFFFF && UINT_MAX == 0xFFFFFFFFFFFFFFFF
+typedef unsigned       int uint64_t;
+typedef   signed       int  int64_t;
+#define  INT64_C(c) c
+#define UINT64_C(c) c##U
+#elif LONG_MIN == ~0x7FFFFFFFFFFFFFFF && LONG_MAX == 0x7FFFFFFFFFFFFFFF && ULONG_MAX == 0xFFFFFFFFFFFFFFFF
+typedef unsigned      long uint64_t;
+typedef   signed      long  int64_t;
+#define  INT64_C(c) c##L
+#define UINT64_C(c) c##LU
+#elif LLONG_MIN == ~0x7FFFFFFFFFFFFFFF && LLONG_MAX == 0x7FFFFFFFFFFFFFFF && ULLONG_MAX == 0xFFFFFFFFFFFFFFFF
+typedef unsigned long long uint64_t;
+typedef   signed long long  int64_t;
+#define  INT64_C(c) c##LL
+#define UINT64_C(c) c##LLU
+#endif
+#define  INT64_MIN (~INT64_C(0x7FFFFFFFFFFFFFFF))
+#define  INT64_MAX ( INT64_C(0x7FFFFFFFFFFFFFFF))
+#define UINT64_MAX ( INT64_C(0xFFFFFFFFFFFFFFFF))
+
+typedef size_t uintptr_t;
+typedef ptrdiff_t intptr_t;
+
+#endif
 
-#define zig_minInt_u8  zig_as_u8(0)
-#define zig_maxInt_u8   UINT8_MAX
 #define zig_minInt_i8    INT8_MIN
 #define zig_maxInt_i8    INT8_MAX
-#define zig_minInt_u16 zig_as_u16(0)
-#define zig_maxInt_u16 UINT16_MAX
+#define zig_minInt_u8   UINT8_C(0)
+#define zig_maxInt_u8   UINT8_MAX
 #define zig_minInt_i16  INT16_MIN
 #define zig_maxInt_i16  INT16_MAX
-#define zig_minInt_u32 zig_as_u32(0)
-#define zig_maxInt_u32 UINT32_MAX
+#define zig_minInt_u16 UINT16_C(0)
+#define zig_maxInt_u16 UINT16_MAX
 #define zig_minInt_i32  INT32_MIN
 #define zig_maxInt_i32  INT32_MAX
-#define zig_minInt_u64 zig_as_u64(0)
-#define zig_maxInt_u64 UINT64_MAX
+#define zig_minInt_u32 UINT32_C(0)
+#define zig_maxInt_u32 UINT32_MAX
 #define zig_minInt_i64  INT64_MIN
 #define zig_maxInt_i64  INT64_MAX
+#define zig_minInt_u64 UINT64_C(0)
+#define zig_maxInt_u64 UINT64_MAX
 
-#define zig_compiler_rt_abbrev_u32  si
-#define zig_compiler_rt_abbrev_i32  si
-#define zig_compiler_rt_abbrev_u64  di
-#define zig_compiler_rt_abbrev_i64  di
-#define zig_compiler_rt_abbrev_u128 ti
-#define zig_compiler_rt_abbrev_i128 ti
-#define zig_compiler_rt_abbrev_f16  hf
-#define zig_compiler_rt_abbrev_f32  sf
-#define zig_compiler_rt_abbrev_f64  df
-#define zig_compiler_rt_abbrev_f80  xf
-#define zig_compiler_rt_abbrev_f128 tf
-
-zig_extern void *memcpy (void *zig_restrict, void const *zig_restrict, zig_usize);
-zig_extern void *memset (void *, int, zig_usize);
-
-/* ==================== 8/16/32/64-bit Integer Routines ===================== */
-
-#define zig_maxInt(Type, bits) zig_shr_##Type(zig_maxInt_##Type, (zig_bitSizeOf(zig_##Type) - bits))
-#define zig_expand_maxInt(Type, bits) zig_maxInt(Type, bits)
-#define zig_minInt(Type, bits) zig_not_##Type(zig_maxInt(Type, bits), bits)
-#define zig_expand_minInt(Type, bits) zig_minInt(Type, bits)
+#define zig_intLimit(s, w, limit, bits) zig_shr_##s##w(zig_##limit##Int_##s##w, w - (bits))
+#define zig_minInt_i(w, bits) zig_intLimit(i, w, min, bits)
+#define zig_maxInt_i(w, bits) zig_intLimit(i, w, max, bits)
+#define zig_minInt_u(w, bits) zig_intLimit(u, w, min, bits)
+#define zig_maxInt_u(w, bits) zig_intLimit(u, w, max, bits)
 
 #define zig_int_operator(Type, RhsType, operation, operator) \
-    static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##RhsType rhs) { \
+    static inline Type zig_##operation(Type lhs, RhsType rhs) { \
         return lhs operator rhs; \
     }
 #define zig_int_basic_operator(Type, operation, operator) \
-    zig_int_operator(Type, Type, operation, operator)
+    zig_int_operator(Type,    Type, operation, operator)
 #define zig_int_shift_operator(Type, operation, operator) \
-    zig_int_operator(Type,   u8, operation, operator)
+    zig_int_operator(Type, uint8_t, operation, operator)
 #define zig_int_helpers(w) \
-    zig_int_basic_operator(u##w, and,  &) \
-    zig_int_basic_operator(i##w, and,  &) \
-    zig_int_basic_operator(u##w,  or,  |) \
-    zig_int_basic_operator(i##w,  or,  |) \
-    zig_int_basic_operator(u##w, xor,  ^) \
-    zig_int_basic_operator(i##w, xor,  ^) \
-    zig_int_shift_operator(u##w, shl, <<) \
-    zig_int_shift_operator(i##w, shl, <<) \
-    zig_int_shift_operator(u##w, shr, >>) \
+    zig_int_basic_operator(uint##w##_t, and_u##w,  &) \
+    zig_int_basic_operator( int##w##_t, and_i##w,  &) \
+    zig_int_basic_operator(uint##w##_t,  or_u##w,  |) \
+    zig_int_basic_operator( int##w##_t,  or_i##w,  |) \
+    zig_int_basic_operator(uint##w##_t, xor_u##w,  ^) \
+    zig_int_basic_operator( int##w##_t, xor_i##w,  ^) \
+    zig_int_shift_operator(uint##w##_t, shl_u##w, <<) \
+    zig_int_shift_operator( int##w##_t, shl_i##w, <<) \
+    zig_int_shift_operator(uint##w##_t, shr_u##w, >>) \
 \
-    static inline zig_i##w zig_shr_i##w(zig_i##w lhs, zig_u8 rhs) { \
-        zig_i##w sign_mask = lhs < zig_as_i##w(0) ? -zig_as_i##w(1) : zig_as_i##w(0); \
+    static inline int##w##_t zig_shr_i##w(int##w##_t lhs, uint8_t rhs) { \
+        int##w##_t sign_mask = lhs < INT##w##_C(0) ? -INT##w##_C(1) : INT##w##_C(0); \
         return ((lhs ^ sign_mask) >> rhs) ^ sign_mask; \
     } \
 \
-    static inline zig_u##w zig_not_u##w(zig_u##w val, zig_u8 bits) { \
-        return val ^ zig_maxInt(u##w, bits); \
+    static inline uint##w##_t zig_not_u##w(uint##w##_t val, uint8_t bits) { \
+        return val ^ zig_maxInt_u(w, bits); \
     } \
 \
-    static inline zig_i##w zig_not_i##w(zig_i##w val, zig_u8 bits) { \
+    static inline int##w##_t zig_not_i##w(int##w##_t val, uint8_t bits) { \
         (void)bits; \
         return ~val; \
     } \
 \
-    static inline zig_u##w zig_wrap_u##w(zig_u##w val, zig_u8 bits) { \
-        return val & zig_maxInt(u##w, bits); \
+    static inline uint##w##_t zig_wrap_u##w(uint##w##_t val, uint8_t bits) { \
+        return val & zig_maxInt_u(w, bits); \
     } \
 \
-    static inline zig_i##w zig_wrap_i##w(zig_i##w val, zig_u8 bits) { \
-        return (val & zig_as_u##w(1) << (bits - zig_as_u8(1))) != 0 \
-            ? val | zig_minInt(i##w, bits) : val & zig_maxInt(i##w, bits); \
+    static inline int##w##_t zig_wrap_i##w(int##w##_t val, uint8_t bits) { \
+        return (val & UINT##w##_C(1) << (bits - UINT8_C(1))) != 0 \
+            ? val | zig_minInt_i(w, bits) : val & zig_maxInt_i(w, bits); \
     } \
 \
-    zig_int_basic_operator(u##w, div_floor, /) \
+    zig_int_basic_operator(uint##w##_t, div_floor_u##w, /) \
 \
-    static inline zig_i##w zig_div_floor_i##w(zig_i##w lhs, zig_i##w rhs) { \
-        return lhs / rhs - (((lhs ^ rhs) & (lhs % rhs)) < zig_as_i##w(0)); \
+    static inline int##w##_t zig_div_floor_i##w(int##w##_t lhs, int##w##_t rhs) { \
+        return lhs / rhs - (((lhs ^ rhs) & (lhs % rhs)) < INT##w##_C(0)); \
     } \
 \
-    zig_int_basic_operator(u##w, mod, %) \
+    zig_int_basic_operator(uint##w##_t, mod_u##w, %) \
 \
-    static inline zig_i##w zig_mod_i##w(zig_i##w lhs, zig_i##w rhs) { \
-        zig_i##w rem = lhs % rhs; \
-        return rem + (((lhs ^ rhs) & rem) < zig_as_i##w(0) ? rhs : zig_as_i##w(0)); \
+    static inline int##w##_t zig_mod_i##w(int##w##_t lhs, int##w##_t rhs) { \
+        int##w##_t rem = lhs % rhs; \
+        return rem + (((lhs ^ rhs) & rem) < INT##w##_C(0) ? rhs : INT##w##_C(0)); \
     } \
 \
-    static inline zig_u##w zig_shlw_u##w(zig_u##w lhs, zig_u8 rhs, zig_u8 bits) { \
+    static inline uint##w##_t zig_shlw_u##w(uint##w##_t lhs, uint8_t rhs, uint8_t bits) { \
         return zig_wrap_u##w(zig_shl_u##w(lhs, rhs), bits); \
     } \
 \
-    static inline zig_i##w zig_shlw_i##w(zig_i##w lhs, zig_u8 rhs, zig_u8 bits) { \
-        return zig_wrap_i##w((zig_i##w)zig_shl_u##w((zig_u##w)lhs, (zig_u##w)rhs), bits); \
+    static inline int##w##_t zig_shlw_i##w(int##w##_t lhs, uint8_t rhs, uint8_t bits) { \
+        return zig_wrap_i##w((int##w##_t)zig_shl_u##w((uint##w##_t)lhs, (uint##w##_t)rhs), bits); \
     } \
 \
-    static inline zig_u##w zig_addw_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
+    static inline uint##w##_t zig_addw_u##w(uint##w##_t lhs, uint##w##_t rhs, uint8_t bits) { \
         return zig_wrap_u##w(lhs + rhs, bits); \
     } \
 \
-    static inline zig_i##w zig_addw_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
-        return zig_wrap_i##w((zig_i##w)((zig_u##w)lhs + (zig_u##w)rhs), bits); \
+    static inline int##w##_t zig_addw_i##w(int##w##_t lhs, int##w##_t rhs, uint8_t bits) { \
+        return zig_wrap_i##w((int##w##_t)((uint##w##_t)lhs + (uint##w##_t)rhs), bits); \
     } \
 \
-    static inline zig_u##w zig_subw_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
+    static inline uint##w##_t zig_subw_u##w(uint##w##_t lhs, uint##w##_t rhs, uint8_t bits) { \
         return zig_wrap_u##w(lhs - rhs, bits); \
     } \
 \
-    static inline zig_i##w zig_subw_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
-        return zig_wrap_i##w((zig_i##w)((zig_u##w)lhs - (zig_u##w)rhs), bits); \
+    static inline int##w##_t zig_subw_i##w(int##w##_t lhs, int##w##_t rhs, uint8_t bits) { \
+        return zig_wrap_i##w((int##w##_t)((uint##w##_t)lhs - (uint##w##_t)rhs), bits); \
     } \
 \
-    static inline zig_u##w zig_mulw_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
+    static inline uint##w##_t zig_mulw_u##w(uint##w##_t lhs, uint##w##_t rhs, uint8_t bits) { \
         return zig_wrap_u##w(lhs * rhs, bits); \
     } \
 \
-    static inline zig_i##w zig_mulw_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
-        return zig_wrap_i##w((zig_i##w)((zig_u##w)lhs * (zig_u##w)rhs), bits); \
+    static inline int##w##_t zig_mulw_i##w(int##w##_t lhs, int##w##_t rhs, uint8_t bits) { \
+        return zig_wrap_i##w((int##w##_t)((uint##w##_t)lhs * (uint##w##_t)rhs), bits); \
     }
 zig_int_helpers(8)
 zig_int_helpers(16)
 zig_int_helpers(32)
 zig_int_helpers(64)
 
-static inline bool zig_addo_u32(zig_u32 *res, zig_u32 lhs, zig_u32 rhs, zig_u8 bits) {
+static inline bool zig_addo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8_t bits) {
 #if zig_has_builtin(add_overflow) || defined(zig_gnuc)
-    zig_u32 full_res;
+    uint32_t full_res;
     bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u32(full_res, bits);
-    return overflow || full_res < zig_minInt(u32, bits) || full_res > zig_maxInt(u32, bits);
+    return overflow || full_res < zig_minInt_u(32, bits) || full_res > zig_maxInt_u(32, bits);
 #else
     *res = zig_addw_u32(lhs, rhs, bits);
     return *res < lhs;
 #endif
 }
 
-static inline void zig_vaddo_u32(zig_u8 *ov, zig_u32 *res, int n,
-    const zig_u32 *lhs, const zig_u32 *rhs, zig_u8 bits)
+static inline void zig_vaddo_u32(uint8_t *ov, uint32_t *res, int n,
+    const uint32_t *lhs, const uint32_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_addo_u32(&res[i], lhs[i], rhs[i], bits);
 }
 
-zig_extern zig_i32  __addosi4(zig_i32 lhs, zig_i32 rhs, zig_c_int *overflow);
-static inline bool zig_addo_i32(zig_i32 *res, zig_i32 lhs, zig_i32 rhs, zig_u8 bits) {
+zig_extern int32_t  __addosi4(int32_t lhs, int32_t rhs, int *overflow);
+static inline bool zig_addo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) {
 #if zig_has_builtin(add_overflow) || defined(zig_gnuc)
-    zig_i32 full_res;
+    int32_t full_res;
     bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
 #else
-    zig_c_int overflow_int;
-    zig_i32 full_res = __addosi4(lhs, rhs, &overflow_int);
+    int overflow_int;
+    int32_t full_res = __addosi4(lhs, rhs, &overflow_int);
     bool overflow = overflow_int != 0;
 #endif
     *res = zig_wrap_i32(full_res, bits);
-    return overflow || full_res < zig_minInt(i32, bits) || full_res > zig_maxInt(i32, bits);
+    return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits);
 }
 
-static inline void zig_vaddo_i32(zig_u8 *ov, zig_i32 *res, int n,
-    const zig_i32 *lhs, const zig_i32 *rhs, zig_u8 bits)
+static inline void zig_vaddo_i32(uint8_t *ov, int32_t *res, int n,
+    const int32_t *lhs, const int32_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_addo_i32(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_addo_u64(zig_u64 *res, zig_u64 lhs, zig_u64 rhs, zig_u8 bits) {
+static inline bool zig_addo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8_t bits) {
 #if zig_has_builtin(add_overflow) || defined(zig_gnuc)
-    zig_u64 full_res;
+    uint64_t full_res;
     bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u64(full_res, bits);
-    return overflow || full_res < zig_minInt(u64, bits) || full_res > zig_maxInt(u64, bits);
+    return overflow || full_res < zig_minInt_u(64, bits) || full_res > zig_maxInt_u(64, bits);
 #else
     *res = zig_addw_u64(lhs, rhs, bits);
     return *res < lhs;
 #endif
 }
 
-static inline void zig_vaddo_u64(zig_u8 *ov, zig_u64 *res, int n,
-    const zig_u64 *lhs, const zig_u64 *rhs, zig_u8 bits)
+static inline void zig_vaddo_u64(uint8_t *ov, uint64_t *res, int n,
+    const uint64_t *lhs, const uint64_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_addo_u64(&res[i], lhs[i], rhs[i], bits);
 }
 
-zig_extern zig_i64  __addodi4(zig_i64 lhs, zig_i64 rhs, zig_c_int *overflow);
-static inline bool zig_addo_i64(zig_i64 *res, zig_i64 lhs, zig_i64 rhs, zig_u8 bits) {
+zig_extern int64_t  __addodi4(int64_t lhs, int64_t rhs, int *overflow);
+static inline bool zig_addo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) {
 #if zig_has_builtin(add_overflow) || defined(zig_gnuc)
-    zig_i64 full_res;
+    int64_t full_res;
     bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
 #else
-    zig_c_int overflow_int;
-    zig_i64 full_res = __addodi4(lhs, rhs, &overflow_int);
+    int overflow_int;
+    int64_t full_res = __addodi4(lhs, rhs, &overflow_int);
     bool overflow = overflow_int != 0;
 #endif
     *res = zig_wrap_i64(full_res, bits);
-    return overflow || full_res < zig_minInt(i64, bits) || full_res > zig_maxInt(i64, bits);
+    return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits);
 }
 
-static inline void zig_vaddo_i64(zig_u8 *ov, zig_i64 *res, int n,
-    const zig_i64 *lhs, const zig_i64 *rhs, zig_u8 bits)
+static inline void zig_vaddo_i64(uint8_t *ov, int64_t *res, int n,
+    const int64_t *lhs, const int64_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_addo_i64(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_addo_u8(zig_u8 *res, zig_u8 lhs, zig_u8 rhs, zig_u8 bits) {
+static inline bool zig_addo_u8(uint8_t *res, uint8_t lhs, uint8_t rhs, uint8_t bits) {
 #if zig_has_builtin(add_overflow) || defined(zig_gnuc)
-    zig_u8 full_res;
+    uint8_t full_res;
     bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u8(full_res, bits);
-    return overflow || full_res < zig_minInt(u8, bits) || full_res > zig_maxInt(u8, bits);
+    return overflow || full_res < zig_minInt_u(8, bits) || full_res > zig_maxInt_u(8, bits);
 #else
-    zig_u32 full_res;
+    uint32_t full_res;
     bool overflow = zig_addo_u32(&full_res, lhs, rhs, bits);
-    *res = (zig_u8)full_res;
+    *res = (uint8_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vaddo_u8(zig_u8 *ov, zig_u8 *res, int n,
-    const zig_u8 *lhs, const zig_u8 *rhs, zig_u8 bits)
+static inline void zig_vaddo_u8(uint8_t *ov, uint8_t *res, int n,
+    const uint8_t *lhs, const uint8_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_addo_u8(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_addo_i8(zig_i8 *res, zig_i8 lhs, zig_i8 rhs, zig_u8 bits) {
+static inline bool zig_addo_i8(int8_t *res, int8_t lhs, int8_t rhs, uint8_t bits) {
 #if zig_has_builtin(add_overflow) || defined(zig_gnuc)
-    zig_i8 full_res;
+    int8_t full_res;
     bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_i8(full_res, bits);
-    return overflow || full_res < zig_minInt(i8, bits) || full_res > zig_maxInt(i8, bits);
+    return overflow || full_res < zig_minInt_i(8, bits) || full_res > zig_maxInt_i(8, bits);
 #else
-    zig_i32 full_res;
+    int32_t full_res;
     bool overflow = zig_addo_i32(&full_res, lhs, rhs, bits);
-    *res = (zig_i8)full_res;
+    *res = (int8_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vaddo_i8(zig_u8 *ov, zig_i8 *res, int n,
-    const zig_i8 *lhs, const zig_i8 *rhs, zig_u8 bits)
+static inline void zig_vaddo_i8(uint8_t *ov, int8_t *res, int n,
+    const int8_t *lhs, const int8_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_addo_i8(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_addo_u16(zig_u16 *res, zig_u16 lhs, zig_u16 rhs, zig_u8 bits) {
+static inline bool zig_addo_u16(uint16_t *res, uint16_t lhs, uint16_t rhs, uint8_t bits) {
 #if zig_has_builtin(add_overflow) || defined(zig_gnuc)
-    zig_u16 full_res;
+    uint16_t full_res;
     bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u16(full_res, bits);
-    return overflow || full_res < zig_minInt(u16, bits) || full_res > zig_maxInt(u16, bits);
+    return overflow || full_res < zig_minInt_u(16, bits) || full_res > zig_maxInt_u(16, bits);
 #else
-    zig_u32 full_res;
+    uint32_t full_res;
     bool overflow = zig_addo_u32(&full_res, lhs, rhs, bits);
-    *res = (zig_u16)full_res;
+    *res = (uint16_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vaddo_u16(zig_u8 *ov, zig_u16 *res, int n,
-    const zig_u16 *lhs, const zig_u16 *rhs, zig_u8 bits)
+static inline void zig_vaddo_u16(uint8_t *ov, uint16_t *res, int n,
+    const uint16_t *lhs, const uint16_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_addo_u16(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_addo_i16(zig_i16 *res, zig_i16 lhs, zig_i16 rhs, zig_u8 bits) {
+static inline bool zig_addo_i16(int16_t *res, int16_t lhs, int16_t rhs, uint8_t bits) {
 #if zig_has_builtin(add_overflow) || defined(zig_gnuc)
-    zig_i16 full_res;
+    int16_t full_res;
     bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_i16(full_res, bits);
-    return overflow || full_res < zig_minInt(i16, bits) || full_res > zig_maxInt(i16, bits);
+    return overflow || full_res < zig_minInt_i(16, bits) || full_res > zig_maxInt_i(16, bits);
 #else
-    zig_i32 full_res;
+    int32_t full_res;
     bool overflow = zig_addo_i32(&full_res, lhs, rhs, bits);
-    *res = (zig_i16)full_res;
+    *res = (int16_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vaddo_i16(zig_u8 *ov, zig_i16 *res, int n,
-    const zig_i16 *lhs, const zig_i16 *rhs, zig_u8 bits)
+static inline void zig_vaddo_i16(uint8_t *ov, int16_t *res, int n,
+    const int16_t *lhs, const int16_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_addo_i16(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_subo_u32(zig_u32 *res, zig_u32 lhs, zig_u32 rhs, zig_u8 bits) {
+static inline bool zig_subo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
-    zig_u32 full_res;
+    uint32_t full_res;
     bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u32(full_res, bits);
-    return overflow || full_res < zig_minInt(u32, bits) || full_res > zig_maxInt(u32, bits);
+    return overflow || full_res < zig_minInt_u(32, bits) || full_res > zig_maxInt_u(32, bits);
 #else
     *res = zig_subw_u32(lhs, rhs, bits);
     return *res > lhs;
 #endif
 }
 
-static inline void zig_vsubo_u32(zig_u8 *ov, zig_u32 *res, int n,
-    const zig_u32 *lhs, const zig_u32 *rhs, zig_u8 bits)
+static inline void zig_vsubo_u32(uint8_t *ov, uint32_t *res, int n,
+    const uint32_t *lhs, const uint32_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_subo_u32(&res[i], lhs[i], rhs[i], bits);
 }
 
-zig_extern zig_i32  __subosi4(zig_i32 lhs, zig_i32 rhs, zig_c_int *overflow);
-static inline bool zig_subo_i32(zig_i32 *res, zig_i32 lhs, zig_i32 rhs, zig_u8 bits) {
+zig_extern int32_t  __subosi4(int32_t lhs, int32_t rhs, int *overflow);
+static inline bool zig_subo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
-    zig_i32 full_res;
+    int32_t full_res;
     bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
 #else
-    zig_c_int overflow_int;
-    zig_i32 full_res = __subosi4(lhs, rhs, &overflow_int);
+    int overflow_int;
+    int32_t full_res = __subosi4(lhs, rhs, &overflow_int);
     bool overflow = overflow_int != 0;
 #endif
     *res = zig_wrap_i32(full_res, bits);
-    return overflow || full_res < zig_minInt(i32, bits) || full_res > zig_maxInt(i32, bits);
+    return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits);
 }
 
-static inline void zig_vsubo_i32(zig_u8 *ov, zig_i32 *res, int n,
-    const zig_i32 *lhs, const zig_i32 *rhs, zig_u8 bits)
+static inline void zig_vsubo_i32(uint8_t *ov, int32_t *res, int n,
+    const int32_t *lhs, const int32_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_subo_i32(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_subo_u64(zig_u64 *res, zig_u64 lhs, zig_u64 rhs, zig_u8 bits) {
+static inline bool zig_subo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
-    zig_u64 full_res;
+    uint64_t full_res;
     bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u64(full_res, bits);
-    return overflow || full_res < zig_minInt(u64, bits) || full_res > zig_maxInt(u64, bits);
+    return overflow || full_res < zig_minInt_u(64, bits) || full_res > zig_maxInt_u(64, bits);
 #else
     *res = zig_subw_u64(lhs, rhs, bits);
     return *res > lhs;
 #endif
 }
 
-static inline void zig_vsubo_u64(zig_u8 *ov, zig_u64 *res, int n,
-    const zig_u64 *lhs, const zig_u64 *rhs, zig_u8 bits)
+static inline void zig_vsubo_u64(uint8_t *ov, uint64_t *res, int n,
+    const uint64_t *lhs, const uint64_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_subo_u64(&res[i], lhs[i], rhs[i], bits);
 }
 
-zig_extern zig_i64  __subodi4(zig_i64 lhs, zig_i64 rhs, zig_c_int *overflow);
-static inline bool zig_subo_i64(zig_i64 *res, zig_i64 lhs, zig_i64 rhs, zig_u8 bits) {
+zig_extern int64_t  __subodi4(int64_t lhs, int64_t rhs, int *overflow);
+static inline bool zig_subo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
-    zig_i64 full_res;
+    int64_t full_res;
     bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
 #else
-    zig_c_int overflow_int;
-    zig_i64 full_res = __subodi4(lhs, rhs, &overflow_int);
+    int overflow_int;
+    int64_t full_res = __subodi4(lhs, rhs, &overflow_int);
     bool overflow = overflow_int != 0;
 #endif
     *res = zig_wrap_i64(full_res, bits);
-    return overflow || full_res < zig_minInt(i64, bits) || full_res > zig_maxInt(i64, bits);
+    return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits);
 }
 
-static inline void zig_vsubo_i64(zig_u8 *ov, zig_i64 *res, int n,
-    const zig_i64 *lhs, const zig_i64 *rhs, zig_u8 bits)
+static inline void zig_vsubo_i64(uint8_t *ov, int64_t *res, int n,
+    const int64_t *lhs, const int64_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_subo_i64(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_subo_u8(zig_u8 *res, zig_u8 lhs, zig_u8 rhs, zig_u8 bits) {
+static inline bool zig_subo_u8(uint8_t *res, uint8_t lhs, uint8_t rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
-    zig_u8 full_res;
+    uint8_t full_res;
     bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u8(full_res, bits);
-    return overflow || full_res < zig_minInt(u8, bits) || full_res > zig_maxInt(u8, bits);
+    return overflow || full_res < zig_minInt_u(8, bits) || full_res > zig_maxInt_u(8, bits);
 #else
-    zig_u32 full_res;
+    uint32_t full_res;
     bool overflow = zig_subo_u32(&full_res, lhs, rhs, bits);
-    *res = (zig_u8)full_res;
+    *res = (uint8_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vsubo_u8(zig_u8 *ov, zig_u8 *res, int n,
-    const zig_u8 *lhs, const zig_u8 *rhs, zig_u8 bits)
+static inline void zig_vsubo_u8(uint8_t *ov, uint8_t *res, int n,
+    const uint8_t *lhs, const uint8_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_subo_u8(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_subo_i8(zig_i8 *res, zig_i8 lhs, zig_i8 rhs, zig_u8 bits) {
+static inline bool zig_subo_i8(int8_t *res, int8_t lhs, int8_t rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
-    zig_i8 full_res;
+    int8_t full_res;
     bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_i8(full_res, bits);
-    return overflow || full_res < zig_minInt(i8, bits) || full_res > zig_maxInt(i8, bits);
+    return overflow || full_res < zig_minInt_i(8, bits) || full_res > zig_maxInt_i(8, bits);
 #else
-    zig_i32 full_res;
+    int32_t full_res;
     bool overflow = zig_subo_i32(&full_res, lhs, rhs, bits);
-    *res = (zig_i8)full_res;
+    *res = (int8_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vsubo_i8(zig_u8 *ov, zig_i8 *res, int n,
-    const zig_i8 *lhs, const zig_i8 *rhs, zig_u8 bits)
+static inline void zig_vsubo_i8(uint8_t *ov, int8_t *res, int n,
+    const int8_t *lhs, const int8_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_subo_i8(&res[i], lhs[i], rhs[i], bits);
 }
 
 
-static inline bool zig_subo_u16(zig_u16 *res, zig_u16 lhs, zig_u16 rhs, zig_u8 bits) {
+static inline bool zig_subo_u16(uint16_t *res, uint16_t lhs, uint16_t rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
-    zig_u16 full_res;
+    uint16_t full_res;
     bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u16(full_res, bits);
-    return overflow || full_res < zig_minInt(u16, bits) || full_res > zig_maxInt(u16, bits);
+    return overflow || full_res < zig_minInt_u(16, bits) || full_res > zig_maxInt_u(16, bits);
 #else
-    zig_u32 full_res;
+    uint32_t full_res;
     bool overflow = zig_subo_u32(&full_res, lhs, rhs, bits);
-    *res = (zig_u16)full_res;
+    *res = (uint16_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vsubo_u16(zig_u8 *ov, zig_u16 *res, int n,
-    const zig_u16 *lhs, const zig_u16 *rhs, zig_u8 bits)
+static inline void zig_vsubo_u16(uint8_t *ov, uint16_t *res, int n,
+    const uint16_t *lhs, const uint16_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_subo_u16(&res[i], lhs[i], rhs[i], bits);
 }
 
 
-static inline bool zig_subo_i16(zig_i16 *res, zig_i16 lhs, zig_i16 rhs, zig_u8 bits) {
+static inline bool zig_subo_i16(int16_t *res, int16_t lhs, int16_t rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
-    zig_i16 full_res;
+    int16_t full_res;
     bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_i16(full_res, bits);
-    return overflow || full_res < zig_minInt(i16, bits) || full_res > zig_maxInt(i16, bits);
+    return overflow || full_res < zig_minInt_i(16, bits) || full_res > zig_maxInt_i(16, bits);
 #else
-    zig_i32 full_res;
+    int32_t full_res;
     bool overflow = zig_subo_i32(&full_res, lhs, rhs, bits);
-    *res = (zig_i16)full_res;
+    *res = (int16_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vsubo_i16(zig_u8 *ov, zig_i16 *res, int n,
-    const zig_i16 *lhs, const zig_i16 *rhs, zig_u8 bits)
+static inline void zig_vsubo_i16(uint8_t *ov, int16_t *res, int n,
+    const int16_t *lhs, const int16_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_subo_i16(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_mulo_u32(zig_u32 *res, zig_u32 lhs, zig_u32 rhs, zig_u8 bits) {
+static inline bool zig_mulo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
-    zig_u32 full_res;
+    uint32_t full_res;
     bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u32(full_res, bits);
-    return overflow || full_res < zig_minInt(u32, bits) || full_res > zig_maxInt(u32, bits);
+    return overflow || full_res < zig_minInt_u(32, bits) || full_res > zig_maxInt_u(32, bits);
 #else
     *res = zig_mulw_u32(lhs, rhs, bits);
-    return rhs != zig_as_u32(0) && lhs > zig_maxInt(u32, bits) / rhs;
+    return rhs != UINT32_C(0) && lhs > zig_maxInt_u(32, bits) / rhs;
 #endif
 }
 
-static inline void zig_vmulo_u32(zig_u8 *ov, zig_u32 *res, int n,
-    const zig_u32 *lhs, const zig_u32 *rhs, zig_u8 bits)
+static inline void zig_vmulo_u32(uint8_t *ov, uint32_t *res, int n,
+    const uint32_t *lhs, const uint32_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_mulo_u32(&res[i], lhs[i], rhs[i], bits);
 }
 
-zig_extern zig_i32  __mulosi4(zig_i32 lhs, zig_i32 rhs, zig_c_int *overflow);
-static inline bool zig_mulo_i32(zig_i32 *res, zig_i32 lhs, zig_i32 rhs, zig_u8 bits) {
+zig_extern int32_t  __mulosi4(int32_t lhs, int32_t rhs, int *overflow);
+static inline bool zig_mulo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
-    zig_i32 full_res;
+    int32_t full_res;
     bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
 #else
-    zig_c_int overflow_int;
-    zig_i32 full_res = __mulosi4(lhs, rhs, &overflow_int);
+    int overflow_int;
+    int32_t full_res = __mulosi4(lhs, rhs, &overflow_int);
     bool overflow = overflow_int != 0;
 #endif
     *res = zig_wrap_i32(full_res, bits);
-    return overflow || full_res < zig_minInt(i32, bits) || full_res > zig_maxInt(i32, bits);
+    return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits);
 }
 
-static inline void zig_vmulo_i32(zig_u8 *ov, zig_i32 *res, int n,
-    const zig_i32 *lhs, const zig_i32 *rhs, zig_u8 bits)
+static inline void zig_vmulo_i32(uint8_t *ov, int32_t *res, int n,
+    const int32_t *lhs, const int32_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_mulo_i32(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_mulo_u64(zig_u64 *res, zig_u64 lhs, zig_u64 rhs, zig_u8 bits) {
+static inline bool zig_mulo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
-    zig_u64 full_res;
+    uint64_t full_res;
     bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u64(full_res, bits);
-    return overflow || full_res < zig_minInt(u64, bits) || full_res > zig_maxInt(u64, bits);
+    return overflow || full_res < zig_minInt_u(64, bits) || full_res > zig_maxInt_u(64, bits);
 #else
     *res = zig_mulw_u64(lhs, rhs, bits);
-    return rhs != zig_as_u64(0) && lhs > zig_maxInt(u64, bits) / rhs;
+    return rhs != UINT64_C(0) && lhs > zig_maxInt_u(64, bits) / rhs;
 #endif
 }
 
-static inline void zig_vmulo_u64(zig_u8 *ov, zig_u64 *res, int n,
-    const zig_u64 *lhs, const zig_u64 *rhs, zig_u8 bits)
+static inline void zig_vmulo_u64(uint8_t *ov, uint64_t *res, int n,
+    const uint64_t *lhs, const uint64_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_mulo_u64(&res[i], lhs[i], rhs[i], bits);
 }
 
-zig_extern zig_i64  __mulodi4(zig_i64 lhs, zig_i64 rhs, zig_c_int *overflow);
-static inline bool zig_mulo_i64(zig_i64 *res, zig_i64 lhs, zig_i64 rhs, zig_u8 bits) {
+zig_extern int64_t  __mulodi4(int64_t lhs, int64_t rhs, int *overflow);
+static inline bool zig_mulo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
-    zig_i64 full_res;
+    int64_t full_res;
     bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
 #else
-    zig_c_int overflow_int;
-    zig_i64 full_res = __mulodi4(lhs, rhs, &overflow_int);
+    int overflow_int;
+    int64_t full_res = __mulodi4(lhs, rhs, &overflow_int);
     bool overflow = overflow_int != 0;
 #endif
     *res = zig_wrap_i64(full_res, bits);
-    return overflow || full_res < zig_minInt(i64, bits) || full_res > zig_maxInt(i64, bits);
+    return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits);
 }
 
-static inline void zig_vmulo_i64(zig_u8 *ov, zig_i64 *res, int n,
-    const zig_i64 *lhs, const zig_i64 *rhs, zig_u8 bits)
+static inline void zig_vmulo_i64(uint8_t *ov, int64_t *res, int n,
+    const int64_t *lhs, const int64_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_mulo_i64(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_mulo_u8(zig_u8 *res, zig_u8 lhs, zig_u8 rhs, zig_u8 bits) {
+static inline bool zig_mulo_u8(uint8_t *res, uint8_t lhs, uint8_t rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
-    zig_u8 full_res;
+    uint8_t full_res;
     bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u8(full_res, bits);
-    return overflow || full_res < zig_minInt(u8, bits) || full_res > zig_maxInt(u8, bits);
+    return overflow || full_res < zig_minInt_u(8, bits) || full_res > zig_maxInt_u(8, bits);
 #else
-    zig_u32 full_res;
+    uint32_t full_res;
     bool overflow = zig_mulo_u32(&full_res, lhs, rhs, bits);
-    *res = (zig_u8)full_res;
+    *res = (uint8_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vmulo_u8(zig_u8 *ov, zig_u8 *res, int n,
-    const zig_u8 *lhs, const zig_u8 *rhs, zig_u8 bits)
+static inline void zig_vmulo_u8(uint8_t *ov, uint8_t *res, int n,
+    const uint8_t *lhs, const uint8_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_mulo_u8(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_mulo_i8(zig_i8 *res, zig_i8 lhs, zig_i8 rhs, zig_u8 bits) {
+static inline bool zig_mulo_i8(int8_t *res, int8_t lhs, int8_t rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
-    zig_i8 full_res;
+    int8_t full_res;
     bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_i8(full_res, bits);
-    return overflow || full_res < zig_minInt(i8, bits) || full_res > zig_maxInt(i8, bits);
+    return overflow || full_res < zig_minInt_i(8, bits) || full_res > zig_maxInt_i(8, bits);
 #else
-    zig_i32 full_res;
+    int32_t full_res;
     bool overflow = zig_mulo_i32(&full_res, lhs, rhs, bits);
-    *res = (zig_i8)full_res;
+    *res = (int8_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vmulo_i8(zig_u8 *ov, zig_i8 *res, int n,
-    const zig_i8 *lhs, const zig_i8 *rhs, zig_u8 bits)
+static inline void zig_vmulo_i8(uint8_t *ov, int8_t *res, int n,
+    const int8_t *lhs, const int8_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_mulo_i8(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_mulo_u16(zig_u16 *res, zig_u16 lhs, zig_u16 rhs, zig_u8 bits) {
+static inline bool zig_mulo_u16(uint16_t *res, uint16_t lhs, uint16_t rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
-    zig_u16 full_res;
+    uint16_t full_res;
     bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u16(full_res, bits);
-    return overflow || full_res < zig_minInt(u16, bits) || full_res > zig_maxInt(u16, bits);
+    return overflow || full_res < zig_minInt_u(16, bits) || full_res > zig_maxInt_u(16, bits);
 #else
-    zig_u32 full_res;
+    uint32_t full_res;
     bool overflow = zig_mulo_u32(&full_res, lhs, rhs, bits);
-    *res = (zig_u16)full_res;
+    *res = (uint16_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vmulo_u16(zig_u8 *ov, zig_u16 *res, int n,
-    const zig_u16 *lhs, const zig_u16 *rhs, zig_u8 bits)
+static inline void zig_vmulo_u16(uint8_t *ov, uint16_t *res, int n,
+    const uint16_t *lhs, const uint16_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_mulo_u16(&res[i], lhs[i], rhs[i], bits);
 }
 
-static inline bool zig_mulo_i16(zig_i16 *res, zig_i16 lhs, zig_i16 rhs, zig_u8 bits) {
+static inline bool zig_mulo_i16(int16_t *res, int16_t lhs, int16_t rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
-    zig_i16 full_res;
+    int16_t full_res;
     bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_i16(full_res, bits);
-    return overflow || full_res < zig_minInt(i16, bits) || full_res > zig_maxInt(i16, bits);
+    return overflow || full_res < zig_minInt_i(16, bits) || full_res > zig_maxInt_i(16, bits);
 #else
-    zig_i32 full_res;
+    int32_t full_res;
     bool overflow = zig_mulo_i32(&full_res, lhs, rhs, bits);
-    *res = (zig_i16)full_res;
+    *res = (int16_t)full_res;
     return overflow;
 #endif
 }
 
-static inline void zig_vmulo_i16(zig_u8 *ov, zig_i16 *res, int n,
-    const zig_i16 *lhs, const zig_i16 *rhs, zig_u8 bits)
+static inline void zig_vmulo_i16(uint8_t *ov, int16_t *res, int n,
+    const int16_t *lhs, const int16_t *rhs, uint8_t bits)
 {
     for (int i = 0; i < n; ++i) ov[i] = zig_mulo_i16(&res[i], lhs[i], rhs[i], bits);
 }
 
 #define zig_int_builtins(w) \
-    static inline bool zig_shlo_u##w(zig_u##w *res, zig_u##w lhs, zig_u8 rhs, zig_u8 bits) { \
+    static inline bool zig_shlo_u##w(uint##w##_t *res, uint##w##_t lhs, uint8_t rhs, uint8_t bits) { \
         *res = zig_shlw_u##w(lhs, rhs, bits); \
-        return lhs > zig_maxInt(u##w, bits) >> rhs; \
+        return lhs > zig_maxInt_u(w, bits) >> rhs; \
     } \
 \
-    static inline bool zig_shlo_i##w(zig_i##w *res, zig_i##w lhs, zig_u8 rhs, zig_u8 bits) { \
+    static inline bool zig_shlo_i##w(int##w##_t *res, int##w##_t lhs, uint8_t rhs, uint8_t bits) { \
         *res = zig_shlw_i##w(lhs, rhs, bits); \
-        zig_i##w mask = (zig_i##w)(zig_maxInt_u##w << (bits - rhs - 1)); \
-        return (lhs & mask) != zig_as_i##w(0) && (lhs & mask) != mask; \
+        int##w##_t mask = (int##w##_t)(UINT##w##_MAX << (bits - rhs - 1)); \
+        return (lhs & mask) != INT##w##_C(0) && (lhs & mask) != mask; \
     } \
 \
-    static inline zig_u##w zig_shls_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
-        zig_u##w res; \
-        if (rhs >= bits) return lhs != zig_as_u##w(0) ? zig_maxInt(u##w, bits) : lhs; \
-        return zig_shlo_u##w(&res, lhs, (zig_u8)rhs, bits) ? zig_maxInt(u##w, bits) : res; \
+    static inline uint##w##_t zig_shls_u##w(uint##w##_t lhs, uint##w##_t rhs, uint8_t bits) { \
+        uint##w##_t res; \
+        if (rhs >= bits) return lhs != UINT##w##_C(0) ? zig_maxInt_u(w, bits) : lhs; \
+        return zig_shlo_u##w(&res, lhs, (uint8_t)rhs, bits) ? zig_maxInt_u(w, bits) : res; \
     } \
 \
-    static inline zig_i##w zig_shls_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
-        zig_i##w res; \
-        if ((zig_u##w)rhs < (zig_u##w)bits && !zig_shlo_i##w(&res, lhs, rhs, bits)) return res; \
-        return lhs < zig_as_i##w(0) ? zig_minInt(i##w, bits) : zig_maxInt(i##w, bits); \
+    static inline int##w##_t zig_shls_i##w(int##w##_t lhs, int##w##_t rhs, uint8_t bits) { \
+        int##w##_t res; \
+        if ((uint##w##_t)rhs < (uint##w##_t)bits && !zig_shlo_i##w(&res, lhs, (uint8_t)rhs, bits)) return res; \
+        return lhs < INT##w##_C(0) ? zig_minInt_i(w, bits) : zig_maxInt_i(w, bits); \
     } \
 \
-    static inline zig_u##w zig_adds_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
-        zig_u##w res; \
-        return zig_addo_u##w(&res, lhs, rhs, bits) ? zig_maxInt(u##w, bits) : res; \
+    static inline uint##w##_t zig_adds_u##w(uint##w##_t lhs, uint##w##_t rhs, uint8_t bits) { \
+        uint##w##_t res; \
+        return zig_addo_u##w(&res, lhs, rhs, bits) ? zig_maxInt_u(w, bits) : res; \
     } \
 \
-    static inline zig_i##w zig_adds_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
-        zig_i##w res; \
+    static inline int##w##_t zig_adds_i##w(int##w##_t lhs, int##w##_t rhs, uint8_t bits) { \
+        int##w##_t res; \
         if (!zig_addo_i##w(&res, lhs, rhs, bits)) return res; \
-        return res >= zig_as_i##w(0) ? zig_minInt(i##w, bits) : zig_maxInt(i##w, bits); \
+        return res >= INT##w##_C(0) ? zig_minInt_i(w, bits) : zig_maxInt_i(w, bits); \
     } \
 \
-    static inline zig_u##w zig_subs_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
-        zig_u##w res; \
-        return zig_subo_u##w(&res, lhs, rhs, bits) ? zig_minInt(u##w, bits) : res; \
+    static inline uint##w##_t zig_subs_u##w(uint##w##_t lhs, uint##w##_t rhs, uint8_t bits) { \
+        uint##w##_t res; \
+        return zig_subo_u##w(&res, lhs, rhs, bits) ? zig_minInt_u(w, bits) : res; \
     } \
 \
-    static inline zig_i##w zig_subs_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
-        zig_i##w res; \
+    static inline int##w##_t zig_subs_i##w(int##w##_t lhs, int##w##_t rhs, uint8_t bits) { \
+        int##w##_t res; \
         if (!zig_subo_i##w(&res, lhs, rhs, bits)) return res; \
-        return res >= zig_as_i##w(0) ? zig_minInt(i##w, bits) : zig_maxInt(i##w, bits); \
+        return res >= INT##w##_C(0) ? zig_minInt_i(w, bits) : zig_maxInt_i(w, bits); \
     } \
 \
-    static inline zig_u##w zig_muls_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
-        zig_u##w res; \
-        return zig_mulo_u##w(&res, lhs, rhs, bits) ? zig_maxInt(u##w, bits) : res; \
+    static inline uint##w##_t zig_muls_u##w(uint##w##_t lhs, uint##w##_t rhs, uint8_t bits) { \
+        uint##w##_t res; \
+        return zig_mulo_u##w(&res, lhs, rhs, bits) ? zig_maxInt_u(w, bits) : res; \
     } \
 \
-    static inline zig_i##w zig_muls_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
-        zig_i##w res; \
+    static inline int##w##_t zig_muls_i##w(int##w##_t lhs, int##w##_t rhs, uint8_t bits) { \
+        int##w##_t res; \
         if (!zig_mulo_i##w(&res, lhs, rhs, bits)) return res; \
-        return (lhs ^ rhs) < zig_as_i##w(0) ? zig_minInt(i##w, bits) : zig_maxInt(i##w, bits); \
+        return (lhs ^ rhs) < INT##w##_C(0) ? zig_minInt_i(w, bits) : zig_maxInt_i(w, bits); \
     }
 zig_int_builtins(8)
 zig_int_builtins(16)
@@ -988,89 +1118,89 @@ zig_int_builtins(32)
 zig_int_builtins(64)
 
 #define zig_builtin8(name, val) __builtin_##name(val)
-typedef zig_c_uint zig_Builtin8;
+typedef unsigned int zig_Builtin8;
 
 #define zig_builtin16(name, val) __builtin_##name(val)
-typedef zig_c_uint zig_Builtin16;
+typedef unsigned int zig_Builtin16;
 
 #if INT_MIN <= INT32_MIN
 #define zig_builtin32(name, val) __builtin_##name(val)
-typedef zig_c_uint zig_Builtin32;
+typedef unsigned int zig_Builtin32;
 #elif LONG_MIN <= INT32_MIN
 #define zig_builtin32(name, val) __builtin_##name##l(val)
-typedef zig_c_ulong zig_Builtin32;
+typedef unsigned long zig_Builtin32;
 #endif
 
 #if INT_MIN <= INT64_MIN
 #define zig_builtin64(name, val) __builtin_##name(val)
-typedef zig_c_uint zig_Builtin64;
+typedef unsigned int zig_Builtin64;
 #elif LONG_MIN <= INT64_MIN
 #define zig_builtin64(name, val) __builtin_##name##l(val)
-typedef zig_c_ulong zig_Builtin64;
+typedef unsigned long zig_Builtin64;
 #elif LLONG_MIN <= INT64_MIN
 #define zig_builtin64(name, val) __builtin_##name##ll(val)
-typedef zig_c_ulonglong zig_Builtin64;
+typedef unsigned long long zig_Builtin64;
 #endif
 
-static inline zig_u8 zig_byte_swap_u8(zig_u8 val, zig_u8 bits) {
+static inline uint8_t zig_byte_swap_u8(uint8_t val, uint8_t bits) {
     return zig_wrap_u8(val >> (8 - bits), bits);
 }
 
-static inline zig_i8 zig_byte_swap_i8(zig_i8 val, zig_u8 bits) {
-    return zig_wrap_i8((zig_i8)zig_byte_swap_u8((zig_u8)val, bits), bits);
+static inline int8_t zig_byte_swap_i8(int8_t val, uint8_t bits) {
+    return zig_wrap_i8((int8_t)zig_byte_swap_u8((uint8_t)val, bits), bits);
 }
 
-static inline zig_u16 zig_byte_swap_u16(zig_u16 val, zig_u8 bits) {
-    zig_u16 full_res;
+static inline uint16_t zig_byte_swap_u16(uint16_t val, uint8_t bits) {
+    uint16_t full_res;
 #if zig_has_builtin(bswap16) || defined(zig_gnuc)
     full_res = __builtin_bswap16(val);
 #else
-    full_res = (zig_u16)zig_byte_swap_u8((zig_u8)(val >>  0), 8) <<  8 |
-               (zig_u16)zig_byte_swap_u8((zig_u8)(val >>  8), 8) >>  0;
+    full_res = (uint16_t)zig_byte_swap_u8((uint8_t)(val >>  0), 8) <<  8 |
+               (uint16_t)zig_byte_swap_u8((uint8_t)(val >>  8), 8) >>  0;
 #endif
     return zig_wrap_u16(full_res >> (16 - bits), bits);
 }
 
-static inline zig_i16 zig_byte_swap_i16(zig_i16 val, zig_u8 bits) {
-    return zig_wrap_i16((zig_i16)zig_byte_swap_u16((zig_u16)val, bits), bits);
+static inline int16_t zig_byte_swap_i16(int16_t val, uint8_t bits) {
+    return zig_wrap_i16((int16_t)zig_byte_swap_u16((uint16_t)val, bits), bits);
 }
 
-static inline zig_u32 zig_byte_swap_u32(zig_u32 val, zig_u8 bits) {
-    zig_u32 full_res;
+static inline uint32_t zig_byte_swap_u32(uint32_t val, uint8_t bits) {
+    uint32_t full_res;
 #if zig_has_builtin(bswap32) || defined(zig_gnuc)
     full_res = __builtin_bswap32(val);
 #else
-    full_res = (zig_u32)zig_byte_swap_u16((zig_u16)(val >>  0), 16) << 16 |
-               (zig_u32)zig_byte_swap_u16((zig_u16)(val >> 16), 16) >>  0;
+    full_res = (uint32_t)zig_byte_swap_u16((uint16_t)(val >>  0), 16) << 16 |
+               (uint32_t)zig_byte_swap_u16((uint16_t)(val >> 16), 16) >>  0;
 #endif
     return zig_wrap_u32(full_res >> (32 - bits), bits);
 }
 
-static inline zig_i32 zig_byte_swap_i32(zig_i32 val, zig_u8 bits) {
-    return zig_wrap_i32((zig_i32)zig_byte_swap_u32((zig_u32)val, bits), bits);
+static inline int32_t zig_byte_swap_i32(int32_t val, uint8_t bits) {
+    return zig_wrap_i32((int32_t)zig_byte_swap_u32((uint32_t)val, bits), bits);
 }
 
-static inline zig_u64 zig_byte_swap_u64(zig_u64 val, zig_u8 bits) {
-    zig_u64 full_res;
+static inline uint64_t zig_byte_swap_u64(uint64_t val, uint8_t bits) {
+    uint64_t full_res;
 #if zig_has_builtin(bswap64) || defined(zig_gnuc)
     full_res = __builtin_bswap64(val);
 #else
-    full_res = (zig_u64)zig_byte_swap_u32((zig_u32)(val >>  0), 32) << 32 |
-               (zig_u64)zig_byte_swap_u32((zig_u32)(val >> 32), 32) >>  0;
+    full_res = (uint64_t)zig_byte_swap_u32((uint32_t)(val >>  0), 32) << 32 |
+               (uint64_t)zig_byte_swap_u32((uint32_t)(val >> 32), 32) >>  0;
 #endif
     return zig_wrap_u64(full_res >> (64 - bits), bits);
 }
 
-static inline zig_i64 zig_byte_swap_i64(zig_i64 val, zig_u8 bits) {
-    return zig_wrap_i64((zig_i64)zig_byte_swap_u64((zig_u64)val, bits), bits);
+static inline int64_t zig_byte_swap_i64(int64_t val, uint8_t bits) {
+    return zig_wrap_i64((int64_t)zig_byte_swap_u64((uint64_t)val, bits), bits);
 }
 
-static inline zig_u8 zig_bit_reverse_u8(zig_u8 val, zig_u8 bits) {
-    zig_u8 full_res;
+static inline uint8_t zig_bit_reverse_u8(uint8_t val, uint8_t bits) {
+    uint8_t full_res;
 #if zig_has_builtin(bitreverse8)
     full_res = __builtin_bitreverse8(val);
 #else
-    static zig_u8 const lut[0x10] = {
+    static uint8_t const lut[0x10] = {
         0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
         0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
     };
@@ -1079,62 +1209,62 @@ static inline zig_u8 zig_bit_reverse_u8(zig_u8 val, zig_u8 bits) {
     return zig_wrap_u8(full_res >> (8 - bits), bits);
 }
 
-static inline zig_i8 zig_bit_reverse_i8(zig_i8 val, zig_u8 bits) {
-    return zig_wrap_i8((zig_i8)zig_bit_reverse_u8((zig_u8)val, bits), bits);
+static inline int8_t zig_bit_reverse_i8(int8_t val, uint8_t bits) {
+    return zig_wrap_i8((int8_t)zig_bit_reverse_u8((uint8_t)val, bits), bits);
 }
 
-static inline zig_u16 zig_bit_reverse_u16(zig_u16 val, zig_u8 bits) {
-    zig_u16 full_res;
+static inline uint16_t zig_bit_reverse_u16(uint16_t val, uint8_t bits) {
+    uint16_t full_res;
 #if zig_has_builtin(bitreverse16)
     full_res = __builtin_bitreverse16(val);
 #else
-    full_res = (zig_u16)zig_bit_reverse_u8((zig_u8)(val >>  0), 8) <<  8 |
-               (zig_u16)zig_bit_reverse_u8((zig_u8)(val >>  8), 8) >>  0;
+    full_res = (uint16_t)zig_bit_reverse_u8((uint8_t)(val >>  0), 8) <<  8 |
+               (uint16_t)zig_bit_reverse_u8((uint8_t)(val >>  8), 8) >>  0;
 #endif
     return zig_wrap_u16(full_res >> (16 - bits), bits);
 }
 
-static inline zig_i16 zig_bit_reverse_i16(zig_i16 val, zig_u8 bits) {
-    return zig_wrap_i16((zig_i16)zig_bit_reverse_u16((zig_u16)val, bits), bits);
+static inline int16_t zig_bit_reverse_i16(int16_t val, uint8_t bits) {
+    return zig_wrap_i16((int16_t)zig_bit_reverse_u16((uint16_t)val, bits), bits);
 }
 
-static inline zig_u32 zig_bit_reverse_u32(zig_u32 val, zig_u8 bits) {
-    zig_u32 full_res;
+static inline uint32_t zig_bit_reverse_u32(uint32_t val, uint8_t bits) {
+    uint32_t full_res;
 #if zig_has_builtin(bitreverse32)
     full_res = __builtin_bitreverse32(val);
 #else
-    full_res = (zig_u32)zig_bit_reverse_u16((zig_u16)(val >>  0), 16) << 16 |
-               (zig_u32)zig_bit_reverse_u16((zig_u16)(val >> 16), 16) >>  0;
+    full_res = (uint32_t)zig_bit_reverse_u16((uint16_t)(val >>  0), 16) << 16 |
+               (uint32_t)zig_bit_reverse_u16((uint16_t)(val >> 16), 16) >>  0;
 #endif
     return zig_wrap_u32(full_res >> (32 - bits), bits);
 }
 
-static inline zig_i32 zig_bit_reverse_i32(zig_i32 val, zig_u8 bits) {
-    return zig_wrap_i32((zig_i32)zig_bit_reverse_u32((zig_u32)val, bits), bits);
+static inline int32_t zig_bit_reverse_i32(int32_t val, uint8_t bits) {
+    return zig_wrap_i32((int32_t)zig_bit_reverse_u32((uint32_t)val, bits), bits);
 }
 
-static inline zig_u64 zig_bit_reverse_u64(zig_u64 val, zig_u8 bits) {
-    zig_u64 full_res;
+static inline uint64_t zig_bit_reverse_u64(uint64_t val, uint8_t bits) {
+    uint64_t full_res;
 #if zig_has_builtin(bitreverse64)
     full_res = __builtin_bitreverse64(val);
 #else
-    full_res = (zig_u64)zig_bit_reverse_u32((zig_u32)(val >>  0), 32) << 32 |
-               (zig_u64)zig_bit_reverse_u32((zig_u32)(val >> 32), 32) >>  0;
+    full_res = (uint64_t)zig_bit_reverse_u32((uint32_t)(val >>  0), 32) << 32 |
+               (uint64_t)zig_bit_reverse_u32((uint32_t)(val >> 32), 32) >>  0;
 #endif
     return zig_wrap_u64(full_res >> (64 - bits), bits);
 }
 
-static inline zig_i64 zig_bit_reverse_i64(zig_i64 val, zig_u8 bits) {
-    return zig_wrap_i64((zig_i64)zig_bit_reverse_u64((zig_u64)val, bits), bits);
+static inline int64_t zig_bit_reverse_i64(int64_t val, uint8_t bits) {
+    return zig_wrap_i64((int64_t)zig_bit_reverse_u64((uint64_t)val, bits), bits);
 }
 
 #define zig_builtin_popcount_common(w) \
-    static inline zig_u8 zig_popcount_i##w(zig_i##w val, zig_u8 bits) { \
-        return zig_popcount_u##w((zig_u##w)val, bits); \
+    static inline uint8_t zig_popcount_i##w(int##w##_t val, uint8_t bits) { \
+        return zig_popcount_u##w((uint##w##_t)val, bits); \
     }
 #if zig_has_builtin(popcount) || defined(zig_gnuc)
 #define zig_builtin_popcount(w) \
-    static inline zig_u8 zig_popcount_u##w(zig_u##w val, zig_u8 bits) { \
+    static inline uint8_t zig_popcount_u##w(uint##w##_t val, uint8_t bits) { \
         (void)bits; \
         return zig_builtin##w(popcount, val); \
     } \
@@ -1142,12 +1272,12 @@ static inline zig_i64 zig_bit_reverse_i64(zig_i64 val, zig_u8 bits) {
     zig_builtin_popcount_common(w)
 #else
 #define zig_builtin_popcount(w) \
-    static inline zig_u8 zig_popcount_u##w(zig_u##w val, zig_u8 bits) { \
+    static inline uint8_t zig_popcount_u##w(uint##w##_t val, uint8_t bits) { \
         (void)bits; \
-        zig_u##w temp = val - ((val >> 1) & (zig_maxInt_u##w / 3)); \
-        temp = (temp & (zig_maxInt_u##w / 5)) + ((temp >> 2) & (zig_maxInt_u##w / 5)); \
-        temp = (temp + (temp >> 4)) & (zig_maxInt_u##w / 17); \
-        return temp * (zig_maxInt_u##w / 255) >> (w - 8); \
+        uint##w##_t temp = val - ((val >> 1) & (UINT##w##_MAX / 3)); \
+        temp = (temp & (UINT##w##_MAX / 5)) + ((temp >> 2) & (UINT##w##_MAX / 5)); \
+        temp = (temp + (temp >> 4)) & (UINT##w##_MAX / 17); \
+        return temp * (UINT##w##_MAX / 255) >> (w - 8); \
     } \
 \
     zig_builtin_popcount_common(w)
@@ -1158,12 +1288,12 @@ zig_builtin_popcount(32)
 zig_builtin_popcount(64)
 
 #define zig_builtin_ctz_common(w) \
-    static inline zig_u8 zig_ctz_i##w(zig_i##w val, zig_u8 bits) { \
-        return zig_ctz_u##w((zig_u##w)val, bits); \
+    static inline uint8_t zig_ctz_i##w(int##w##_t val, uint8_t bits) { \
+        return zig_ctz_u##w((uint##w##_t)val, bits); \
     }
 #if zig_has_builtin(ctz) || defined(zig_gnuc)
 #define zig_builtin_ctz(w) \
-    static inline zig_u8 zig_ctz_u##w(zig_u##w val, zig_u8 bits) { \
+    static inline uint8_t zig_ctz_u##w(uint##w##_t val, uint8_t bits) { \
         if (val == 0) return bits; \
         return zig_builtin##w(ctz, val); \
     } \
@@ -1171,7 +1301,7 @@ zig_builtin_popcount(64)
     zig_builtin_ctz_common(w)
 #else
 #define zig_builtin_ctz(w) \
-    static inline zig_u8 zig_ctz_u##w(zig_u##w val, zig_u8 bits) { \
+    static inline uint8_t zig_ctz_u##w(uint##w##_t val, uint8_t bits) { \
         return zig_popcount_u##w(zig_not_u##w(val, bits) & zig_subw_u##w(val, 1, bits), bits); \
     } \
 \
@@ -1183,12 +1313,12 @@ zig_builtin_ctz(32)
 zig_builtin_ctz(64)
 
 #define zig_builtin_clz_common(w) \
-    static inline zig_u8 zig_clz_i##w(zig_i##w val, zig_u8 bits) { \
-        return zig_clz_u##w((zig_u##w)val, bits); \
+    static inline uint8_t zig_clz_i##w(int##w##_t val, uint8_t bits) { \
+        return zig_clz_u##w((uint##w##_t)val, bits); \
     }
 #if zig_has_builtin(clz) || defined(zig_gnuc)
 #define zig_builtin_clz(w) \
-    static inline zig_u8 zig_clz_u##w(zig_u##w val, zig_u8 bits) { \
+    static inline uint8_t zig_clz_u##w(uint##w##_t val, uint8_t bits) { \
         if (val == 0) return bits; \
         return zig_builtin##w(clz, val) - (zig_bitSizeOf(zig_Builtin##w) - bits); \
     } \
@@ -1196,7 +1326,7 @@ zig_builtin_ctz(64)
     zig_builtin_clz_common(w)
 #else
 #define zig_builtin_clz(w) \
-    static inline zig_u8 zig_clz_u##w(zig_u##w val, zig_u8 bits) { \
+    static inline uint8_t zig_clz_u##w(uint##w##_t val, uint8_t bits) { \
         return zig_ctz_u##w(zig_bit_reverse_u##w(val, bits), bits); \
     } \
 \
@@ -1207,7 +1337,7 @@ zig_builtin_clz(16)
 zig_builtin_clz(32)
 zig_builtin_clz(64)
 
-/* ======================== 128-bit Integer Routines ======================== */
+/* ======================== 128-bit Integer Support ========================= */
 
 #if !defined(zig_has_int128)
 # if defined(__SIZEOF_INT128__)
@@ -1222,18 +1352,18 @@ zig_builtin_clz(64)
 typedef unsigned __int128 zig_u128;
 typedef   signed __int128 zig_i128;
 
-#define zig_as_u128(hi, lo) ((zig_u128)(hi)<<64|(lo))
-#define zig_as_i128(hi, lo) ((zig_i128)zig_as_u128(hi, lo))
-#define zig_as_constant_u128(hi, lo) zig_as_u128(hi, lo)
-#define zig_as_constant_i128(hi, lo) zig_as_i128(hi, lo)
-#define zig_hi_u128(val) ((zig_u64)((val) >> 64))
-#define zig_lo_u128(val) ((zig_u64)((val) >>  0))
-#define zig_hi_i128(val) ((zig_i64)((val) >> 64))
-#define zig_lo_i128(val) ((zig_u64)((val) >>  0))
+#define zig_make_u128(hi, lo) ((zig_u128)(hi)<<64|(lo))
+#define zig_make_i128(hi, lo) ((zig_i128)zig_make_u128(hi, lo))
+#define zig_make_constant_u128(hi, lo) zig_make_u128(hi, lo)
+#define zig_make_constant_i128(hi, lo) zig_make_i128(hi, lo)
+#define zig_hi_u128(val) ((uint64_t)((val) >> 64))
+#define zig_lo_u128(val) ((uint64_t)((val) >>  0))
+#define zig_hi_i128(val) (( int64_t)((val) >> 64))
+#define zig_lo_i128(val) ((uint64_t)((val) >>  0))
 #define zig_bitcast_u128(val) ((zig_u128)(val))
 #define zig_bitcast_i128(val) ((zig_i128)(val))
 #define zig_cmp_int128(Type) \
-    static inline zig_i32 zig_cmp_##Type(zig_##Type lhs, zig_##Type rhs) { \
+    static inline int32_t zig_cmp_##Type(zig_##Type lhs, zig_##Type rhs) { \
         return (lhs > rhs) - (lhs < rhs); \
     }
 #define zig_bit_int128(Type, operation, operator) \
@@ -1244,31 +1374,31 @@ typedef   signed __int128 zig_i128;
 #else /* zig_has_int128 */
 
 #if __LITTLE_ENDIAN__ || _MSC_VER
-typedef struct { zig_align(16) zig_u64 lo; zig_u64 hi; } zig_u128;
-typedef struct { zig_align(16) zig_u64 lo; zig_i64 hi; } zig_i128;
+typedef struct { zig_align(16) uint64_t lo; uint64_t hi; } zig_u128;
+typedef struct { zig_align(16) uint64_t lo; int64_t hi; } zig_i128;
 #else
-typedef struct { zig_align(16) zig_u64 hi; zig_u64 lo; } zig_u128;
-typedef struct { zig_align(16) zig_i64 hi; zig_u64 lo; } zig_i128;
+typedef struct { zig_align(16) uint64_t hi; uint64_t lo; } zig_u128;
+typedef struct { zig_align(16) int64_t hi; uint64_t lo; } zig_i128;
 #endif
 
-#define zig_as_u128(hi, lo) ((zig_u128){ .h##i = (hi), .l##o = (lo) })
-#define zig_as_i128(hi, lo) ((zig_i128){ .h##i = (hi), .l##o = (lo) })
+#define zig_make_u128(hi, lo) ((zig_u128){ .h##i = (hi), .l##o = (lo) })
+#define zig_make_i128(hi, lo) ((zig_i128){ .h##i = (hi), .l##o = (lo) })
 
-#if _MSC_VER
-#define zig_as_constant_u128(hi, lo) { .h##i = (hi), .l##o = (lo) }
-#define zig_as_constant_i128(hi, lo) { .h##i = (hi), .l##o = (lo) }
-#else
-#define zig_as_constant_u128(hi, lo) zig_as_u128(hi, lo)
-#define zig_as_constant_i128(hi, lo) zig_as_i128(hi, lo)
+#if _MSC_VER /* MSVC doesn't allow struct literals in constant expressions */
+#define zig_make_constant_u128(hi, lo) { .h##i = (hi), .l##o = (lo) }
+#define zig_make_constant_i128(hi, lo) { .h##i = (hi), .l##o = (lo) }
+#else /* But non-MSVC doesn't like the unprotected commas */
+#define zig_make_constant_u128(hi, lo) zig_make_u128(hi, lo)
+#define zig_make_constant_i128(hi, lo) zig_make_i128(hi, lo)
 #endif
 #define zig_hi_u128(val) ((val).hi)
 #define zig_lo_u128(val) ((val).lo)
 #define zig_hi_i128(val) ((val).hi)
 #define zig_lo_i128(val) ((val).lo)
-#define zig_bitcast_u128(val) zig_as_u128((zig_u64)(val).hi, (val).lo)
-#define zig_bitcast_i128(val) zig_as_i128((zig_i64)(val).hi, (val).lo)
+#define zig_bitcast_u128(val) zig_make_u128((uint64_t)(val).hi, (val).lo)
+#define zig_bitcast_i128(val) zig_make_i128(( int64_t)(val).hi, (val).lo)
 #define zig_cmp_int128(Type) \
-    static inline zig_i32 zig_cmp_##Type(zig_##Type lhs, zig_##Type rhs) { \
+    static inline int32_t zig_cmp_##Type(zig_##Type lhs, zig_##Type rhs) { \
         return (lhs.hi == rhs.hi) \
             ? (lhs.lo > rhs.lo) - (lhs.lo < rhs.lo) \
             : (lhs.hi > rhs.hi) - (lhs.hi < rhs.hi); \
@@ -1280,10 +1410,10 @@ typedef struct { zig_align(16) zig_i64 hi; zig_u64 lo; } zig_i128;
 
 #endif /* zig_has_int128 */
 
-#define zig_minInt_u128 zig_as_u128(zig_minInt_u64, zig_minInt_u64)
-#define zig_maxInt_u128 zig_as_u128(zig_maxInt_u64, zig_maxInt_u64)
-#define zig_minInt_i128 zig_as_i128(zig_minInt_i64, zig_minInt_u64)
-#define zig_maxInt_i128 zig_as_i128(zig_maxInt_i64, zig_maxInt_u64)
+#define zig_minInt_u128 zig_make_u128(zig_minInt_u64, zig_minInt_u64)
+#define zig_maxInt_u128 zig_make_u128(zig_maxInt_u64, zig_maxInt_u64)
+#define zig_minInt_i128 zig_make_i128(zig_minInt_i64, zig_minInt_u64)
+#define zig_maxInt_i128 zig_make_i128(zig_maxInt_i64, zig_maxInt_u64)
 
 zig_cmp_int128(u128)
 zig_cmp_int128(i128)
@@ -1297,28 +1427,33 @@ zig_bit_int128(i128,  or, |)
 zig_bit_int128(u128, xor, ^)
 zig_bit_int128(i128, xor, ^)
 
-static inline zig_u128 zig_shr_u128(zig_u128 lhs, zig_u8 rhs);
+static inline zig_u128 zig_shr_u128(zig_u128 lhs, uint8_t rhs);
 
 #if zig_has_int128
 
-static inline zig_u128 zig_not_u128(zig_u128 val, zig_u8 bits) {
-    return val ^ zig_maxInt(u128, bits);
+static inline zig_u128 zig_not_u128(zig_u128 val, uint8_t bits) {
+    return val ^ zig_maxInt_u(128, bits);
 }
 
-static inline zig_i128 zig_not_i128(zig_i128 val, zig_u8 bits) {
+static inline zig_i128 zig_not_i128(zig_i128 val, uint8_t bits) {
     (void)bits;
     return ~val;
 }
 
-static inline zig_u128 zig_shr_u128(zig_u128 lhs, zig_u8 rhs) {
+static inline zig_u128 zig_shr_u128(zig_u128 lhs, uint8_t rhs) {
     return lhs >> rhs;
 }
 
-static inline zig_u128 zig_shl_u128(zig_u128 lhs, zig_u8 rhs) {
+static inline zig_u128 zig_shl_u128(zig_u128 lhs, uint8_t rhs) {
     return lhs << rhs;
 }
 
-static inline zig_i128 zig_shl_i128(zig_i128 lhs, zig_u8 rhs) {
+static inline zig_i128 zig_shr_i128(zig_i128 lhs, uint8_t rhs) {
+    zig_i128 sign_mask = lhs < zig_make_i128(0, 0) ? -zig_make_i128(0, 1) : zig_make_i128(0, 0);
+    return ((lhs ^ sign_mask) >> rhs) ^ sign_mask;
+}
+
+static inline zig_i128 zig_shl_i128(zig_i128 lhs, uint8_t rhs) {
     return lhs << rhs;
 }
 
@@ -1363,40 +1498,46 @@ static inline zig_i128 zig_rem_i128(zig_i128 lhs, zig_i128 rhs) {
 }
 
 static inline zig_i128 zig_div_floor_i128(zig_i128 lhs, zig_i128 rhs) {
-    return zig_div_trunc_i128(lhs, rhs) - (((lhs ^ rhs) & zig_rem_i128(lhs, rhs)) < zig_as_i128(0, 0));
+    return zig_div_trunc_i128(lhs, rhs) - (((lhs ^ rhs) & zig_rem_i128(lhs, rhs)) < zig_make_i128(0, 0));
 }
 
 static inline zig_i128 zig_mod_i128(zig_i128 lhs, zig_i128 rhs) {
     zig_i128 rem = zig_rem_i128(lhs, rhs);
-    return rem + (((lhs ^ rhs) & rem) < zig_as_i128(0, 0) ? rhs : zig_as_i128(0, 0));
+    return rem + (((lhs ^ rhs) & rem) < zig_make_i128(0, 0) ? rhs : zig_make_i128(0, 0));
 }
 
 #else /* zig_has_int128 */
 
-static inline zig_u128 zig_not_u128(zig_u128 val, zig_u8 bits) {
-    return (zig_u128){ .hi = zig_not_u64(val.hi, bits - zig_as_u8(64)), .lo = zig_not_u64(val.lo, zig_as_u8(64)) };
+static inline zig_u128 zig_not_u128(zig_u128 val, uint8_t bits) {
+    return (zig_u128){ .hi = zig_not_u64(val.hi, bits - UINT8_C(64)), .lo = zig_not_u64(val.lo, UINT8_C(64)) };
 }
 
-static inline zig_i128 zig_not_i128(zig_i128 val, zig_u8 bits) {
-    return (zig_i128){ .hi = zig_not_i64(val.hi, bits - zig_as_u8(64)), .lo = zig_not_u64(val.lo, zig_as_u8(64)) };
+static inline zig_i128 zig_not_i128(zig_i128 val, uint8_t bits) {
+    return (zig_i128){ .hi = zig_not_i64(val.hi, bits - UINT8_C(64)), .lo = zig_not_u64(val.lo, UINT8_C(64)) };
 }
 
-static inline zig_u128 zig_shr_u128(zig_u128 lhs, zig_u8 rhs) {
-    if (rhs == zig_as_u8(0)) return lhs;
-    if (rhs >= zig_as_u8(64)) return (zig_u128){ .hi = zig_minInt_u64, .lo = lhs.hi >> (rhs - zig_as_u8(64)) };
-    return (zig_u128){ .hi = lhs.hi >> rhs, .lo = lhs.hi << (zig_as_u8(64) - rhs) | lhs.lo >> rhs };
+static inline zig_u128 zig_shr_u128(zig_u128 lhs, uint8_t rhs) {
+    if (rhs == UINT8_C(0)) return lhs;
+    if (rhs >= UINT8_C(64)) return (zig_u128){ .hi = zig_minInt_u64, .lo = lhs.hi >> (rhs - UINT8_C(64)) };
+    return (zig_u128){ .hi = lhs.hi >> rhs, .lo = lhs.hi << (UINT8_C(64) - rhs) | lhs.lo >> rhs };
 }
 
-static inline zig_u128 zig_shl_u128(zig_u128 lhs, zig_u8 rhs) {
-    if (rhs == zig_as_u8(0)) return lhs;
-    if (rhs >= zig_as_u8(64)) return (zig_u128){ .hi = lhs.lo << (rhs - zig_as_u8(64)), .lo = zig_minInt_u64 };
-    return (zig_u128){ .hi = lhs.hi << rhs | lhs.lo >> (zig_as_u8(64) - rhs), .lo = lhs.lo << rhs };
+static inline zig_u128 zig_shl_u128(zig_u128 lhs, uint8_t rhs) {
+    if (rhs == UINT8_C(0)) return lhs;
+    if (rhs >= UINT8_C(64)) return (zig_u128){ .hi = lhs.lo << (rhs - UINT8_C(64)), .lo = zig_minInt_u64 };
+    return (zig_u128){ .hi = lhs.hi << rhs | lhs.lo >> (UINT8_C(64) - rhs), .lo = lhs.lo << rhs };
 }
 
-static inline zig_i128 zig_shl_i128(zig_i128 lhs, zig_u8 rhs) {
-    if (rhs == zig_as_u8(0)) return lhs;
-    if (rhs >= zig_as_u8(64)) return (zig_i128){ .hi = lhs.lo << (rhs - zig_as_u8(64)), .lo = zig_minInt_u64 };
-    return (zig_i128){ .hi = lhs.hi << rhs | lhs.lo >> (zig_as_u8(64) - rhs), .lo = lhs.lo << rhs };
+static inline zig_i128 zig_shr_i128(zig_i128 lhs, uint8_t rhs) {
+    if (rhs == UINT8_C(0)) return lhs;
+    if (rhs >= UINT8_C(64)) return (zig_i128){ .hi = zig_shr_i64(lhs.hi, 63), .lo = zig_shr_i64(lhs.hi, (rhs - UINT8_C(64))) };
+    return (zig_i128){ .hi = zig_shr_i64(lhs.hi, rhs), .lo = lhs.lo >> rhs | (uint64_t)lhs.hi << (UINT8_C(64) - rhs) };
+}
+
+static inline zig_i128 zig_shl_i128(zig_i128 lhs, uint8_t rhs) {
+    if (rhs == UINT8_C(0)) return lhs;
+    if (rhs >= UINT8_C(64)) return (zig_i128){ .hi = lhs.lo << (rhs - UINT8_C(64)), .lo = zig_minInt_u64 };
+    return (zig_i128){ .hi = lhs.hi << rhs | lhs.lo >> (UINT8_C(64) - rhs), .lo = lhs.lo << rhs };
 }
 
 static inline zig_u128 zig_add_u128(zig_u128 lhs, zig_u128 rhs) {
@@ -1424,14 +1565,14 @@ static inline zig_i128 zig_sub_i128(zig_i128 lhs, zig_i128 rhs) {
 }
 
 zig_extern zig_i128 __multi3(zig_i128 lhs, zig_i128 rhs);
-static zig_u128 zig_mul_u128(zig_u128 lhs, zig_u128 rhs) {
-    return zig_bitcast_u128(__multi3(zig_bitcast_i128(lhs), zig_bitcast_i128(rhs)));
-}
-
 static zig_i128 zig_mul_i128(zig_i128 lhs, zig_i128 rhs) {
     return __multi3(lhs, rhs);
 }
 
+static zig_u128 zig_mul_u128(zig_u128 lhs, zig_u128 rhs) {
+    return zig_bitcast_u128(zig_mul_i128(zig_bitcast_i128(lhs), zig_bitcast_i128(rhs)));
+}
+
 zig_extern zig_u128 __udivti3(zig_u128 lhs, zig_u128 rhs);
 static zig_u128 zig_div_trunc_u128(zig_u128 lhs, zig_u128 rhs) {
     return __udivti3(lhs, rhs);
@@ -1454,11 +1595,11 @@ static zig_i128 zig_rem_i128(zig_i128 lhs, zig_i128 rhs) {
 
 static inline zig_i128 zig_mod_i128(zig_i128 lhs, zig_i128 rhs) {
     zig_i128 rem = zig_rem_i128(lhs, rhs);
-    return zig_add_i128(rem, (((lhs.hi ^ rhs.hi) & rem.hi) < zig_as_i64(0) ? rhs : zig_as_i128(0, 0)));
+    return zig_add_i128(rem, ((lhs.hi ^ rhs.hi) & rem.hi) < INT64_C(0) ? rhs : zig_make_i128(0, 0));
 }
 
 static inline zig_i128 zig_div_floor_i128(zig_i128 lhs, zig_i128 rhs) {
-    return zig_sub_i128(zig_div_trunc_i128(lhs, rhs), zig_as_i128(0, zig_cmp_i128(zig_and_i128(zig_xor_i128(lhs, rhs), zig_rem_i128(lhs, rhs)), zig_as_i128(0, 0)) < zig_as_i32(0)));
+    return zig_sub_i128(zig_div_trunc_i128(lhs, rhs), zig_make_i128(0, zig_cmp_i128(zig_and_i128(zig_xor_i128(lhs, rhs), zig_rem_i128(lhs, rhs)), zig_make_i128(0, 0)) < INT32_C(0)));
 }
 
 #endif /* zig_has_int128 */
@@ -1471,323 +1612,294 @@ static inline zig_u128 zig_nand_u128(zig_u128 lhs, zig_u128 rhs) {
 }
 
 static inline zig_u128 zig_min_u128(zig_u128 lhs, zig_u128 rhs) {
-    return zig_cmp_u128(lhs, rhs) < zig_as_i32(0) ? lhs : rhs;
+    return zig_cmp_u128(lhs, rhs) < INT32_C(0) ? lhs : rhs;
 }
 
 static inline zig_i128 zig_min_i128(zig_i128 lhs, zig_i128 rhs) {
-    return zig_cmp_i128(lhs, rhs) < zig_as_i32(0) ? lhs : rhs;
+    return zig_cmp_i128(lhs, rhs) < INT32_C(0) ? lhs : rhs;
 }
 
 static inline zig_u128 zig_max_u128(zig_u128 lhs, zig_u128 rhs) {
-    return zig_cmp_u128(lhs, rhs) > zig_as_i32(0) ? lhs : rhs;
+    return zig_cmp_u128(lhs, rhs) > INT32_C(0) ? lhs : rhs;
 }
 
 static inline zig_i128 zig_max_i128(zig_i128 lhs, zig_i128 rhs) {
-    return zig_cmp_i128(lhs, rhs) > zig_as_i32(0) ? lhs : rhs;
+    return zig_cmp_i128(lhs, rhs) > INT32_C(0) ? lhs : rhs;
 }
 
-static inline zig_i128 zig_shr_i128(zig_i128 lhs, zig_u8 rhs) {
-    zig_i128 sign_mask = zig_cmp_i128(lhs, zig_as_i128(0, 0)) < zig_as_i32(0) ? zig_sub_i128(zig_as_i128(0, 0), zig_as_i128(0, 1)) : zig_as_i128(0, 0);
-    return zig_xor_i128(zig_bitcast_i128(zig_shr_u128(zig_bitcast_u128(zig_xor_i128(lhs, sign_mask)), rhs)), sign_mask);
+static inline zig_u128 zig_wrap_u128(zig_u128 val, uint8_t bits) {
+    return zig_and_u128(val, zig_maxInt_u(128, bits));
 }
 
-static inline zig_u128 zig_wrap_u128(zig_u128 val, zig_u8 bits) {
-    return zig_and_u128(val, zig_maxInt(u128, bits));
+static inline zig_i128 zig_wrap_i128(zig_i128 val, uint8_t bits) {
+    return zig_make_i128(zig_wrap_i64(zig_hi_i128(val), bits - UINT8_C(64)), zig_lo_i128(val));
 }
 
-static inline zig_i128 zig_wrap_i128(zig_i128 val, zig_u8 bits) {
-    return zig_as_i128(zig_wrap_i64(zig_hi_i128(val), bits - zig_as_u8(64)), zig_lo_i128(val));
-}
-
-static inline zig_u128 zig_shlw_u128(zig_u128 lhs, zig_u8 rhs, zig_u8 bits) {
+static inline zig_u128 zig_shlw_u128(zig_u128 lhs, uint8_t rhs, uint8_t bits) {
     return zig_wrap_u128(zig_shl_u128(lhs, rhs), bits);
 }
 
-static inline zig_i128 zig_shlw_i128(zig_i128 lhs, zig_u8 rhs, zig_u8 bits) {
+static inline zig_i128 zig_shlw_i128(zig_i128 lhs, uint8_t rhs, uint8_t bits) {
     return zig_wrap_i128(zig_bitcast_i128(zig_shl_u128(zig_bitcast_u128(lhs), rhs)), bits);
 }
 
-static inline zig_u128 zig_addw_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline zig_u128 zig_addw_u128(zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
     return zig_wrap_u128(zig_add_u128(lhs, rhs), bits);
 }
 
-static inline zig_i128 zig_addw_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+static inline zig_i128 zig_addw_i128(zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
     return zig_wrap_i128(zig_bitcast_i128(zig_add_u128(zig_bitcast_u128(lhs), zig_bitcast_u128(rhs))), bits);
 }
 
-static inline zig_u128 zig_subw_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline zig_u128 zig_subw_u128(zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
     return zig_wrap_u128(zig_sub_u128(lhs, rhs), bits);
 }
 
-static inline zig_i128 zig_subw_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+static inline zig_i128 zig_subw_i128(zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
     return zig_wrap_i128(zig_bitcast_i128(zig_sub_u128(zig_bitcast_u128(lhs), zig_bitcast_u128(rhs))), bits);
 }
 
-static inline zig_u128 zig_mulw_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline zig_u128 zig_mulw_u128(zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
     return zig_wrap_u128(zig_mul_u128(lhs, rhs), bits);
 }
 
-static inline zig_i128 zig_mulw_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+static inline zig_i128 zig_mulw_i128(zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
     return zig_wrap_i128(zig_bitcast_i128(zig_mul_u128(zig_bitcast_u128(lhs), zig_bitcast_u128(rhs))), bits);
 }
 
 #if zig_has_int128
 
-static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
 #if zig_has_builtin(add_overflow)
     zig_u128 full_res;
     bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u128(full_res, bits);
-    return overflow || full_res < zig_minInt(u128, bits) || full_res > zig_maxInt(u128, bits);
+    return overflow || full_res < zig_minInt_u(128, bits) || full_res > zig_maxInt_u(128, bits);
 #else
     *res = zig_addw_u128(lhs, rhs, bits);
     return *res < lhs;
 #endif
 }
 
-zig_extern zig_i128  __addoti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
-static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+zig_extern zig_i128  __addoti4(zig_i128 lhs, zig_i128 rhs, int *overflow);
+static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
 #if zig_has_builtin(add_overflow)
     zig_i128 full_res;
     bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
 #else
-    zig_c_int overflow_int;
+    int overflow_int;
     zig_i128 full_res =  __addoti4(lhs, rhs, &overflow_int);
     bool overflow = overflow_int != 0;
 #endif
     *res = zig_wrap_i128(full_res, bits);
-    return overflow || full_res < zig_minInt(i128, bits) || full_res > zig_maxInt(i128, bits);
+    return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits);
 }
 
-static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow)
     zig_u128 full_res;
     bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u128(full_res, bits);
-    return overflow || full_res < zig_minInt(u128, bits) || full_res > zig_maxInt(u128, bits);
+    return overflow || full_res < zig_minInt_u(128, bits) || full_res > zig_maxInt_u(128, bits);
 #else
     *res = zig_subw_u128(lhs, rhs, bits);
     return *res > lhs;
 #endif
 }
 
-zig_extern zig_i128  __suboti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
-static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+zig_extern zig_i128  __suboti4(zig_i128 lhs, zig_i128 rhs, int *overflow);
+static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow)
     zig_i128 full_res;
     bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
 #else
-    zig_c_int overflow_int;
+    int overflow_int;
     zig_i128 full_res = __suboti4(lhs, rhs, &overflow_int);
     bool overflow = overflow_int != 0;
 #endif
     *res = zig_wrap_i128(full_res, bits);
-    return overflow || full_res < zig_minInt(i128, bits) || full_res > zig_maxInt(i128, bits);
+    return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits);
 }
 
-static inline bool zig_mulo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline bool zig_mulo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow)
     zig_u128 full_res;
     bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
     *res = zig_wrap_u128(full_res, bits);
-    return overflow || full_res < zig_minInt(u128, bits) || full_res > zig_maxInt(u128, bits);
+    return overflow || full_res < zig_minInt_u(128, bits) || full_res > zig_maxInt_u(128, bits);
 #else
     *res = zig_mulw_u128(lhs, rhs, bits);
-    return rhs != zig_as_u128(0, 0) && lhs > zig_maxInt(u128, bits) / rhs;
+    return rhs != zig_make_u128(0, 0) && lhs > zig_maxInt_u(128, bits) / rhs;
 #endif
 }
 
-zig_extern zig_i128  __muloti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
-static inline bool zig_mulo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+zig_extern zig_i128  __muloti4(zig_i128 lhs, zig_i128 rhs, int *overflow);
+static inline bool zig_mulo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow)
     zig_i128 full_res;
     bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
 #else
-    zig_c_int overflow_int;
+    int overflow_int;
     zig_i128 full_res =  __muloti4(lhs, rhs, &overflow_int);
     bool overflow = overflow_int != 0;
 #endif
     *res = zig_wrap_i128(full_res, bits);
-    return overflow || full_res < zig_minInt(i128, bits) || full_res > zig_maxInt(i128, bits);
+    return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits);
 }
 
 #else /* zig_has_int128 */
 
-static inline bool zig_overflow_u128(bool overflow, zig_u128 full_res, zig_u8 bits) {
-    return overflow ||
-        zig_cmp_u128(full_res, zig_minInt(u128, bits)) < zig_as_i32(0) ||
-        zig_cmp_u128(full_res, zig_maxInt(u128, bits)) > zig_as_i32(0);
+static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
+    uint64_t hi;
+    bool overflow = zig_addo_u64(&hi, lhs.hi, rhs.hi, bits - 64);
+    return overflow ^ zig_addo_u64(&res->hi, hi, zig_addo_u64(&res->lo, lhs.lo, rhs.lo, 64), bits - 64);
 }
 
-static inline bool zig_overflow_i128(bool overflow, zig_i128 full_res, zig_u8 bits) {
-    return overflow ||
-        zig_cmp_i128(full_res, zig_minInt(i128, bits)) < zig_as_i32(0) ||
-        zig_cmp_i128(full_res, zig_maxInt(i128, bits)) > zig_as_i32(0);
+static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
+    int64_t hi;
+    bool overflow = zig_addo_i64(&hi, lhs.hi, rhs.hi, bits - 64);
+    return overflow ^ zig_addo_i64(&res->hi, hi, zig_addo_u64(&res->lo, lhs.lo, rhs.lo, 64), bits - 64);
 }
 
-static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
-    zig_u128 full_res;
-    bool overflow =
-        zig_addo_u64(&full_res.hi, lhs.hi, rhs.hi, 64) |
-        zig_addo_u64(&full_res.hi, full_res.hi, zig_addo_u64(&full_res.lo, lhs.lo, rhs.lo, 64), 64);
-    *res = zig_wrap_u128(full_res, bits);
-    return zig_overflow_u128(overflow, full_res, bits);
+static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
+    uint64_t hi;
+    bool overflow = zig_subo_u64(&hi, lhs.hi, rhs.hi, bits - 64);
+    return overflow ^ zig_subo_u64(&res->hi, hi, zig_subo_u64(&res->lo, lhs.lo, rhs.lo, 64), bits - 64);
 }
 
-zig_extern zig_i128 __addoti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
-static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
-    zig_c_int overflow_int;
-    zig_i128 full_res = __addoti4(lhs, rhs, &overflow_int);
-    *res = zig_wrap_i128(full_res, bits);
-    return zig_overflow_i128(overflow_int, full_res, bits);
+static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
+    int64_t hi;
+    bool overflow = zig_subo_i64(&hi, lhs.hi, rhs.hi, bits - 64);
+    return overflow ^ zig_subo_i64(&res->hi, hi, zig_subo_u64(&res->lo, lhs.lo, rhs.lo, 64), bits - 64);
 }
 
-static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
-    zig_u128 full_res;
-    bool overflow =
-        zig_subo_u64(&full_res.hi, lhs.hi, rhs.hi, 64) |
-        zig_subo_u64(&full_res.hi, full_res.hi, zig_subo_u64(&full_res.lo, lhs.lo, rhs.lo, 64), 64);
-    *res = zig_wrap_u128(full_res, bits);
-    return zig_overflow_u128(overflow, full_res, bits);
-}
-
-zig_extern zig_i128 __suboti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
-static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
-    zig_c_int overflow_int;
-    zig_i128 full_res = __suboti4(lhs, rhs, &overflow_int);
-    *res = zig_wrap_i128(full_res, bits);
-    return zig_overflow_i128(overflow_int, full_res, bits);
-}
-
-static inline bool zig_mulo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline bool zig_mulo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
     *res = zig_mulw_u128(lhs, rhs, bits);
-    return zig_cmp_u128(*res, zig_as_u128(0, 0)) != zig_as_i32(0) &&
-        zig_cmp_u128(lhs, zig_div_trunc_u128(zig_maxInt(u128, bits), rhs)) > zig_as_i32(0);
+    return zig_cmp_u128(*res, zig_make_u128(0, 0)) != INT32_C(0) &&
+        zig_cmp_u128(lhs, zig_div_trunc_u128(zig_maxInt_u(128, bits), rhs)) > INT32_C(0);
 }
 
-zig_extern zig_i128 __muloti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
-static inline bool zig_mulo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
-    zig_c_int overflow_int;
+zig_extern zig_i128 __muloti4(zig_i128 lhs, zig_i128 rhs, int *overflow);
+static inline bool zig_mulo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
+    int overflow_int;
     zig_i128 full_res = __muloti4(lhs, rhs, &overflow_int);
+    bool overflow = overflow_int != 0 ||
+        zig_cmp_i128(full_res, zig_minInt_i(128, bits)) < INT32_C(0) ||
+        zig_cmp_i128(full_res, zig_maxInt_i(128, bits)) > INT32_C(0);
     *res = zig_wrap_i128(full_res, bits);
-    return zig_overflow_i128(overflow_int, full_res, bits);
+    return overflow;
 }
 
 #endif /* zig_has_int128 */
 
-static inline bool zig_shlo_u128(zig_u128 *res, zig_u128 lhs, zig_u8 rhs, zig_u8 bits) {
+static inline bool zig_shlo_u128(zig_u128 *res, zig_u128 lhs, uint8_t rhs, uint8_t bits) {
     *res = zig_shlw_u128(lhs, rhs, bits);
-    return zig_cmp_u128(lhs, zig_shr_u128(zig_maxInt(u128, bits), rhs)) > zig_as_i32(0);
+    return zig_cmp_u128(lhs, zig_shr_u128(zig_maxInt_u(128, bits), rhs)) > INT32_C(0);
 }
 
-static inline bool zig_shlo_i128(zig_i128 *res, zig_i128 lhs, zig_u8 rhs, zig_u8 bits) {
+static inline bool zig_shlo_i128(zig_i128 *res, zig_i128 lhs, uint8_t rhs, uint8_t bits) {
     *res = zig_shlw_i128(lhs, rhs, bits);
-    zig_i128 mask = zig_bitcast_i128(zig_shl_u128(zig_maxInt_u128, bits - rhs - zig_as_u8(1)));
-    return zig_cmp_i128(zig_and_i128(lhs, mask), zig_as_i128(0, 0)) != zig_as_i32(0) &&
-           zig_cmp_i128(zig_and_i128(lhs, mask), mask) != zig_as_i32(0);
+    zig_i128 mask = zig_bitcast_i128(zig_shl_u128(zig_maxInt_u128, bits - rhs - UINT8_C(1)));
+    return zig_cmp_i128(zig_and_i128(lhs, mask), zig_make_i128(0, 0)) != INT32_C(0) &&
+           zig_cmp_i128(zig_and_i128(lhs, mask), mask) != INT32_C(0);
 }
 
-static inline zig_u128 zig_shls_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline zig_u128 zig_shls_u128(zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
     zig_u128 res;
-    if (zig_cmp_u128(rhs, zig_as_u128(0, bits)) >= zig_as_i32(0))
-        return zig_cmp_u128(lhs, zig_as_u128(0, 0)) != zig_as_i32(0) ? zig_maxInt(u128, bits) : lhs;
-
-#if zig_has_int128
-    return zig_shlo_u128(&res, lhs, (zig_u8)rhs, bits) ? zig_maxInt(u128, bits) : res;
-#else
-    return zig_shlo_u128(&res, lhs, (zig_u8)rhs.lo, bits) ? zig_maxInt(u128, bits) : res;
-#endif
+    if (zig_cmp_u128(rhs, zig_make_u128(0, bits)) >= INT32_C(0))
+        return zig_cmp_u128(lhs, zig_make_u128(0, 0)) != INT32_C(0) ? zig_maxInt_u(128, bits) : lhs;
+    return zig_shlo_u128(&res, lhs, (uint8_t)zig_lo_u128(rhs), bits) ? zig_maxInt_u(128, bits) : res;
 }
 
-static inline zig_i128 zig_shls_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+static inline zig_i128 zig_shls_i128(zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
     zig_i128 res;
-    if (zig_cmp_u128(zig_bitcast_u128(rhs), zig_as_u128(0, bits)) < zig_as_i32(0) && !zig_shlo_i128(&res, lhs, zig_lo_i128(rhs), bits)) return res;
-    return zig_cmp_i128(lhs, zig_as_i128(0, 0)) < zig_as_i32(0) ? zig_minInt(i128, bits) : zig_maxInt(i128, bits);
+    if (zig_cmp_u128(zig_bitcast_u128(rhs), zig_make_u128(0, bits)) < INT32_C(0) && !zig_shlo_i128(&res, lhs, (uint8_t)zig_lo_i128(rhs), bits)) return res;
+    return zig_cmp_i128(lhs, zig_make_i128(0, 0)) < INT32_C(0) ? zig_minInt_i(128, bits) : zig_maxInt_i(128, bits);
 }
 
-static inline zig_u128 zig_adds_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline zig_u128 zig_adds_u128(zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
     zig_u128 res;
-    return zig_addo_u128(&res, lhs, rhs, bits) ? zig_maxInt(u128, bits) : res;
+    return zig_addo_u128(&res, lhs, rhs, bits) ? zig_maxInt_u(128, bits) : res;
 }
 
-static inline zig_i128 zig_adds_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+static inline zig_i128 zig_adds_i128(zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
     zig_i128 res;
     if (!zig_addo_i128(&res, lhs, rhs, bits)) return res;
-    return zig_cmp_i128(res, zig_as_i128(0, 0)) >= zig_as_i32(0) ? zig_minInt(i128, bits) : zig_maxInt(i128, bits);
+    return zig_cmp_i128(res, zig_make_i128(0, 0)) >= INT32_C(0) ? zig_minInt_i(128, bits) : zig_maxInt_i(128, bits);
 }
 
-static inline zig_u128 zig_subs_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline zig_u128 zig_subs_u128(zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
     zig_u128 res;
-    return zig_subo_u128(&res, lhs, rhs, bits) ? zig_minInt(u128, bits) : res;
+    return zig_subo_u128(&res, lhs, rhs, bits) ? zig_minInt_u(128, bits) : res;
 }
 
-static inline zig_i128 zig_subs_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+static inline zig_i128 zig_subs_i128(zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
     zig_i128 res;
     if (!zig_subo_i128(&res, lhs, rhs, bits)) return res;
-    return zig_cmp_i128(res, zig_as_i128(0, 0)) >= zig_as_i32(0) ? zig_minInt(i128, bits) : zig_maxInt(i128, bits);
+    return zig_cmp_i128(res, zig_make_i128(0, 0)) >= INT32_C(0) ? zig_minInt_i(128, bits) : zig_maxInt_i(128, bits);
 }
 
-static inline zig_u128 zig_muls_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+static inline zig_u128 zig_muls_u128(zig_u128 lhs, zig_u128 rhs, uint8_t bits) {
     zig_u128 res;
-    return zig_mulo_u128(&res, lhs, rhs, bits) ? zig_maxInt(u128, bits) : res;
+    return zig_mulo_u128(&res, lhs, rhs, bits) ? zig_maxInt_u(128, bits) : res;
 }
 
-static inline zig_i128 zig_muls_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+static inline zig_i128 zig_muls_i128(zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
     zig_i128 res;
     if (!zig_mulo_i128(&res, lhs, rhs, bits)) return res;
-    return zig_cmp_i128(zig_xor_i128(lhs, rhs), zig_as_i128(0, 0)) < zig_as_i32(0) ? zig_minInt(i128, bits) : zig_maxInt(i128, bits);
+    return zig_cmp_i128(zig_xor_i128(lhs, rhs), zig_make_i128(0, 0)) < INT32_C(0) ? zig_minInt_i(128, bits) : zig_maxInt_i(128, bits);
 }
 
-static inline zig_u8 zig_clz_u128(zig_u128 val, zig_u8 bits) {
-    if (bits <= zig_as_u8(64)) return zig_clz_u64(zig_lo_u128(val), bits);
-    if (zig_hi_u128(val) != 0) return zig_clz_u64(zig_hi_u128(val), bits - zig_as_u8(64));
-    return zig_clz_u64(zig_lo_u128(val), zig_as_u8(64)) + (bits - zig_as_u8(64));
+static inline uint8_t zig_clz_u128(zig_u128 val, uint8_t bits) {
+    if (bits <= UINT8_C(64)) return zig_clz_u64(zig_lo_u128(val), bits);
+    if (zig_hi_u128(val) != 0) return zig_clz_u64(zig_hi_u128(val), bits - UINT8_C(64));
+    return zig_clz_u64(zig_lo_u128(val), UINT8_C(64)) + (bits - UINT8_C(64));
 }
 
-static inline zig_u8 zig_clz_i128(zig_i128 val, zig_u8 bits) {
+static inline uint8_t zig_clz_i128(zig_i128 val, uint8_t bits) {
     return zig_clz_u128(zig_bitcast_u128(val), bits);
 }
 
-static inline zig_u8 zig_ctz_u128(zig_u128 val, zig_u8 bits) {
-    if (zig_lo_u128(val) != 0) return zig_ctz_u64(zig_lo_u128(val), zig_as_u8(64));
-    return zig_ctz_u64(zig_hi_u128(val), bits - zig_as_u8(64)) + zig_as_u8(64);
+static inline uint8_t zig_ctz_u128(zig_u128 val, uint8_t bits) {
+    if (zig_lo_u128(val) != 0) return zig_ctz_u64(zig_lo_u128(val), UINT8_C(64));
+    return zig_ctz_u64(zig_hi_u128(val), bits - UINT8_C(64)) + UINT8_C(64);
 }
 
-static inline zig_u8 zig_ctz_i128(zig_i128 val, zig_u8 bits) {
+static inline uint8_t zig_ctz_i128(zig_i128 val, uint8_t bits) {
     return zig_ctz_u128(zig_bitcast_u128(val), bits);
 }
 
-static inline zig_u8 zig_popcount_u128(zig_u128 val, zig_u8 bits) {
-    return zig_popcount_u64(zig_hi_u128(val), bits - zig_as_u8(64)) +
-           zig_popcount_u64(zig_lo_u128(val), zig_as_u8(64));
+static inline uint8_t zig_popcount_u128(zig_u128 val, uint8_t bits) {
+    return zig_popcount_u64(zig_hi_u128(val), bits - UINT8_C(64)) +
+           zig_popcount_u64(zig_lo_u128(val), UINT8_C(64));
 }
 
-static inline zig_u8 zig_popcount_i128(zig_i128 val, zig_u8 bits) {
+static inline uint8_t zig_popcount_i128(zig_i128 val, uint8_t bits) {
     return zig_popcount_u128(zig_bitcast_u128(val), bits);
 }
 
-static inline zig_u128 zig_byte_swap_u128(zig_u128 val, zig_u8 bits) {
+static inline zig_u128 zig_byte_swap_u128(zig_u128 val, uint8_t bits) {
     zig_u128 full_res;
 #if zig_has_builtin(bswap128)
     full_res = __builtin_bswap128(val);
 #else
-    full_res = zig_as_u128(zig_byte_swap_u64(zig_lo_u128(val), zig_as_u8(64)),
-                           zig_byte_swap_u64(zig_hi_u128(val), zig_as_u8(64)));
+    full_res = zig_make_u128(zig_byte_swap_u64(zig_lo_u128(val), UINT8_C(64)),
+                           zig_byte_swap_u64(zig_hi_u128(val), UINT8_C(64)));
 #endif
-    return zig_shr_u128(full_res, zig_as_u8(128) - bits);
+    return zig_shr_u128(full_res, UINT8_C(128) - bits);
 }
 
-static inline zig_i128 zig_byte_swap_i128(zig_i128 val, zig_u8 bits) {
+static inline zig_i128 zig_byte_swap_i128(zig_i128 val, uint8_t bits) {
     return zig_bitcast_i128(zig_byte_swap_u128(zig_bitcast_u128(val), bits));
 }
 
-static inline zig_u128 zig_bit_reverse_u128(zig_u128 val, zig_u8 bits) {
-    return zig_shr_u128(zig_as_u128(zig_bit_reverse_u64(zig_lo_u128(val), zig_as_u8(64)),
-                                    zig_bit_reverse_u64(zig_hi_u128(val), zig_as_u8(64))),
-                        zig_as_u8(128) - bits);
+static inline zig_u128 zig_bit_reverse_u128(zig_u128 val, uint8_t bits) {
+    return zig_shr_u128(zig_make_u128(zig_bit_reverse_u64(zig_lo_u128(val), UINT8_C(64)),
+                                    zig_bit_reverse_u64(zig_hi_u128(val), UINT8_C(64))),
+                        UINT8_C(128) - bits);
 }
 
-static inline zig_i128 zig_bit_reverse_i128(zig_i128 val, zig_u8 bits) {
+static inline zig_i128 zig_bit_reverse_i128(zig_i128 val, uint8_t bits) {
     return zig_bitcast_i128(zig_bit_reverse_u128(zig_bitcast_u128(val), bits));
 }
 
@@ -1810,85 +1922,87 @@ static inline zig_i128 zig_bit_reverse_i128(zig_i128 val, zig_u8 bits) {
 
 #if (zig_has_builtin(nan) && zig_has_builtin(nans) && zig_has_builtin(inf)) || defined(zig_gnuc)
 #define zig_has_float_builtins 1
-#define zig_as_special_f16(sign, name, arg, repr) sign zig_as_f16(__builtin_##name, )(arg)
-#define zig_as_special_f32(sign, name, arg, repr) sign zig_as_f32(__builtin_##name, )(arg)
-#define zig_as_special_f64(sign, name, arg, repr) sign zig_as_f64(__builtin_##name, )(arg)
-#define zig_as_special_f80(sign, name, arg, repr) sign zig_as_f80(__builtin_##name, )(arg)
-#define zig_as_special_f128(sign, name, arg, repr) sign zig_as_f128(__builtin_##name, )(arg)
-#define zig_as_special_c_longdouble(sign, name, arg, repr) sign zig_as_c_longdouble(__builtin_##name, )(arg)
+#define zig_make_special_f16(sign, name, arg, repr) sign zig_make_f16(__builtin_##name, )(arg)
+#define zig_make_special_f32(sign, name, arg, repr) sign zig_make_f32(__builtin_##name, )(arg)
+#define zig_make_special_f64(sign, name, arg, repr) sign zig_make_f64(__builtin_##name, )(arg)
+#define zig_make_special_f80(sign, name, arg, repr) sign zig_make_f80(__builtin_##name, )(arg)
+#define zig_make_special_f128(sign, name, arg, repr) sign zig_make_f128(__builtin_##name, )(arg)
+#define zig_make_special_c_longdouble(sign, name, arg, repr) sign zig_make_c_longdouble(__builtin_##name, )(arg)
 #else
 #define zig_has_float_builtins 0
-#define zig_as_special_f16(sign, name, arg, repr) zig_float_from_repr_f16(repr)
-#define zig_as_special_f32(sign, name, arg, repr) zig_float_from_repr_f32(repr)
-#define zig_as_special_f64(sign, name, arg, repr) zig_float_from_repr_f64(repr)
-#define zig_as_special_f80(sign, name, arg, repr) zig_float_from_repr_f80(repr)
-#define zig_as_special_f128(sign, name, arg, repr)  zig_float_from_repr_f128(repr)
-#define zig_as_special_c_longdouble(sign, name, arg, repr) zig_float_from_repr_c_longdouble(repr)
+#define zig_make_special_f16(sign, name, arg, repr) zig_float_from_repr_f16(repr)
+#define zig_make_special_f32(sign, name, arg, repr) zig_float_from_repr_f32(repr)
+#define zig_make_special_f64(sign, name, arg, repr) zig_float_from_repr_f64(repr)
+#define zig_make_special_f80(sign, name, arg, repr) zig_float_from_repr_f80(repr)
+#define zig_make_special_f128(sign, name, arg, repr)  zig_float_from_repr_f128(repr)
+#define zig_make_special_c_longdouble(sign, name, arg, repr) zig_float_from_repr_c_longdouble(repr)
 #endif
 
 #define zig_has_f16 1
 #define zig_bitSizeOf_f16 16
 #define zig_libc_name_f16(name) __##name##h
-#define zig_as_special_constant_f16(sign, name, arg, repr) zig_as_special_f16(sign, name, arg, repr)
+#define zig_make_special_constant_f16(sign, name, arg, repr) zig_make_special_f16(sign, name, arg, repr)
 #if FLT_MANT_DIG == 11
 typedef float zig_f16;
-#define zig_as_f16(fp, repr) fp##f
+#define zig_make_f16(fp, repr) fp##f
 #elif DBL_MANT_DIG == 11
 typedef double zig_f16;
-#define zig_as_f16(fp, repr) fp
+#define zig_make_f16(fp, repr) fp
 #elif LDBL_MANT_DIG == 11
 #define zig_bitSizeOf_c_longdouble 16
+typedef uint16_t zig_repr_c_longdouble;
 typedef long double zig_f16;
-#define zig_as_f16(fp, repr) fp##l
+#define zig_make_f16(fp, repr) fp##l
 #elif FLT16_MANT_DIG == 11 && (zig_has_builtin(inff16) || defined(zig_gnuc))
 typedef _Float16 zig_f16;
-#define zig_as_f16(fp, repr) fp##f16
+#define zig_make_f16(fp, repr) fp##f16
 #elif defined(__SIZEOF_FP16__)
 typedef __fp16 zig_f16;
-#define zig_as_f16(fp, repr) fp##f16
+#define zig_make_f16(fp, repr) fp##f16
 #else
 #undef zig_has_f16
 #define zig_has_f16 0
-#define zig_repr_f16 i16
-typedef zig_i16 zig_f16;
-#define zig_as_f16(fp, repr) repr
-#undef zig_as_special_f16
-#define zig_as_special_f16(sign, name, arg, repr) repr
-#undef zig_as_special_constant_f16
-#define zig_as_special_constant_f16(sign, name, arg, repr) repr
+#define zig_bitSizeOf_repr_f16 16
+typedef int16_t zig_f16;
+#define zig_make_f16(fp, repr) repr
+#undef zig_make_special_f16
+#define zig_make_special_f16(sign, name, arg, repr) repr
+#undef zig_make_special_constant_f16
+#define zig_make_special_constant_f16(sign, name, arg, repr) repr
 #endif
 
 #define zig_has_f32 1
 #define zig_bitSizeOf_f32 32
 #define zig_libc_name_f32(name) name##f
 #if _MSC_VER
-#define zig_as_special_constant_f32(sign, name, arg, repr) sign zig_as_f32(zig_msvc_flt_##name, )
+#define zig_make_special_constant_f32(sign, name, arg, repr) sign zig_make_f32(zig_msvc_flt_##name, )
 #else
-#define zig_as_special_constant_f32(sign, name, arg, repr) zig_as_special_f32(sign, name, arg, repr)
+#define zig_make_special_constant_f32(sign, name, arg, repr) zig_make_special_f32(sign, name, arg, repr)
 #endif
 #if FLT_MANT_DIG == 24
 typedef float zig_f32;
-#define zig_as_f32(fp, repr) fp##f
+#define zig_make_f32(fp, repr) fp##f
 #elif DBL_MANT_DIG == 24
 typedef double zig_f32;
-#define zig_as_f32(fp, repr) fp
+#define zig_make_f32(fp, repr) fp
 #elif LDBL_MANT_DIG == 24
 #define zig_bitSizeOf_c_longdouble 32
+typedef uint32_t zig_repr_c_longdouble;
 typedef long double zig_f32;
-#define zig_as_f32(fp, repr) fp##l
+#define zig_make_f32(fp, repr) fp##l
 #elif FLT32_MANT_DIG == 24
 typedef _Float32 zig_f32;
-#define zig_as_f32(fp, repr) fp##f32
+#define zig_make_f32(fp, repr) fp##f32
 #else
 #undef zig_has_f32
 #define zig_has_f32 0
-#define zig_repr_f32 i32
-typedef zig_i32 zig_f32;
-#define zig_as_f32(fp, repr) repr
-#undef zig_as_special_f32
-#define zig_as_special_f32(sign, name, arg, repr) repr
-#undef zig_as_special_constant_f32
-#define zig_as_special_constant_f32(sign, name, arg, repr) repr
+#define zig_bitSizeOf_repr_f32 32
+typedef int32_t zig_f32;
+#define zig_make_f32(fp, repr) repr
+#undef zig_make_special_f32
+#define zig_make_special_f32(sign, name, arg, repr) repr
+#undef zig_make_special_constant_f32
+#define zig_make_special_constant_f32(sign, name, arg, repr) repr
 #endif
 
 #define zig_has_f64 1
@@ -1897,109 +2011,113 @@ typedef zig_i32 zig_f32;
 #if _MSC_VER
 #ifdef ZIG_TARGET_ABI_MSVC
 #define zig_bitSizeOf_c_longdouble 64
+typedef uint64_t zig_repr_c_longdouble;
 #endif
-#define zig_as_special_constant_f64(sign, name, arg, repr) sign zig_as_f64(zig_msvc_flt_##name, )
+#define zig_make_special_constant_f64(sign, name, arg, repr) sign zig_make_f64(zig_msvc_flt_##name, )
 #else /* _MSC_VER */
-#define zig_as_special_constant_f64(sign, name, arg, repr) zig_as_special_f64(sign, name, arg, repr)
+#define zig_make_special_constant_f64(sign, name, arg, repr) zig_make_special_f64(sign, name, arg, repr)
 #endif /* _MSC_VER */
 #if FLT_MANT_DIG == 53
 typedef float zig_f64;
-#define zig_as_f64(fp, repr) fp##f
+#define zig_make_f64(fp, repr) fp##f
 #elif DBL_MANT_DIG == 53
 typedef double zig_f64;
-#define zig_as_f64(fp, repr) fp
+#define zig_make_f64(fp, repr) fp
 #elif LDBL_MANT_DIG == 53
 #define zig_bitSizeOf_c_longdouble 64
+typedef uint64_t zig_repr_c_longdouble;
 typedef long double zig_f64;
-#define zig_as_f64(fp, repr) fp##l
+#define zig_make_f64(fp, repr) fp##l
 #elif FLT64_MANT_DIG == 53
 typedef _Float64 zig_f64;
-#define zig_as_f64(fp, repr) fp##f64
+#define zig_make_f64(fp, repr) fp##f64
 #elif FLT32X_MANT_DIG == 53
 typedef _Float32x zig_f64;
-#define zig_as_f64(fp, repr) fp##f32x
+#define zig_make_f64(fp, repr) fp##f32x
 #else
 #undef zig_has_f64
 #define zig_has_f64 0
-#define zig_repr_f64 i64
-typedef zig_i64 zig_f64;
-#define zig_as_f64(fp, repr) repr
-#undef zig_as_special_f64
-#define zig_as_special_f64(sign, name, arg, repr) repr
-#undef zig_as_special_constant_f64
-#define zig_as_special_constant_f64(sign, name, arg, repr) repr
+#define zig_bitSizeOf_repr_f64 64
+typedef int64_t zig_f64;
+#define zig_make_f64(fp, repr) repr
+#undef zig_make_special_f64
+#define zig_make_special_f64(sign, name, arg, repr) repr
+#undef zig_make_special_constant_f64
+#define zig_make_special_constant_f64(sign, name, arg, repr) repr
 #endif
 
 #define zig_has_f80 1
 #define zig_bitSizeOf_f80 80
 #define zig_libc_name_f80(name) __##name##x
-#define zig_as_special_constant_f80(sign, name, arg, repr) zig_as_special_f80(sign, name, arg, repr)
+#define zig_make_special_constant_f80(sign, name, arg, repr) zig_make_special_f80(sign, name, arg, repr)
 #if FLT_MANT_DIG == 64
 typedef float zig_f80;
-#define zig_as_f80(fp, repr) fp##f
+#define zig_make_f80(fp, repr) fp##f
 #elif DBL_MANT_DIG == 64
 typedef double zig_f80;
-#define zig_as_f80(fp, repr) fp
+#define zig_make_f80(fp, repr) fp
 #elif LDBL_MANT_DIG == 64
 #define zig_bitSizeOf_c_longdouble 80
+typedef zig_u128 zig_repr_c_longdouble;
 typedef long double zig_f80;
-#define zig_as_f80(fp, repr) fp##l
+#define zig_make_f80(fp, repr) fp##l
 #elif FLT80_MANT_DIG == 64
 typedef _Float80 zig_f80;
-#define zig_as_f80(fp, repr) fp##f80
+#define zig_make_f80(fp, repr) fp##f80
 #elif FLT64X_MANT_DIG == 64
 typedef _Float64x zig_f80;
-#define zig_as_f80(fp, repr) fp##f64x
+#define zig_make_f80(fp, repr) fp##f64x
 #elif defined(__SIZEOF_FLOAT80__)
 typedef __float80 zig_f80;
-#define zig_as_f80(fp, repr) fp##l
+#define zig_make_f80(fp, repr) fp##l
 #else
 #undef zig_has_f80
 #define zig_has_f80 0
-#define zig_repr_f80 i128
+#define zig_bitSizeOf_repr_f80 128
 typedef zig_i128 zig_f80;
-#define zig_as_f80(fp, repr) repr
-#undef zig_as_special_f80
-#define zig_as_special_f80(sign, name, arg, repr) repr
-#undef zig_as_special_constant_f80
-#define zig_as_special_constant_f80(sign, name, arg, repr) repr
+#define zig_make_f80(fp, repr) repr
+#undef zig_make_special_f80
+#define zig_make_special_f80(sign, name, arg, repr) repr
+#undef zig_make_special_constant_f80
+#define zig_make_special_constant_f80(sign, name, arg, repr) repr
 #endif
 
 #define zig_has_f128 1
 #define zig_bitSizeOf_f128 128
 #define zig_libc_name_f128(name) name##q
-#define zig_as_special_constant_f128(sign, name, arg, repr) zig_as_special_f128(sign, name, arg, repr)
+#define zig_make_special_constant_f128(sign, name, arg, repr) zig_make_special_f128(sign, name, arg, repr)
 #if FLT_MANT_DIG == 113
 typedef float zig_f128;
-#define zig_as_f128(fp, repr) fp##f
+#define zig_make_f128(fp, repr) fp##f
 #elif DBL_MANT_DIG == 113
 typedef double zig_f128;
-#define zig_as_f128(fp, repr) fp
+#define zig_make_f128(fp, repr) fp
 #elif LDBL_MANT_DIG == 113
 #define zig_bitSizeOf_c_longdouble 128
+typedef zig_u128 zig_repr_c_longdouble;
 typedef long double zig_f128;
-#define zig_as_f128(fp, repr) fp##l
+#define zig_make_f128(fp, repr) fp##l
 #elif FLT128_MANT_DIG == 113
 typedef _Float128 zig_f128;
-#define zig_as_f128(fp, repr) fp##f128
+#define zig_make_f128(fp, repr) fp##f128
 #elif FLT64X_MANT_DIG == 113
 typedef _Float64x zig_f128;
-#define zig_as_f128(fp, repr) fp##f64x
+#define zig_make_f128(fp, repr) fp##f64x
 #elif defined(__SIZEOF_FLOAT128__)
 typedef __float128 zig_f128;
-#define zig_as_f128(fp, repr) fp##q
-#undef zig_as_special_f128
-#define zig_as_special_f128(sign, name, arg, repr) sign __builtin_##name##f128(arg)
+#define zig_make_f128(fp, repr) fp##q
+#undef zig_make_special_f128
+#define zig_make_special_f128(sign, name, arg, repr) sign __builtin_##name##f128(arg)
 #else
 #undef zig_has_f128
 #define zig_has_f128 0
-#define zig_repr_f128 i128
+#define zig_bitSizeOf_repr_f128 128
 typedef zig_i128 zig_f128;
-#define zig_as_f128(fp, repr) repr
-#undef zig_as_special_f128
-#define zig_as_special_f128(sign, name, arg, repr) repr
-#undef zig_as_special_constant_f128
-#define zig_as_special_constant_f128(sign, name, arg, repr) repr
+#define zig_make_f128(fp, repr) repr
+#undef zig_make_special_f128
+#define zig_make_special_f128(sign, name, arg, repr) repr
+#undef zig_make_special_constant_f128
+#define zig_make_special_constant_f128(sign, name, arg, repr) repr
 #endif
 
 #define zig_has_c_longdouble 1
@@ -2010,17 +2128,18 @@ typedef zig_i128 zig_f128;
 #define zig_libc_name_c_longdouble(name) name##l
 #endif
 
-#define zig_as_special_constant_c_longdouble(sign, name, arg, repr) zig_as_special_c_longdouble(sign, name, arg, repr)
+#define zig_make_special_constant_c_longdouble(sign, name, arg, repr) zig_make_special_c_longdouble(sign, name, arg, repr)
 #ifdef zig_bitSizeOf_c_longdouble
 
 #ifdef ZIG_TARGET_ABI_MSVC
-typedef double zig_c_longdouble;
 #undef zig_bitSizeOf_c_longdouble
 #define zig_bitSizeOf_c_longdouble 64
-#define zig_as_c_longdouble(fp, repr) fp
+typedef uint64_t zig_repr_c_longdouble;
+typedef zig_f64 zig_c_longdouble;
+#define zig_make_c_longdouble(fp, repr) fp
 #else
 typedef long double zig_c_longdouble;
-#define zig_as_c_longdouble(fp, repr) fp##l
+#define zig_make_c_longdouble(fp, repr) fp##l
 #endif
 
 #else /* zig_bitSizeOf_c_longdouble */
@@ -2028,34 +2147,32 @@ typedef long double zig_c_longdouble;
 #undef zig_has_c_longdouble
 #define zig_has_c_longdouble 0
 #define zig_bitSizeOf_c_longdouble 80
+typedef zig_u128 zig_repr_c_longdouble;
 #define zig_compiler_rt_abbrev_c_longdouble zig_compiler_rt_abbrev_f80
-#define zig_repr_c_longdouble i128
+#define zig_bitSizeOf_repr_c_longdouble 128
 typedef zig_i128 zig_c_longdouble;
-#define zig_as_c_longdouble(fp, repr) repr
-#undef zig_as_special_c_longdouble
-#define zig_as_special_c_longdouble(sign, name, arg, repr) repr
-#undef zig_as_special_constant_c_longdouble
-#define zig_as_special_constant_c_longdouble(sign, name, arg, repr) repr
+#define zig_make_c_longdouble(fp, repr) repr
+#undef zig_make_special_c_longdouble
+#define zig_make_special_c_longdouble(sign, name, arg, repr) repr
+#undef zig_make_special_constant_c_longdouble
+#define zig_make_special_constant_c_longdouble(sign, name, arg, repr) repr
 
 #endif /* zig_bitSizeOf_c_longdouble */
 
 #if !zig_has_float_builtins
 #define zig_float_from_repr(Type, ReprType) \
-    static inline zig_##Type zig_float_from_repr_##Type(zig_##ReprType repr) { \
-        return *((zig_##Type*)&repr); \
+    static inline zig_##Type zig_float_from_repr_##Type(ReprType repr) { \
+        zig_##Type result; \
+        memcpy(&result, &repr, sizeof(result)); \
+        return result; \
     }
 
-zig_float_from_repr(f16, u16)
-zig_float_from_repr(f32, u32)
-zig_float_from_repr(f64, u64)
-zig_float_from_repr(f80, u128)
-zig_float_from_repr(f128, u128)
-#if zig_bitSizeOf_c_longdouble == 80
-zig_float_from_repr(c_longdouble, u128)
-#else
-#define zig_expand_float_from_repr(Type, ReprType) zig_float_from_repr(Type, ReprType)
-zig_expand_float_from_repr(c_longdouble, zig_expand_concat(u, zig_bitSizeOf_c_longdouble))
-#endif
+zig_float_from_repr(f16, uint16_t)
+zig_float_from_repr(f32, uint32_t)
+zig_float_from_repr(f64, uint64_t)
+zig_float_from_repr(f80, zig_u128)
+zig_float_from_repr(f128, zig_u128)
+zig_float_from_repr(c_longdouble, zig_repr_c_longdouble)
 #endif
 
 #define zig_cast_f16 (zig_f16)
@@ -2073,32 +2190,35 @@ zig_expand_float_from_repr(c_longdouble, zig_expand_concat(u, zig_bitSizeOf_c_lo
 #endif
 
 #define zig_convert_builtin(ResType, operation, ArgType, version) \
-    zig_extern zig_##ResType zig_expand_concat(zig_expand_concat(zig_expand_concat(__##operation, \
-        zig_compiler_rt_abbrev_##ArgType), zig_compiler_rt_abbrev_##ResType), version)(zig_##ArgType);
-zig_convert_builtin(f16,  trunc,  f32,  2)
-zig_convert_builtin(f16,  trunc,  f64,  2)
-zig_convert_builtin(f16,  trunc,  f80,  2)
-zig_convert_builtin(f16,  trunc,  f128, 2)
-zig_convert_builtin(f32,  extend, f16,  2)
-zig_convert_builtin(f32,  trunc,  f64,  2)
-zig_convert_builtin(f32,  trunc,  f80,  2)
-zig_convert_builtin(f32,  trunc,  f128, 2)
-zig_convert_builtin(f64,  extend, f16,  2)
-zig_convert_builtin(f64,  extend, f32,  2)
-zig_convert_builtin(f64,  trunc,  f80,  2)
-zig_convert_builtin(f64,  trunc,  f128, 2)
-zig_convert_builtin(f80,  extend, f16,  2)
-zig_convert_builtin(f80,  extend, f32,  2)
-zig_convert_builtin(f80,  extend, f64,  2)
-zig_convert_builtin(f80,  trunc,  f128, 2)
-zig_convert_builtin(f128, extend, f16,  2)
-zig_convert_builtin(f128, extend, f32,  2)
-zig_convert_builtin(f128, extend, f64,  2)
-zig_convert_builtin(f128, extend, f80,  2)
+    zig_extern ResType zig_expand_concat(zig_expand_concat(zig_expand_concat(__##operation, \
+        zig_compiler_rt_abbrev_##ArgType), zig_compiler_rt_abbrev_##ResType), version)(ArgType);
+zig_convert_builtin(zig_f16,  trunc,  zig_f32,  2)
+zig_convert_builtin(zig_f16,  trunc,  zig_f64,  2)
+zig_convert_builtin(zig_f16,  trunc,  zig_f80,  2)
+zig_convert_builtin(zig_f16,  trunc,  zig_f128, 2)
+zig_convert_builtin(zig_f32,  extend, zig_f16,  2)
+zig_convert_builtin(zig_f32,  trunc,  zig_f64,  2)
+zig_convert_builtin(zig_f32,  trunc,  zig_f80,  2)
+zig_convert_builtin(zig_f32,  trunc,  zig_f128, 2)
+zig_convert_builtin(zig_f64,  extend, zig_f16,  2)
+zig_convert_builtin(zig_f64,  extend, zig_f32,  2)
+zig_convert_builtin(zig_f64,  trunc,  zig_f80,  2)
+zig_convert_builtin(zig_f64,  trunc,  zig_f128, 2)
+zig_convert_builtin(zig_f80,  extend, zig_f16,  2)
+zig_convert_builtin(zig_f80,  extend, zig_f32,  2)
+zig_convert_builtin(zig_f80,  extend, zig_f64,  2)
+zig_convert_builtin(zig_f80,  trunc,  zig_f128, 2)
+zig_convert_builtin(zig_f128, extend, zig_f16,  2)
+zig_convert_builtin(zig_f128, extend, zig_f32,  2)
+zig_convert_builtin(zig_f128, extend, zig_f64,  2)
+zig_convert_builtin(zig_f128, extend, zig_f80,  2)
 
 #define zig_float_negate_builtin_0(Type) \
     static inline zig_##Type zig_neg_##Type(zig_##Type arg) { \
-        return zig_expand_concat(zig_xor_, zig_repr_##Type)(arg, zig_expand_minInt(zig_repr_##Type, zig_bitSizeOf_##Type)); \
+        return zig_expand_concat(zig_xor_i, zig_bitSizeOf_repr_##Type)( \
+            arg, \
+            zig_minInt_i(zig_bitSizeOf_repr_##Type, zig_bitSizeOf_##Type) \
+        ); \
     }
 #define zig_float_negate_builtin_1(Type) \
     static inline zig_##Type zig_neg_##Type(zig_##Type arg) { \
@@ -2106,28 +2226,28 @@ zig_convert_builtin(f128, extend, f80,  2)
     }
 
 #define zig_float_less_builtin_0(Type, operation) \
-    zig_extern zig_i32 zig_expand_concat(zig_expand_concat(__##operation, \
-        zig_compiler_rt_abbrev_##Type), 2)(zig_##Type, zig_##Type); \
-    static inline zig_i32 zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
-        return zig_expand_concat(zig_expand_concat(__##operation, zig_compiler_rt_abbrev_##Type), 2)(lhs, rhs); \
+    zig_extern int32_t zig_expand_concat(zig_expand_concat(__##operation, \
+        zig_compiler_rt_abbrev_zig_##Type), 2)(zig_##Type, zig_##Type); \
+    static inline int32_t zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return zig_expand_concat(zig_expand_concat(__##operation, zig_compiler_rt_abbrev_zig_##Type), 2)(lhs, rhs); \
     }
 #define zig_float_less_builtin_1(Type, operation) \
-    static inline zig_i32 zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+    static inline int32_t zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
         return (!(lhs <= rhs) - (lhs < rhs)); \
     }
 
 #define zig_float_greater_builtin_0(Type, operation) \
     zig_float_less_builtin_0(Type, operation)
 #define zig_float_greater_builtin_1(Type, operation) \
-    static inline zig_i32 zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+    static inline int32_t zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
         return ((lhs > rhs) - !(lhs >= rhs)); \
     }
 
 #define zig_float_binary_builtin_0(Type, operation, operator) \
     zig_extern zig_##Type zig_expand_concat(zig_expand_concat(__##operation, \
-        zig_compiler_rt_abbrev_##Type), 3)(zig_##Type, zig_##Type); \
+        zig_compiler_rt_abbrev_zig_##Type), 3)(zig_##Type, zig_##Type); \
     static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
-        return zig_expand_concat(zig_expand_concat(__##operation, zig_compiler_rt_abbrev_##Type), 3)(lhs, rhs); \
+        return zig_expand_concat(zig_expand_concat(__##operation, zig_compiler_rt_abbrev_zig_##Type), 3)(lhs, rhs); \
     }
 #define zig_float_binary_builtin_1(Type, operation, operator) \
     static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
@@ -2135,18 +2255,18 @@ zig_convert_builtin(f128, extend, f80,  2)
     }
 
 #define zig_float_builtins(Type) \
-    zig_convert_builtin(i32,  fix,     Type, ) \
-    zig_convert_builtin(u32,  fixuns,  Type, ) \
-    zig_convert_builtin(i64,  fix,     Type, ) \
-    zig_convert_builtin(u64,  fixuns,  Type, ) \
-    zig_convert_builtin(i128, fix,     Type, ) \
-    zig_convert_builtin(u128, fixuns,  Type, ) \
-    zig_convert_builtin(Type, float,   i32,  ) \
-    zig_convert_builtin(Type, floatun, u32,  ) \
-    zig_convert_builtin(Type, float,   i64,  ) \
-    zig_convert_builtin(Type, floatun, u64,  ) \
-    zig_convert_builtin(Type, float,   i128, ) \
-    zig_convert_builtin(Type, floatun, u128, ) \
+    zig_convert_builtin( int32_t, fix,     zig_##Type, ) \
+    zig_convert_builtin(uint32_t, fixuns,  zig_##Type, ) \
+    zig_convert_builtin( int64_t, fix,     zig_##Type, ) \
+    zig_convert_builtin(uint64_t, fixuns,  zig_##Type, ) \
+    zig_convert_builtin(zig_i128, fix,     zig_##Type, ) \
+    zig_convert_builtin(zig_u128, fixuns,  zig_##Type, ) \
+    zig_convert_builtin(zig_##Type, float,    int32_t, ) \
+    zig_convert_builtin(zig_##Type, floatun, uint32_t, ) \
+    zig_convert_builtin(zig_##Type, float,    int64_t, ) \
+    zig_convert_builtin(zig_##Type, floatun, uint64_t, ) \
+    zig_convert_builtin(zig_##Type, float,   zig_i128, ) \
+    zig_convert_builtin(zig_##Type, floatun, zig_u128, ) \
     zig_expand_concat(zig_float_negate_builtin_,  zig_has_##Type)(Type) \
     zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, cmp) \
     zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, ne) \
@@ -2200,149 +2320,157 @@ zig_float_builtins(c_longdouble)
 
 // TODO: zig_msvc_atomic_load should load 32 bit without interlocked on x86, and load 64 bit without interlocked on x64
 
-#define zig_msvc_atomics(Type, suffix) \
-    static inline bool zig_msvc_cmpxchg_##Type(zig_##Type volatile* obj, zig_##Type* expected, zig_##Type desired) { \
-        zig_##Type comparand = *expected; \
-        zig_##Type initial = _InterlockedCompareExchange##suffix(obj, desired, comparand); \
+#define zig_msvc_atomics(ZigType, Type, suffix) \
+    static inline bool zig_msvc_cmpxchg_##ZigType(Type volatile* obj, Type* expected, Type desired) { \
+        Type comparand = *expected; \
+        Type initial = _InterlockedCompareExchange##suffix(obj, desired, comparand); \
         bool exchanged = initial == comparand; \
         if (!exchanged) { \
             *expected = initial; \
         } \
         return exchanged; \
     } \
-    static inline zig_##Type zig_msvc_atomicrmw_xchg_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+    static inline Type zig_msvc_atomicrmw_xchg_##ZigType(Type volatile* obj, Type value) { \
         return _InterlockedExchange##suffix(obj, value); \
     } \
-    static inline zig_##Type zig_msvc_atomicrmw_add_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+    static inline Type zig_msvc_atomicrmw_add_##ZigType(Type volatile* obj, Type value) { \
         return _InterlockedExchangeAdd##suffix(obj, value); \
     } \
-    static inline zig_##Type zig_msvc_atomicrmw_sub_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+    static inline Type zig_msvc_atomicrmw_sub_##ZigType(Type volatile* obj, Type value) { \
         bool success = false; \
-        zig_##Type new; \
-        zig_##Type prev; \
+        Type new; \
+        Type prev; \
         while (!success) { \
             prev = *obj; \
             new = prev - value; \
-            success = zig_msvc_cmpxchg_##Type(obj, &prev, new); \
+            success = zig_msvc_cmpxchg_##ZigType(obj, &prev, new); \
         } \
         return prev; \
     } \
-    static inline zig_##Type zig_msvc_atomicrmw_or_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+    static inline Type zig_msvc_atomicrmw_or_##ZigType(Type volatile* obj, Type value) { \
         return _InterlockedOr##suffix(obj, value); \
     } \
-    static inline zig_##Type zig_msvc_atomicrmw_xor_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+    static inline Type zig_msvc_atomicrmw_xor_##ZigType(Type volatile* obj, Type value) { \
         return _InterlockedXor##suffix(obj, value); \
     } \
-    static inline zig_##Type zig_msvc_atomicrmw_and_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+    static inline Type zig_msvc_atomicrmw_and_##ZigType(Type volatile* obj, Type value) { \
         return _InterlockedAnd##suffix(obj, value); \
     } \
-    static inline zig_##Type zig_msvc_atomicrmw_nand_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+    static inline Type zig_msvc_atomicrmw_nand_##ZigType(Type volatile* obj, Type value) { \
         bool success = false; \
-        zig_##Type new; \
-        zig_##Type prev; \
+        Type new; \
+        Type prev; \
         while (!success) { \
             prev = *obj; \
             new = ~(prev & value); \
-            success = zig_msvc_cmpxchg_##Type(obj, &prev, new); \
+            success = zig_msvc_cmpxchg_##ZigType(obj, &prev, new); \
         } \
         return prev; \
     } \
-    static inline zig_##Type zig_msvc_atomicrmw_min_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+    static inline Type zig_msvc_atomicrmw_min_##ZigType(Type volatile* obj, Type value) { \
         bool success = false; \
-        zig_##Type new; \
-        zig_##Type prev; \
+        Type new; \
+        Type prev; \
         while (!success) { \
             prev = *obj; \
             new = value < prev ? value : prev; \
-            success = zig_msvc_cmpxchg_##Type(obj, &prev, new); \
+            success = zig_msvc_cmpxchg_##ZigType(obj, &prev, new); \
         } \
         return prev; \
     } \
-    static inline zig_##Type zig_msvc_atomicrmw_max_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+    static inline Type zig_msvc_atomicrmw_max_##ZigType(Type volatile* obj, Type value) { \
         bool success = false; \
-        zig_##Type new; \
-        zig_##Type prev; \
+        Type new; \
+        Type prev; \
         while (!success) { \
             prev = *obj; \
             new = value > prev ? value : prev; \
-            success = zig_msvc_cmpxchg_##Type(obj, &prev, new); \
+            success = zig_msvc_cmpxchg_##ZigType(obj, &prev, new); \
         } \
         return prev; \
     } \
-    static inline void zig_msvc_atomic_store_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+    static inline void zig_msvc_atomic_store_##ZigType(Type volatile* obj, Type value) { \
         _InterlockedExchange##suffix(obj, value); \
     } \
-    static inline zig_##Type zig_msvc_atomic_load_##Type(zig_##Type volatile* obj) { \
+    static inline Type zig_msvc_atomic_load_##ZigType(Type volatile* obj) { \
         return _InterlockedOr##suffix(obj, 0); \
     }
 
-zig_msvc_atomics(u8, 8)
-zig_msvc_atomics(i8, 8)
-zig_msvc_atomics(u16, 16)
-zig_msvc_atomics(i16, 16)
-zig_msvc_atomics(u32, )
-zig_msvc_atomics(i32, )
+zig_msvc_atomics( u8,  uint8_t,  8)
+zig_msvc_atomics( i8,   int8_t,  8)
+zig_msvc_atomics(u16, uint16_t, 16)
+zig_msvc_atomics(i16,  int16_t, 16)
+zig_msvc_atomics(u32, uint32_t,   )
+zig_msvc_atomics(i32,  int32_t,   )
 
 #if _M_X64
-zig_msvc_atomics(u64, 64)
-zig_msvc_atomics(i64, 64)
+zig_msvc_atomics(u64, uint64_t, 64)
+zig_msvc_atomics(i64,  int64_t, 64)
 #endif
 
 #define zig_msvc_flt_atomics(Type, ReprType, suffix) \
     static inline bool zig_msvc_cmpxchg_##Type(zig_##Type volatile* obj, zig_##Type* expected, zig_##Type desired) { \
-        zig_##ReprType comparand = *((zig_##ReprType*)expected); \
-        zig_##ReprType initial = _InterlockedCompareExchange##suffix((zig_##ReprType volatile*)obj, *((zig_##ReprType*)&desired), comparand); \
-        bool exchanged = initial == comparand; \
-        if (!exchanged) { \
-            *expected = *((zig_##Type*)&initial); \
-        } \
-        return exchanged; \
+        ReprType exchange; \
+        ReprType comparand; \
+        ReprType initial; \
+        bool success; \
+        memcpy(&comparand, expected, sizeof(comparand)); \
+        memcpy(&exchange, &desired, sizeof(exchange)); \
+        initial = _InterlockedCompareExchange##suffix((ReprType volatile*)obj, exchange, comparand); \
+        success = initial == comparand; \
+        if (!success) memcpy(expected, &initial, sizeof(*expected)); \
+        return success; \
     } \
     static inline zig_##Type zig_msvc_atomicrmw_xchg_##Type(zig_##Type volatile* obj, zig_##Type value) { \
-        zig_##ReprType initial = _InterlockedExchange##suffix((zig_##ReprType volatile*)obj, *((zig_##ReprType*)&value)); \
-        return *((zig_##Type*)&initial); \
+        ReprType repr; \
+        ReprType initial; \
+        zig_##Type result; \
+        memcpy(&repr, &value, sizeof(repr)); \
+        initial = _InterlockedExchange##suffix((ReprType volatile*)obj, repr); \
+        memcpy(&result, &initial, sizeof(result)); \
+        return result; \
     } \
     static inline zig_##Type zig_msvc_atomicrmw_add_##Type(zig_##Type volatile* obj, zig_##Type value) { \
-        bool success = false; \
-        zig_##ReprType new; \
-        zig_##Type prev; \
-        while (!success) { \
-            prev = *obj; \
-            new = prev + value; \
-            success = zig_msvc_cmpxchg_##Type(obj, &prev, *((zig_##ReprType*)&new)); \
-        } \
-        return prev; \
+        ReprType repr; \
+        zig_##Type expected; \
+        zig_##Type desired; \
+        repr = *(ReprType volatile*)obj; \
+        memcpy(&expected, &repr, sizeof(expected)); \
+        do { \
+            desired = expected + value; \
+        } while (!zig_msvc_cmpxchg_##Type(obj, &expected, desired)); \
+        return expected; \
     } \
     static inline zig_##Type zig_msvc_atomicrmw_sub_##Type(zig_##Type volatile* obj, zig_##Type value) { \
-        bool success = false; \
-        zig_##ReprType new; \
-        zig_##Type prev; \
-        while (!success) { \
-            prev = *obj; \
-            new = prev - value; \
-            success = zig_msvc_cmpxchg_##Type(obj, &prev, *((zig_##ReprType*)&new)); \
-        } \
-        return prev; \
+        ReprType repr; \
+        zig_##Type expected; \
+        zig_##Type desired; \
+        repr = *(ReprType volatile*)obj; \
+        memcpy(&expected, &repr, sizeof(expected)); \
+        do { \
+            desired = expected - value; \
+        } while (!zig_msvc_cmpxchg_##Type(obj, &expected, desired)); \
+        return expected; \
     }
 
-zig_msvc_flt_atomics(f32, u32, )
+zig_msvc_flt_atomics(f32, uint32_t,   )
 #if _M_X64
-zig_msvc_flt_atomics(f64, u64, 64)
+zig_msvc_flt_atomics(f64, uint64_t, 64)
 #endif
 
 #if _M_IX86
 static inline void zig_msvc_atomic_barrier() {
-    zig_i32 barrier;
+    int32_t barrier;
     __asm {
         xchg barrier, eax
     }
 }
 
-static inline void* zig_msvc_atomicrmw_xchg_p32(void** obj, zig_u32* arg) {
+static inline void* zig_msvc_atomicrmw_xchg_p32(void** obj, void* arg) {
     return _InterlockedExchangePointer(obj, arg);
 }
 
-static inline void zig_msvc_atomic_store_p32(void** obj, zig_u32* arg) {
+static inline void zig_msvc_atomic_store_p32(void** obj, void* arg) {
     _InterlockedExchangePointer(obj, arg);
 }
 
@@ -2360,11 +2488,11 @@ static inline bool zig_msvc_cmpxchg_p32(void** obj, void** expected, void* desir
     return exchanged;
 }
 #else /* _M_IX86 */
-static inline void* zig_msvc_atomicrmw_xchg_p64(void** obj, zig_u64* arg) {
+static inline void* zig_msvc_atomicrmw_xchg_p64(void** obj, void* arg) {
     return _InterlockedExchangePointer(obj, arg);
 }
 
-static inline void zig_msvc_atomic_store_p64(void** obj, zig_u64* arg) {
+static inline void zig_msvc_atomic_store_p64(void** obj, void* arg) {
     _InterlockedExchangePointer(obj, arg);
 }
 
@@ -2383,11 +2511,11 @@ static inline bool zig_msvc_cmpxchg_p64(void** obj, void** expected, void* desir
 }
 
 static inline bool zig_msvc_cmpxchg_u128(zig_u128 volatile* obj, zig_u128* expected, zig_u128 desired) {
-    return _InterlockedCompareExchange128((zig_i64 volatile*)obj, desired.hi, desired.lo, (zig_i64*)expected);
+    return _InterlockedCompareExchange128((int64_t volatile*)obj, desired.hi, desired.lo, (int64_t*)expected);
 }
 
 static inline bool zig_msvc_cmpxchg_i128(zig_i128 volatile* obj, zig_i128* expected, zig_i128 desired) {
-    return _InterlockedCompareExchange128((zig_i64 volatile*)obj, desired.hi, desired.lo, (zig_u64*)expected);
+    return _InterlockedCompareExchange128((int64_t volatile*)obj, desired.hi, desired.lo, (uint64_t*)expected);
 }
 
 #define zig_msvc_atomics_128xchg(Type) \
@@ -2429,7 +2557,7 @@ zig_msvc_atomics_128op(u128, max)
 
 #endif /* _MSC_VER && (_M_IX86 || _M_X64) */
 
-/* ========================= Special Case Intrinsics ========================= */
+/* ======================== Special Case Intrinsics ========================= */
 
 #if (_MSC_VER && _M_X64) || defined(__x86_64__)
 
@@ -2459,8 +2587,8 @@ static inline void* zig_x86_windows_teb(void) {
 
 #if (_MSC_VER && (_M_IX86 || _M_X64)) || defined(__i386__) || defined(__x86_64__)
 
-static inline void zig_x86_cpuid(zig_u32 leaf_id, zig_u32 subid, zig_u32* eax, zig_u32* ebx, zig_u32* ecx, zig_u32* edx) {
-    zig_u32 cpu_info[4];
+static inline void zig_x86_cpuid(uint32_t leaf_id, uint32_t subid, uint32_t* eax, uint32_t* ebx, uint32_t* ecx, uint32_t* edx) {
+    uint32_t cpu_info[4];
 #if _MSC_VER
     __cpuidex(cpu_info, leaf_id, subid);
 #else
@@ -2472,12 +2600,12 @@ static inline void zig_x86_cpuid(zig_u32 leaf_id, zig_u32 subid, zig_u32* eax, z
     *edx = cpu_info[3];
 }
 
-static inline zig_u32 zig_x86_get_xcr0(void) {
+static inline uint32_t zig_x86_get_xcr0(void) {
 #if _MSC_VER
-    return (zig_u32)_xgetbv(0);
+    return (uint32_t)_xgetbv(0);
 #else
-    zig_u32 eax;
-    zig_u32 edx;
+    uint32_t eax;
+    uint32_t edx;
     __asm__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
     return eax;
 #endif
diff --git a/src/Autodoc.zig b/src/Autodoc.zig
index 2fc54cc0ec..3cf3fff4c0 100644
--- a/src/Autodoc.zig
+++ b/src/Autodoc.zig
@@ -860,17 +860,9 @@ fn walkInstruction(
             const str_tok = data[inst_index].str_tok;
             var path = str_tok.get(file.zir);
 
-            const maybe_other_package: ?*Package = blk: {
-                if (self.module.main_pkg_is_std and std.mem.eql(u8, path, "std")) {
-                    path = "std";
-                    break :blk self.module.main_pkg;
-                } else {
-                    break :blk file.pkg.table.get(path);
-                }
-            };
             // importFile cannot error out since all files
             // are already loaded at this point
-            if (maybe_other_package) |other_package| {
+            if (file.pkg.table.get(path)) |other_package| {
                 const result = try self.packages.getOrPut(self.arena, other_package);
 
                 // Immediately add this package to the import table of our
@@ -3629,7 +3621,7 @@ fn tryResolveRefPath(
     }
 
     if (self.pending_ref_paths.get(&path[path.len - 1])) |waiter_list| {
-        // It's important to de-register oureslves as pending before
+        // It's important to de-register ourselves as pending before
         // attempting to resolve any other decl.
         _ = self.pending_ref_paths.remove(&path[path.len - 1]);
 
diff --git a/src/Compilation.zig b/src/Compilation.zig
index 60e74107c2..3348b7de11 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -1596,36 +1596,53 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
 
             const builtin_pkg = try Package.createWithDir(
                 gpa,
-                "builtin",
                 zig_cache_artifact_directory,
                 null,
                 "builtin.zig",
             );
             errdefer builtin_pkg.destroy(gpa);
 
-            const std_pkg = try Package.createWithDir(
-                gpa,
-                "std",
-                options.zig_lib_directory,
-                "std",
-                "std.zig",
-            );
-            errdefer std_pkg.destroy(gpa);
+            // When you're testing std, the main module is std. In that case, we'll just set the std
+            // module to the main one, since avoiding the errors caused by duplicating it is more
+            // effort than it's worth.
+            const main_pkg_is_std = m: {
+                const std_path = try std.fs.path.resolve(arena, &[_][]const u8{
+                    options.zig_lib_directory.path orelse ".",
+                    "std",
+                    "std.zig",
+                });
+                defer arena.free(std_path);
+                const main_path = try std.fs.path.resolve(arena, &[_][]const u8{
+                    main_pkg.root_src_directory.path orelse ".",
+                    main_pkg.root_src_path,
+                });
+                defer arena.free(main_path);
+                break :m mem.eql(u8, main_path, std_path);
+            };
+
+            const std_pkg = if (main_pkg_is_std)
+                main_pkg
+            else
+                try Package.createWithDir(
+                    gpa,
+                    options.zig_lib_directory,
+                    "std",
+                    "std.zig",
+                );
+
+            errdefer if (!main_pkg_is_std) std_pkg.destroy(gpa);
 
             const root_pkg = if (options.is_test) root_pkg: {
-                // TODO: we currently have two packages named 'root' here, which is weird. This
-                // should be changed as part of the resolution of #12201
                 const test_pkg = if (options.test_runner_path) |test_runner| test_pkg: {
                     const test_dir = std.fs.path.dirname(test_runner);
                     const basename = std.fs.path.basename(test_runner);
-                    const pkg = try Package.create(gpa, "root", test_dir, basename);
+                    const pkg = try Package.create(gpa, test_dir, basename);
 
                     // copy package table from main_pkg to root_pkg
                     pkg.table = try main_pkg.table.clone(gpa);
                     break :test_pkg pkg;
                 } else try Package.createWithDir(
                     gpa,
-                    "root",
                     options.zig_lib_directory,
                     null,
                     "test_runner.zig",
@@ -1639,7 +1656,6 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
             const compiler_rt_pkg = if (include_compiler_rt and options.output_mode == .Obj) compiler_rt_pkg: {
                 break :compiler_rt_pkg try Package.createWithDir(
                     gpa,
-                    "compiler_rt",
                     options.zig_lib_directory,
                     null,
                     "compiler_rt.zig",
@@ -1647,28 +1663,14 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
             } else null;
             errdefer if (compiler_rt_pkg) |p| p.destroy(gpa);
 
-            try main_pkg.addAndAdopt(gpa, builtin_pkg);
-            try main_pkg.add(gpa, root_pkg);
-            try main_pkg.addAndAdopt(gpa, std_pkg);
+            try main_pkg.add(gpa, "builtin", builtin_pkg);
+            try main_pkg.add(gpa, "root", root_pkg);
+            try main_pkg.add(gpa, "std", std_pkg);
 
             if (compiler_rt_pkg) |p| {
-                try main_pkg.addAndAdopt(gpa, p);
+                try main_pkg.add(gpa, "compiler_rt", p);
             }
 
-            const main_pkg_is_std = m: {
-                const std_path = try std.fs.path.resolve(arena, &[_][]const u8{
-                    std_pkg.root_src_directory.path orelse ".",
-                    std_pkg.root_src_path,
-                });
-                defer arena.free(std_path);
-                const main_path = try std.fs.path.resolve(arena, &[_][]const u8{
-                    main_pkg.root_src_directory.path orelse ".",
-                    main_pkg.root_src_path,
-                });
-                defer arena.free(main_path);
-                break :m mem.eql(u8, main_path, std_path);
-            };
-
             // Pre-open the directory handles for cached ZIR code so that it does not need
             // to redundantly happen for each AstGen operation.
             const zir_sub_dir = "z";
@@ -1705,7 +1707,6 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
                 .gpa = gpa,
                 .comp = comp,
                 .main_pkg = main_pkg,
-                .main_pkg_is_std = main_pkg_is_std,
                 .root_pkg = root_pkg,
                 .zig_cache_artifact_directory = zig_cache_artifact_directory,
                 .global_zir_cache = global_zir_cache,
@@ -2772,6 +2773,111 @@ fn emitOthers(comp: *Compilation) void {
     }
 }
 
+fn reportMultiModuleErrors(mod: *Module) !void {
+    // Some cases can give you a whole bunch of multi-module errors, which it's not helpful to
+    // print all of, so we'll cap the number of these to emit.
+    var num_errors: u32 = 0;
+    const max_errors = 5;
+    // Attach the "some omitted" note to the final error message
+    var last_err: ?*Module.ErrorMsg = null;
+
+    for (mod.import_table.values()) |file| {
+        if (!file.multi_pkg) continue;
+
+        num_errors += 1;
+        if (num_errors > max_errors) continue;
+
+        const err = err_blk: {
+            // Like with errors, let's cap the number of notes to prevent a huge error spew.
+            const max_notes = 5;
+            const omitted = file.references.items.len -| max_notes;
+            const num_notes = file.references.items.len - omitted;
+
+            const notes = try mod.gpa.alloc(Module.ErrorMsg, if (omitted > 0) num_notes + 1 else num_notes);
+            errdefer mod.gpa.free(notes);
+
+            for (notes[0..num_notes], file.references.items[0..num_notes], 0..) |*note, ref, i| {
+                errdefer for (notes[0..i]) |*n| n.deinit(mod.gpa);
+                note.* = switch (ref) {
+                    .import => |loc| blk: {
+                        const name = try loc.file_scope.pkg.getName(mod.gpa, mod.*);
+                        defer mod.gpa.free(name);
+                        break :blk try Module.ErrorMsg.init(
+                            mod.gpa,
+                            loc,
+                            "imported from module {s}",
+                            .{name},
+                        );
+                    },
+                    .root => |pkg| blk: {
+                        const name = try pkg.getName(mod.gpa, mod.*);
+                        defer mod.gpa.free(name);
+                        break :blk try Module.ErrorMsg.init(
+                            mod.gpa,
+                            .{ .file_scope = file, .parent_decl_node = 0, .lazy = .entire_file },
+                            "root of module {s}",
+                            .{name},
+                        );
+                    },
+                };
+            }
+            errdefer for (notes[0..num_notes]) |*n| n.deinit(mod.gpa);
+
+            if (omitted > 0) {
+                notes[num_notes] = try Module.ErrorMsg.init(
+                    mod.gpa,
+                    .{ .file_scope = file, .parent_decl_node = 0, .lazy = .entire_file },
+                    "{} more references omitted",
+                    .{omitted},
+                );
+            }
+            errdefer if (omitted > 0) notes[num_notes].deinit(mod.gpa);
+
+            const err = try Module.ErrorMsg.create(
+                mod.gpa,
+                .{ .file_scope = file, .parent_decl_node = 0, .lazy = .entire_file },
+                "file exists in multiple modules",
+                .{},
+            );
+            err.notes = notes;
+            break :err_blk err;
+        };
+        errdefer err.destroy(mod.gpa);
+        try mod.failed_files.putNoClobber(mod.gpa, file, err);
+        last_err = err;
+    }
+
+    // If we omitted any errors, add a note saying that
+    if (num_errors > max_errors) {
+        const err = last_err.?;
+
+        // There isn't really any meaningful place to put this note, so just attach it to the
+        // last failed file
+        var note = try Module.ErrorMsg.init(
+            mod.gpa,
+            err.src_loc,
+            "{} more errors omitted",
+            .{num_errors - max_errors},
+        );
+        errdefer note.deinit(mod.gpa);
+
+        const i = err.notes.len;
+        err.notes = try mod.gpa.realloc(err.notes, i + 1);
+        err.notes[i] = note;
+    }
+
+    // Now that we've reported the errors, we need to deal with
+    // dependencies. Any file referenced by a multi_pkg file should also be
+    // marked multi_pkg and have its status set to astgen_failure, as it's
+    // ambiguous which package they should be analyzed as a part of. We need
+    // to add this flag after reporting the errors however, as otherwise
+    // we'd get an error for every single downstream file, which wouldn't be
+    // very useful.
+    for (mod.import_table.values()) |file| {
+        if (file.multi_pkg) file.recursiveMarkMultiPkg(mod);
+    }
+}
+
 /// Having the file open for writing is problematic as far as executing the
 /// binary is concerned. This will remove the write flag, or close the file,
 /// or whatever is needed so that it can be executed.
@@ -3098,54 +3204,7 @@ pub fn performAllTheWork(
     }
 
     if (comp.bin_file.options.module) |mod| {
-        for (mod.import_table.values()) |file| {
-            if (!file.multi_pkg) continue;
-            const err = err_blk: {
-                const notes = try mod.gpa.alloc(Module.ErrorMsg, file.references.items.len);
-                errdefer mod.gpa.free(notes);
-
-                for (notes, 0..) |*note, i| {
-                    errdefer for (notes[0..i]) |*n| n.deinit(mod.gpa);
-                    note.* = switch (file.references.items[i]) {
-                        .import => |loc| try Module.ErrorMsg.init(
-                            mod.gpa,
-                            loc,
-                            "imported from package {s}",
-                            .{loc.file_scope.pkg.name},
-                        ),
-                        .root => |pkg| try Module.ErrorMsg.init(
-                            mod.gpa,
-                            .{ .file_scope = file, .parent_decl_node = 0, .lazy = .entire_file },
-                            "root of package {s}",
-                            .{pkg.name},
-                        ),
-                    };
-                }
-                errdefer for (notes) |*n| n.deinit(mod.gpa);
-
-                const err = try Module.ErrorMsg.create(
-                    mod.gpa,
-                    .{ .file_scope = file, .parent_decl_node = 0, .lazy = .entire_file },
-                    "file exists in multiple packages",
-                    .{},
-                );
-                err.notes = notes;
-                break :err_blk err;
-            };
-            errdefer err.destroy(mod.gpa);
-            try mod.failed_files.putNoClobber(mod.gpa, file, err);
-        }
-
-        // Now that we've reported the errors, we need to deal with
-        // dependencies. Any file referenced by a multi_pkg file should also be
-        // marked multi_pkg and have its status set to astgen_failure, as it's
-        // ambiguous which package they should be analyzed as a part of. We need
-        // to add this flag after reporting the errors however, as otherwise
-        // we'd get an error for every single downstream file, which wouldn't be
-        // very useful.
-        for (mod.import_table.values()) |file| {
-            if (file.multi_pkg) file.recursiveMarkMultiPkg(mod);
-        }
+        try reportMultiModuleErrors(mod);
     }
 
     {
@@ -3266,24 +3325,20 @@ fn processOneJob(comp: *Compilation, job: Job) !void {
                     const decl_emit_h = emit_h.declPtr(decl_index);
                     const fwd_decl = &decl_emit_h.fwd_decl;
                     fwd_decl.shrinkRetainingCapacity(0);
-                    var typedefs_arena = std.heap.ArenaAllocator.init(gpa);
-                    defer typedefs_arena.deinit();
+                    var ctypes_arena = std.heap.ArenaAllocator.init(gpa);
+                    defer ctypes_arena.deinit();
 
                     var dg: c_codegen.DeclGen = .{
                         .gpa = gpa,
                         .module = module,
                         .error_msg = null,
-                        .decl_index = decl_index,
+                        .decl_index = decl_index.toOptional(),
                         .decl = decl,
                         .fwd_decl = fwd_decl.toManaged(gpa),
-                        .typedefs = c_codegen.TypedefMap.initContext(gpa, .{ .mod = module }),
-                        .typedefs_arena = typedefs_arena.allocator(),
+                        .ctypes = .{},
                     };
                     defer {
-                        for (dg.typedefs.values()) |typedef| {
-                            module.gpa.free(typedef.rendered);
-                        }
-                        dg.typedefs.deinit();
+                        dg.ctypes.deinit(gpa);
                         dg.fwd_decl.deinit();
                     }
 
@@ -5415,7 +5470,6 @@ fn buildOutputFromZig(
     var main_pkg: Package = .{
         .root_src_directory = comp.zig_lib_directory,
         .root_src_path = src_basename,
-        .name = "root",
     };
     defer main_pkg.deinitTable(comp.gpa);
     const root_name = src_basename[0 .. src_basename.len - std.fs.path.extension(src_basename).len];
diff --git a/src/Module.zig b/src/Module.zig
index 76777532ab..a2502d36d3 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -144,10 +144,6 @@ stage1_flags: packed struct {
 } = .{},
 
 job_queued_update_builtin_zig: bool = true,
-/// This makes it so that we can run `zig test` on the standard library.
-/// Otherwise, the logic for scanning test decls skips all of them because
-/// `main_pkg != std_pkg`.
-main_pkg_is_std: bool,
 
 compile_log_text: ArrayListUnmanaged(u8) = .{},
 
@@ -1950,7 +1946,7 @@ pub const File = struct {
     prev_zir: ?*Zir = null,
 
     /// A single reference to a file.
-    const Reference = union(enum) {
+    pub const Reference = union(enum) {
         /// The file is imported directly (i.e. not as a package) with @import.
         import: SrcLoc,
         /// The file is the root of a package.
@@ -2113,7 +2109,27 @@ pub const File = struct {
 
     /// Add a reference to this file during AstGen.
     pub fn addReference(file: *File, mod: Module, ref: Reference) !void {
-        try file.references.append(mod.gpa, ref);
+        // Don't add the same module root twice. Note that since we always add module roots at the
+        // front of the references array (see below), this loop is actually O(1) on valid code.
+        if (ref == .root) {
+            for (file.references.items) |other| {
+                switch (other) {
+                    .root => |r| if (ref.root == r) return,
+                    else => break, // reached the end of the "is-root" references
+                }
+            }
+        }
+
+        switch (ref) {
+            // We put root references at the front of the list both to make the above loop fast and
+            // to make multi-module errors more helpful (since "root-of" notes are generally more
+            // informative than "imported-from" notes). This path is hit very rarely, so the speed
+            // of the insert operation doesn't matter too much.
+            .root => try file.references.insert(mod.gpa, 0, ref),
+
+            // Other references we'll just put at the end.
+            else => try file.references.append(mod.gpa, ref),
+        }
 
         const pkg = switch (ref) {
             .import => |loc| loc.file_scope.pkg,
@@ -2128,7 +2144,10 @@ pub const File = struct {
         file.multi_pkg = true;
         file.status = .astgen_failure;
 
-        std.debug.assert(file.zir_loaded);
+        // We can only mark children as failed if the ZIR is loaded, which may not
+        // be the case if there were other astgen failures in this file
+        if (!file.zir_loaded) return;
+
         const imports_index = file.zir.extra[@enumToInt(Zir.ExtraIndex.imports)];
         if (imports_index == 0) return;
         const extra = file.zir.extraData(Zir.Inst.Imports, imports_index);
@@ -3323,10 +3342,19 @@ pub fn deinit(mod: *Module) void {
     // The callsite of `Compilation.create` owns the `main_pkg`, however
     // Module owns the builtin and std packages that it adds.
     if (mod.main_pkg.table.fetchRemove("builtin")) |kv| {
+        gpa.free(kv.key);
         kv.value.destroy(gpa);
     }
     if (mod.main_pkg.table.fetchRemove("std")) |kv| {
-        kv.value.destroy(gpa);
+        gpa.free(kv.key);
+        // It's possible for main_pkg to be std when running 'zig test'! In this case, we must not
+        // destroy it, since it would lead to a double-free.
+        if (kv.value != mod.main_pkg) {
+            kv.value.destroy(gpa);
+        }
+    }
+    if (mod.main_pkg.table.fetchRemove("root")) |kv| {
+        gpa.free(kv.key);
     }
     if (mod.root_pkg != mod.main_pkg) {
         mod.root_pkg.destroy(gpa);
@@ -4808,11 +4836,14 @@ pub fn importPkg(mod: *Module, pkg: *Package) !ImportFileResult {
 
     const gop = try mod.import_table.getOrPut(gpa, resolved_path);
     errdefer _ = mod.import_table.pop();
-    if (gop.found_existing) return ImportFileResult{
-        .file = gop.value_ptr.*,
-        .is_new = false,
-        .is_pkg = true,
-    };
+    if (gop.found_existing) {
+        try gop.value_ptr.*.addReference(mod.*, .{ .root = pkg });
+        return ImportFileResult{
+            .file = gop.value_ptr.*,
+            .is_new = false,
+            .is_pkg = true,
+        };
+    }
 
     const sub_file_path = try gpa.dupe(u8, pkg.root_src_path);
     errdefer gpa.free(sub_file_path);
@@ -5208,22 +5239,14 @@ fn scanDecl(iter: *ScanDeclIter, decl_sub_index: usize, flags: u4) Allocator.Err
                 // test decl with no name. Skip the part where we check against
                 // the test name filter.
                 if (!comp.bin_file.options.is_test) break :blk false;
-                if (decl_pkg != mod.main_pkg) {
-                    if (!mod.main_pkg_is_std) break :blk false;
-                    const std_pkg = mod.main_pkg.table.get("std").?;
-                    if (std_pkg != decl_pkg) break :blk false;
-                }
+                if (decl_pkg != mod.main_pkg) break :blk false;
                 try mod.test_functions.put(gpa, new_decl_index, {});
                 break :blk true;
             },
             else => blk: {
                 if (!is_named_test) break :blk false;
                 if (!comp.bin_file.options.is_test) break :blk false;
-                if (decl_pkg != mod.main_pkg) {
-                    if (!mod.main_pkg_is_std) break :blk false;
-                    const std_pkg = mod.main_pkg.table.get("std").?;
-                    if (std_pkg != decl_pkg) break :blk false;
-                }
+                if (decl_pkg != mod.main_pkg) break :blk false;
                 if (comp.test_filter) |test_filter| {
                     if (mem.indexOf(u8, decl_name, test_filter) == null) {
                         break :blk false;
diff --git a/src/Package.zig b/src/Package.zig
index 5878e7bad6..f0e389e7ef 100644
--- a/src/Package.zig
+++ b/src/Package.zig
@@ -22,17 +22,16 @@ pub const Table = std.StringHashMapUnmanaged(*Package);
 root_src_directory: Compilation.Directory,
 /// Relative to `root_src_directory`. May contain path separators.
 root_src_path: []const u8,
+/// The dependency table of this module. Shared dependencies such as 'std', 'builtin', and 'root'
+/// are not specified in every dependency table, but instead only in the table of `main_pkg`.
+/// `Module.importFile` is responsible for detecting these names and using the correct package.
 table: Table = .{},
-parent: ?*Package = null,
 /// Whether to free `root_src_directory` on `destroy`.
 root_src_directory_owned: bool = false,
-/// This information can be recovered from 'table', but it's more convenient to store on the package.
-name: []const u8,
 
 /// Allocate a Package. No references to the slices passed are kept.
 pub fn create(
     gpa: Allocator,
-    name: []const u8,
     /// Null indicates the current working directory
     root_src_dir_path: ?[]const u8,
     /// Relative to root_src_dir_path
@@ -47,9 +46,6 @@ pub fn create(
     const owned_src_path = try gpa.dupe(u8, root_src_path);
     errdefer gpa.free(owned_src_path);
 
-    const owned_name = try gpa.dupe(u8, name);
-    errdefer gpa.free(owned_name);
-
     ptr.* = .{
         .root_src_directory = .{
             .path = owned_dir_path,
@@ -57,7 +53,6 @@ pub fn create(
         },
         .root_src_path = owned_src_path,
         .root_src_directory_owned = true,
-        .name = owned_name,
     };
 
     return ptr;
@@ -65,7 +60,6 @@ pub fn create(
 
 pub fn createWithDir(
     gpa: Allocator,
-    name: []const u8,
     directory: Compilation.Directory,
     /// Relative to `directory`. If null, means `directory` is the root src dir
     /// and is owned externally.
@@ -79,9 +73,6 @@ pub fn createWithDir(
     const owned_src_path = try gpa.dupe(u8, root_src_path);
     errdefer gpa.free(owned_src_path);
 
-    const owned_name = try gpa.dupe(u8, name);
-    errdefer gpa.free(owned_name);
-
     if (root_src_dir_path) |p| {
         const owned_dir_path = try directory.join(gpa, &[1][]const u8{p});
         errdefer gpa.free(owned_dir_path);
@@ -93,14 +84,12 @@ pub fn createWithDir(
             },
             .root_src_directory_owned = true,
             .root_src_path = owned_src_path,
-            .name = owned_name,
         };
     } else {
         ptr.* = .{
             .root_src_directory = directory,
             .root_src_directory_owned = false,
             .root_src_path = owned_src_path,
-            .name = owned_name,
         };
     }
     return ptr;
@@ -110,7 +99,6 @@ pub fn createWithDir(
 /// inside its table; the caller is responsible for calling destroy() on them.
 pub fn destroy(pkg: *Package, gpa: Allocator) void {
     gpa.free(pkg.root_src_path);
-    gpa.free(pkg.name);
 
     if (pkg.root_src_directory_owned) {
         // If root_src_directory.path is null then the handle is the cwd()
@@ -130,15 +118,97 @@ pub fn deinitTable(pkg: *Package, gpa: Allocator) void {
     pkg.table.deinit(gpa);
 }
 
-pub fn add(pkg: *Package, gpa: Allocator, package: *Package) !void {
+pub fn add(pkg: *Package, gpa: Allocator, name: []const u8, package: *Package) !void {
     try pkg.table.ensureUnusedCapacity(gpa, 1);
-    pkg.table.putAssumeCapacityNoClobber(package.name, package);
+    const name_dupe = try gpa.dupe(u8, name);
+    pkg.table.putAssumeCapacityNoClobber(name_dupe, package);
 }
 
-pub fn addAndAdopt(parent: *Package, gpa: Allocator, child: *Package) !void {
-    assert(child.parent == null); // make up your mind, who is the parent??
-    child.parent = parent;
-    return parent.add(gpa, child);
+/// Compute a readable name for the package. The returned name should be freed from gpa. This
+/// function is very slow, as it traverses the whole package hierarchy to find a path to this
+/// package. It should only be used for error output.
+pub fn getName(target: *const Package, gpa: Allocator, mod: Module) ![]const u8 {
+    // we'll do a breadth-first search from the root module to try and find a short name for this
+    // module, using a TailQueue of module/parent pairs. note that the "parent" there is just the
+    // first-found shortest path - a module may be children of arbitrarily many other modules.
+    // also, this path may vary between executions due to hashmap iteration order, but that doesn't
+    // matter too much.
+    var node_arena = std.heap.ArenaAllocator.init(gpa);
+    defer node_arena.deinit();
+    const Parented = struct {
+        parent: ?*const @This(),
+        mod: *const Package,
+    };
+    const Queue = std.TailQueue(Parented);
+    var to_check: Queue = .{};
+
+    {
+        const new = try node_arena.allocator().create(Queue.Node);
+        new.* = .{ .data = .{ .parent = null, .mod = mod.root_pkg } };
+        to_check.prepend(new);
+    }
+
+    if (mod.main_pkg != mod.root_pkg) {
+        const new = try node_arena.allocator().create(Queue.Node);
+        // TODO: once #12201 is resolved, we may want a way of indicating a different name for this
+        new.* = .{ .data = .{ .parent = null, .mod = mod.main_pkg } };
+        to_check.prepend(new);
+    }
+
+    // set of modules we've already checked to prevent loops
+    var checked = std.AutoHashMap(*const Package, void).init(gpa);
+    defer checked.deinit();
+
+    const linked = while (to_check.pop()) |node| {
+        const check = &node.data;
+
+        if (checked.contains(check.mod)) continue;
+        try checked.put(check.mod, {});
+
+        if (check.mod == target) break check;
+
+        var it = check.mod.table.iterator();
+        while (it.next()) |kv| {
+            var new = try node_arena.allocator().create(Queue.Node);
+            new.* = .{ .data = .{
+                .parent = check,
+                .mod = kv.value_ptr.*,
+            } };
+            to_check.prepend(new);
+        }
+    } else {
+        // this can happen for e.g. @cImport packages
+        return gpa.dupe(u8, "<unnamed>");
+    };
+
+    // we found a path to the module! unfortunately, we can only traverse *up* it, so we have to put
+    // all the names into a buffer so we can then print them in order.
+    var names = std.ArrayList([]const u8).init(gpa);
+    defer names.deinit();
+
+    var cur: *const Parented = linked;
+    while (cur.parent) |parent| : (cur = parent) {
+        // find cur's name in parent
+        var it = parent.mod.table.iterator();
+        const name = while (it.next()) |kv| {
+            if (kv.value_ptr.* == cur.mod) {
+                break kv.key_ptr.*;
+            }
+        } else unreachable;
+        try names.append(name);
+    }
+
+    // finally, print the names into a buffer!
+    var buf = std.ArrayList(u8).init(gpa);
+    defer buf.deinit();
+    try buf.writer().writeAll("root");
+    var i: usize = names.items.len;
+    while (i > 0) {
+        i -= 1;
+        try buf.writer().print(".{s}", .{names.items[i]});
+    }
+
+    return buf.toOwnedSlice();
 }
 
 pub const build_zig_basename = "build.zig";
@@ -236,7 +306,7 @@ pub fn fetchAndAddDependencies(
             color,
         );
 
-        try addAndAdopt(pkg, gpa, sub_pkg);
+        try add(pkg, gpa, fqn, sub_pkg);
 
         try dependencies_source.writer().print("    pub const {s} = @import(\"{}\");\n", .{
             std.zig.fmtId(fqn), std.zig.fmtEscapes(fqn),
@@ -248,7 +318,6 @@ pub fn fetchAndAddDependencies(
 
 pub fn createFilePkg(
     gpa: Allocator,
-    name: []const u8,
     cache_directory: Compilation.Directory,
     basename: []const u8,
     contents: []const u8,
@@ -269,7 +338,7 @@ pub fn createFilePkg(
     const o_dir_sub_path = "o" ++ fs.path.sep_str ++ hex_digest;
     try renameTmpIntoCache(cache_directory.handle, tmp_dir_sub_path, o_dir_sub_path);
 
-    return createWithDir(gpa, name, cache_directory, o_dir_sub_path, basename);
+    return createWithDir(gpa, cache_directory, o_dir_sub_path, basename);
 }
 
 const Report = struct {
@@ -363,9 +432,6 @@ fn fetchAndUnpack(
         const owned_src_path = try gpa.dupe(u8, build_zig_basename);
         errdefer gpa.free(owned_src_path);
 
-        const owned_name = try gpa.dupe(u8, fqn);
-        errdefer gpa.free(owned_name);
-
         const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path});
         errdefer gpa.free(build_root);
 
@@ -380,7 +446,6 @@ fn fetchAndUnpack(
             },
             .root_src_directory_owned = true,
             .root_src_path = owned_src_path,
-            .name = owned_name,
         };
 
         return ptr;
@@ -428,6 +493,11 @@ fn fetchAndUnpack(
         // apply those rules directly to the filesystem right here. This ensures that files
         // not protected by the hash are not present on the file system.
 
+        // TODO: raise an error for files that have illegal paths on some operating systems.
+        // For example, on Linux a path with a backslash should raise an error here.
+        // Of course, if the ignore rules above omit the file from the package, then everything
+        // is fine and no error should be raised.
+
         break :a try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle });
     };
 
@@ -455,7 +525,7 @@ fn fetchAndUnpack(
         std.zig.fmtId(fqn), std.zig.fmtEscapes(build_root),
     });
 
-    return createWithDir(gpa, fqn, global_cache_directory, pkg_dir_sub_path, build_zig_basename);
+    return createWithDir(gpa, global_cache_directory, pkg_dir_sub_path, build_zig_basename);
 }
 
 fn unpackTarball(
@@ -481,7 +551,8 @@ fn unpackTarball(
 }
 
 const HashedFile = struct {
-    path: []const u8,
+    fs_path: []const u8,
+    normalized_path: []const u8,
     hash: [Manifest.Hash.digest_length]u8,
     failure: Error!void,
 
@@ -489,7 +560,7 @@ const HashedFile = struct {
 
     fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool {
         _ = context;
-        return mem.lessThan(u8, lhs.path, rhs.path);
+        return mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path);
     }
 };
 
@@ -525,8 +596,10 @@ fn computePackageHash(
                 else => return error.IllegalFileTypeInPackage,
             }
             const hashed_file = try arena.create(HashedFile);
+            const fs_path = try arena.dupe(u8, entry.path);
             hashed_file.* = .{
-                .path = try arena.dupe(u8, entry.path),
+                .fs_path = fs_path,
+                .normalized_path = try normalizePath(arena, fs_path),
                 .hash = undefined, // to be populated by the worker
                 .failure = undefined, // to be populated by the worker
             };
@@ -544,7 +617,7 @@ fn computePackageHash(
     for (all_files.items) |hashed_file| {
         hashed_file.failure catch |err| {
             any_failures = true;
-            std.log.err("unable to hash '{s}': {s}", .{ hashed_file.path, @errorName(err) });
+            std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) });
         };
         hasher.update(&hashed_file.hash);
     }
@@ -552,6 +625,24 @@ fn computePackageHash(
     return hasher.finalResult();
 }
 
+/// Make a file system path identical independently of operating system path inconsistencies.
+/// This converts backslashes into forward slashes.
+fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 {
+    const canonical_sep = '/';
+
+    if (fs.path.sep == canonical_sep)
+        return fs_path;
+
+    const normalized = try arena.dupe(u8, fs_path);
+    for (normalized) |*byte| {
+        switch (byte.*) {
+            fs.path.sep => byte.* = canonical_sep,
+            else => continue,
+        }
+    }
+    return normalized;
+}
+
 fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void {
     defer wg.finish();
     hashed_file.failure = hashFileFallible(dir, hashed_file);
@@ -559,9 +650,10 @@ fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void {
 
 fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void {
     var buf: [8000]u8 = undefined;
-    var file = try dir.openFile(hashed_file.path, .{});
+    var file = try dir.openFile(hashed_file.fs_path, .{});
+    defer file.close();
     var hasher = Manifest.Hash.init(.{});
-    hasher.update(hashed_file.path);
+    hasher.update(hashed_file.normalized_path);
     hasher.update(&.{ 0, @boolToInt(try isExecutable(file)) });
     while (true) {
         const bytes_read = try file.read(&buf);
diff --git a/src/Sema.zig b/src/Sema.zig
index fcdb1ce518..46b47cd23d 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -2529,7 +2529,7 @@ fn coerceResultPtr(
                 _ = try block.addBinOp(.store, new_ptr, null_inst);
                 return Air.Inst.Ref.void_value;
             }
-            return sema.bitCast(block, ptr_ty, new_ptr, src);
+            return sema.bitCast(block, ptr_ty, new_ptr, src, null);
         }
 
         const trash_inst = trash_block.instructions.pop();
@@ -2545,7 +2545,7 @@ fn coerceResultPtr(
                 if (try sema.resolveDefinedValue(block, src, new_ptr)) |ptr_val| {
                     new_ptr = try sema.addConstant(ptr_operand_ty, ptr_val);
                 } else {
-                    new_ptr = try sema.bitCast(block, ptr_operand_ty, new_ptr, src);
+                    new_ptr = try sema.bitCast(block, ptr_operand_ty, new_ptr, src, null);
                 }
             },
             .wrap_optional => {
@@ -5311,7 +5311,6 @@ fn zirCImport(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileEr
     }
     const c_import_pkg = Package.create(
         sema.gpa,
-        "c_import", // TODO: should we make this unique?
         null,
         c_import_res.out_zig_path,
     ) catch |err| switch (err) {
@@ -9655,7 +9654,7 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air
         .Vector,
         => {},
     }
-    return sema.bitCast(block, dest_ty, operand, operand_src);
+    return sema.bitCast(block, dest_ty, operand, inst_data.src(), operand_src);
 }
 
 fn zirFloatCast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@@ -9888,7 +9887,7 @@ fn zirSwitchCapture(
 
         switch (operand_ty.zigTypeTag()) {
             .ErrorSet => if (block.switch_else_err_ty) |some| {
-                return sema.bitCast(block, some, operand, operand_src);
+                return sema.bitCast(block, some, operand, operand_src, null);
             } else {
                 try block.addUnreachable(false);
                 return Air.Inst.Ref.unreachable_value;
@@ -9988,14 +9987,14 @@ fn zirSwitchCapture(
                 Module.ErrorSet.sortNames(&names);
                 const else_error_ty = try Type.Tag.error_set_merged.create(sema.arena, names);
 
-                return sema.bitCast(block, else_error_ty, operand, operand_src);
+                return sema.bitCast(block, else_error_ty, operand, operand_src, null);
             } else {
                 const item_ref = try sema.resolveInst(items[0]);
                 // Previous switch validation ensured this will succeed
                 const item_val = sema.resolveConstValue(block, .unneeded, item_ref, "") catch unreachable;
 
                 const item_ty = try Type.Tag.error_set_single.create(sema.arena, item_val.getError().?);
-                return sema.bitCast(block, item_ty, operand, operand_src);
+                return sema.bitCast(block, item_ty, operand, operand_src, null);
             }
         },
         else => {
@@ -11793,8 +11792,9 @@ fn zirImport(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
             return sema.fail(block, operand_src, "import of file outside package path: '{s}'", .{operand});
         },
         error.PackageNotFound => {
-            const cur_pkg = block.getFileScope().pkg;
-            return sema.fail(block, operand_src, "no package named '{s}' available within package '{s}'", .{ operand, cur_pkg.name });
+            const name = try block.getFileScope().pkg.getName(sema.gpa, mod.*);
+            defer sema.gpa.free(name);
+            return sema.fail(block, operand_src, "no package named '{s}' available within package '{s}'", .{ operand, name });
         },
         else => {
             // TODO: these errors are file system errors; make sure an update() will
@@ -19953,7 +19953,7 @@ fn zirAlignCast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A
         } else is_aligned;
         try sema.addSafetyCheck(block, ok, .incorrect_alignment);
     }
-    return sema.bitCast(block, dest_ty, ptr, ptr_src);
+    return sema.bitCast(block, dest_ty, ptr, ptr_src, null);
 }
 
 fn zirBitCount(
@@ -21482,24 +21482,32 @@ fn zirFieldParentPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileEr
     const name_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
     const ptr_src: LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node };
 
-    const struct_ty = try sema.resolveType(block, ty_src, extra.parent_type);
+    const parent_ty = try sema.resolveType(block, ty_src, extra.parent_type);
     const field_name = try sema.resolveConstString(block, name_src, extra.field_name, "field name must be comptime-known");
     const field_ptr = try sema.resolveInst(extra.field_ptr);
     const field_ptr_ty = sema.typeOf(field_ptr);
 
-    if (struct_ty.zigTypeTag() != .Struct) {
-        return sema.fail(block, ty_src, "expected struct type, found '{}'", .{struct_ty.fmt(sema.mod)});
+    if (parent_ty.zigTypeTag() != .Struct and parent_ty.zigTypeTag() != .Union) {
+        return sema.fail(block, ty_src, "expected struct or union type, found '{}'", .{parent_ty.fmt(sema.mod)});
     }
-    try sema.resolveTypeLayout(struct_ty);
+    try sema.resolveTypeLayout(parent_ty);
 
-    const field_index = if (struct_ty.isTuple()) blk: {
-        if (mem.eql(u8, field_name, "len")) {
-            return sema.fail(block, src, "cannot get @fieldParentPtr of 'len' field of tuple", .{});
-        }
-        break :blk try sema.tupleFieldIndex(block, struct_ty, field_name, name_src);
-    } else try sema.structFieldIndex(block, struct_ty, field_name, name_src);
+    const field_index = switch (parent_ty.zigTypeTag()) {
+        .Struct => blk: {
+            if (parent_ty.isTuple()) {
+                if (mem.eql(u8, field_name, "len")) {
+                    return sema.fail(block, src, "cannot get @fieldParentPtr of 'len' field of tuple", .{});
+                }
+                break :blk try sema.tupleFieldIndex(block, parent_ty, field_name, name_src);
+            } else {
+                break :blk try sema.structFieldIndex(block, parent_ty, field_name, name_src);
+            }
+        },
+        .Union => try sema.unionFieldIndex(block, parent_ty, field_name, name_src),
+        else => unreachable,
+    };
 
-    if (struct_ty.structFieldIsComptime(field_index)) {
+    if (parent_ty.zigTypeTag() == .Struct and parent_ty.structFieldIsComptime(field_index)) {
         return sema.fail(block, src, "cannot get @fieldParentPtr of a comptime field", .{});
     }
 
@@ -21507,23 +21515,29 @@ fn zirFieldParentPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileEr
     const field_ptr_ty_info = field_ptr_ty.ptrInfo().data;
 
     var ptr_ty_data: Type.Payload.Pointer.Data = .{
-        .pointee_type = struct_ty.structFieldType(field_index),
+        .pointee_type = parent_ty.structFieldType(field_index),
         .mutable = field_ptr_ty_info.mutable,
         .@"addrspace" = field_ptr_ty_info.@"addrspace",
     };
 
-    if (struct_ty.containerLayout() == .Packed) {
-        return sema.fail(block, src, "TODO handle packed structs with @fieldParentPtr", .{});
+    if (parent_ty.containerLayout() == .Packed) {
+        return sema.fail(block, src, "TODO handle packed structs/unions with @fieldParentPtr", .{});
     } else {
-        ptr_ty_data.@"align" = if (struct_ty.castTag(.@"struct")) |struct_obj| b: {
-            break :b struct_obj.data.fields.values()[field_index].abi_align;
-        } else 0;
+        ptr_ty_data.@"align" = blk: {
+            if (parent_ty.castTag(.@"struct")) |struct_obj| {
+                break :blk struct_obj.data.fields.values()[field_index].abi_align;
+            } else if (parent_ty.cast(Type.Payload.Union)) |union_obj| {
+                break :blk union_obj.data.fields.values()[field_index].abi_align;
+            } else {
+                break :blk 0;
+            }
+        };
     }
 
     const actual_field_ptr_ty = try Type.ptr(sema.arena, sema.mod, ptr_ty_data);
     const casted_field_ptr = try sema.coerce(block, actual_field_ptr_ty, field_ptr, ptr_src);
 
-    ptr_ty_data.pointee_type = struct_ty;
+    ptr_ty_data.pointee_type = parent_ty;
     const result_ptr = try Type.ptr(sema.arena, sema.mod, ptr_ty_data);
 
     if (try sema.resolveDefinedValue(block, src, casted_field_ptr)) |field_ptr_val| {
@@ -21540,11 +21554,11 @@ fn zirFieldParentPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileEr
                         field_name,
                         field_index,
                         payload.data.field_index,
-                        struct_ty.fmt(sema.mod),
+                        parent_ty.fmt(sema.mod),
                     },
                 );
                 errdefer msg.destroy(sema.gpa);
-                try sema.addDeclaredHereNote(msg, struct_ty);
+                try sema.addDeclaredHereNote(msg, parent_ty);
                 break :msg msg;
             };
             return sema.failWithOwnedErrorMsg(msg);
@@ -24141,8 +24155,9 @@ fn unionFieldVal(
                     return sema.addConstant(field.ty, tag_and_val.val);
                 } else {
                     const old_ty = union_ty.unionFieldType(tag_and_val.tag, sema.mod);
-                    const new_val = try sema.bitCastVal(block, src, tag_and_val.val, old_ty, field.ty, 0);
-                    return sema.addConstant(field.ty, new_val);
+                    if (try sema.bitCastVal(block, src, tag_and_val.val, old_ty, field.ty, 0)) |new_val| {
+                        return sema.addConstant(field.ty, new_val);
+                    }
                 }
             },
         }
@@ -26514,8 +26529,12 @@ fn storePtrVal(
             const abi_size = try sema.usizeCast(block, src, mut_kit.ty.abiSize(target));
             const buffer = try sema.gpa.alloc(u8, abi_size);
             defer sema.gpa.free(buffer);
-            reinterpret.val_ptr.*.writeToMemory(mut_kit.ty, sema.mod, buffer);
-            operand_val.writeToMemory(operand_ty, sema.mod, buffer[reinterpret.byte_offset..]);
+            reinterpret.val_ptr.*.writeToMemory(mut_kit.ty, sema.mod, buffer) catch |err| switch (err) {
+                error.ReinterpretDeclRef => unreachable,
+            };
+            operand_val.writeToMemory(operand_ty, sema.mod, buffer[reinterpret.byte_offset..]) catch |err| switch (err) {
+                error.ReinterpretDeclRef => unreachable,
+            };
 
             const arena = mut_kit.beginArena(sema.mod);
             defer mut_kit.finishArena(sema.mod);
@@ -27398,6 +27417,7 @@ fn bitCast(
     dest_ty_unresolved: Type,
     inst: Air.Inst.Ref,
     inst_src: LazySrcLoc,
+    operand_src: ?LazySrcLoc,
 ) CompileError!Air.Inst.Ref {
     const dest_ty = try sema.resolveTypeFields(dest_ty_unresolved);
     try sema.resolveTypeLayout(dest_ty);
@@ -27419,10 +27439,11 @@ fn bitCast(
     }
 
     if (try sema.resolveMaybeUndefVal(inst)) |val| {
-        const result_val = try sema.bitCastVal(block, inst_src, val, old_ty, dest_ty, 0);
-        return sema.addConstant(dest_ty, result_val);
+        if (try sema.bitCastVal(block, inst_src, val, old_ty, dest_ty, 0)) |result_val| {
+            return sema.addConstant(dest_ty, result_val);
+        }
     }
-    try sema.requireRuntimeBlock(block, inst_src, null);
+    try sema.requireRuntimeBlock(block, inst_src, operand_src);
     return block.addBitCast(dest_ty, inst);
 }
 
@@ -27434,7 +27455,7 @@ fn bitCastVal(
     old_ty: Type,
     new_ty: Type,
     buffer_offset: usize,
-) !Value {
+) !?Value {
     const target = sema.mod.getTarget();
     if (old_ty.eql(new_ty, sema.mod)) return val;
 
@@ -27443,8 +27464,10 @@ fn bitCastVal(
     const abi_size = try sema.usizeCast(block, src, old_ty.abiSize(target));
     const buffer = try sema.gpa.alloc(u8, abi_size);
     defer sema.gpa.free(buffer);
-    val.writeToMemory(old_ty, sema.mod, buffer);
-    return Value.readFromMemory(new_ty, sema.mod, buffer[buffer_offset..], sema.arena);
+    val.writeToMemory(old_ty, sema.mod, buffer) catch |err| switch (err) {
+        error.ReinterpretDeclRef => return null,
+    };
+    return try Value.readFromMemory(new_ty, sema.mod, buffer[buffer_offset..], sema.arena);
 }
 
 fn coerceArrayPtrToSlice(
@@ -27551,7 +27574,7 @@ fn coerceCompatiblePtrs(
         } else is_non_zero;
         try sema.addSafetyCheck(block, ok, .cast_to_null);
     }
-    return sema.bitCast(block, dest_ty, inst, inst_src);
+    return sema.bitCast(block, dest_ty, inst, inst_src, null);
 }
 
 fn coerceEnumToUnion(
@@ -28291,7 +28314,7 @@ fn analyzeRef(
     try sema.storePtr(block, src, alloc, operand);
 
     // TODO: Replace with sema.coerce when that supports adding pointer constness.
-    return sema.bitCast(block, ptr_type, alloc, src);
+    return sema.bitCast(block, ptr_type, alloc, src, null);
 }
 
 fn analyzeLoad(
@@ -32327,11 +32350,11 @@ fn pointerDerefExtra(sema: *Sema, block: *Block, src: LazySrcLoc, ptr_val: Value
 
     // Try the smaller bit-cast first, since that's more efficient than using the larger `parent`
     if (deref.pointee) |tv| if (load_sz <= try sema.typeAbiSize(tv.ty))
-        return DerefResult{ .val = try sema.bitCastVal(block, src, tv.val, tv.ty, load_ty, 0) };
+        return DerefResult{ .val = (try sema.bitCastVal(block, src, tv.val, tv.ty, load_ty, 0)) orelse return .runtime_load };
 
     // If that fails, try to bit-cast from the largest parent value with a well-defined layout
     if (deref.parent) |parent| if (load_sz + parent.byte_offset <= try sema.typeAbiSize(parent.tv.ty))
-        return DerefResult{ .val = try sema.bitCastVal(block, src, parent.tv.val, parent.tv.ty, load_ty, parent.byte_offset) };
+        return DerefResult{ .val = (try sema.bitCastVal(block, src, parent.tv.val, parent.tv.ty, load_ty, parent.byte_offset)) orelse return .runtime_load };
 
     if (deref.ty_without_well_defined_layout) |bad_ty| {
         // We got no parent for bit-casting, or the parent we got was too small. Either way, the problem
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index 5b0db30757..e7fef20a4f 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -3958,7 +3958,9 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
 
             switch (value) {
                 .dead => unreachable,
-                .undef => unreachable,
+                .undef => {
+                    try self.genSetReg(value_ty, addr_reg, value);
+                },
                 .register => |value_reg| {
                     log.debug("store: register {} to {}", .{ value_reg, addr_reg });
                     try self.genStrRegister(value_reg, addr_reg, value_ty);
@@ -5870,7 +5872,22 @@ fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void {
 
 fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
-    const result = try self.resolveInst(ty_op.operand);
+    const result = if (self.liveness.isUnused(inst)) .dead else result: {
+        const operand = try self.resolveInst(ty_op.operand);
+        if (self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand;
+
+        const operand_lock = switch (operand) {
+            .register => |reg| self.register_manager.lockReg(reg),
+            .register_with_overflow => |rwo| self.register_manager.lockReg(rwo.reg),
+            else => null,
+        };
+        defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
+
+        const dest_ty = self.air.typeOfIndex(inst);
+        const dest = try self.allocRegOrMem(dest_ty, true, inst);
+        try self.setRegOrMem(dest_ty, dest, operand);
+        break :result dest;
+    };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 0fbf1ee984..01a1d6b7eb 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -2765,7 +2765,9 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
 
             switch (value) {
                 .dead => unreachable,
-                .undef => unreachable,
+                .undef => {
+                    try self.genSetReg(value_ty, addr_reg, value);
+                },
                 .register => |value_reg| {
                     try self.genStrRegister(value_reg, addr_reg, value_ty);
                 },
@@ -2971,6 +2973,11 @@ fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void {
     const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
         const field_ptr = try self.resolveInst(extra.field_ptr);
         const struct_ty = self.air.getRefType(ty_pl.ty).childType();
+
+        if (struct_ty.zigTypeTag() == .Union) {
+            return self.fail("TODO implement @fieldParentPtr codegen for unions", .{});
+        }
+
         const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(extra.field_index, self.target.*));
         switch (field_ptr) {
             .ptr_stack_offset => |off| {
@@ -5816,7 +5823,24 @@ fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void {
 
 fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
-    const result = try self.resolveInst(ty_op.operand);
+    const result = if (self.liveness.isUnused(inst)) .dead else result: {
+        const operand = try self.resolveInst(ty_op.operand);
+        if (self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand;
+
+        const operand_lock = switch (operand) {
+            .register,
+            .register_c_flag,
+            .register_v_flag,
+            => |reg| self.register_manager.lockReg(reg),
+            else => null,
+        };
+        defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
+
+        const dest_ty = self.air.typeOfIndex(inst);
+        const dest = try self.allocRegOrMem(dest_ty, true, inst);
+        try self.setRegOrMem(dest_ty, dest, operand);
+        break :result dest;
+    };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index b97ac727c1..afcf4b0bb7 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -2338,7 +2338,20 @@ fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void {
 
 fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
-    const result = try self.resolveInst(ty_op.operand);
+    const result = if (self.liveness.isUnused(inst)) .dead else result: {
+        const operand = try self.resolveInst(ty_op.operand);
+        if (self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand;
+
+        const operand_lock = switch (operand) {
+            .register => |reg| self.register_manager.lockReg(reg),
+            else => null,
+        };
+        defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
+
+        const dest = try self.allocRegOrMem(inst, true);
+        try self.setRegOrMem(self.air.typeOfIndex(inst), dest, operand);
+        break :result dest;
+    };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig
index 8344b6e0cc..c8f77fe702 100644
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@@ -1091,7 +1091,21 @@ fn airPtrArithmetic(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void
 
 fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
-    const result = try self.resolveInst(ty_op.operand);
+    const result = if (self.liveness.isUnused(inst)) .dead else result: {
+        const operand = try self.resolveInst(ty_op.operand);
+        if (self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand;
+
+        const operand_lock = switch (operand) {
+            .register => |reg| self.register_manager.lockReg(reg),
+            .register_with_overflow => |rwo| self.register_manager.lockReg(rwo.reg),
+            else => null,
+        };
+        defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
+
+        const dest = try self.allocRegOrMem(inst, true);
+        try self.setRegOrMem(self.air.typeOfIndex(inst), dest, operand);
+        break :result dest;
+    };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 53dc28626c..2f191fd834 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -2896,7 +2896,7 @@ fn lowerConstant(func: *CodeGen, arg_val: Value, ty: Type) InnerError!WValue {
             const struct_obj = ty.castTag(.@"struct").?.data;
             assert(struct_obj.layout == .Packed);
             var buf: [8]u8 = .{0} ** 8; // zero the buffer so we do not read 0xaa as integer
-            val.writeToPackedMemory(ty, func.bin_file.base.options.module.?, &buf, 0);
+            val.writeToPackedMemory(ty, func.bin_file.base.options.module.?, &buf, 0) catch unreachable;
             var payload: Value.Payload.U64 = .{
                 .base = .{ .tag = .int_u64 },
                 .data = std.mem.readIntLittle(u64, &buf),
@@ -2907,7 +2907,7 @@ fn lowerConstant(func: *CodeGen, arg_val: Value, ty: Type) InnerError!WValue {
         .Vector => {
             assert(determineSimdStoreStrategy(ty, target) == .direct);
             var buf: [16]u8 = undefined;
-            val.writeToMemory(ty, func.bin_file.base.options.module.?, &buf);
+            val.writeToMemory(ty, func.bin_file.base.options.module.?, &buf) catch unreachable;
             return func.storeSimdImmd(buf);
         },
         else => |zig_type| return func.fail("Wasm TODO: LowerConstant for zigTypeTag {}", .{zig_type}),
@@ -4944,8 +4944,8 @@ fn airFieldParentPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     if (func.liveness.isUnused(inst)) return func.finishAir(inst, .none, &.{extra.field_ptr});
 
     const field_ptr = try func.resolveInst(extra.field_ptr);
-    const struct_ty = func.air.getRefType(ty_pl.ty).childType();
-    const field_offset = struct_ty.structFieldOffset(extra.field_index, func.target);
+    const parent_ty = func.air.getRefType(ty_pl.ty).childType();
+    const field_offset = parent_ty.structFieldOffset(extra.field_index, func.target);
 
     const result = if (field_offset != 0) result: {
         const base = try func.buildPointerOffset(field_ptr, 0, .new);
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index f63d80486e..20e443b83c 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -920,9 +920,6 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
         const mod = self.bin_file.options.module.?;
         return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)});
     };
-    const abi_align = elem_ty.abiAlignment(self.target.*);
-    if (abi_align > self.stack_align)
-        self.stack_align = abi_align;
 
     if (reg_ok) {
         switch (elem_ty.zigTypeTag()) {
@@ -951,6 +948,10 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
             },
         }
     }
+
+    const abi_align = elem_ty.abiAlignment(self.target.*);
+    if (abi_align > self.stack_align)
+        self.stack_align = abi_align;
     const stack_offset = try self.allocMem(inst, abi_size, abi_align);
     return MCValue{ .stack_offset = @intCast(i32, stack_offset) };
 }
@@ -990,7 +991,7 @@ fn revertState(self: *Self, state: State) void {
 
 pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void {
     const stack_mcv = try self.allocRegOrMem(inst, false);
-    log.debug("spilling {d} to stack mcv {any}", .{ inst, stack_mcv });
+    log.debug("spilling %{d} to stack mcv {any}", .{ inst, stack_mcv });
     const reg_mcv = self.getResolvedInstValue(inst);
     switch (reg_mcv) {
         .register => |other| {
@@ -1016,7 +1017,7 @@ pub fn spillEflagsIfOccupied(self: *Self) !void {
         };
 
         try self.setRegOrMem(self.air.typeOfIndex(inst_to_save), new_mcv, mcv);
-        log.debug("spilling {d} to mcv {any}", .{ inst_to_save, new_mcv });
+        log.debug("spilling %{d} to mcv {any}", .{ inst_to_save, new_mcv });
 
         const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
         try branch.inst_table.put(self.gpa, inst_to_save, new_mcv);
@@ -2114,6 +2115,7 @@ fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void {
         };
         break :result dst_mcv;
     };
+    log.debug("airSliceLen(%{d}): {}", .{ inst, result });
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
@@ -2641,6 +2643,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo
 fn airLoad(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
     const elem_ty = self.air.typeOfIndex(inst);
+    const elem_size = elem_ty.abiSize(self.target.*);
     const result: MCValue = result: {
         if (!elem_ty.hasRuntimeBitsIgnoreComptime())
             break :result MCValue.none;
@@ -2651,13 +2654,14 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void {
             break :result MCValue.dead;
 
         const dst_mcv: MCValue = blk: {
-            if (self.reuseOperand(inst, ty_op.operand, 0, ptr)) {
+            if (elem_size <= 8 and self.reuseOperand(inst, ty_op.operand, 0, ptr)) {
                 // The MCValue that holds the pointer can be re-used as the value.
                 break :blk ptr;
             } else {
                 break :blk try self.allocRegOrMem(inst, true);
             }
         };
+        log.debug("airLoad(%{d}): {} <- {}", .{ inst, dst_mcv, ptr });
         try self.load(dst_mcv, ptr, self.air.typeOf(ty_op.operand));
         break :result dst_mcv;
     };
@@ -2728,10 +2732,12 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
 
             switch (value) {
                 .none => unreachable,
-                .undef => unreachable,
                 .dead => unreachable,
                 .unreach => unreachable,
                 .eflags => unreachable,
+                .undef => {
+                    try self.genSetReg(value_ty, reg, value);
+                },
                 .immediate => |imm| {
                     switch (abi_size) {
                         1, 2, 4 => {
@@ -2773,6 +2779,30 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                 .register => |src_reg| {
                     try self.genInlineMemcpyRegisterRegister(value_ty, reg, src_reg, 0);
                 },
+                .register_overflow => |ro| {
+                    const ro_reg_lock = self.register_manager.lockReg(ro.reg);
+                    defer if (ro_reg_lock) |lock| self.register_manager.unlockReg(lock);
+
+                    const wrapped_ty = value_ty.structFieldType(0);
+                    try self.genInlineMemcpyRegisterRegister(wrapped_ty, reg, ro.reg, 0);
+
+                    const overflow_bit_ty = value_ty.structFieldType(1);
+                    const overflow_bit_offset = value_ty.structFieldOffset(1, self.target.*);
+                    const tmp_reg = try self.register_manager.allocReg(null, gp);
+                    _ = try self.addInst(.{
+                        .tag = .cond_set_byte,
+                        .ops = Mir.Inst.Ops.encode(.{
+                            .reg1 = tmp_reg.to8(),
+                        }),
+                        .data = .{ .cc = ro.eflags },
+                    });
+                    try self.genInlineMemcpyRegisterRegister(
+                        overflow_bit_ty,
+                        reg,
+                        tmp_reg,
+                        -@intCast(i32, overflow_bit_offset),
+                    );
+                },
                 .linker_load,
                 .memory,
                 .stack_offset,
@@ -2787,8 +2817,9 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                         .dest_stack_base = reg.to64(),
                     });
                 },
-                else => |other| {
-                    return self.fail("TODO implement set pointee with {}", .{other});
+                .ptr_stack_offset => {
+                    const tmp_reg = try self.copyToTmpRegister(value_ty, value);
+                    return self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty);
                 },
             }
         },
@@ -2902,6 +2933,7 @@ fn airStore(self: *Self, inst: Air.Inst.Index) !void {
     const ptr_ty = self.air.typeOf(bin_op.lhs);
     const value = try self.resolveInst(bin_op.rhs);
     const value_ty = self.air.typeOf(bin_op.rhs);
+    log.debug("airStore(%{d}): {} <- {}", .{ inst, ptr, value });
     try self.store(ptr, value, ptr_ty, value_ty);
     return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
 }
@@ -6321,7 +6353,22 @@ fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void {
 
 fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
-    const result = try self.resolveInst(ty_op.operand);
+    const result = if (self.liveness.isUnused(inst)) .dead else result: {
+        const operand = try self.resolveInst(ty_op.operand);
+        if (self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand;
+
+        const operand_lock = switch (operand) {
+            .register => |reg| self.register_manager.lockReg(reg),
+            .register_overflow => |ro| self.register_manager.lockReg(ro.reg),
+            else => null,
+        };
+        defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
+
+        const dest = try self.allocRegOrMem(inst, true);
+        try self.setRegOrMem(self.air.typeOfIndex(inst), dest, operand);
+        break :result dest;
+    };
+    log.debug("airBitCast(%{d}): {}", .{ inst, result });
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
diff --git a/src/codegen.zig b/src/codegen.zig
index 9eea1c667d..df7ceff1f0 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -527,7 +527,7 @@ pub fn generateSymbol(
                             .fail => |em| return Result{ .fail = em },
                         }
                     } else {
-                        field_val.writeToPackedMemory(field_ty, mod, code.items[current_pos..], bits);
+                        field_val.writeToPackedMemory(field_ty, mod, code.items[current_pos..], bits) catch unreachable;
                     }
                     bits += @intCast(u16, field_ty.bitSize(target));
                 }
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 0beb00b236..cf428d4bd6 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -23,12 +23,14 @@ const libcFloatSuffix = target_util.libcFloatSuffix;
 const compilerRtFloatAbbrev = target_util.compilerRtFloatAbbrev;
 const compilerRtIntAbbrev = target_util.compilerRtIntAbbrev;
 
-const Mutability = enum { Const, ConstArgument, Mut };
 const BigIntLimb = std.math.big.Limb;
 const BigInt = std.math.big.int;
 
+pub const CType = @import("c/type.zig").CType;
+
 pub const CValue = union(enum) {
     none: void,
+    new_local: LocalIndex,
     local: LocalIndex,
     /// Address of a local.
     local_ref: LocalIndex,
@@ -36,6 +38,8 @@ pub const CValue = union(enum) {
     constant: Air.Inst.Ref,
     /// Index into the parameters
     arg: usize,
+    /// The array field of a parameter
+    arg_array: usize,
     /// Index into a tuple's fields
     field: usize,
     /// By-value
@@ -45,6 +49,8 @@ pub const CValue = union(enum) {
     undef: Type,
     /// Render the slice as an identifier (using fmtIdent)
     identifier: []const u8,
+    /// Render the slice as an payload.identifier (using fmtIdent)
+    payload_identifier: []const u8,
     /// Render these bytes literally.
     /// TODO make this a [*:0]const u8 to save memory
     bytes: []const u8,
@@ -55,37 +61,43 @@ const BlockData = struct {
     result: CValue,
 };
 
-const TypedefKind = enum {
-    Forward,
-    Complete,
-};
-
 pub const CValueMap = std.AutoHashMap(Air.Inst.Ref, CValue);
-pub const TypedefMap = std.ArrayHashMap(
-    Type,
-    struct { name: []const u8, rendered: []u8 },
-    Type.HashContext32,
-    true,
-);
+
+pub const LazyFnKey = union(enum) {
+    tag_name: Decl.Index,
+    never_tail: Decl.Index,
+    never_inline: Decl.Index,
+};
+pub const LazyFnValue = struct {
+    fn_name: []const u8,
+    data: Data,
+
+    pub const Data = union {
+        tag_name: Type,
+        never_tail: void,
+        never_inline: void,
+    };
+};
+pub const LazyFnMap = std.AutoArrayHashMapUnmanaged(LazyFnKey, LazyFnValue);
 
 const LoopDepth = u16;
 const Local = struct {
-    ty: Type,
-    alignment: u32,
+    cty_idx: CType.Index,
     /// How many loops the last definition was nested in.
     loop_depth: LoopDepth,
+    alignas: CType.AlignAs,
+
+    pub fn getType(local: Local) LocalType {
+        return .{ .cty_idx = local.cty_idx, .alignas = local.alignas };
+    }
 };
 
 const LocalIndex = u16;
-const LocalsList = std.ArrayListUnmanaged(LocalIndex);
-const LocalsMap = std.ArrayHashMapUnmanaged(Type, LocalsList, Type.HashContext32, true);
+const LocalType = struct { cty_idx: CType.Index, alignas: CType.AlignAs };
+const LocalsList = std.AutoArrayHashMapUnmanaged(LocalIndex, void);
+const LocalsMap = std.AutoArrayHashMapUnmanaged(LocalType, LocalsList);
 const LocalsStack = std.ArrayListUnmanaged(LocalsMap);
 
-const FormatTypeAsCIdentContext = struct {
-    ty: Type,
-    mod: *Module,
-};
-
 const ValueRenderLocation = enum {
     FunctionArgument,
     Initializer,
@@ -106,26 +118,6 @@ const BuiltinInfo = enum {
     Bits,
 };
 
-fn formatTypeAsCIdentifier(
-    data: FormatTypeAsCIdentContext,
-    comptime fmt: []const u8,
-    options: std.fmt.FormatOptions,
-    writer: anytype,
-) !void {
-    var stack = std.heap.stackFallback(128, data.mod.gpa);
-    const allocator = stack.get();
-    const str = std.fmt.allocPrint(allocator, "{}", .{data.ty.fmt(data.mod)}) catch "";
-    defer allocator.free(str);
-    return formatIdent(str, fmt, options, writer);
-}
-
-pub fn typeToCIdentifier(ty: Type, mod: *Module) std.fmt.Formatter(formatTypeAsCIdentifier) {
-    return .{ .data = .{
-        .ty = ty,
-        .mod = mod,
-    } };
-}
-
 const reserved_idents = std.ComptimeStringMap(void, .{
     // C language
     .{ "alignas", {
@@ -224,6 +216,15 @@ const reserved_idents = std.ComptimeStringMap(void, .{
     .{ "volatile", {} },
     .{ "while ", {} },
 
+    // stdarg.h
+    .{ "va_start", {} },
+    .{ "va_arg", {} },
+    .{ "va_end", {} },
+    .{ "va_copy", {} },
+
+    // stddef.h
+    .{ "offsetof", {} },
+
     // windows.h
     .{ "max", {} },
     .{ "min", {} },
@@ -281,6 +282,7 @@ pub const Function = struct {
     next_arg_index: usize = 0,
     next_block_index: usize = 0,
     object: Object,
+    lazy_fns: LazyFnMap,
     func: *Module.Fn,
     /// All the locals, to be emitted at the top of the function.
     locals: std.ArrayListUnmanaged(Local) = .{},
@@ -299,10 +301,6 @@ pub const Function = struct {
     /// Needed for memory used by the keys of free_locals_stack entries.
     arena: std.heap.ArenaAllocator,
 
-    fn tyHashCtx(f: Function) Type.HashContext32 {
-        return .{ .mod = f.object.dg.module };
-    }
-
     fn resolveInst(f: *Function, inst: Air.Inst.Ref) !CValue {
         const gop = try f.value_map.getOrPut(inst);
         if (gop.found_existing) return gop.value_ptr.*;
@@ -310,19 +308,19 @@ pub const Function = struct {
         const val = f.air.value(inst).?;
         const ty = f.air.typeOf(inst);
 
-        const result = if (lowersToArray(ty, f.object.dg.module.getTarget())) result: {
+        const result: CValue = if (lowersToArray(ty, f.object.dg.module.getTarget())) result: {
             const writer = f.object.code_header.writer();
             const alignment = 0;
             const decl_c_value = try f.allocLocalValue(ty, alignment);
             const gpa = f.object.dg.gpa;
-            try f.allocs.put(gpa, decl_c_value.local, true);
+            try f.allocs.put(gpa, decl_c_value.new_local, true);
             try writer.writeAll("static ");
-            try f.object.dg.renderTypeAndName(writer, ty, decl_c_value, .Const, alignment, .Complete);
+            try f.object.dg.renderTypeAndName(writer, ty, decl_c_value, Const, alignment, .complete);
             try writer.writeAll(" = ");
             try f.object.dg.renderValue(writer, ty, val, .StaticInitializer);
             try writer.writeAll(";\n ");
             break :result decl_c_value;
-        } else CValue{ .constant = inst };
+        } else .{ .constant = inst };
 
         gop.value_ptr.* = result;
         return result;
@@ -342,32 +340,32 @@ pub const Function = struct {
     /// Skips the reuse logic.
     fn allocLocalValue(f: *Function, ty: Type, alignment: u32) !CValue {
         const gpa = f.object.dg.gpa;
+        const target = f.object.dg.module.getTarget();
         try f.locals.append(gpa, .{
-            .ty = ty,
-            .alignment = alignment,
+            .cty_idx = try f.typeToIndex(ty, .complete),
             .loop_depth = @intCast(LoopDepth, f.free_locals_stack.items.len - 1),
+            .alignas = CType.AlignAs.init(alignment, ty.abiAlignment(target)),
         });
-        return CValue{ .local = @intCast(LocalIndex, f.locals.items.len - 1) };
+        return .{ .new_local = @intCast(LocalIndex, f.locals.items.len - 1) };
     }
 
     fn allocLocal(f: *Function, inst: Air.Inst.Index, ty: Type) !CValue {
-        const result = try f.allocAlignedLocal(ty, .Mut, 0);
-        log.debug("%{d}: allocating t{d}", .{ inst, result.local });
+        const result = try f.allocAlignedLocal(ty, .{}, 0);
+        log.debug("%{d}: allocating t{d}", .{ inst, result.new_local });
         return result;
     }
 
     /// Only allocates the local; does not print anything.
-    fn allocAlignedLocal(f: *Function, ty: Type, mutability: Mutability, alignment: u32) !CValue {
-        _ = mutability;
-
-        if (f.getFreeLocals().getPtrContext(ty, f.tyHashCtx())) |locals_list| {
-            for (locals_list.items, 0..) |local_index, i| {
-                const local = &f.locals.items[local_index];
-                if (local.alignment >= alignment) {
-                    local.loop_depth = @intCast(LoopDepth, f.free_locals_stack.items.len - 1);
-                    _ = locals_list.swapRemove(i);
-                    return CValue{ .local = local_index };
-                }
+    fn allocAlignedLocal(f: *Function, ty: Type, _: CQualifiers, alignment: u32) !CValue {
+        const target = f.object.dg.module.getTarget();
+        if (f.getFreeLocals().getPtr(.{
+            .cty_idx = try f.typeToIndex(ty, .complete),
+            .alignas = CType.AlignAs.init(alignment, ty.abiAlignment(target)),
+        })) |locals_list| {
+            if (locals_list.popOrNull()) |local_entry| {
+                const local = &f.locals.items[local_entry.key];
+                local.loop_depth = @intCast(LoopDepth, f.free_locals_stack.items.len - 1);
+                return .{ .new_local = local_entry.key };
             }
         }
 
@@ -430,12 +428,20 @@ pub const Function = struct {
         return f.object.dg.fail(format, args);
     }
 
-    fn renderType(f: *Function, w: anytype, t: Type) !void {
-        return f.object.dg.renderType(w, t, .Complete);
+    fn indexToCType(f: *Function, idx: CType.Index) CType {
+        return f.object.dg.indexToCType(idx);
     }
 
-    fn renderTypecast(f: *Function, w: anytype, t: Type) !void {
-        return f.object.dg.renderTypecast(w, t);
+    fn typeToIndex(f: *Function, ty: Type, kind: CType.Kind) !CType.Index {
+        return f.object.dg.typeToIndex(ty, kind);
+    }
+
+    fn typeToCType(f: *Function, ty: Type, kind: CType.Kind) !CType {
+        return f.object.dg.typeToCType(ty, kind);
+    }
+
+    fn renderType(f: *Function, w: anytype, t: Type) !void {
+        return f.object.dg.renderType(w, t);
     }
 
     fn renderIntCast(f: *Function, w: anytype, dest_ty: Type, src: CValue, src_ty: Type, location: ValueRenderLocation) !void {
@@ -446,7 +452,39 @@ pub const Function = struct {
         return f.object.dg.fmtIntLiteral(ty, val);
     }
 
-    pub fn deinit(f: *Function, gpa: mem.Allocator) void {
+    fn getLazyFnName(f: *Function, key: LazyFnKey, data: LazyFnValue.Data) ![]const u8 {
+        const gpa = f.object.dg.gpa;
+        const gop = try f.lazy_fns.getOrPut(gpa, key);
+        if (!gop.found_existing) {
+            errdefer _ = f.lazy_fns.pop();
+
+            var promoted = f.object.dg.ctypes.promote(gpa);
+            defer f.object.dg.ctypes.demote(promoted);
+            const arena = promoted.arena.allocator();
+
+            gop.value_ptr.* = .{
+                .fn_name = switch (key) {
+                    .tag_name,
+                    .never_tail,
+                    .never_inline,
+                    => |owner_decl| try std.fmt.allocPrint(arena, "zig_{s}_{}__{d}", .{
+                        @tagName(key),
+                        fmtIdent(mem.span(f.object.dg.module.declPtr(owner_decl).name)),
+                        @enumToInt(owner_decl),
+                    }),
+                },
+                .data = switch (key) {
+                    .tag_name => .{ .tag_name = try data.tag_name.copy(arena) },
+                    .never_tail => .{ .never_tail = data.never_tail },
+                    .never_inline => .{ .never_inline = data.never_inline },
+                },
+            };
+        }
+        return gop.value_ptr.fn_name;
+    }
+
+    pub fn deinit(f: *Function) void {
+        const gpa = f.object.dg.gpa;
         f.allocs.deinit(gpa);
         f.locals.deinit(gpa);
         for (f.free_locals_stack.items) |*free_locals| {
@@ -455,11 +493,9 @@ pub const Function = struct {
         f.free_locals_stack.deinit(gpa);
         f.blocks.deinit(gpa);
         f.value_map.deinit();
+        f.lazy_fns.deinit(gpa);
         f.object.code.deinit();
-        for (f.object.dg.typedefs.values()) |typedef| {
-            gpa.free(typedef.rendered);
-        }
-        f.object.dg.typedefs.deinit();
+        f.object.dg.ctypes.deinit(gpa);
         f.object.dg.fwd_decl.deinit();
         f.arena.deinit();
     }
@@ -483,30 +519,20 @@ pub const Object = struct {
 pub const DeclGen = struct {
     gpa: std.mem.Allocator,
     module: *Module,
-    decl: *Decl,
-    decl_index: Decl.Index,
+    decl: ?*Decl,
+    decl_index: Decl.OptionalIndex,
     fwd_decl: std.ArrayList(u8),
     error_msg: ?*Module.ErrorMsg,
-    /// The key of this map is Type which has references to typedefs_arena.
-    typedefs: TypedefMap,
-    typedefs_arena: std.mem.Allocator,
+    ctypes: CType.Store,
 
     fn fail(dg: *DeclGen, comptime format: []const u8, args: anytype) error{ AnalysisFail, OutOfMemory } {
         @setCold(true);
         const src = LazySrcLoc.nodeOffset(0);
-        const src_loc = src.toSrcLoc(dg.decl);
+        const src_loc = src.toSrcLoc(dg.decl.?);
         dg.error_msg = try Module.ErrorMsg.create(dg.gpa, src_loc, format, args);
         return error.AnalysisFail;
     }
 
-    fn getTypedefName(dg: *DeclGen, t: Type) ?[]const u8 {
-        if (dg.typedefs.get(t)) |typedef| {
-            return typedef.name;
-        } else {
-            return null;
-        }
-    }
-
     fn renderDeclValue(
         dg: *DeclGen,
         writer: anytype,
@@ -520,7 +546,7 @@ pub const DeclGen = struct {
 
         // Render an undefined pointer if we have a pointer to a zero-bit or comptime type.
         if (ty.isPtrAtRuntime() and !decl.ty.isFnOrHasRuntimeBits()) {
-            return dg.writeCValue(writer, CValue{ .undef = ty });
+            return dg.writeCValue(writer, .{ .undef = ty });
         }
 
         // Chase function values in order to be able to reference the original function.
@@ -534,7 +560,7 @@ pub const DeclGen = struct {
                 try writer.writeByte('{');
             } else {
                 try writer.writeByte('(');
-                try dg.renderTypecast(writer, ty);
+                try dg.renderType(writer, ty);
                 try writer.writeAll("){ .ptr = ");
             }
 
@@ -561,7 +587,7 @@ pub const DeclGen = struct {
         const need_typecast = if (ty.castPtrToFn()) |_| false else !ty.eql(decl.ty, dg.module);
         if (need_typecast) {
             try writer.writeAll("((");
-            try dg.renderTypecast(writer, ty);
+            try dg.renderType(writer, ty);
             try writer.writeByte(')');
         }
         try writer.writeByte('&');
@@ -576,7 +602,7 @@ pub const DeclGen = struct {
     fn renderParentPtr(dg: *DeclGen, writer: anytype, ptr_val: Value, ptr_ty: Type, location: ValueRenderLocation) error{ OutOfMemory, AnalysisFail }!void {
         if (!ptr_ty.isSlice()) {
             try writer.writeByte('(');
-            try dg.renderTypecast(writer, ptr_ty);
+            try dg.renderType(writer, ptr_ty);
             try writer.writeByte(')');
         }
         switch (ptr_val.tag()) {
@@ -591,90 +617,71 @@ pub const DeclGen = struct {
                 try dg.renderDeclValue(writer, ptr_ty, ptr_val, decl_index, location);
             },
             .field_ptr => {
-                const ptr_info = ptr_ty.ptrInfo();
+                const target = dg.module.getTarget();
                 const field_ptr = ptr_val.castTag(.field_ptr).?.data;
-                const container_ty = field_ptr.container_ty;
-                const index = field_ptr.field_index;
 
-                var container_ptr_ty_pl: Type.Payload.ElemType = .{
-                    .base = .{ .tag = .c_mut_pointer },
-                    .data = field_ptr.container_ty,
-                };
-                const container_ptr_ty = Type.initPayload(&container_ptr_ty_pl.base);
+                // Ensure complete type definition is visible before accessing fields.
+                _ = try dg.typeToIndex(field_ptr.container_ty, .complete);
 
-                const FieldInfo = struct { name: []const u8, ty: Type };
-                const field_info: FieldInfo = switch (container_ty.zigTypeTag()) {
-                    .Struct => switch (container_ty.containerLayout()) {
-                        .Auto, .Extern => FieldInfo{
-                            .name = container_ty.structFields().keys()[index],
-                            .ty = container_ty.structFields().values()[index].ty,
-                        },
-                        .Packed => if (ptr_info.data.host_size == 0) {
-                            const target = dg.module.getTarget();
+                var container_ptr_pl = ptr_ty.ptrInfo();
+                container_ptr_pl.data.pointee_type = field_ptr.container_ty;
+                const container_ptr_ty = Type.initPayload(&container_ptr_pl.base);
 
-                            const byte_offset = container_ty.packedStructFieldByteOffset(index, target);
-                            var byte_offset_pl = Value.Payload.U64{
-                                .base = .{ .tag = .int_u64 },
-                                .data = byte_offset,
-                            };
-                            const byte_offset_val = Value.initPayload(&byte_offset_pl.base);
-
-                            var u8_ptr_pl = ptr_info;
-                            u8_ptr_pl.data.pointee_type = Type.u8;
-                            const u8_ptr_ty = Type.initPayload(&u8_ptr_pl.base);
-
-                            try writer.writeAll("&((");
-                            try dg.renderTypecast(writer, u8_ptr_ty);
-                            try writer.writeByte(')');
-                            try dg.renderParentPtr(writer, field_ptr.container_ptr, container_ptr_ty, location);
-                            return writer.print(")[{}]", .{try dg.fmtIntLiteral(Type.usize, byte_offset_val)});
-                        } else {
-                            var host_pl = Type.Payload.Bits{
-                                .base = .{ .tag = .int_unsigned },
-                                .data = ptr_info.data.host_size * 8,
-                            };
-                            const host_ty = Type.initPayload(&host_pl.base);
-
-                            try writer.writeByte('(');
-                            try dg.renderTypecast(writer, ptr_ty);
-                            try writer.writeByte(')');
-                            return dg.renderParentPtr(writer, field_ptr.container_ptr, host_ty, location);
-                        },
+                switch (fieldLocation(
+                    field_ptr.container_ty,
+                    ptr_ty,
+                    @intCast(u32, field_ptr.field_index),
+                    target,
+                )) {
+                    .begin => try dg.renderParentPtr(
+                        writer,
+                        field_ptr.container_ptr,
+                        container_ptr_ty,
+                        location,
+                    ),
+                    .field => |field| {
+                        try writer.writeAll("&(");
+                        try dg.renderParentPtr(
+                            writer,
+                            field_ptr.container_ptr,
+                            container_ptr_ty,
+                            location,
+                        );
+                        try writer.writeAll(")->");
+                        try dg.writeCValue(writer, field);
                     },
-                    .Union => switch (container_ty.containerLayout()) {
-                        .Auto, .Extern => FieldInfo{
-                            .name = container_ty.unionFields().keys()[index],
-                            .ty = container_ty.unionFields().values()[index].ty,
-                        },
-                        .Packed => {
-                            return dg.renderParentPtr(writer, field_ptr.container_ptr, ptr_ty, location);
-                        },
-                    },
-                    .Pointer => field_info: {
-                        assert(container_ty.isSlice());
-                        break :field_info switch (index) {
-                            0 => FieldInfo{ .name = "ptr", .ty = container_ty.childType() },
-                            1 => FieldInfo{ .name = "len", .ty = Type.usize },
-                            else => unreachable,
+                    .byte_offset => |byte_offset| {
+                        var u8_ptr_pl = ptr_ty.ptrInfo();
+                        u8_ptr_pl.data.pointee_type = Type.u8;
+                        const u8_ptr_ty = Type.initPayload(&u8_ptr_pl.base);
+
+                        var byte_offset_pl = Value.Payload.U64{
+                            .base = .{ .tag = .int_u64 },
+                            .data = byte_offset,
                         };
+                        const byte_offset_val = Value.initPayload(&byte_offset_pl.base);
+
+                        try writer.writeAll("((");
+                        try dg.renderType(writer, u8_ptr_ty);
+                        try writer.writeByte(')');
+                        try dg.renderParentPtr(
+                            writer,
+                            field_ptr.container_ptr,
+                            container_ptr_ty,
+                            location,
+                        );
+                        try writer.print(" + {})", .{try dg.fmtIntLiteral(Type.usize, byte_offset_val)});
+                    },
+                    .end => {
+                        try writer.writeAll("((");
+                        try dg.renderParentPtr(
+                            writer,
+                            field_ptr.container_ptr,
+                            container_ptr_ty,
+                            location,
+                        );
+                        try writer.print(") + {})", .{try dg.fmtIntLiteral(Type.usize, Value.one)});
                     },
-                    else => unreachable,
-                };
-
-                if (field_info.ty.hasRuntimeBitsIgnoreComptime()) {
-                    // Ensure complete type definition is visible before accessing fields.
-                    try dg.renderType(std.io.null_writer, field_ptr.container_ty, .Complete);
-
-                    try writer.writeAll("&(");
-                    try dg.renderParentPtr(writer, field_ptr.container_ptr, container_ptr_ty, location);
-                    try writer.writeAll(")->");
-                    switch (field_ptr.container_ty.tag()) {
-                        .union_tagged, .union_safety_tagged => try writer.writeAll("payload."),
-                        else => {},
-                    }
-                    try writer.print("{ }", .{fmtIdent(field_info.name)});
-                } else {
-                    try dg.renderParentPtr(writer, field_ptr.container_ptr, container_ptr_ty, location);
                 }
             },
             .elem_ptr => {
@@ -698,7 +705,7 @@ pub const DeclGen = struct {
                 const container_ptr_ty = Type.initPayload(&container_ptr_ty_pl.base);
 
                 // Ensure complete type definition is visible before accessing fields.
-                try dg.renderType(std.io.null_writer, payload_ptr.container_ty, .Complete);
+                _ = try dg.typeToIndex(payload_ptr.container_ty, .complete);
 
                 try writer.writeAll("&(");
                 try dg.renderParentPtr(writer, payload_ptr.container_ptr, container_ptr_ty, location);
@@ -747,7 +754,7 @@ pub const DeclGen = struct {
 
                     try writer.writeAll("zig_cast_");
                     try dg.renderTypeForBuiltinFnName(writer, ty);
-                    try writer.writeAll(" zig_as_");
+                    try writer.writeAll(" zig_make_");
                     try dg.renderTypeForBuiltinFnName(writer, ty);
                     try writer.writeByte('(');
                     switch (bits) {
@@ -765,18 +772,18 @@ pub const DeclGen = struct {
                 .Pointer => if (ty.isSlice()) {
                     if (!location.isInitializer()) {
                         try writer.writeByte('(');
-                        try dg.renderTypecast(writer, ty);
+                        try dg.renderType(writer, ty);
                         try writer.writeByte(')');
                     }
 
                     try writer.writeAll("{(");
                     var buf: Type.SlicePtrFieldTypeBuffer = undefined;
                     const ptr_ty = ty.slicePtrFieldType(&buf);
-                    try dg.renderTypecast(writer, ptr_ty);
+                    try dg.renderType(writer, ptr_ty);
                     return writer.print("){x}, {0x}}}", .{try dg.fmtIntLiteral(Type.usize, val)});
                 } else {
                     try writer.writeAll("((");
-                    try dg.renderTypecast(writer, ty);
+                    try dg.renderType(writer, ty);
                     return writer.print("){x})", .{try dg.fmtIntLiteral(Type.usize, val)});
                 },
                 .Optional => {
@@ -793,7 +800,7 @@ pub const DeclGen = struct {
 
                     if (!location.isInitializer()) {
                         try writer.writeByte('(');
-                        try dg.renderTypecast(writer, ty);
+                        try dg.renderType(writer, ty);
                         try writer.writeByte(')');
                     }
 
@@ -807,7 +814,7 @@ pub const DeclGen = struct {
                     .Auto, .Extern => {
                         if (!location.isInitializer()) {
                             try writer.writeByte('(');
-                            try dg.renderTypecast(writer, ty);
+                            try dg.renderType(writer, ty);
                             try writer.writeByte(')');
                         }
 
@@ -821,7 +828,7 @@ pub const DeclGen = struct {
 
                             empty = false;
                         }
-                        if (empty) try writer.print("{x}", .{try dg.fmtIntLiteral(Type.u8, Value.undef)});
+
                         return writer.writeByte('}');
                     },
                     .Packed => return writer.print("{x}", .{try dg.fmtIntLiteral(ty, Value.undef)}),
@@ -829,7 +836,7 @@ pub const DeclGen = struct {
                 .Union => {
                     if (!location.isInitializer()) {
                         try writer.writeByte('(');
-                        try dg.renderTypecast(writer, ty);
+                        try dg.renderType(writer, ty);
                         try writer.writeByte(')');
                     }
 
@@ -854,7 +861,7 @@ pub const DeclGen = struct {
                 .ErrorUnion => {
                     if (!location.isInitializer()) {
                         try writer.writeByte('(');
-                        try dg.renderTypecast(writer, ty);
+                        try dg.renderType(writer, ty);
                         try writer.writeByte(')');
                     }
 
@@ -867,7 +874,7 @@ pub const DeclGen = struct {
                 .Array, .Vector => {
                     if (!location.isInitializer()) {
                         try writer.writeByte('(');
-                        try dg.renderTypecast(writer, ty);
+                        try dg.renderType(writer, ty);
                         try writer.writeByte(')');
                     }
 
@@ -876,14 +883,14 @@ pub const DeclGen = struct {
                         var literal = stringLiteral(writer);
                         try literal.start();
                         const c_len = ty.arrayLenIncludingSentinel();
-                        var index: usize = 0;
+                        var index: u64 = 0;
                         while (index < c_len) : (index += 1)
                             try literal.writeChar(0xaa);
                         return literal.end();
                     } else {
                         try writer.writeByte('{');
                         const c_len = ty.arrayLenIncludingSentinel();
-                        var index: usize = 0;
+                        var index: u64 = 0;
                         while (index < c_len) : (index += 1) {
                             if (index > 0) try writer.writeAll(", ");
                             try dg.renderValue(writer, ty.childType(), val, initializer_type);
@@ -957,7 +964,7 @@ pub const DeclGen = struct {
                 try writer.writeByte(' ');
                 var empty = true;
                 if (std.math.isFinite(f128_val)) {
-                    try writer.writeAll("zig_as_");
+                    try writer.writeAll("zig_make_");
                     try dg.renderTypeForBuiltinFnName(writer, ty);
                     try writer.writeByte('(');
                     switch (bits) {
@@ -992,7 +999,7 @@ pub const DeclGen = struct {
                         //     return dg.fail("Only quiet nans are supported in global variable initializers", .{});
                     }
 
-                    try writer.writeAll("zig_as_special_");
+                    try writer.writeAll("zig_make_special_");
                     if (location == .StaticInitializer) try writer.writeAll("constant_");
                     try dg.renderTypeForBuiltinFnName(writer, ty);
                     try writer.writeByte('(');
@@ -1028,7 +1035,7 @@ pub const DeclGen = struct {
                     return dg.renderValue(writer, ty, slice_val, location);
                 } else {
                     try writer.writeAll("((");
-                    try dg.renderTypecast(writer, ty);
+                    try dg.renderType(writer, ty);
                     try writer.writeAll(")NULL)");
                 },
                 .variable => {
@@ -1038,7 +1045,7 @@ pub const DeclGen = struct {
                 .slice => {
                     if (!location.isInitializer()) {
                         try writer.writeByte('(');
-                        try dg.renderTypecast(writer, ty);
+                        try dg.renderType(writer, ty);
                         try writer.writeByte(')');
                     }
 
@@ -1061,7 +1068,7 @@ pub const DeclGen = struct {
                 },
                 .int_u64, .one => {
                     try writer.writeAll("((");
-                    try dg.renderTypecast(writer, ty);
+                    try dg.renderType(writer, ty);
                     return writer.print("){x})", .{try dg.fmtIntLiteral(Type.usize, val)});
                 },
                 .field_ptr,
@@ -1076,15 +1083,15 @@ pub const DeclGen = struct {
             .Array, .Vector => {
                 if (location == .FunctionArgument) {
                     try writer.writeByte('(');
-                    try dg.renderTypecast(writer, ty);
+                    try dg.renderType(writer, ty);
                     try writer.writeByte(')');
                 }
 
                 // First try specific tag representations for more efficiency.
                 switch (val.tag()) {
                     .undef, .empty_struct_value, .empty_array => {
-                        try writer.writeByte('{');
                         const ai = ty.arrayInfo();
+                        try writer.writeByte('{');
                         if (ai.sentinel) |s| {
                             try dg.renderValue(writer, ai.elem_type, s, initializer_type);
                         } else {
@@ -1092,13 +1099,19 @@ pub const DeclGen = struct {
                         }
                         try writer.writeByte('}');
                     },
-                    .bytes => {
-                        try writer.print("{s}", .{fmtStringLiteral(val.castTag(.bytes).?.data)});
-                    },
-                    .str_lit => {
-                        const str_lit = val.castTag(.str_lit).?.data;
-                        const bytes = dg.module.string_literal_bytes.items[str_lit.index..][0..str_lit.len];
-                        try writer.print("{s}", .{fmtStringLiteral(bytes)});
+                    .bytes, .str_lit => |t| {
+                        const bytes = switch (t) {
+                            .bytes => val.castTag(.bytes).?.data,
+                            .str_lit => bytes: {
+                                const str_lit = val.castTag(.str_lit).?.data;
+                                break :bytes dg.module.string_literal_bytes.items[str_lit.index..][0..str_lit.len];
+                            },
+                            else => unreachable,
+                        };
+                        const sentinel = if (ty.sentinel()) |sentinel| @intCast(u8, sentinel.toUnsignedInt(target)) else null;
+                        try writer.print("{s}", .{
+                            fmtStringLiteral(bytes[0..@intCast(usize, ty.arrayLen())], sentinel),
+                        });
                     },
                     else => {
                         // Fall back to generic implementation.
@@ -1122,7 +1135,7 @@ pub const DeclGen = struct {
                                 }
                                 if (ai.sentinel) |s| {
                                     const s_u8 = @intCast(u8, s.toUnsignedInt(target));
-                                    try literal.writeChar(s_u8);
+                                    if (s_u8 != 0) try literal.writeChar(s_u8);
                                 }
                                 try literal.end();
                             } else {
@@ -1179,7 +1192,7 @@ pub const DeclGen = struct {
 
                 if (!location.isInitializer()) {
                     try writer.writeByte('(');
-                    try dg.renderTypecast(writer, ty);
+                    try dg.renderType(writer, ty);
                     try writer.writeByte(')');
                 }
 
@@ -1213,7 +1226,7 @@ pub const DeclGen = struct {
 
                 if (!location.isInitializer()) {
                     try writer.writeByte('(');
-                    try dg.renderTypecast(writer, ty);
+                    try dg.renderType(writer, ty);
                     try writer.writeByte(')');
                 }
 
@@ -1277,7 +1290,7 @@ pub const DeclGen = struct {
 
                     if (!location.isInitializer()) {
                         try writer.writeByte('(');
-                        try dg.renderTypecast(writer, ty);
+                        try dg.renderType(writer, ty);
                         try writer.writeByte(')');
                     }
 
@@ -1292,7 +1305,6 @@ pub const DeclGen = struct {
 
                         empty = false;
                     }
-                    if (empty) try writer.print("{}", .{try dg.fmtIntLiteral(Type.u8, Value.zero)});
                     try writer.writeByte('}');
                 },
                 .Packed => {
@@ -1309,7 +1321,7 @@ pub const DeclGen = struct {
                     const bit_offset_val = Value.initPayload(&bit_offset_val_pl.base);
 
                     var eff_num_fields: usize = 0;
-                    for (field_vals, 0..) |_, index| {
+                    for (0..field_vals.len) |index| {
                         const field_ty = ty.structFieldType(index);
                         if (!field_ty.hasRuntimeBitsIgnoreComptime()) continue;
 
@@ -1365,7 +1377,7 @@ pub const DeclGen = struct {
 
                             if (!empty) try writer.writeAll(" | ");
                             try writer.writeByte('(');
-                            try dg.renderTypecast(writer, ty);
+                            try dg.renderType(writer, ty);
                             try writer.writeByte(')');
 
                             if (bit_offset_val_pl.data != 0) {
@@ -1388,7 +1400,7 @@ pub const DeclGen = struct {
 
                 if (!location.isInitializer()) {
                     try writer.writeByte('(');
-                    try dg.renderTypecast(writer, ty);
+                    try dg.renderType(writer, ty);
                     try writer.writeByte(')');
                 }
 
@@ -1399,11 +1411,11 @@ pub const DeclGen = struct {
                     if (field_ty.hasRuntimeBits()) {
                         if (field_ty.isPtrAtRuntime()) {
                             try writer.writeByte('(');
-                            try dg.renderTypecast(writer, ty);
+                            try dg.renderType(writer, ty);
                             try writer.writeByte(')');
                         } else if (field_ty.zigTypeTag() == .Float) {
                             try writer.writeByte('(');
-                            try dg.renderTypecast(writer, ty);
+                            try dg.renderType(writer, ty);
                             try writer.writeByte(')');
                         }
                         try dg.renderValue(writer, field_ty, union_obj.val, initializer_type);
@@ -1413,6 +1425,7 @@ pub const DeclGen = struct {
                     return;
                 }
 
+                var has_payload_init = false;
                 try writer.writeByte('{');
                 if (ty.unionTagTypeSafety()) |tag_ty| {
                     const layout = ty.unionGetLayout(target);
@@ -1421,7 +1434,10 @@ pub const DeclGen = struct {
                         try dg.renderValue(writer, tag_ty, union_obj.tag, initializer_type);
                         try writer.writeAll(", ");
                     }
-                    try writer.writeAll(".payload = {");
+                    if (!ty.unionHasAllZeroBitFieldTypes()) {
+                        try writer.writeAll(".payload = {");
+                        has_payload_init = true;
+                    }
                 }
 
                 var it = ty.unionFields().iterator();
@@ -1433,8 +1449,8 @@ pub const DeclGen = struct {
                     try writer.print(".{ } = ", .{fmtIdent(field.key_ptr.*)});
                     try dg.renderValue(writer, field.value_ptr.ty, Value.undef, initializer_type);
                     break;
-                } else try writer.writeAll(".empty_union = 0");
-                if (ty.unionTagTypeSafety()) |_| try writer.writeByte('}');
+                }
+                if (has_payload_init) try writer.writeByte('}');
                 try writer.writeByte('}');
             },
 
@@ -1456,497 +1472,93 @@ pub const DeclGen = struct {
         }
     }
 
-    fn renderFunctionSignature(dg: *DeclGen, w: anytype, kind: TypedefKind, export_index: u32) !void {
-        const fn_info = dg.decl.ty.fnInfo();
+    fn renderFunctionSignature(
+        dg: *DeclGen,
+        w: anytype,
+        fn_decl_index: Decl.Index,
+        kind: CType.Kind,
+        name: union(enum) {
+            export_index: u32,
+            string: []const u8,
+        },
+    ) !void {
+        const store = &dg.ctypes.set;
+        const module = dg.module;
+
+        const fn_decl = module.declPtr(fn_decl_index);
+        const fn_cty_idx = try dg.typeToIndex(fn_decl.ty, kind);
+
+        const fn_info = fn_decl.ty.fnInfo();
         if (fn_info.cc == .Naked) {
             switch (kind) {
-                .Forward => try w.writeAll("zig_naked_decl "),
-                .Complete => try w.writeAll("zig_naked "),
+                .forward => try w.writeAll("zig_naked_decl "),
+                .complete => try w.writeAll("zig_naked "),
+                else => unreachable,
             }
         }
-        if (dg.decl.val.castTag(.function)) |func_payload|
+        if (fn_decl.val.castTag(.function)) |func_payload|
             if (func_payload.data.is_cold) try w.writeAll("zig_cold ");
+        if (fn_info.return_type.tag() == .noreturn) try w.writeAll("zig_noreturn ");
 
-        const target = dg.module.getTarget();
-        var ret_buf: LowerFnRetTyBuffer = undefined;
-        const ret_ty = lowerFnRetTy(fn_info.return_type, &ret_buf, target);
-
-        try dg.renderType(w, ret_ty, kind);
-        try w.writeByte(' ');
+        const trailing = try renderTypePrefix(
+            dg.decl_index,
+            store.*,
+            module,
+            w,
+            fn_cty_idx,
+            .suffix,
+            .{},
+        );
+        try w.print("{}", .{trailing});
 
         if (toCallingConvention(fn_info.cc)) |call_conv| {
             try w.print("zig_callconv({s}) ", .{call_conv});
         }
 
-        if (fn_info.alignment > 0 and kind == .Complete) try w.print(" zig_align_fn({})", .{fn_info.alignment});
-
-        try dg.renderDeclName(w, dg.decl_index, export_index);
-        try w.writeByte('(');
-
-        var index: usize = 0;
-        for (fn_info.param_types) |param_type| {
-            if (!param_type.hasRuntimeBitsIgnoreComptime()) continue;
-            if (index > 0) try w.writeAll(", ");
-            const name = CValue{ .arg = index };
-            try dg.renderTypeAndName(w, param_type, name, .ConstArgument, 0, kind);
-            index += 1;
-        }
-
-        if (fn_info.is_var_args) {
-            if (index > 0) try w.writeAll(", ");
-            try w.writeAll("...");
-        } else if (index == 0) {
-            try dg.renderType(w, Type.void, kind);
-        }
-        try w.writeByte(')');
-        if (fn_info.alignment > 0 and kind == .Forward) try w.print(" zig_align_fn({})", .{fn_info.alignment});
-    }
-
-    fn renderPtrToFnTypedef(dg: *DeclGen, t: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-        const bw = buffer.writer();
-
-        const fn_info = t.fnInfo();
-
-        const target = dg.module.getTarget();
-        var ret_buf: LowerFnRetTyBuffer = undefined;
-        const ret_ty = lowerFnRetTy(fn_info.return_type, &ret_buf, target);
-
-        try bw.writeAll("typedef ");
-        try dg.renderType(bw, ret_ty, .Forward);
-        try bw.writeAll(" (*");
-        const name_begin = buffer.items.len;
-        try bw.print("zig_F_{}", .{typeToCIdentifier(t, dg.module)});
-        const name_end = buffer.items.len;
-        try bw.writeAll(")(");
-
-        const param_len = fn_info.param_types.len;
-
-        var params_written: usize = 0;
-        var index: usize = 0;
-        while (index < param_len) : (index += 1) {
-            const param_ty = fn_info.param_types[index];
-            if (!param_ty.hasRuntimeBitsIgnoreComptime()) continue;
-            if (params_written > 0) {
-                try bw.writeAll(", ");
-            }
-            try dg.renderTypeAndName(bw, param_ty, .{ .bytes = "" }, .Mut, 0, .Forward);
-            params_written += 1;
-        }
-
-        if (fn_info.is_var_args) {
-            if (params_written != 0) try bw.writeAll(", ");
-            try bw.writeAll("...");
-        } else if (params_written == 0) {
-            try dg.renderType(bw, Type.void, .Forward);
-        }
-        try bw.writeAll(");\n");
-
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-        const name = rendered[name_begin..name_end];
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try t.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
-    }
-
-    fn renderSliceTypedef(dg: *DeclGen, t: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        std.debug.assert(t.sentinel() == null); // expected canonical type
-
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-        const bw = buffer.writer();
-
-        var ptr_ty_buf: Type.SlicePtrFieldTypeBuffer = undefined;
-        const ptr_ty = t.slicePtrFieldType(&ptr_ty_buf);
-        const ptr_name = CValue{ .identifier = "ptr" };
-        const len_ty = Type.usize;
-        const len_name = CValue{ .identifier = "len" };
-
-        try bw.writeAll("typedef struct {\n ");
-        try dg.renderTypeAndName(bw, ptr_ty, ptr_name, .Mut, 0, .Complete);
-        try bw.writeAll(";\n ");
-        try dg.renderTypeAndName(bw, len_ty, len_name, .Mut, 0, .Complete);
-
-        try bw.writeAll(";\n} ");
-        const name_begin = buffer.items.len;
-        try bw.print("zig_{c}_{}", .{
-            @as(u8, if (t.isConstPtr()) 'L' else 'M'),
-            typeToCIdentifier(t.childType(), dg.module),
-        });
-        const name_end = buffer.items.len;
-        try bw.writeAll(";\n");
-
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-        const name = rendered[name_begin..name_end];
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try t.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
-    }
-
-    fn renderFwdTypedef(dg: *DeclGen, t: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        // The forward declaration for T is stored with a key of *const T.
-        const child_ty = t.childType();
-
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-        const bw = buffer.writer();
-
-        const tag = switch (child_ty.zigTypeTag()) {
-            .Struct, .ErrorUnion, .Optional => "struct",
-            .Union => if (child_ty.unionTagTypeSafety()) |_| "struct" else "union",
-            else => unreachable,
-        };
-        try bw.writeAll("typedef ");
-        try bw.writeAll(tag);
-        const name_begin = buffer.items.len + " ".len;
-        try bw.writeAll(" zig_");
-        switch (child_ty.zigTypeTag()) {
-            .Struct, .Union => {
-                var fqn_buf = std.ArrayList(u8).init(dg.typedefs.allocator);
-                defer fqn_buf.deinit();
-
-                const owner_decl_index = child_ty.getOwnerDecl();
-                const owner_decl = dg.module.declPtr(owner_decl_index);
-                try owner_decl.renderFullyQualifiedName(dg.module, fqn_buf.writer());
-
-                try bw.print("S_{}__{d}", .{ fmtIdent(fqn_buf.items), @enumToInt(owner_decl_index) });
-            },
-            .ErrorUnion => {
-                try bw.print("E_{}", .{typeToCIdentifier(child_ty.errorUnionPayload(), dg.module)});
-            },
-            .Optional => {
-                var opt_buf: Type.Payload.ElemType = undefined;
-                try bw.print("Q_{}", .{typeToCIdentifier(child_ty.optionalChild(&opt_buf), dg.module)});
-            },
+        switch (kind) {
+            .forward => {},
+            .complete => if (fn_info.alignment > 0)
+                try w.print(" zig_align_fn({})", .{fn_info.alignment}),
             else => unreachable,
         }
-        const name_end = buffer.items.len;
-        try buffer.ensureUnusedCapacity(" ".len + (name_end - name_begin) + ";\n".len);
-        buffer.appendAssumeCapacity(' ');
-        buffer.appendSliceAssumeCapacity(buffer.items[name_begin..name_end]);
-        buffer.appendSliceAssumeCapacity(";\n");
 
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-        const name = rendered[name_begin..name_end];
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try t.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
-    }
-
-    fn renderStructTypedef(dg: *DeclGen, t: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        var ptr_pl = Type.Payload.ElemType{ .base = .{ .tag = .single_const_pointer }, .data = t };
-        const ptr_ty = Type.initPayload(&ptr_pl.base);
-        const name = dg.getTypedefName(ptr_ty) orelse
-            try dg.renderFwdTypedef(ptr_ty);
-
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-
-        try buffer.appendSlice("struct ");
-
-        var needs_pack_attr = false;
-        {
-            var it = t.structFields().iterator();
-            while (it.next()) |field| {
-                const field_ty = field.value_ptr.ty;
-                if (!field_ty.hasRuntimeBits()) continue;
-                const alignment = field.value_ptr.abi_align;
-                if (alignment != 0 and alignment < field_ty.abiAlignment(dg.module.getTarget())) {
-                    needs_pack_attr = true;
-                    try buffer.appendSlice("zig_packed(");
-                    break;
-                }
-            }
+        switch (name) {
+            .export_index => |export_index| try dg.renderDeclName(w, fn_decl_index, export_index),
+            .string => |string| try w.writeAll(string),
         }
 
-        try buffer.appendSlice(name);
-        try buffer.appendSlice(" {\n");
-        {
-            var it = t.structFields().iterator();
-            var empty = true;
-            while (it.next()) |field| {
-                const field_ty = field.value_ptr.ty;
-                if (!field_ty.hasRuntimeBits()) continue;
+        try renderTypeSuffix(
+            dg.decl_index,
+            store.*,
+            module,
+            w,
+            fn_cty_idx,
+            .suffix,
+            CQualifiers.init(.{ .@"const" = switch (kind) {
+                .forward => false,
+                .complete => true,
+                else => unreachable,
+            } }),
+        );
 
-                const alignment = field.value_ptr.alignment(dg.module.getTarget(), t.containerLayout());
-                const field_name = CValue{ .identifier = field.key_ptr.* };
-                try buffer.append(' ');
-                try dg.renderTypeAndName(buffer.writer(), field_ty, field_name, .Mut, alignment, .Complete);
-                try buffer.appendSlice(";\n");
-
-                empty = false;
-            }
-            if (empty) try buffer.appendSlice(" char empty_struct;\n");
+        switch (kind) {
+            .forward => if (fn_info.alignment > 0)
+                try w.print(" zig_align_fn({})", .{fn_info.alignment}),
+            .complete => {},
+            else => unreachable,
         }
-        if (needs_pack_attr) try buffer.appendSlice("});\n") else try buffer.appendSlice("};\n");
-
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try t.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
     }
 
-    fn renderTupleTypedef(dg: *DeclGen, t: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-
-        try buffer.appendSlice("typedef struct {\n");
-        {
-            const fields = t.tupleFields();
-            var field_id: usize = 0;
-            for (fields.types, 0..) |field_ty, i| {
-                if (!field_ty.hasRuntimeBits() or fields.values[i].tag() != .unreachable_value) continue;
-
-                try buffer.append(' ');
-                try dg.renderTypeAndName(buffer.writer(), field_ty, .{ .field = field_id }, .Mut, 0, .Complete);
-                try buffer.appendSlice(";\n");
-
-                field_id += 1;
-            }
-            if (field_id == 0) try buffer.appendSlice(" char empty_tuple;\n");
-        }
-        const name_begin = buffer.items.len + "} ".len;
-        try buffer.writer().print("}} zig_T_{}_{d};\n", .{ typeToCIdentifier(t, dg.module), @truncate(u16, t.hash(dg.module)) });
-        const name_end = buffer.items.len - ";\n".len;
-
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-        const name = rendered[name_begin..name_end];
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try t.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
+    fn indexToCType(dg: *DeclGen, idx: CType.Index) CType {
+        return dg.ctypes.indexToCType(idx);
     }
 
-    fn renderUnionTypedef(dg: *DeclGen, t: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        var ptr_pl = Type.Payload.ElemType{ .base = .{ .tag = .single_const_pointer }, .data = t };
-        const ptr_ty = Type.initPayload(&ptr_pl.base);
-        const name = dg.getTypedefName(ptr_ty) orelse
-            try dg.renderFwdTypedef(ptr_ty);
-
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-
-        try buffer.appendSlice(if (t.unionTagTypeSafety()) |_| "struct " else "union ");
-        try buffer.appendSlice(name);
-        try buffer.appendSlice(" {\n");
-
-        const indent = if (t.unionTagTypeSafety()) |tag_ty| indent: {
-            const target = dg.module.getTarget();
-            const layout = t.unionGetLayout(target);
-            if (layout.tag_size != 0) {
-                try buffer.append(' ');
-                try dg.renderTypeAndName(buffer.writer(), tag_ty, .{ .identifier = "tag" }, .Mut, 0, .Complete);
-                try buffer.appendSlice(";\n");
-            }
-            try buffer.appendSlice(" union {\n");
-            break :indent "  ";
-        } else " ";
-
-        {
-            var it = t.unionFields().iterator();
-            var empty = true;
-            while (it.next()) |field| {
-                const field_ty = field.value_ptr.ty;
-                if (!field_ty.hasRuntimeBits()) continue;
-
-                const alignment = field.value_ptr.abi_align;
-                const field_name = CValue{ .identifier = field.key_ptr.* };
-                try buffer.appendSlice(indent);
-                try dg.renderTypeAndName(buffer.writer(), field_ty, field_name, .Mut, alignment, .Complete);
-                try buffer.appendSlice(";\n");
-
-                empty = false;
-            }
-            if (empty) {
-                try buffer.appendSlice(indent);
-                try buffer.appendSlice("char empty_union;\n");
-            }
-        }
-
-        if (t.unionTagTypeSafety()) |_| try buffer.appendSlice(" } payload;\n");
-        try buffer.appendSlice("};\n");
-
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try t.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
+    fn typeToIndex(dg: *DeclGen, ty: Type, kind: CType.Kind) !CType.Index {
+        return dg.ctypes.typeToIndex(dg.gpa, ty, dg.module, kind);
     }
 
-    fn renderErrorUnionTypedef(dg: *DeclGen, t: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        assert(t.errorUnionSet().tag() == .anyerror);
-
-        var ptr_pl = Type.Payload.ElemType{ .base = .{ .tag = .single_const_pointer }, .data = t };
-        const ptr_ty = Type.initPayload(&ptr_pl.base);
-        const name = dg.getTypedefName(ptr_ty) orelse
-            try dg.renderFwdTypedef(ptr_ty);
-
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-        const bw = buffer.writer();
-
-        const payload_ty = t.errorUnionPayload();
-        const payload_name = CValue{ .identifier = "payload" };
-        const error_ty = t.errorUnionSet();
-        const error_name = CValue{ .identifier = "error" };
-
-        const target = dg.module.getTarget();
-        const payload_align = payload_ty.abiAlignment(target);
-        const error_align = error_ty.abiAlignment(target);
-        try bw.writeAll("struct ");
-        try bw.writeAll(name);
-        try bw.writeAll(" {\n ");
-        if (error_align > payload_align) {
-            try dg.renderTypeAndName(bw, payload_ty, payload_name, .Mut, 0, .Complete);
-            try bw.writeAll(";\n ");
-            try dg.renderTypeAndName(bw, error_ty, error_name, .Mut, 0, .Complete);
-        } else {
-            try dg.renderTypeAndName(bw, error_ty, error_name, .Mut, 0, .Complete);
-            try bw.writeAll(";\n ");
-            try dg.renderTypeAndName(bw, payload_ty, payload_name, .Mut, 0, .Complete);
-        }
-        try bw.writeAll(";\n};\n");
-
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try t.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
-    }
-
-    fn renderArrayTypedef(dg: *DeclGen, t: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        const info = t.arrayInfo();
-        std.debug.assert(info.sentinel == null); // expected canonical type
-
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-        const bw = buffer.writer();
-
-        try bw.writeAll("typedef ");
-        try dg.renderType(bw, info.elem_type, .Complete);
-
-        const name_begin = buffer.items.len + " ".len;
-        try bw.print(" zig_A_{}_{d}", .{ typeToCIdentifier(info.elem_type, dg.module), info.len });
-        const name_end = buffer.items.len;
-
-        const c_len = if (info.len > 0) info.len else 1;
-        var c_len_pl: Value.Payload.U64 = .{ .base = .{ .tag = .int_u64 }, .data = c_len };
-        const c_len_val = Value.initPayload(&c_len_pl.base);
-        try bw.print("[{}];\n", .{try dg.fmtIntLiteral(Type.usize, c_len_val)});
-
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-        const name = rendered[name_begin..name_end];
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try t.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
-    }
-
-    fn renderOptionalTypedef(dg: *DeclGen, t: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        var ptr_pl = Type.Payload.ElemType{ .base = .{ .tag = .single_const_pointer }, .data = t };
-        const ptr_ty = Type.initPayload(&ptr_pl.base);
-        const name = dg.getTypedefName(ptr_ty) orelse
-            try dg.renderFwdTypedef(ptr_ty);
-
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-        const bw = buffer.writer();
-
-        var opt_buf: Type.Payload.ElemType = undefined;
-        const child_ty = t.optionalChild(&opt_buf);
-
-        try bw.writeAll("struct ");
-        try bw.writeAll(name);
-        try bw.writeAll(" {\n");
-        try dg.renderTypeAndName(bw, child_ty, .{ .identifier = "payload" }, .Mut, 0, .Complete);
-        try bw.writeAll(";\n ");
-        try dg.renderTypeAndName(bw, Type.bool, .{ .identifier = "is_null" }, .Mut, 0, .Complete);
-        try bw.writeAll(";\n};\n");
-
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try t.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
-    }
-
-    fn renderOpaqueTypedef(dg: *DeclGen, t: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        const opaque_ty = t.cast(Type.Payload.Opaque).?.data;
-        const unqualified_name = dg.module.declPtr(opaque_ty.owner_decl).name;
-        const fqn = try opaque_ty.getFullyQualifiedName(dg.module);
-        defer dg.typedefs.allocator.free(fqn);
-
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-
-        try buffer.writer().print("typedef struct { } ", .{fmtIdent(std.mem.span(unqualified_name))});
-
-        const name_begin = buffer.items.len;
-        try buffer.writer().print("zig_O_{}", .{fmtIdent(fqn)});
-        const name_end = buffer.items.len;
-        try buffer.appendSlice(";\n");
-
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-        const name = rendered[name_begin..name_end];
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try t.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
+    fn typeToCType(dg: *DeclGen, ty: Type, kind: CType.Kind) !CType {
+        return dg.ctypes.typeToCType(dg.gpa, ty, dg.module, kind);
     }
 
     /// Renders a type as a single identifier, generating intermediate typedefs
@@ -1957,277 +1569,15 @@ pub const DeclGen = struct {
     /// There are three type formats in total that we support rendering:
     ///   | Function            | Example 1 (*u8) | Example 2 ([10]*u8) |
     ///   |---------------------|-----------------|---------------------|
-    ///   | `renderTypecast`    | "uint8_t *"     | "uint8_t *[10]"     |
     ///   | `renderTypeAndName` | "uint8_t *name" | "uint8_t *name[10]" |
-    ///   | `renderType`        | "uint8_t *"     | "zig_A_uint8_t_10"  |
+    ///   | `renderType`        | "uint8_t *"     | "uint8_t *[10]"     |
     ///
-    fn renderType(
-        dg: *DeclGen,
-        w: anytype,
-        t: Type,
-        kind: TypedefKind,
-    ) error{ OutOfMemory, AnalysisFail }!void {
-        const target = dg.module.getTarget();
-
-        switch (t.zigTypeTag()) {
-            .Void => try w.writeAll("void"),
-            .Bool => try w.writeAll("bool"),
-            .NoReturn, .Float => {
-                try w.writeAll("zig_");
-                try t.print(w, dg.module);
-            },
-            .Int => {
-                if (t.isNamedInt()) {
-                    try w.writeAll("zig_");
-                    try t.print(w, dg.module);
-                } else {
-                    return renderTypeUnnamed(dg, w, t, kind);
-                }
-            },
-            .ErrorSet => {
-                return renderTypeUnnamed(dg, w, t, kind);
-            },
-            .Pointer => {
-                const ptr_info = t.ptrInfo().data;
-                if (ptr_info.size == .Slice) {
-                    var slice_pl = Type.Payload.ElemType{
-                        .base = .{ .tag = if (t.ptrIsMutable()) .mut_slice else .const_slice },
-                        .data = ptr_info.pointee_type,
-                    };
-                    const slice_ty = Type.initPayload(&slice_pl.base);
-
-                    const name = dg.getTypedefName(slice_ty) orelse
-                        try dg.renderSliceTypedef(slice_ty);
-
-                    return w.writeAll(name);
-                }
-
-                if (ptr_info.pointee_type.zigTypeTag() == .Fn) {
-                    const name = dg.getTypedefName(ptr_info.pointee_type) orelse
-                        try dg.renderPtrToFnTypedef(ptr_info.pointee_type);
-
-                    return w.writeAll(name);
-                }
-
-                if (ptr_info.host_size != 0) {
-                    var host_pl = Type.Payload.Bits{
-                        .base = .{ .tag = .int_unsigned },
-                        .data = ptr_info.host_size * 8,
-                    };
-                    const host_ty = Type.initPayload(&host_pl.base);
-
-                    try dg.renderType(w, host_ty, .Forward);
-                } else if (t.isCPtr() and ptr_info.pointee_type.eql(Type.u8, dg.module) and
-                    (dg.decl.val.tag() == .extern_fn or
-                    std.mem.eql(u8, std.mem.span(dg.decl.name), "main")))
-                {
-                    // This is a hack, since the c compiler expects a lot of external
-                    // library functions to have char pointers in their signatures, but
-                    // u8 and i8 produce unsigned char and signed char respectively,
-                    // which in C are (not very usefully) different than char.
-                    try w.writeAll("char");
-                } else try dg.renderType(w, switch (ptr_info.pointee_type.tag()) {
-                    .anyopaque => Type.void,
-                    else => ptr_info.pointee_type,
-                }, .Forward);
-                if (t.isConstPtr()) try w.writeAll(" const");
-                if (t.isVolatilePtr()) try w.writeAll(" volatile");
-                return w.writeAll(" *");
-            },
-            .Array, .Vector => {
-                var array_pl = Type.Payload.Array{ .base = .{ .tag = .array }, .data = .{
-                    .len = t.arrayLenIncludingSentinel(),
-                    .elem_type = t.childType(),
-                } };
-                const array_ty = Type.initPayload(&array_pl.base);
-
-                const name = dg.getTypedefName(array_ty) orelse
-                    try dg.renderArrayTypedef(array_ty);
-
-                return w.writeAll(name);
-            },
-            .Optional => {
-                var opt_buf: Type.Payload.ElemType = undefined;
-                const child_ty = t.optionalChild(&opt_buf);
-
-                if (!child_ty.hasRuntimeBitsIgnoreComptime())
-                    return dg.renderType(w, Type.bool, kind);
-
-                if (t.optionalReprIsPayload())
-                    return dg.renderType(w, child_ty, kind);
-
-                switch (kind) {
-                    .Complete => {
-                        const name = dg.getTypedefName(t) orelse
-                            try dg.renderOptionalTypedef(t);
-
-                        try w.writeAll(name);
-                    },
-                    .Forward => {
-                        var ptr_pl = Type.Payload.ElemType{
-                            .base = .{ .tag = .single_const_pointer },
-                            .data = t,
-                        };
-                        const ptr_ty = Type.initPayload(&ptr_pl.base);
-
-                        const name = dg.getTypedefName(ptr_ty) orelse
-                            try dg.renderFwdTypedef(ptr_ty);
-
-                        try w.writeAll(name);
-                    },
-                }
-            },
-            .ErrorUnion => {
-                const payload_ty = t.errorUnionPayload();
-
-                if (!payload_ty.hasRuntimeBitsIgnoreComptime())
-                    return dg.renderType(w, Type.anyerror, kind);
-
-                var error_union_pl = Type.Payload.ErrorUnion{
-                    .data = .{ .error_set = Type.anyerror, .payload = payload_ty },
-                };
-                const error_union_ty = Type.initPayload(&error_union_pl.base);
-
-                switch (kind) {
-                    .Complete => {
-                        const name = dg.getTypedefName(error_union_ty) orelse
-                            try dg.renderErrorUnionTypedef(error_union_ty);
-
-                        try w.writeAll(name);
-                    },
-                    .Forward => {
-                        var ptr_pl = Type.Payload.ElemType{
-                            .base = .{ .tag = .single_const_pointer },
-                            .data = error_union_ty,
-                        };
-                        const ptr_ty = Type.initPayload(&ptr_pl.base);
-
-                        const name = dg.getTypedefName(ptr_ty) orelse
-                            try dg.renderFwdTypedef(ptr_ty);
-
-                        try w.writeAll(name);
-                    },
-                }
-            },
-            .Struct, .Union => |tag| if (t.containerLayout() == .Packed) {
-                if (t.castTag(.@"struct")) |struct_obj| {
-                    try dg.renderType(w, struct_obj.data.backing_int_ty, kind);
-                } else {
-                    var buf: Type.Payload.Bits = .{
-                        .base = .{ .tag = .int_unsigned },
-                        .data = @intCast(u16, t.bitSize(target)),
-                    };
-                    try dg.renderType(w, Type.initPayload(&buf.base), kind);
-                }
-            } else if (t.isSimpleTupleOrAnonStruct()) {
-                const ExpectedContents = struct { types: [8]Type, values: [8]Value };
-                var stack align(@alignOf(ExpectedContents)) =
-                    std.heap.stackFallback(@sizeOf(ExpectedContents), dg.gpa);
-                const allocator = stack.get();
-
-                var tuple_storage = std.MultiArrayList(struct { type: Type, value: Value }){};
-                defer tuple_storage.deinit(allocator);
-                try tuple_storage.ensureTotalCapacity(allocator, t.structFieldCount());
-
-                const fields = t.tupleFields();
-                for (fields.values, 0..) |value, index|
-                    if (value.tag() == .unreachable_value)
-                        tuple_storage.appendAssumeCapacity(.{
-                            .type = fields.types[index],
-                            .value = value,
-                        });
-
-                const tuple_slice = tuple_storage.slice();
-                var tuple_pl = Type.Payload.Tuple{ .data = .{
-                    .types = tuple_slice.items(.type),
-                    .values = tuple_slice.items(.value),
-                } };
-                const tuple_ty = Type.initPayload(&tuple_pl.base);
-
-                const name = dg.getTypedefName(tuple_ty) orelse
-                    try dg.renderTupleTypedef(tuple_ty);
-
-                try w.writeAll(name);
-            } else switch (kind) {
-                .Complete => {
-                    const name = dg.getTypedefName(t) orelse switch (tag) {
-                        .Struct => try dg.renderStructTypedef(t),
-                        .Union => try dg.renderUnionTypedef(t),
-                        else => unreachable,
-                    };
-
-                    try w.writeAll(name);
-                },
-                .Forward => {
-                    var ptr_pl = Type.Payload.ElemType{
-                        .base = .{ .tag = .single_const_pointer },
-                        .data = t,
-                    };
-                    const ptr_ty = Type.initPayload(&ptr_pl.base);
-
-                    const name = dg.getTypedefName(ptr_ty) orelse
-                        try dg.renderFwdTypedef(ptr_ty);
-
-                    try w.writeAll(name);
-                },
-            },
-            .Enum => {
-                // For enums, we simply use the integer tag type.
-                var int_tag_buf: Type.Payload.Bits = undefined;
-                const int_tag_ty = t.intTagType(&int_tag_buf);
-
-                try dg.renderType(w, int_tag_ty, kind);
-            },
-            .Opaque => switch (t.tag()) {
-                .@"opaque" => {
-                    const name = dg.getTypedefName(t) orelse
-                        try dg.renderOpaqueTypedef(t);
-
-                    try w.writeAll(name);
-                },
-                else => unreachable,
-            },
-
-            .Frame,
-            .AnyFrame,
-            => |tag| return dg.fail("TODO: C backend: implement value of type {s}", .{
-                @tagName(tag),
-            }),
-
-            .Fn => unreachable, // This is a function body, not a function pointer.
-
-            .Null,
-            .Undefined,
-            .EnumLiteral,
-            .ComptimeFloat,
-            .ComptimeInt,
-            .Type,
-            => unreachable, // must be const or comptime
-        }
-    }
-
-    fn renderTypeUnnamed(
-        dg: *DeclGen,
-        w: anytype,
-        t: Type,
-        kind: TypedefKind,
-    ) error{ OutOfMemory, AnalysisFail }!void {
-        const target = dg.module.getTarget();
-        const int_info = t.intInfo(target);
-        if (toCIntBits(int_info.bits)) |c_bits|
-            return w.print("zig_{c}{d}", .{ signAbbrev(int_info.signedness), c_bits })
-        else if (loweredArrayInfo(t, target)) |array_info| {
-            assert(array_info.sentinel == null);
-            var array_pl = Type.Payload.Array{
-                .base = .{ .tag = .array },
-                .data = .{ .len = array_info.len, .elem_type = array_info.elem_type },
-            };
-            const array_ty = Type.initPayload(&array_pl.base);
-
-            return dg.renderType(w, array_ty, kind);
-        } else return dg.fail("C backend: Unable to lower unnamed integer type {}", .{
-            t.fmt(dg.module),
-        });
+    fn renderType(dg: *DeclGen, w: anytype, t: Type) error{ OutOfMemory, AnalysisFail }!void {
+        const store = &dg.ctypes.set;
+        const module = dg.module;
+        const idx = try dg.typeToIndex(t, .complete);
+        _ = try renderTypePrefix(dg.decl_index, store.*, module, w, idx, .suffix, .{});
+        try renderTypeSuffix(dg.decl_index, store.*, module, w, idx, .suffix, .{});
     }
 
     const IntCastContext = union(enum) {
@@ -2254,16 +1604,16 @@ pub const DeclGen = struct {
     /// Renders a cast to an int type, from either an int or a pointer.
     ///
     /// Some platforms don't have 128 bit integers, so we need to use
-    /// the zig_as_ and zig_lo_ macros in those cases.
+    /// the zig_make_ and zig_lo_ macros in those cases.
     ///
     ///   | Dest type bits   | Src type         | Result
     ///   |------------------|------------------|---------------------------|
     ///   | < 64 bit integer | pointer          | (zig_<dest_ty>)(zig_<u|i>size)src
     ///   | < 64 bit integer | < 64 bit integer | (zig_<dest_ty>)src
     ///   | < 64 bit integer | > 64 bit integer | zig_lo(src)
-    ///   | > 64 bit integer | pointer          | zig_as_<dest_ty>(0, (zig_<u|i>size)src)
-    ///   | > 64 bit integer | < 64 bit integer | zig_as_<dest_ty>(0, src)
-    ///   | > 64 bit integer | > 64 bit integer | zig_as_<dest_ty>(zig_hi_<src_ty>(src), zig_lo_<src_ty>(src))
+    ///   | > 64 bit integer | pointer          | zig_make_<dest_ty>(0, (zig_<u|i>size)src)
+    ///   | > 64 bit integer | < 64 bit integer | zig_make_<dest_ty>(0, src)
+    ///   | > 64 bit integer | > 64 bit integer | zig_make_<dest_ty>(zig_hi_<src_ty>(src), zig_lo_<src_ty>(src))
     fn renderIntCast(dg: *DeclGen, w: anytype, dest_ty: Type, context: IntCastContext, src_ty: Type, location: ValueRenderLocation) !void {
         const target = dg.module.getTarget();
         const dest_bits = dest_ty.bitSize(target);
@@ -2284,36 +1634,41 @@ pub const DeclGen = struct {
 
             if (needs_cast) {
                 try w.writeByte('(');
-                try dg.renderTypecast(w, dest_ty);
+                try dg.renderType(w, dest_ty);
                 try w.writeByte(')');
             }
             if (src_is_ptr) {
                 try w.writeByte('(');
-                try dg.renderTypecast(w, src_eff_ty);
+                try dg.renderType(w, src_eff_ty);
                 try w.writeByte(')');
             }
             try context.writeValue(dg, w, src_ty, location);
         } else if (dest_bits <= 64 and src_bits > 64) {
             assert(!src_is_ptr);
+            if (dest_bits < 64) {
+                try w.writeByte('(');
+                try dg.renderType(w, dest_ty);
+                try w.writeByte(')');
+            }
             try w.writeAll("zig_lo_");
             try dg.renderTypeForBuiltinFnName(w, src_eff_ty);
             try w.writeByte('(');
             try context.writeValue(dg, w, src_ty, .FunctionArgument);
             try w.writeByte(')');
         } else if (dest_bits > 64 and src_bits <= 64) {
-            try w.writeAll("zig_as_");
+            try w.writeAll("zig_make_");
             try dg.renderTypeForBuiltinFnName(w, dest_ty);
             try w.writeAll("(0, "); // TODO: Should the 0 go through fmtIntLiteral?
             if (src_is_ptr) {
                 try w.writeByte('(');
-                try dg.renderTypecast(w, src_eff_ty);
+                try dg.renderType(w, src_eff_ty);
                 try w.writeByte(')');
             }
             try context.writeValue(dg, w, src_ty, .FunctionArgument);
             try w.writeByte(')');
         } else {
             assert(!src_is_ptr);
-            try w.writeAll("zig_as_");
+            try w.writeAll("zig_make_");
             try dg.renderTypeForBuiltinFnName(w, dest_ty);
             try w.writeAll("(zig_hi_");
             try dg.renderTypeForBuiltinFnName(w, src_eff_ty);
@@ -2327,151 +1682,50 @@ pub const DeclGen = struct {
         }
     }
 
-    /// Renders a type in C typecast format.
-    ///
-    /// This is guaranteed to be valid in a typecast expression, but not
-    /// necessarily in a variable/field declaration.
-    ///
-    /// There are three type formats in total that we support rendering:
-    ///   | Function            | Example 1 (*u8) | Example 2 ([10]*u8) |
-    ///   |---------------------|-----------------|---------------------|
-    ///   | `renderTypecast`    | "uint8_t *"     | "uint8_t *[10]"     |
-    ///   | `renderTypeAndName` | "uint8_t *name" | "uint8_t *name[10]" |
-    ///   | `renderType`        | "uint8_t *"     | "zig_A_uint8_t_10"  |
-    ///
-    fn renderTypecast(dg: *DeclGen, w: anytype, ty: Type) error{ OutOfMemory, AnalysisFail }!void {
-        return renderTypeAndName(dg, w, ty, .{ .bytes = "" }, .Mut, 0, .Complete);
-    }
-
     /// Renders a type and name in field declaration/definition format.
     ///
     /// There are three type formats in total that we support rendering:
     ///   | Function            | Example 1 (*u8) | Example 2 ([10]*u8) |
     ///   |---------------------|-----------------|---------------------|
-    ///   | `renderTypecast`    | "uint8_t *"     | "uint8_t *[10]"     |
     ///   | `renderTypeAndName` | "uint8_t *name" | "uint8_t *name[10]" |
-    ///   | `renderType`        | "uint8_t *"     | "zig_A_uint8_t_10"  |
+    ///   | `renderType`        | "uint8_t *"     | "uint8_t *[10]"     |
     ///
     fn renderTypeAndName(
         dg: *DeclGen,
         w: anytype,
         ty: Type,
         name: CValue,
-        mutability: Mutability,
+        qualifiers: CQualifiers,
         alignment: u32,
-        kind: TypedefKind,
+        kind: CType.Kind,
     ) error{ OutOfMemory, AnalysisFail }!void {
-        var suffix = std.ArrayList(u8).init(dg.gpa);
-        defer suffix.deinit();
-        const suffix_writer = suffix.writer();
-
-        // Any top-level array types are rendered here as a suffix, which
-        // avoids creating typedefs for every array type
         const target = dg.module.getTarget();
-        var render_ty = ty;
-        var depth: u32 = 0;
-        while (loweredArrayInfo(render_ty, target)) |array_info| {
-            const c_len = array_info.len + @boolToInt(array_info.sentinel != null);
-            var c_len_pl: Value.Payload.U64 = .{ .base = .{ .tag = .int_u64 }, .data = c_len };
-            const c_len_val = Value.initPayload(&c_len_pl.base);
+        const alignas = CType.AlignAs.init(alignment, ty.abiAlignment(target));
+        try dg.renderCTypeAndName(w, try dg.typeToIndex(ty, kind), name, qualifiers, alignas);
+    }
 
-            try suffix_writer.writeByte('[');
-            if (mutability == .ConstArgument and depth == 0) try suffix_writer.writeAll("zig_const_arr ");
-            try suffix.writer().print("{}]", .{try dg.fmtIntLiteral(Type.usize, c_len_val)});
-            render_ty = array_info.elem_type;
-            depth += 1;
+    fn renderCTypeAndName(
+        dg: *DeclGen,
+        w: anytype,
+        cty_idx: CType.Index,
+        name: CValue,
+        qualifiers: CQualifiers,
+        alignas: CType.AlignAs,
+    ) error{ OutOfMemory, AnalysisFail }!void {
+        const store = &dg.ctypes.set;
+        const module = dg.module;
+
+        switch (std.math.order(alignas.@"align", alignas.abi)) {
+            .lt => try w.print("zig_under_align({}) ", .{alignas.getAlign()}),
+            .eq => {},
+            .gt => try w.print("zig_align({}) ", .{alignas.getAlign()}),
         }
 
-        if (alignment != 0) {
-            const abi_alignment = ty.abiAlignment(target);
-            if (alignment < abi_alignment) {
-                try w.print("zig_under_align({}) ", .{alignment});
-            } else if (alignment > abi_alignment) {
-                try w.print("zig_align({}) ", .{alignment});
-            }
-        }
-        try dg.renderType(w, render_ty, kind);
-
-        const const_prefix = switch (mutability) {
-            .Const, .ConstArgument => "const ",
-            .Mut => "",
-        };
-        try w.print(" {s}", .{const_prefix});
+        const trailing =
+            try renderTypePrefix(dg.decl_index, store.*, module, w, cty_idx, .suffix, qualifiers);
+        try w.print("{}", .{trailing});
         try dg.writeCValue(w, name);
-        try w.writeAll(suffix.items);
-    }
-
-    fn renderTagNameFn(dg: *DeclGen, enum_ty: Type) error{ OutOfMemory, AnalysisFail }![]const u8 {
-        var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
-        defer buffer.deinit();
-        const bw = buffer.writer();
-
-        const name_slice_ty = Type.initTag(.const_slice_u8_sentinel_0);
-
-        try buffer.appendSlice("static ");
-        try dg.renderType(bw, name_slice_ty, .Complete);
-        const name_begin = buffer.items.len + " ".len;
-        try bw.print(" zig_tagName_{}_{d}(", .{ typeToCIdentifier(enum_ty, dg.module), @enumToInt(enum_ty.getOwnerDecl()) });
-        const name_end = buffer.items.len - "(".len;
-        try dg.renderTypeAndName(bw, enum_ty, .{ .identifier = "tag" }, .Const, 0, .Complete);
-        try buffer.appendSlice(") {\n switch (tag) {\n");
-        for (enum_ty.enumFields().keys(), 0..) |name, index| {
-            const name_z = try dg.typedefs.allocator.dupeZ(u8, name);
-            defer dg.typedefs.allocator.free(name_z);
-            const name_bytes = name_z[0 .. name_z.len + 1];
-
-            var tag_pl: Value.Payload.U32 = .{
-                .base = .{ .tag = .enum_field_index },
-                .data = @intCast(u32, index),
-            };
-            const tag_val = Value.initPayload(&tag_pl.base);
-
-            var int_pl: Value.Payload.U64 = undefined;
-            const int_val = tag_val.enumToInt(enum_ty, &int_pl);
-
-            var name_ty_pl = Type.Payload.Len{ .base = .{ .tag = .array_u8_sentinel_0 }, .data = name.len };
-            const name_ty = Type.initPayload(&name_ty_pl.base);
-
-            var name_pl = Value.Payload.Bytes{ .base = .{ .tag = .bytes }, .data = name_bytes };
-            const name_val = Value.initPayload(&name_pl.base);
-
-            var len_pl = Value.Payload.U64{ .base = .{ .tag = .int_u64 }, .data = name.len };
-            const len_val = Value.initPayload(&len_pl.base);
-
-            try bw.print("  case {}: {{\n   static ", .{try dg.fmtIntLiteral(enum_ty, int_val)});
-            try dg.renderTypeAndName(bw, name_ty, .{ .identifier = "name" }, .Const, 0, .Complete);
-            try buffer.appendSlice(" = ");
-            try dg.renderValue(bw, name_ty, name_val, .Initializer);
-            try buffer.appendSlice(";\n   return (");
-            try dg.renderTypecast(bw, name_slice_ty);
-            try bw.print("){{{}, {}}};\n", .{
-                fmtIdent("name"), try dg.fmtIntLiteral(Type.usize, len_val),
-            });
-
-            try buffer.appendSlice("  }\n");
-        }
-        try buffer.appendSlice(" }\n while (");
-        try dg.renderValue(bw, Type.bool, Value.true, .Other);
-        try buffer.appendSlice(") ");
-        _ = try airBreakpoint(bw);
-        try buffer.appendSlice("}\n");
-
-        const rendered = try buffer.toOwnedSlice();
-        errdefer dg.typedefs.allocator.free(rendered);
-        const name = rendered[name_begin..name_end];
-
-        try dg.typedefs.ensureUnusedCapacity(1);
-        dg.typedefs.putAssumeCapacityNoClobber(
-            try enum_ty.copy(dg.typedefs_arena),
-            .{ .name = name, .rendered = rendered },
-        );
-
-        return name;
-    }
-
-    fn getTagNameFn(dg: *DeclGen, enum_ty: Type) ![]const u8 {
-        return dg.getTypedefName(enum_ty) orelse
-            try dg.renderTagNameFn(enum_ty);
+        try renderTypeSuffix(dg.decl_index, store.*, module, w, cty_idx, .suffix, .{});
     }
 
     fn declIsGlobal(dg: *DeclGen, tv: TypedValue) bool {
@@ -2492,10 +1746,11 @@ pub const DeclGen = struct {
     fn writeCValue(dg: *DeclGen, w: anytype, c_value: CValue) !void {
         switch (c_value) {
             .none => unreachable,
-            .local => |i| return w.print("t{d}", .{i}),
+            .local, .new_local => |i| return w.print("t{d}", .{i}),
             .local_ref => |i| return w.print("&t{d}", .{i}),
             .constant => unreachable,
             .arg => |i| return w.print("a{d}", .{i}),
+            .arg_array => |i| return dg.writeCValueMember(w, .{ .arg = i }, .{ .identifier = "array" }),
             .field => |i| return w.print("f{d}", .{i}),
             .decl => |decl| return dg.renderDeclName(w, decl, 0),
             .decl_ref => |decl| {
@@ -2504,6 +1759,10 @@ pub const DeclGen = struct {
             },
             .undef => |ty| return dg.renderValue(w, ty, Value.undef, .Other),
             .identifier => |ident| return w.print("{ }", .{fmtIdent(ident)}),
+            .payload_identifier => |ident| return w.print("{ }.{ }", .{
+                fmtIdent("payload"),
+                fmtIdent(ident),
+            }),
             .bytes => |bytes| return w.writeAll(bytes),
         }
     }
@@ -2511,10 +1770,15 @@ pub const DeclGen = struct {
     fn writeCValueDeref(dg: *DeclGen, w: anytype, c_value: CValue) !void {
         switch (c_value) {
             .none => unreachable,
-            .local => |i| return w.print("(*t{d})", .{i}),
+            .local, .new_local => |i| return w.print("(*t{d})", .{i}),
             .local_ref => |i| return w.print("t{d}", .{i}),
             .constant => unreachable,
             .arg => |i| return w.print("(*a{d})", .{i}),
+            .arg_array => |i| {
+                try w.writeAll("(*");
+                try dg.writeCValueMember(w, .{ .arg = i }, .{ .identifier = "array" });
+                return w.writeByte(')');
+            },
             .field => |i| return w.print("f{d}", .{i}),
             .decl => |decl| {
                 try w.writeAll("(*");
@@ -2524,6 +1788,10 @@ pub const DeclGen = struct {
             .decl_ref => |decl| return dg.renderDeclName(w, decl, 0),
             .undef => unreachable,
             .identifier => |ident| return w.print("(*{ })", .{fmtIdent(ident)}),
+            .payload_identifier => |ident| return w.print("(*{ }.{ })", .{
+                fmtIdent("payload"),
+                fmtIdent(ident),
+            }),
             .bytes => |bytes| {
                 try w.writeAll("(*");
                 try w.writeAll(bytes);
@@ -2541,7 +1809,7 @@ pub const DeclGen = struct {
     fn writeCValueDerefMember(dg: *DeclGen, writer: anytype, c_value: CValue, member: CValue) !void {
         switch (c_value) {
             .none, .constant, .field, .undef => unreachable,
-            .local, .arg, .decl, .identifier, .bytes => {
+            .new_local, .local, .arg, .arg_array, .decl, .identifier, .payload_identifier, .bytes => {
                 try dg.writeCValue(writer, c_value);
                 try writer.writeAll("->");
             },
@@ -2668,11 +1936,464 @@ pub const DeclGen = struct {
     }
 };
 
-pub fn genGlobalAsm(mod: *Module, code: *std.ArrayList(u8)) !void {
-    var it = mod.global_assembly.valueIterator();
-    while (it.next()) |asm_source| {
-        try code.writer().print("__asm({s});\n", .{fmtStringLiteral(asm_source.*)});
+const CTypeFix = enum { prefix, suffix };
+const CQualifiers = std.enums.EnumSet(enum { @"const", @"volatile", restrict });
+const Const = CQualifiers.init(.{ .@"const" = true });
+const RenderCTypeTrailing = enum {
+    no_space,
+    maybe_space,
+
+    pub fn format(
+        self: @This(),
+        comptime fmt: []const u8,
+        _: std.fmt.FormatOptions,
+        w: anytype,
+    ) @TypeOf(w).Error!void {
+        if (fmt.len != 0)
+            @compileError("invalid format string '" ++ fmt ++ "' for type '" ++
+                @typeName(@This()) ++ "'");
+        comptime assert(fmt.len == 0);
+        switch (self) {
+            .no_space => {},
+            .maybe_space => try w.writeByte(' '),
+        }
     }
+};
+fn renderTypeName(
+    mod: *Module,
+    w: anytype,
+    idx: CType.Index,
+    cty: CType,
+    attributes: []const u8,
+) !void {
+    switch (cty.tag()) {
+        else => unreachable,
+
+        .fwd_anon_struct,
+        .fwd_anon_union,
+        => |tag| try w.print("{s} {s}anon__lazy_{d}", .{
+            @tagName(tag)["fwd_anon_".len..],
+            attributes,
+            idx,
+        }),
+
+        .fwd_struct,
+        .fwd_union,
+        => |tag| {
+            const owner_decl = cty.cast(CType.Payload.FwdDecl).?.data;
+            try w.print("{s} {s}{}__{d}", .{
+                @tagName(tag)["fwd_".len..],
+                attributes,
+                fmtIdent(mem.span(mod.declPtr(owner_decl).name)),
+                @enumToInt(owner_decl),
+            });
+        },
+    }
+}
+fn renderTypePrefix(
+    decl: Decl.OptionalIndex,
+    store: CType.Store.Set,
+    mod: *Module,
+    w: anytype,
+    idx: CType.Index,
+    parent_fix: CTypeFix,
+    qualifiers: CQualifiers,
+) @TypeOf(w).Error!RenderCTypeTrailing {
+    var trailing = RenderCTypeTrailing.maybe_space;
+
+    const cty = store.indexToCType(idx);
+    switch (cty.tag()) {
+        .void,
+        .char,
+        .@"signed char",
+        .short,
+        .int,
+        .long,
+        .@"long long",
+        ._Bool,
+        .@"unsigned char",
+        .@"unsigned short",
+        .@"unsigned int",
+        .@"unsigned long",
+        .@"unsigned long long",
+        .float,
+        .double,
+        .@"long double",
+        .bool,
+        .size_t,
+        .ptrdiff_t,
+        .uint8_t,
+        .int8_t,
+        .uint16_t,
+        .int16_t,
+        .uint32_t,
+        .int32_t,
+        .uint64_t,
+        .int64_t,
+        .uintptr_t,
+        .intptr_t,
+        .zig_u128,
+        .zig_i128,
+        .zig_f16,
+        .zig_f32,
+        .zig_f64,
+        .zig_f80,
+        .zig_f128,
+        .zig_c_longdouble,
+        => |tag| try w.writeAll(@tagName(tag)),
+
+        .pointer,
+        .pointer_const,
+        .pointer_volatile,
+        .pointer_const_volatile,
+        => |tag| {
+            const child_idx = cty.cast(CType.Payload.Child).?.data;
+            const child_trailing = try renderTypePrefix(
+                decl,
+                store,
+                mod,
+                w,
+                child_idx,
+                .prefix,
+                CQualifiers.init(.{ .@"const" = switch (tag) {
+                    .pointer, .pointer_volatile => false,
+                    .pointer_const, .pointer_const_volatile => true,
+                    else => unreachable,
+                }, .@"volatile" = switch (tag) {
+                    .pointer, .pointer_const => false,
+                    .pointer_volatile, .pointer_const_volatile => true,
+                    else => unreachable,
+                } }),
+            );
+            try w.print("{}*", .{child_trailing});
+            trailing = .no_space;
+        },
+
+        .array,
+        .vector,
+        => {
+            const child_idx = cty.cast(CType.Payload.Sequence).?.data.elem_type;
+            const child_trailing = try renderTypePrefix(
+                decl,
+                store,
+                mod,
+                w,
+                child_idx,
+                .suffix,
+                qualifiers,
+            );
+            switch (parent_fix) {
+                .prefix => {
+                    try w.print("{}(", .{child_trailing});
+                    return .no_space;
+                },
+                .suffix => return child_trailing,
+            }
+        },
+
+        .fwd_anon_struct,
+        .fwd_anon_union,
+        => if (decl.unwrap()) |decl_index|
+            try w.print("anon__{d}_{d}", .{ @enumToInt(decl_index), idx })
+        else
+            try renderTypeName(mod, w, idx, cty, ""),
+
+        .fwd_struct,
+        .fwd_union,
+        => try renderTypeName(mod, w, idx, cty, ""),
+
+        .unnamed_struct,
+        .unnamed_union,
+        .packed_unnamed_struct,
+        .packed_unnamed_union,
+        => |tag| {
+            try w.print("{s} {s}", .{
+                @tagName(tag)["unnamed_".len..],
+                if (cty.isPacked()) "zig_packed(" else "",
+            });
+            try renderAggregateFields(mod, w, store, cty, 1);
+            if (cty.isPacked()) try w.writeByte(')');
+        },
+
+        .anon_struct,
+        .anon_union,
+        .@"struct",
+        .@"union",
+        .packed_struct,
+        .packed_union,
+        => return renderTypePrefix(
+            decl,
+            store,
+            mod,
+            w,
+            cty.cast(CType.Payload.Aggregate).?.data.fwd_decl,
+            parent_fix,
+            qualifiers,
+        ),
+
+        .function,
+        .varargs_function,
+        => {
+            const child_trailing = try renderTypePrefix(
+                decl,
+                store,
+                mod,
+                w,
+                cty.cast(CType.Payload.Function).?.data.return_type,
+                .suffix,
+                .{},
+            );
+            switch (parent_fix) {
+                .prefix => {
+                    try w.print("{}(", .{child_trailing});
+                    return .no_space;
+                },
+                .suffix => return child_trailing,
+            }
+        },
+    }
+
+    var qualifier_it = qualifiers.iterator();
+    while (qualifier_it.next()) |qualifier| {
+        try w.print("{}{s}", .{ trailing, @tagName(qualifier) });
+        trailing = .maybe_space;
+    }
+
+    return trailing;
+}
+fn renderTypeSuffix(
+    decl: Decl.OptionalIndex,
+    store: CType.Store.Set,
+    mod: *Module,
+    w: anytype,
+    idx: CType.Index,
+    parent_fix: CTypeFix,
+    qualifiers: CQualifiers,
+) @TypeOf(w).Error!void {
+    const cty = store.indexToCType(idx);
+    switch (cty.tag()) {
+        .void,
+        .char,
+        .@"signed char",
+        .short,
+        .int,
+        .long,
+        .@"long long",
+        ._Bool,
+        .@"unsigned char",
+        .@"unsigned short",
+        .@"unsigned int",
+        .@"unsigned long",
+        .@"unsigned long long",
+        .float,
+        .double,
+        .@"long double",
+        .bool,
+        .size_t,
+        .ptrdiff_t,
+        .uint8_t,
+        .int8_t,
+        .uint16_t,
+        .int16_t,
+        .uint32_t,
+        .int32_t,
+        .uint64_t,
+        .int64_t,
+        .uintptr_t,
+        .intptr_t,
+        .zig_u128,
+        .zig_i128,
+        .zig_f16,
+        .zig_f32,
+        .zig_f64,
+        .zig_f80,
+        .zig_f128,
+        .zig_c_longdouble,
+        => {},
+
+        .pointer,
+        .pointer_const,
+        .pointer_volatile,
+        .pointer_const_volatile,
+        => try renderTypeSuffix(
+            decl,
+            store,
+            mod,
+            w,
+            cty.cast(CType.Payload.Child).?.data,
+            .prefix,
+            .{},
+        ),
+
+        .array,
+        .vector,
+        => {
+            switch (parent_fix) {
+                .prefix => try w.writeByte(')'),
+                .suffix => {},
+            }
+
+            try w.print("[{}]", .{cty.cast(CType.Payload.Sequence).?.data.len});
+            try renderTypeSuffix(
+                decl,
+                store,
+                mod,
+                w,
+                cty.cast(CType.Payload.Sequence).?.data.elem_type,
+                .suffix,
+                .{},
+            );
+        },
+
+        .fwd_anon_struct,
+        .fwd_anon_union,
+        .fwd_struct,
+        .fwd_union,
+        .unnamed_struct,
+        .unnamed_union,
+        .packed_unnamed_struct,
+        .packed_unnamed_union,
+        .anon_struct,
+        .anon_union,
+        .@"struct",
+        .@"union",
+        .packed_struct,
+        .packed_union,
+        => {},
+
+        .function,
+        .varargs_function,
+        => |tag| {
+            switch (parent_fix) {
+                .prefix => try w.writeByte(')'),
+                .suffix => {},
+            }
+
+            const data = cty.cast(CType.Payload.Function).?.data;
+
+            try w.writeByte('(');
+            var need_comma = false;
+            for (data.param_types, 0..) |param_type, param_i| {
+                if (need_comma) try w.writeAll(", ");
+                need_comma = true;
+                const trailing =
+                    try renderTypePrefix(decl, store, mod, w, param_type, .suffix, qualifiers);
+                if (qualifiers.contains(.@"const")) try w.print("{}a{d}", .{ trailing, param_i });
+                try renderTypeSuffix(decl, store, mod, w, param_type, .suffix, .{});
+            }
+            switch (tag) {
+                .function => {},
+                .varargs_function => {
+                    if (need_comma) try w.writeAll(", ");
+                    need_comma = true;
+                    try w.writeAll("...");
+                },
+                else => unreachable,
+            }
+            if (!need_comma) try w.writeAll("void");
+            try w.writeByte(')');
+
+            try renderTypeSuffix(decl, store, mod, w, data.return_type, .suffix, .{});
+        },
+    }
+}
+fn renderAggregateFields(
+    mod: *Module,
+    writer: anytype,
+    store: CType.Store.Set,
+    cty: CType,
+    indent: usize,
+) !void {
+    try writer.writeAll("{\n");
+    const fields = cty.fields();
+    for (fields) |field| {
+        try writer.writeByteNTimes(' ', indent + 1);
+        switch (std.math.order(field.alignas.@"align", field.alignas.abi)) {
+            .lt => try writer.print("zig_under_align({}) ", .{field.alignas.getAlign()}),
+            .eq => {},
+            .gt => try writer.print("zig_align({}) ", .{field.alignas.getAlign()}),
+        }
+        const trailing = try renderTypePrefix(.none, store, mod, writer, field.type, .suffix, .{});
+        try writer.print("{}{ }", .{ trailing, fmtIdent(mem.span(field.name)) });
+        try renderTypeSuffix(.none, store, mod, writer, field.type, .suffix, .{});
+        try writer.writeAll(";\n");
+    }
+    try writer.writeByteNTimes(' ', indent);
+    try writer.writeByte('}');
+}
+
+pub fn genTypeDecl(
+    mod: *Module,
+    writer: anytype,
+    global_store: CType.Store.Set,
+    global_idx: CType.Index,
+    decl: Decl.OptionalIndex,
+    decl_store: CType.Store.Set,
+    decl_idx: CType.Index,
+    found_existing: bool,
+) !void {
+    const global_cty = global_store.indexToCType(global_idx);
+    switch (global_cty.tag()) {
+        .fwd_anon_struct => if (decl != .none) {
+            try writer.writeAll("typedef ");
+            _ = try renderTypePrefix(.none, global_store, mod, writer, global_idx, .suffix, .{});
+            try writer.writeByte(' ');
+            _ = try renderTypePrefix(decl, decl_store, mod, writer, decl_idx, .suffix, .{});
+            try writer.writeAll(";\n");
+        },
+
+        .fwd_struct,
+        .fwd_union,
+        .anon_struct,
+        .anon_union,
+        .@"struct",
+        .@"union",
+        .packed_struct,
+        .packed_union,
+        => |tag| if (!found_existing) {
+            switch (tag) {
+                .fwd_struct,
+                .fwd_union,
+                => {
+                    const owner_decl = global_cty.cast(CType.Payload.FwdDecl).?.data;
+                    _ = try renderTypePrefix(.none, global_store, mod, writer, global_idx, .suffix, .{});
+                    try writer.writeAll("; // ");
+                    try mod.declPtr(owner_decl).renderFullyQualifiedName(mod, writer);
+                    try writer.writeByte('\n');
+                },
+
+                .anon_struct,
+                .anon_union,
+                .@"struct",
+                .@"union",
+                .packed_struct,
+                .packed_union,
+                => {
+                    const fwd_idx = global_cty.cast(CType.Payload.Aggregate).?.data.fwd_decl;
+                    try renderTypeName(
+                        mod,
+                        writer,
+                        fwd_idx,
+                        global_store.indexToCType(fwd_idx),
+                        if (global_cty.isPacked()) "zig_packed(" else "",
+                    );
+                    try writer.writeByte(' ');
+                    try renderAggregateFields(mod, writer, global_store, global_cty, 0);
+                    if (global_cty.isPacked()) try writer.writeByte(')');
+                    try writer.writeAll(";\n");
+                },
+
+                else => unreachable,
+            }
+        },
+
+        else => {},
+    }
+}
+
+pub fn genGlobalAsm(mod: *Module, writer: anytype) !void {
+    var it = mod.global_assembly.valueIterator();
+    while (it.next()) |asm_source| try writer.print("__asm({s});\n", .{fmtStringLiteral(asm_source.*, null)});
 }
 
 pub fn genErrDecls(o: *Object) !void {
@@ -2690,26 +2411,24 @@ pub fn genErrDecls(o: *Object) !void {
     o.indent_writer.popIndent();
     try writer.writeAll("};\n");
 
-    const name_prefix = "zig_errorName";
-    const name_buf = try o.dg.gpa.alloc(u8, name_prefix.len + "_".len + max_name_len + 1);
+    const array_identifier = "zig_errorName";
+    const name_prefix = array_identifier ++ "_";
+    const name_buf = try o.dg.gpa.alloc(u8, name_prefix.len + max_name_len);
     defer o.dg.gpa.free(name_buf);
 
-    std.mem.copy(u8, name_buf, name_prefix ++ "_");
+    std.mem.copy(u8, name_buf, name_prefix);
     for (o.dg.module.error_name_list.items) |name| {
-        std.mem.copy(u8, name_buf[name_prefix.len + "_".len ..], name);
-        name_buf[name_prefix.len + "_".len + name.len] = 0;
-
-        const identifier = name_buf[0 .. name_prefix.len + "_".len + name.len :0];
-        const name_z = identifier[name_prefix.len + "_".len ..];
+        std.mem.copy(u8, name_buf[name_prefix.len..], name);
+        const identifier = name_buf[0 .. name_prefix.len + name.len];
 
         var name_ty_pl = Type.Payload.Len{ .base = .{ .tag = .array_u8_sentinel_0 }, .data = name.len };
         const name_ty = Type.initPayload(&name_ty_pl.base);
 
-        var name_pl = Value.Payload.Bytes{ .base = .{ .tag = .bytes }, .data = name_z };
+        var name_pl = Value.Payload.Bytes{ .base = .{ .tag = .bytes }, .data = name };
         const name_val = Value.initPayload(&name_pl.base);
 
         try writer.writeAll("static ");
-        try o.dg.renderTypeAndName(writer, name_ty, .{ .identifier = identifier }, .Const, 0, .Complete);
+        try o.dg.renderTypeAndName(writer, name_ty, .{ .identifier = identifier }, Const, 0, .complete);
         try writer.writeAll(" = ");
         try o.dg.renderValue(writer, name_ty, name_val, .StaticInitializer);
         try writer.writeAll(";\n");
@@ -2722,7 +2441,7 @@ pub fn genErrDecls(o: *Object) !void {
     const name_array_ty = Type.initPayload(&name_array_ty_pl.base);
 
     try writer.writeAll("static ");
-    try o.dg.renderTypeAndName(writer, name_array_ty, .{ .identifier = name_prefix }, .Const, 0, .Complete);
+    try o.dg.renderTypeAndName(writer, name_array_ty, .{ .identifier = array_identifier }, Const, 0, .complete);
     try writer.writeAll(" = {");
     for (o.dg.module.error_name_list.items, 0..) |name, value| {
         if (value != 0) try writer.writeByte(',');
@@ -2730,7 +2449,7 @@ pub fn genErrDecls(o: *Object) !void {
         var len_pl = Value.Payload.U64{ .base = .{ .tag = .int_u64 }, .data = name.len };
         const len_val = Value.initPayload(&len_pl.base);
 
-        try writer.print("{{" ++ name_prefix ++ "_{}, {}}}", .{
+        try writer.print("{{" ++ name_prefix ++ "{}, {}}}", .{
             fmtIdent(name), try o.dg.fmtIntLiteral(Type.usize, len_val),
         });
     }
@@ -2742,14 +2461,95 @@ fn genExports(o: *Object) !void {
     defer tracy.end();
 
     const fwd_decl_writer = o.dg.fwd_decl.writer();
-    if (o.dg.module.decl_exports.get(o.dg.decl_index)) |exports| for (exports.items[1..], 0..) |@"export", i| {
-        try fwd_decl_writer.writeAll("zig_export(");
-        try o.dg.renderFunctionSignature(fwd_decl_writer, .Forward, @intCast(u32, 1 + i));
-        try fwd_decl_writer.print(", {s}, {s});\n", .{
-            fmtStringLiteral(exports.items[0].options.name),
-            fmtStringLiteral(@"export".options.name),
-        });
-    };
+    if (o.dg.module.decl_exports.get(o.dg.decl_index.unwrap().?)) |exports| {
+        for (exports.items[1..], 1..) |@"export", i| {
+            try fwd_decl_writer.writeAll("zig_export(");
+            try o.dg.renderFunctionSignature(fwd_decl_writer, o.dg.decl_index.unwrap().?, .forward, .{ .export_index = @intCast(u32, i) });
+            try fwd_decl_writer.print(", {s}, {s});\n", .{
+                fmtStringLiteral(exports.items[0].options.name, null),
+                fmtStringLiteral(@"export".options.name, null),
+            });
+        }
+    }
+}
+
+pub fn genLazyFn(o: *Object, lazy_fn: LazyFnMap.Entry) !void {
+    const w = o.writer();
+    const key = lazy_fn.key_ptr.*;
+    const val = lazy_fn.value_ptr;
+    const fn_name = val.fn_name;
+    switch (key) {
+        .tag_name => {
+            const enum_ty = val.data.tag_name;
+
+            const name_slice_ty = Type.initTag(.const_slice_u8_sentinel_0);
+
+            try w.writeAll("static ");
+            try o.dg.renderType(w, name_slice_ty);
+            try w.writeByte(' ');
+            try w.writeAll(fn_name);
+            try w.writeByte('(');
+            try o.dg.renderTypeAndName(w, enum_ty, .{ .identifier = "tag" }, Const, 0, .complete);
+            try w.writeAll(") {\n switch (tag) {\n");
+            for (enum_ty.enumFields().keys(), 0..) |name, index| {
+                var tag_pl: Value.Payload.U32 = .{
+                    .base = .{ .tag = .enum_field_index },
+                    .data = @intCast(u32, index),
+                };
+                const tag_val = Value.initPayload(&tag_pl.base);
+
+                var int_pl: Value.Payload.U64 = undefined;
+                const int_val = tag_val.enumToInt(enum_ty, &int_pl);
+
+                var name_ty_pl = Type.Payload.Len{ .base = .{ .tag = .array_u8_sentinel_0 }, .data = name.len };
+                const name_ty = Type.initPayload(&name_ty_pl.base);
+
+                var name_pl = Value.Payload.Bytes{ .base = .{ .tag = .bytes }, .data = name };
+                const name_val = Value.initPayload(&name_pl.base);
+
+                var len_pl = Value.Payload.U64{ .base = .{ .tag = .int_u64 }, .data = name.len };
+                const len_val = Value.initPayload(&len_pl.base);
+
+                try w.print("  case {}: {{\n   static ", .{try o.dg.fmtIntLiteral(enum_ty, int_val)});
+                try o.dg.renderTypeAndName(w, name_ty, .{ .identifier = "name" }, Const, 0, .complete);
+                try w.writeAll(" = ");
+                try o.dg.renderValue(w, name_ty, name_val, .Initializer);
+                try w.writeAll(";\n   return (");
+                try o.dg.renderType(w, name_slice_ty);
+                try w.print("){{{}, {}}};\n", .{
+                    fmtIdent("name"), try o.dg.fmtIntLiteral(Type.usize, len_val),
+                });
+
+                try w.writeAll("  }\n");
+            }
+            try w.writeAll(" }\n while (");
+            try o.dg.renderValue(w, Type.bool, Value.true, .Other);
+            try w.writeAll(") ");
+            _ = try airBreakpoint(w);
+            try w.writeAll("}\n");
+        },
+        .never_tail, .never_inline => |fn_decl_index| {
+            const fn_decl = o.dg.module.declPtr(fn_decl_index);
+            const fn_cty = try o.dg.typeToCType(fn_decl.ty, .complete);
+            const fn_info = fn_cty.cast(CType.Payload.Function).?.data;
+
+            const fwd_decl_writer = o.dg.fwd_decl.writer();
+            try fwd_decl_writer.print("static zig_{s} ", .{@tagName(key)});
+            try o.dg.renderFunctionSignature(fwd_decl_writer, fn_decl_index, .forward, .{ .string = fn_name });
+            try fwd_decl_writer.writeAll(";\n");
+
+            try w.print("static zig_{s} ", .{@tagName(key)});
+            try o.dg.renderFunctionSignature(w, fn_decl_index, .complete, .{ .string = fn_name });
+            try w.writeAll(" {\n return ");
+            try o.dg.renderDeclName(w, fn_decl_index, 0);
+            try w.writeByte('(');
+            for (0..fn_info.param_types.len) |arg| {
+                if (arg > 0) try w.writeAll(", ");
+                try o.dg.writeCValue(w, .{ .arg = arg });
+            }
+            try w.writeAll(");\n}\n");
+        },
+    }
 }
 
 pub fn genFunc(f: *Function) !void {
@@ -2758,9 +2558,10 @@ pub fn genFunc(f: *Function) !void {
 
     const o = &f.object;
     const gpa = o.dg.gpa;
+    const decl_index = o.dg.decl_index.unwrap().?;
     const tv: TypedValue = .{
-        .ty = o.dg.decl.ty,
-        .val = o.dg.decl.val,
+        .ty = o.dg.decl.?.ty,
+        .val = o.dg.decl.?.val,
     };
 
     o.code_header = std.ArrayList(u8).init(gpa);
@@ -2769,13 +2570,13 @@ pub fn genFunc(f: *Function) !void {
     const is_global = o.dg.declIsGlobal(tv);
     const fwd_decl_writer = o.dg.fwd_decl.writer();
     try fwd_decl_writer.writeAll(if (is_global) "zig_extern " else "static ");
-    try o.dg.renderFunctionSignature(fwd_decl_writer, .Forward, 0);
+    try o.dg.renderFunctionSignature(fwd_decl_writer, decl_index, .forward, .{ .export_index = 0 });
     try fwd_decl_writer.writeAll(";\n");
     try genExports(o);
 
     try o.indent_writer.insertNewline();
     if (!is_global) try o.writer().writeAll("static ");
-    try o.dg.renderFunctionSignature(o.writer(), .Complete, 0);
+    try o.dg.renderFunctionSignature(o.writer(), decl_index, .complete, .{ .export_index = 0 });
     try o.writer().writeByte(' ');
 
     // In case we need to use the header, populate it with a copy of the function
@@ -2799,41 +2600,31 @@ pub fn genFunc(f: *Function) !void {
     // missing. These are added now to complete the map. Then we can sort by
     // alignment, descending.
     const free_locals = f.getFreeLocals();
-    const values = f.allocs.values();
-    for (f.allocs.keys(), 0..) |local_index, i| {
-        if (values[i]) continue; // static
+    for (f.allocs.keys(), f.allocs.values()) |local_index, value| {
+        if (value) continue; // static
         const local = f.locals.items[local_index];
         log.debug("inserting local {d} into free_locals", .{local_index});
-        const gop = try free_locals.getOrPutContext(gpa, local.ty, f.tyHashCtx());
+        const gop = try free_locals.getOrPut(gpa, local.getType());
         if (!gop.found_existing) gop.value_ptr.* = .{};
-        try gop.value_ptr.append(gpa, local_index);
+        try gop.value_ptr.putNoClobber(gpa, local_index, {});
     }
 
     const SortContext = struct {
-        target: std.Target,
-        keys: []const Type,
+        keys: []const LocalType,
 
-        pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
-            const a_ty = ctx.keys[a_index];
-            const b_ty = ctx.keys[b_index];
-            return b_ty.abiAlignment(ctx.target) < a_ty.abiAlignment(ctx.target);
+        pub fn lessThan(ctx: @This(), lhs_index: usize, rhs_index: usize) bool {
+            const lhs_ty = ctx.keys[lhs_index];
+            const rhs_ty = ctx.keys[rhs_index];
+            return lhs_ty.alignas.getAlign() > rhs_ty.alignas.getAlign();
         }
     };
-    const target = o.dg.module.getTarget();
-    free_locals.sort(SortContext{ .target = target, .keys = free_locals.keys() });
+    free_locals.sort(SortContext{ .keys = free_locals.keys() });
 
     const w = o.code_header.writer();
     for (free_locals.values()) |list| {
-        for (list.items) |local_index| {
+        for (list.keys()) |local_index| {
             const local = f.locals.items[local_index];
-            try o.dg.renderTypeAndName(
-                w,
-                local.ty,
-                .{ .local = local_index },
-                .Mut,
-                local.alignment,
-                .Complete,
-            );
+            try o.dg.renderCTypeAndName(w, local.cty_idx, .{ .local = local_index }, .{}, local.alignas);
             try w.writeAll(";\n ");
         }
     }
@@ -2850,15 +2641,15 @@ pub fn genDecl(o: *Object) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const tv: TypedValue = .{
-        .ty = o.dg.decl.ty,
-        .val = o.dg.decl.val,
-    };
+    const decl = o.dg.decl.?;
+    const decl_c_value = .{ .decl = o.dg.decl_index.unwrap().? };
+    const tv: TypedValue = .{ .ty = decl.ty, .val = decl.val };
+
     if (!tv.ty.isFnOrHasRuntimeBitsIgnoreComptime()) return;
     if (tv.val.tag() == .extern_fn) {
         const fwd_decl_writer = o.dg.fwd_decl.writer();
         try fwd_decl_writer.writeAll("zig_extern ");
-        try o.dg.renderFunctionSignature(fwd_decl_writer, .Forward, 0);
+        try o.dg.renderFunctionSignature(fwd_decl_writer, decl_c_value.decl, .forward, .{ .export_index = 0 });
         try fwd_decl_writer.writeAll(";\n");
         try genExports(o);
     } else if (tv.val.castTag(.variable)) |var_payload| {
@@ -2867,11 +2658,9 @@ pub fn genDecl(o: *Object) !void {
         const is_global = o.dg.declIsGlobal(tv) or variable.is_extern;
         const fwd_decl_writer = o.dg.fwd_decl.writer();
 
-        const decl_c_value = CValue{ .decl = o.dg.decl_index };
-
         try fwd_decl_writer.writeAll(if (is_global) "zig_extern " else "static ");
         if (variable.is_threadlocal) try fwd_decl_writer.writeAll("zig_threadlocal ");
-        try o.dg.renderTypeAndName(fwd_decl_writer, o.dg.decl.ty, decl_c_value, .Mut, o.dg.decl.@"align", .Complete);
+        try o.dg.renderTypeAndName(fwd_decl_writer, decl.ty, decl_c_value, .{}, decl.@"align", .complete);
         try fwd_decl_writer.writeAll(";\n");
         try genExports(o);
 
@@ -2880,27 +2669,26 @@ pub fn genDecl(o: *Object) !void {
         const w = o.writer();
         if (!is_global) try w.writeAll("static ");
         if (variable.is_threadlocal) try w.writeAll("zig_threadlocal ");
-        if (o.dg.decl.@"linksection") |section| try w.print("zig_linksection(\"{s}\", ", .{section});
-        try o.dg.renderTypeAndName(w, o.dg.decl.ty, decl_c_value, .Mut, o.dg.decl.@"align", .Complete);
-        if (o.dg.decl.@"linksection" != null) try w.writeAll(", read, write)");
+        if (decl.@"linksection") |section| try w.print("zig_linksection(\"{s}\", ", .{section});
+        try o.dg.renderTypeAndName(w, tv.ty, decl_c_value, .{}, decl.@"align", .complete);
+        if (decl.@"linksection" != null) try w.writeAll(", read, write)");
         try w.writeAll(" = ");
         try o.dg.renderValue(w, tv.ty, variable.init, .StaticInitializer);
         try w.writeByte(';');
         try o.indent_writer.insertNewline();
     } else {
-        const is_global = o.dg.module.decl_exports.contains(o.dg.decl_index);
+        const is_global = o.dg.module.decl_exports.contains(decl_c_value.decl);
         const fwd_decl_writer = o.dg.fwd_decl.writer();
-        const decl_c_value: CValue = .{ .decl = o.dg.decl_index };
 
         try fwd_decl_writer.writeAll(if (is_global) "zig_extern " else "static ");
-        try o.dg.renderTypeAndName(fwd_decl_writer, tv.ty, decl_c_value, .Const, o.dg.decl.@"align", .Complete);
+        try o.dg.renderTypeAndName(fwd_decl_writer, tv.ty, decl_c_value, Const, decl.@"align", .complete);
         try fwd_decl_writer.writeAll(";\n");
 
         const w = o.writer();
         if (!is_global) try w.writeAll("static ");
-        if (o.dg.decl.@"linksection") |section| try w.print("zig_linksection(\"{s}\", ", .{section});
-        try o.dg.renderTypeAndName(w, tv.ty, decl_c_value, .Const, o.dg.decl.@"align", .Complete);
-        if (o.dg.decl.@"linksection" != null) try w.writeAll(", read)");
+        if (decl.@"linksection") |section| try w.print("zig_linksection(\"{s}\", ", .{section});
+        try o.dg.renderTypeAndName(w, tv.ty, decl_c_value, Const, decl.@"align", .complete);
+        if (decl.@"linksection" != null) try w.writeAll(", read)");
         try w.writeAll(" = ");
         try o.dg.renderValue(w, tv.ty, tv.val, .StaticInitializer);
         try w.writeAll(";\n");
@@ -2912,8 +2700,8 @@ pub fn genHeader(dg: *DeclGen) error{ AnalysisFail, OutOfMemory }!void {
     defer tracy.end();
 
     const tv: TypedValue = .{
-        .ty = dg.decl.ty,
-        .val = dg.decl.val,
+        .ty = dg.decl.?.ty,
+        .val = dg.decl.?.val,
     };
     const writer = dg.fwd_decl.writer();
 
@@ -2922,7 +2710,7 @@ pub fn genHeader(dg: *DeclGen) error{ AnalysisFail, OutOfMemory }!void {
             const is_global = dg.declIsGlobal(tv);
             if (is_global) {
                 try writer.writeAll("zig_extern ");
-                try dg.renderFunctionSignature(writer, .Complete, 0);
+                try dg.renderFunctionSignature(writer, dg.decl_index.unwrap().?, .complete, .{ .export_index = 0 });
                 try dg.fwd_decl.appendSlice(";\n");
             }
         },
@@ -2951,7 +2739,7 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             // zig fmt: off
             .constant => unreachable, // excluded from function bodies
             .const_ty => unreachable, // excluded from function bodies
-            .arg      => airArg(f),
+            .arg      => try airArg(f, inst),
 
             .breakpoint => try airBreakpoint(f.object.writer()),
             .ret_addr   => try airRetAddr(f, inst),
@@ -3112,10 +2900,10 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
 
             .dbg_block_begin,
             .dbg_block_end,
-            => CValue{ .none = {} },
+            => .none,
 
             .call              => try airCall(f, inst, .auto),
-            .call_always_tail  => try airCall(f, inst, .always_tail),
+            .call_always_tail  => .none,
             .call_never_tail   => try airCall(f, inst, .never_tail),
             .call_never_inline => try airCall(f, inst, .never_inline),
 
@@ -3194,19 +2982,20 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .error_set_has_value => return f.fail("TODO: C backend: implement error_set_has_value", .{}),
             .vector_store_elem => return f.fail("TODO: C backend: implement vector_store_elem", .{}),
 
-            .c_va_arg => return f.fail("TODO implement c_va_arg", .{}),
-            .c_va_copy => return f.fail("TODO implement c_va_copy", .{}),
-            .c_va_end => return f.fail("TODO implement c_va_end", .{}),
-            .c_va_start => return f.fail("TODO implement c_va_start", .{}),
+            .c_va_start => try airCVaStart(f, inst),
+            .c_va_arg => try airCVaArg(f, inst),
+            .c_va_end => try airCVaEnd(f, inst),
+            .c_va_copy => try airCVaCopy(f, inst),
             // zig fmt: on
         };
-        if (result_value == .local) {
-            log.debug("map %{d} to t{d}", .{ inst, result_value.local });
-        }
-        switch (result_value) {
-            .none => {},
-            else => try f.value_map.putNoClobber(Air.indexToRef(inst), result_value),
+        if (result_value == .new_local) {
+            log.debug("map %{d} to t{d}", .{ inst, result_value.new_local });
         }
+        try f.value_map.putNoClobber(Air.indexToRef(inst), switch (result_value) {
+            .none => continue,
+            .new_local => |i| .{ .local = i },
+            else => result_value,
+        });
     }
 }
 
@@ -3215,7 +3004,7 @@ fn airSliceField(f: *Function, inst: Air.Inst.Index, is_ptr: bool, field_name: [
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -3241,7 +3030,7 @@ fn airPtrElemVal(f: *Function, inst: Air.Inst.Index) !CValue {
         !inst_ty.hasRuntimeBitsIgnoreComptime())
     {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const ptr = try f.resolveInst(bin_op.lhs);
@@ -3267,7 +3056,7 @@ fn airPtrElemVal(f: *Function, inst: Air.Inst.Index) !CValue {
     try writer.writeByte(']');
     if (is_array) {
         try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, inst_ty);
+        try f.renderType(writer, inst_ty);
         try writer.writeAll("))");
     }
     try writer.writeAll(";\n");
@@ -3280,9 +3069,10 @@ fn airPtrElemPtr(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
+    const inst_ty = f.air.typeOfIndex(inst);
     const ptr_ty = f.air.typeOf(bin_op.lhs);
     const child_ty = ptr_ty.childType();
 
@@ -3297,7 +3087,9 @@ fn airPtrElemPtr(f: *Function, inst: Air.Inst.Index) !CValue {
     const writer = f.object.writer();
     const local = try f.allocLocal(inst, f.air.typeOfIndex(inst));
     try f.writeCValue(writer, local, .Other);
-    try writer.writeAll(" = &(");
+    try writer.writeAll(" = (");
+    try f.renderType(writer, inst_ty);
+    try writer.writeAll(")&(");
     if (ptr_ty.ptrSize() == .One) {
         // It's a pointer to an array, so we need to de-reference.
         try f.writeCValueDeref(writer, ptr);
@@ -3318,7 +3110,7 @@ fn airSliceElemVal(f: *Function, inst: Air.Inst.Index) !CValue {
         !inst_ty.hasRuntimeBitsIgnoreComptime())
     {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const slice = try f.resolveInst(bin_op.lhs);
@@ -3344,7 +3136,7 @@ fn airSliceElemVal(f: *Function, inst: Air.Inst.Index) !CValue {
     try writer.writeByte(']');
     if (is_array) {
         try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, inst_ty);
+        try f.renderType(writer, inst_ty);
         try writer.writeAll("))");
     }
     try writer.writeAll(";\n");
@@ -3357,7 +3149,7 @@ fn airSliceElemPtr(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const slice_ty = f.air.typeOf(bin_op.lhs);
@@ -3387,7 +3179,7 @@ fn airArrayElemVal(f: *Function, inst: Air.Inst.Index) !CValue {
     const inst_ty = f.air.typeOfIndex(inst);
     if (f.liveness.isUnused(inst) or !inst_ty.hasRuntimeBitsIgnoreComptime()) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const array = try f.resolveInst(bin_op.lhs);
@@ -3413,7 +3205,7 @@ fn airArrayElemVal(f: *Function, inst: Air.Inst.Index) !CValue {
     try writer.writeByte(']');
     if (is_array) {
         try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, inst_ty);
+        try f.renderType(writer, inst_ty);
         try writer.writeAll("))");
     }
     try writer.writeAll(";\n");
@@ -3425,16 +3217,19 @@ fn airAlloc(f: *Function, inst: Air.Inst.Index) !CValue {
 
     const elem_type = inst_ty.elemType();
     if (!elem_type.isFnOrHasRuntimeBitsIgnoreComptime()) {
-        return CValue{ .undef = inst_ty };
+        return .{ .undef = inst_ty };
     }
 
-    const mutability: Mutability = if (inst_ty.isConstPtr()) .Const else .Mut;
     const target = f.object.dg.module.getTarget();
-    const local = try f.allocAlignedLocal(elem_type, mutability, inst_ty.ptrAlignment(target));
-    log.debug("%{d}: allocated unfreeable t{d}", .{ inst, local.local });
+    const local = try f.allocAlignedLocal(
+        elem_type,
+        CQualifiers.init(.{ .@"const" = inst_ty.isConstPtr() }),
+        inst_ty.ptrAlignment(target),
+    );
+    log.debug("%{d}: allocated unfreeable t{d}", .{ inst, local.new_local });
     const gpa = f.object.dg.module.gpa;
-    try f.allocs.put(gpa, local.local, false);
-    return CValue{ .local_ref = local.local };
+    try f.allocs.put(gpa, local.new_local, false);
+    return .{ .local_ref = local.new_local };
 }
 
 fn airRetPtr(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -3442,22 +3237,31 @@ fn airRetPtr(f: *Function, inst: Air.Inst.Index) !CValue {
 
     const elem_ty = inst_ty.elemType();
     if (!elem_ty.isFnOrHasRuntimeBitsIgnoreComptime()) {
-        return CValue{ .undef = inst_ty };
+        return .{ .undef = inst_ty };
     }
 
-    const mutability: Mutability = if (inst_ty.isConstPtr()) .Const else .Mut;
     const target = f.object.dg.module.getTarget();
-    const local = try f.allocAlignedLocal(elem_ty, mutability, inst_ty.ptrAlignment(target));
-    log.debug("%{d}: allocated unfreeable t{d}", .{ inst, local.local });
+    const local = try f.allocAlignedLocal(
+        elem_ty,
+        CQualifiers.init(.{ .@"const" = inst_ty.isConstPtr() }),
+        inst_ty.ptrAlignment(target),
+    );
+    log.debug("%{d}: allocated unfreeable t{d}", .{ inst, local.new_local });
     const gpa = f.object.dg.module.gpa;
-    try f.allocs.put(gpa, local.local, false);
-    return CValue{ .local_ref = local.local };
+    try f.allocs.put(gpa, local.new_local, false);
+    return .{ .local_ref = local.new_local };
 }
 
-fn airArg(f: *Function) CValue {
+fn airArg(f: *Function, inst: Air.Inst.Index) !CValue {
+    const inst_ty = f.air.typeOfIndex(inst);
+    const inst_cty = try f.typeToIndex(inst_ty, .parameter);
+
     const i = f.next_arg_index;
     f.next_arg_index += 1;
-    return .{ .arg = i };
+    return if (inst_cty != try f.typeToIndex(inst_ty, .complete))
+        .{ .arg_array = i }
+    else
+        .{ .arg = i };
 }
 
 fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -3469,7 +3273,7 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue {
         (!ptr_info.@"volatile" and f.liveness.isUnused(inst)))
     {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const operand = try f.resolveInst(ty_op.operand);
@@ -3491,7 +3295,7 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue {
         try writer.writeAll(", (const char *)");
         try f.writeCValue(writer, operand, .Other);
         try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, src_ty);
+        try f.renderType(writer, src_ty);
         try writer.writeAll("))");
     } else if (ptr_info.host_size != 0) {
         var host_pl = Type.Payload.Bits{
@@ -3520,11 +3324,11 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue {
 
         try f.writeCValue(writer, local, .Other);
         try writer.writeAll(" = (");
-        try f.renderTypecast(writer, src_ty);
+        try f.renderType(writer, src_ty);
         try writer.writeAll(")zig_wrap_");
         try f.object.dg.renderTypeForBuiltinFnName(writer, field_ty);
         try writer.writeAll("((");
-        try f.renderTypecast(writer, field_ty);
+        try f.renderType(writer, field_ty);
         try writer.writeByte(')');
         const cant_cast = host_ty.isInt() and host_ty.bitSize(target) > 64;
         if (cant_cast) {
@@ -3554,20 +3358,24 @@ fn airRet(f: *Function, inst: Air.Inst.Index, is_ptr: bool) !CValue {
     const un_op = f.air.instructions.items(.data)[inst].un_op;
     const writer = f.object.writer();
     const target = f.object.dg.module.getTarget();
+    const op_inst = Air.refToIndex(un_op);
     const op_ty = f.air.typeOf(un_op);
     const ret_ty = if (is_ptr) op_ty.childType() else op_ty;
     var lowered_ret_buf: LowerFnRetTyBuffer = undefined;
     const lowered_ret_ty = lowerFnRetTy(ret_ty, &lowered_ret_buf, target);
 
-    if (lowered_ret_ty.hasRuntimeBitsIgnoreComptime()) {
-        var deref = is_ptr;
+    if (op_inst != null and f.air.instructions.items(.tag)[op_inst.?] == .call_always_tail) {
+        try reap(f, inst, &.{un_op});
+        _ = try airCall(f, op_inst.?, .always_tail);
+    } else if (lowered_ret_ty.hasRuntimeBitsIgnoreComptime()) {
         const operand = try f.resolveInst(un_op);
         try reap(f, inst, &.{un_op});
+        var deref = is_ptr;
         const is_array = lowersToArray(ret_ty, target);
         const ret_val = if (is_array) ret_val: {
             const array_local = try f.allocLocal(inst, try lowered_ret_ty.copy(f.arena.allocator()));
             try writer.writeAll("memcpy(");
-            try f.writeCValueMember(writer, array_local, .{ .field = 0 });
+            try f.writeCValueMember(writer, array_local, .{ .identifier = "array" });
             try writer.writeAll(", ");
             if (deref)
                 try f.writeCValueDeref(writer, operand)
@@ -3575,7 +3383,7 @@ fn airRet(f: *Function, inst: Air.Inst.Index, is_ptr: bool) !CValue {
                 try f.writeCValue(writer, operand, .FunctionArgument);
             deref = false;
             try writer.writeAll(", sizeof(");
-            try f.renderTypecast(writer, ret_ty);
+            try f.renderType(writer, ret_ty);
             try writer.writeAll("));\n");
             break :ret_val array_local;
         } else operand;
@@ -3587,16 +3395,15 @@ fn airRet(f: *Function, inst: Air.Inst.Index, is_ptr: bool) !CValue {
             try f.writeCValue(writer, ret_val, .Other);
         try writer.writeAll(";\n");
         if (is_array) {
-            try freeLocal(f, inst, ret_val.local, 0);
+            try freeLocal(f, inst, ret_val.new_local, 0);
         }
     } else {
         try reap(f, inst, &.{un_op});
-        if (f.object.dg.decl.ty.fnCallingConvention() != .Naked) {
-            // Not even allowed to return void in a naked function.
+        // Not even allowed to return void in a naked function.
+        if (if (f.object.dg.decl) |decl| decl.ty.fnCallingConvention() != .Naked else true)
             try writer.writeAll("return;\n");
-        }
     }
-    return CValue.none;
+    return .none;
 }
 
 fn airIntCast(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -3604,7 +3411,7 @@ fn airIntCast(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const operand = try f.resolveInst(ty_op.operand);
@@ -3625,7 +3432,7 @@ fn airTrunc(f: *Function, inst: Air.Inst.Index) !CValue {
     const ty_op = f.air.instructions.items(.data)[inst].ty_op;
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const operand = try f.resolveInst(ty_op.operand);
@@ -3644,15 +3451,17 @@ fn airTrunc(f: *Function, inst: Air.Inst.Index) !CValue {
     try f.writeCValue(writer, local, .Other);
     try writer.writeAll(" = ");
 
+    if (dest_c_bits < 64) {
+        try writer.writeByte('(');
+        try f.renderType(writer, inst_ty);
+        try writer.writeByte(')');
+    }
+
     const needs_lo = operand_int_info.bits > 64 and dest_bits <= 64;
     if (needs_lo) {
         try writer.writeAll("zig_lo_");
         try f.object.dg.renderTypeForBuiltinFnName(writer, operand_ty);
         try writer.writeByte('(');
-    } else if (dest_c_bits <= 64) {
-        try writer.writeByte('(');
-        try f.renderTypecast(writer, inst_ty);
-        try writer.writeByte(')');
     }
 
     if (dest_bits >= 8 and std.math.isPowerOfTwo(dest_bits)) {
@@ -3712,7 +3521,7 @@ fn airBoolToInt(f: *Function, inst: Air.Inst.Index) !CValue {
     const un_op = f.air.instructions.items(.data)[inst].un_op;
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{un_op});
-        return CValue.none;
+        return .none;
     }
     const operand = try f.resolveInst(un_op);
     try reap(f, inst, &.{un_op});
@@ -3732,10 +3541,10 @@ fn storeUndefined(f: *Function, lhs_child_ty: Type, dest_ptr: CValue) !CValue {
         try writer.writeAll("memset(");
         try f.writeCValue(writer, dest_ptr, .FunctionArgument);
         try writer.print(", {x}, sizeof(", .{try f.fmtIntLiteral(Type.u8, Value.undef)});
-        try f.renderTypecast(writer, lhs_child_ty);
+        try f.renderType(writer, lhs_child_ty);
         try writer.writeAll("));\n");
     }
-    return CValue.none;
+    return .none;
 }
 
 fn airStore(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -3744,7 +3553,7 @@ fn airStore(f: *Function, inst: Air.Inst.Index) !CValue {
     const ptr_info = f.air.typeOf(bin_op.lhs).ptrInfo().data;
     if (!ptr_info.pointee_type.hasRuntimeBitsIgnoreComptime()) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const ptr_val = try f.resolveInst(bin_op.lhs);
@@ -3793,10 +3602,10 @@ fn airStore(f: *Function, inst: Air.Inst.Index) !CValue {
         if (!is_array) try writer.writeByte('&');
         try f.writeCValue(writer, array_src, .FunctionArgument);
         try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, src_ty);
+        try f.renderType(writer, src_ty);
         try writer.writeAll("))");
         if (src_val == .constant) {
-            try freeLocal(f, inst, array_src.local, 0);
+            try freeLocal(f, inst, array_src.new_local, 0);
         }
     } else if (ptr_info.host_size != 0) {
         const host_bits = ptr_info.host_size * 8;
@@ -3847,18 +3656,18 @@ fn airStore(f: *Function, inst: Air.Inst.Index) !CValue {
         const cant_cast = host_ty.isInt() and host_ty.bitSize(target) > 64;
         if (cant_cast) {
             if (src_ty.bitSize(target) > 64) return f.fail("TODO: C backend: implement casting between types > 64 bits", .{});
-            try writer.writeAll("zig_as_");
+            try writer.writeAll("zig_make_");
             try f.object.dg.renderTypeForBuiltinFnName(writer, host_ty);
             try writer.writeAll("(0, ");
         } else {
             try writer.writeByte('(');
-            try f.renderTypecast(writer, host_ty);
+            try f.renderType(writer, host_ty);
             try writer.writeByte(')');
         }
 
         if (src_ty.isPtrAtRuntime()) {
             try writer.writeByte('(');
-            try f.renderTypecast(writer, Type.usize);
+            try f.renderType(writer, Type.usize);
             try writer.writeByte(')');
         }
         try f.writeCValue(writer, src_val, .Other);
@@ -3870,7 +3679,7 @@ fn airStore(f: *Function, inst: Air.Inst.Index) !CValue {
         try f.writeCValue(writer, src_val, .Other);
     }
     try writer.writeAll(";\n");
-    return CValue.none;
+    return .none;
 }
 
 fn airOverflow(f: *Function, inst: Air.Inst.Index, operation: []const u8, info: BuiltinInfo) !CValue {
@@ -3879,7 +3688,7 @@ fn airOverflow(f: *Function, inst: Air.Inst.Index, operation: []const u8, info:
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const lhs = try f.resolveInst(bin_op.lhs);
@@ -3935,7 +3744,7 @@ fn airNot(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const op = try f.resolveInst(ty_op.operand);
@@ -3970,7 +3779,7 @@ fn airBinOp(
 
     try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
 
-    if (f.liveness.isUnused(inst)) return CValue.none;
+    if (f.liveness.isUnused(inst)) return .none;
 
     const inst_ty = f.air.typeOfIndex(inst);
 
@@ -3993,7 +3802,7 @@ fn airCmpOp(f: *Function, inst: Air.Inst.Index, operator: []const u8, operation:
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const operand_ty = f.air.typeOf(bin_op.lhs);
@@ -4033,7 +3842,7 @@ fn airEquality(
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const operand_ty = f.air.typeOf(bin_op.lhs);
@@ -4089,7 +3898,7 @@ fn airCmpLtErrorsLen(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{un_op});
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -4110,7 +3919,7 @@ fn airPtrAddSub(f: *Function, inst: Air.Inst.Index, operator: u8) !CValue {
     const bin_op = f.air.extraData(Air.Bin, ty_pl.payload).data;
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const lhs = try f.resolveInst(bin_op.lhs);
@@ -4118,32 +3927,31 @@ fn airPtrAddSub(f: *Function, inst: Air.Inst.Index, operator: u8) !CValue {
     try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
 
     const inst_ty = f.air.typeOfIndex(inst);
-    const elem_ty = switch (inst_ty.ptrSize()) {
-        .One => blk: {
-            const array_ty = inst_ty.childType();
-            break :blk array_ty.childType();
-        },
-        else => inst_ty.childType(),
-    };
+    const elem_ty = inst_ty.elemType2();
 
-    // We must convert to and from integer types to prevent UB if the operation
-    // results in a NULL pointer, or if LHS is NULL. The operation is only UB
-    // if the result is NULL and then dereferenced.
     const local = try f.allocLocal(inst, inst_ty);
     const writer = f.object.writer();
     try f.writeCValue(writer, local, .Other);
-    try writer.writeAll(" = (");
-    try f.renderTypecast(writer, inst_ty);
-    try writer.writeAll(")(((uintptr_t)");
-    try f.writeCValue(writer, lhs, .Other);
-    try writer.writeAll(") ");
-    try writer.writeByte(operator);
-    try writer.writeAll(" (");
-    try f.writeCValue(writer, rhs, .Other);
-    try writer.writeAll("*sizeof(");
-    try f.renderTypecast(writer, elem_ty);
-    try writer.writeAll(")));\n");
+    try writer.writeAll(" = ");
 
+    if (elem_ty.hasRuntimeBitsIgnoreComptime()) {
+        // We must convert to and from integer types to prevent UB if the operation
+        // results in a NULL pointer, or if LHS is NULL. The operation is only UB
+        // if the result is NULL and then dereferenced.
+        try writer.writeByte('(');
+        try f.renderType(writer, inst_ty);
+        try writer.writeAll(")(((uintptr_t)");
+        try f.writeCValue(writer, lhs, .Other);
+        try writer.writeAll(") ");
+        try writer.writeByte(operator);
+        try writer.writeAll(" (");
+        try f.writeCValue(writer, rhs, .Other);
+        try writer.writeAll("*sizeof(");
+        try f.renderType(writer, elem_ty);
+        try writer.writeAll(")))");
+    } else try f.writeCValue(writer, lhs, .Initializer);
+
+    try writer.writeAll(";\n");
     return local;
 }
 
@@ -4152,7 +3960,7 @@ fn airMinMax(f: *Function, inst: Air.Inst.Index, operator: u8, operation: []cons
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -4191,7 +3999,7 @@ fn airSlice(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const ptr = try f.resolveInst(bin_op.lhs);
@@ -4204,7 +4012,7 @@ fn airSlice(f: *Function, inst: Air.Inst.Index) !CValue {
     try f.writeCValue(writer, local, .Other);
     try writer.writeAll(".ptr = (");
     var buf: Type.SlicePtrFieldTypeBuffer = undefined;
-    try f.renderTypecast(writer, inst_ty.slicePtrFieldType(&buf));
+    try f.renderType(writer, inst_ty.slicePtrFieldType(&buf));
     try writer.writeByte(')');
     try f.writeCValue(writer, ptr, .Other);
     try writer.writeAll("; ");
@@ -4222,24 +4030,41 @@ fn airCall(
     modifier: std.builtin.CallModifier,
 ) !CValue {
     // Not even allowed to call panic in a naked function.
-    if (f.object.dg.decl.ty.fnCallingConvention() == .Naked) return .none;
-    const gpa = f.object.dg.gpa;
+    if (f.object.dg.decl) |decl| if (decl.ty.fnCallingConvention() == .Naked) return .none;
+
+    const gpa = f.object.dg.gpa;
+    const module = f.object.dg.module;
+    const target = module.getTarget();
+    const writer = f.object.writer();
 
-    switch (modifier) {
-        .auto => {},
-        .always_tail => return f.fail("TODO: C backend: call with always_tail attribute", .{}),
-        .never_tail => return f.fail("TODO: C backend: call with never_tail attribute", .{}),
-        .never_inline => return f.fail("TODO: C backend: call with never_inline attribute", .{}),
-        else => unreachable,
-    }
     const pl_op = f.air.instructions.items(.data)[inst].pl_op;
     const extra = f.air.extraData(Air.Call, pl_op.payload);
     const args = @ptrCast([]const Air.Inst.Ref, f.air.extra[extra.end..][0..extra.data.args_len]);
 
     const resolved_args = try gpa.alloc(CValue, args.len);
     defer gpa.free(resolved_args);
-    for (args, 0..) |arg, i| {
-        resolved_args[i] = try f.resolveInst(arg);
+    for (resolved_args, args) |*resolved_arg, arg| {
+        const arg_ty = f.air.typeOf(arg);
+        const arg_cty = try f.typeToIndex(arg_ty, .parameter);
+        if (f.indexToCType(arg_cty).tag() == .void) {
+            resolved_arg.* = .none;
+            continue;
+        }
+        resolved_arg.* = try f.resolveInst(arg);
+        if (arg_cty != try f.typeToIndex(arg_ty, .complete)) {
+            var lowered_arg_buf: LowerFnRetTyBuffer = undefined;
+            const lowered_arg_ty = lowerFnRetTy(arg_ty, &lowered_arg_buf, target);
+
+            const array_local = try f.allocLocal(inst, try lowered_arg_ty.copy(f.arena.allocator()));
+            try writer.writeAll("memcpy(");
+            try f.writeCValueMember(writer, array_local, .{ .identifier = "array" });
+            try writer.writeAll(", ");
+            try f.writeCValue(writer, resolved_arg.*, .FunctionArgument);
+            try writer.writeAll(", sizeof(");
+            try f.renderType(writer, lowered_arg_ty);
+            try writer.writeAll("));\n");
+            resolved_arg.* = array_local;
+        }
     }
 
     const callee = try f.resolveInst(pl_op.operand);
@@ -4256,18 +4081,19 @@ fn airCall(
         .Pointer => callee_ty.childType(),
         else => unreachable,
     };
-    const writer = f.object.writer();
 
-    const target = f.object.dg.module.getTarget();
     const ret_ty = fn_ty.fnReturnType();
     var lowered_ret_buf: LowerFnRetTyBuffer = undefined;
     const lowered_ret_ty = lowerFnRetTy(ret_ty, &lowered_ret_buf, target);
 
-    const result_local: CValue = if (!lowered_ret_ty.hasRuntimeBitsIgnoreComptime())
+    const result_local = if (modifier == .always_tail) r: {
+        try writer.writeAll("zig_always_tail return ");
+        break :r .none;
+    } else if (!lowered_ret_ty.hasRuntimeBitsIgnoreComptime())
         .none
     else if (f.liveness.isUnused(inst)) r: {
         try writer.writeByte('(');
-        try f.renderTypecast(writer, Type.void);
+        try f.renderType(writer, Type.void);
         try writer.writeByte(')');
         break :r .none;
     } else r: {
@@ -4277,48 +4103,44 @@ fn airCall(
         break :r local;
     };
 
-    var is_extern = false;
-    var name: [*:0]const u8 = "";
     callee: {
         known: {
             const fn_decl = fn_decl: {
                 const callee_val = f.air.value(pl_op.operand) orelse break :known;
                 break :fn_decl switch (callee_val.tag()) {
-                    .extern_fn => blk: {
-                        is_extern = true;
-                        break :blk callee_val.castTag(.extern_fn).?.data.owner_decl;
-                    },
+                    .extern_fn => callee_val.castTag(.extern_fn).?.data.owner_decl,
                     .function => callee_val.castTag(.function).?.data.owner_decl,
                     .decl_ref => callee_val.castTag(.decl_ref).?.data,
                     else => break :known,
                 };
             };
-            name = f.object.dg.module.declPtr(fn_decl).name;
-            try f.object.dg.renderDeclName(writer, fn_decl, 0);
+            switch (modifier) {
+                .auto, .always_tail => try f.object.dg.renderDeclName(writer, fn_decl, 0),
+                inline .never_tail, .never_inline => |mod| try writer.writeAll(try f.getLazyFnName(
+                    @unionInit(LazyFnKey, @tagName(mod), fn_decl),
+                    @unionInit(LazyFnValue.Data, @tagName(mod), {}),
+                )),
+                else => unreachable,
+            }
             break :callee;
         }
+        switch (modifier) {
+            .auto, .always_tail => {},
+            .never_tail => return f.fail("CBE: runtime callee with never_tail attribute unsupported", .{}),
+            .never_inline => return f.fail("CBE: runtime callee with never_inline attribute unsupported", .{}),
+            else => unreachable,
+        }
         // Fall back to function pointer call.
         try f.writeCValue(writer, callee, .Other);
     }
 
     try writer.writeByte('(');
     var args_written: usize = 0;
-    for (args, 0..) |arg, arg_i| {
-        const ty = f.air.typeOf(arg);
-        if (!ty.hasRuntimeBitsIgnoreComptime()) continue;
-        if (args_written != 0) {
-            try writer.writeAll(", ");
-        }
-        if ((is_extern or std.mem.eql(u8, std.mem.span(name), "main")) and
-            ty.isCPtr() and ty.childType().tag() == .u8)
-        {
-            // Corresponds with hack in renderType .Pointer case.
-            try writer.writeAll("(char");
-            if (ty.isConstPtr()) try writer.writeAll(" const");
-            if (ty.isVolatilePtr()) try writer.writeAll(" volatile");
-            try writer.writeAll(" *)");
-        }
-        try f.writeCValue(writer, resolved_args[arg_i], .FunctionArgument);
+    for (resolved_args) |resolved_arg| {
+        if (resolved_arg == .none) continue;
+        if (args_written != 0) try writer.writeAll(", ");
+        try f.writeCValue(writer, resolved_arg, .FunctionArgument);
+        if (resolved_arg == .new_local) try freeLocal(f, inst, resolved_arg.new_local, 0);
         args_written += 1;
     }
     try writer.writeAll(");\n");
@@ -4331,11 +4153,11 @@ fn airCall(
         try writer.writeAll("memcpy(");
         try f.writeCValue(writer, array_local, .FunctionArgument);
         try writer.writeAll(", ");
-        try f.writeCValueMember(writer, result_local, .{ .field = 0 });
+        try f.writeCValueMember(writer, result_local, .{ .identifier = "array" });
         try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, ret_ty);
+        try f.renderType(writer, ret_ty);
         try writer.writeAll("));\n");
-        try freeLocal(f, inst, result_local.local, 0);
+        try freeLocal(f, inst, result_local.new_local, 0);
         break :r array_local;
     };
 
@@ -4354,7 +4176,7 @@ fn airDbgStmt(f: *Function, inst: Air.Inst.Index) !CValue {
     // Perhaps an additional compilation option is in order?
     //try writer.print("#line {d}\n", .{dbg_stmt.line + 1});
     try writer.print("/* file:{d}:{d} */\n", .{ dbg_stmt.line + 1, dbg_stmt.column + 1 });
-    return CValue.none;
+    return .none;
 }
 
 fn airDbgInline(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -4363,7 +4185,7 @@ fn airDbgInline(f: *Function, inst: Air.Inst.Index) !CValue {
     const function = f.air.values[ty_pl.payload].castTag(.function).?.data;
     const mod = f.object.dg.module;
     try writer.print("/* dbg func:{s} */\n", .{mod.declPtr(function.owner_decl).name});
-    return CValue.none;
+    return .none;
 }
 
 fn airDbgVar(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -4375,7 +4197,7 @@ fn airDbgVar(f: *Function, inst: Air.Inst.Index) !CValue {
     try reap(f, inst, &.{pl_op.operand});
     const writer = f.object.writer();
     try writer.print("/* var:{s} */\n", .{name});
-    return CValue.none;
+    return .none;
 }
 
 fn airBlock(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -4391,7 +4213,7 @@ fn airBlock(f: *Function, inst: Air.Inst.Index) !CValue {
     const result = if (inst_ty.tag() != .void and !f.liveness.isUnused(inst))
         try f.allocLocal(inst, inst_ty)
     else
-        CValue{ .none = {} };
+        .none;
 
     try f.blocks.putNoClobber(f.object.dg.gpa, inst, .{
         .block_id = block_id,
@@ -4400,8 +4222,9 @@ fn airBlock(f: *Function, inst: Air.Inst.Index) !CValue {
 
     try genBodyInner(f, body);
     try f.object.indent_writer.insertNewline();
+    // label might be unused, add a dummy goto
     // label must be followed by an expression, add an empty one.
-    try writer.print("zig_block_{d}:;\n", .{block_id});
+    try writer.print("goto zig_block_{d};\nzig_block_{d}: (void)0;\n", .{ block_id, block_id });
     return result;
 }
 
@@ -4460,7 +4283,7 @@ fn lowerTry(
 
     if (!payload_has_bits) {
         if (!operand_is_ptr) {
-            return CValue.none;
+            return .none;
         } else {
             return err_union;
         }
@@ -4469,7 +4292,7 @@ fn lowerTry(
     try reap(f, inst, &.{operand});
 
     if (f.liveness.isUnused(inst)) {
-        return CValue.none;
+        return .none;
     }
 
     const target = f.object.dg.module.getTarget();
@@ -4481,7 +4304,7 @@ fn lowerTry(
         try writer.writeAll(", ");
         try f.writeCValueMember(writer, err_union, .{ .identifier = "payload" });
         try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, payload_ty);
+        try f.renderType(writer, payload_ty);
         try writer.writeAll("));\n");
     } else {
         try f.writeCValue(writer, local, .Other);
@@ -4514,7 +4337,7 @@ fn airBr(f: *Function, inst: Air.Inst.Index) !CValue {
             try writer.writeAll(", ");
             try f.writeCValue(writer, operand, .FunctionArgument);
             try writer.writeAll(", sizeof(");
-            try f.renderTypecast(writer, operand_ty);
+            try f.renderType(writer, operand_ty);
             try writer.writeAll("))");
         } else {
             try f.writeCValue(writer, result, .Other);
@@ -4525,7 +4348,7 @@ fn airBr(f: *Function, inst: Air.Inst.Index) !CValue {
     }
 
     try writer.print("goto zig_block_{d};\n", .{block.block_id});
-    return CValue.none;
+    return .none;
 }
 
 fn airBitcast(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -4535,7 +4358,7 @@ fn airBitcast(f: *Function, inst: Air.Inst.Index) !CValue {
     // https://github.com/ziglang/zig/issues/13410
     if (f.liveness.isUnused(inst) or !dest_ty.hasRuntimeBits()) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const operand = try f.resolveInst(ty_op.operand);
@@ -4563,7 +4386,7 @@ fn airBitcast(f: *Function, inst: Air.Inst.Index) !CValue {
     if (dest_ty.isPtrAtRuntime() and operand_ty.isPtrAtRuntime()) {
         try f.writeCValue(writer, local, .Other);
         try writer.writeAll(" = (");
-        try f.renderTypecast(writer, dest_ty);
+        try f.renderType(writer, dest_ty);
         try writer.writeByte(')');
         try f.writeCValue(writer, operand, .Other);
         try writer.writeAll(";\n");
@@ -4584,7 +4407,7 @@ fn airBitcast(f: *Function, inst: Air.Inst.Index) !CValue {
     try writer.writeAll(", &");
     try f.writeCValue(writer, operand_lval, .Other);
     try writer.writeAll(", sizeof(");
-    try f.renderTypecast(writer, dest_ty);
+    try f.renderType(writer, dest_ty);
     try writer.writeAll("));\n");
 
     // Ensure padding bits have the expected value.
@@ -4599,7 +4422,7 @@ fn airBitcast(f: *Function, inst: Air.Inst.Index) !CValue {
     }
 
     if (operand == .constant) {
-        try freeLocal(f, inst, operand_lval.local, 0);
+        try freeLocal(f, inst, operand_lval.new_local, 0);
     }
 
     return local;
@@ -4607,27 +4430,27 @@ fn airBitcast(f: *Function, inst: Air.Inst.Index) !CValue {
 
 fn airBreakpoint(writer: anytype) !CValue {
     try writer.writeAll("zig_breakpoint();\n");
-    return CValue.none;
+    return .none;
 }
 
 fn airRetAddr(f: *Function, inst: Air.Inst.Index) !CValue {
-    if (f.liveness.isUnused(inst)) return CValue.none;
+    if (f.liveness.isUnused(inst)) return .none;
     const writer = f.object.writer();
     const local = try f.allocLocal(inst, Type.usize);
     try f.writeCValue(writer, local, .Other);
     try writer.writeAll(" = (");
-    try f.renderTypecast(writer, Type.usize);
+    try f.renderType(writer, Type.usize);
     try writer.writeAll(")zig_return_address();\n");
     return local;
 }
 
 fn airFrameAddress(f: *Function, inst: Air.Inst.Index) !CValue {
-    if (f.liveness.isUnused(inst)) return CValue.none;
+    if (f.liveness.isUnused(inst)) return .none;
     const writer = f.object.writer();
     const local = try f.allocLocal(inst, Type.usize);
     try f.writeCValue(writer, local, .Other);
     try writer.writeAll(" = (");
-    try f.renderTypecast(writer, Type.usize);
+    try f.renderType(writer, Type.usize);
     try writer.writeAll(")zig_frame_address();\n");
     return local;
 }
@@ -4640,15 +4463,15 @@ fn airFence(f: *Function, inst: Air.Inst.Index) !CValue {
     try writeMemoryOrder(writer, atomic_order);
     try writer.writeAll(");\n");
 
-    return CValue.none;
+    return .none;
 }
 
 fn airUnreach(f: *Function) !CValue {
     // Not even allowed to call unreachable in a naked function.
-    if (f.object.dg.decl.ty.fnCallingConvention() == .Naked) return .none;
+    if (f.object.dg.decl) |decl| if (decl.ty.fnCallingConvention() == .Naked) return .none;
 
     try f.object.writer().writeAll("zig_unreachable();\n");
-    return CValue.none;
+    return .none;
 }
 
 fn airLoop(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -4669,18 +4492,18 @@ fn airLoop(f: *Function, inst: Air.Inst.Index) !CValue {
     const new_free_locals = f.getFreeLocals();
     var it = new_free_locals.iterator();
     while (it.next()) |entry| {
-        const gop = try old_free_locals.getOrPutContext(gpa, entry.key_ptr.*, f.tyHashCtx());
+        const gop = try old_free_locals.getOrPut(gpa, entry.key_ptr.*);
         if (gop.found_existing) {
-            try gop.value_ptr.appendSlice(gpa, entry.value_ptr.items);
-        } else {
-            gop.value_ptr.* = entry.value_ptr.*;
-            entry.value_ptr.* = .{};
-        }
+            try gop.value_ptr.ensureUnusedCapacity(gpa, entry.value_ptr.count());
+            for (entry.value_ptr.keys()) |local_index| {
+                gop.value_ptr.putAssumeCapacityNoClobber(local_index, {});
+            }
+        } else gop.value_ptr.* = entry.value_ptr.move();
     }
     deinitFreeLocalsMap(gpa, new_free_locals);
     new_free_locals.* = old_free_locals.move();
 
-    return CValue.none;
+    return .none;
 }
 
 fn airCondBr(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -4705,6 +4528,10 @@ fn airCondBr(f: *Function, inst: Air.Inst.Index) !CValue {
     // that we can notice and use them in the else branch. Any new locals must
     // necessarily be free already after the then branch is complete.
     const pre_locals_len = @intCast(LocalIndex, f.locals.items.len);
+    // Remember how many allocs there were before entering the then branch so
+    // that we can notice and make sure not to use them in the else branch.
+    // Any new allocs must be removed from the free list.
+    const pre_allocs_len = @intCast(LocalIndex, f.allocs.count());
     const pre_clone_depth = f.free_locals_clone_depth;
     f.free_locals_clone_depth = @intCast(LoopDepth, f.free_locals_stack.items.len);
 
@@ -4735,7 +4562,7 @@ fn airCondBr(f: *Function, inst: Air.Inst.Index) !CValue {
         try die(f, inst, Air.indexToRef(operand));
     }
 
-    try noticeBranchFrees(f, pre_locals_len, inst);
+    try noticeBranchFrees(f, pre_locals_len, pre_allocs_len, inst);
 
     if (needs_else) {
         try genBody(f, else_body);
@@ -4745,7 +4572,7 @@ fn airCondBr(f: *Function, inst: Air.Inst.Index) !CValue {
 
     try f.object.indent_writer.insertNewline();
 
-    return CValue.none;
+    return .none;
 }
 
 fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -4759,11 +4586,11 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue {
     try writer.writeAll("switch (");
     if (condition_ty.zigTypeTag() == .Bool) {
         try writer.writeByte('(');
-        try f.renderTypecast(writer, Type.u1);
+        try f.renderType(writer, Type.u1);
         try writer.writeByte(')');
     } else if (condition_ty.isPtrAtRuntime()) {
         try writer.writeByte('(');
-        try f.renderTypecast(writer, Type.usize);
+        try f.renderType(writer, Type.usize);
         try writer.writeByte(')');
     }
     try f.writeCValue(writer, condition, .Other);
@@ -4780,8 +4607,7 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue {
     const last_case_i = switch_br.data.cases_len - @boolToInt(switch_br.data.else_body_len == 0);
 
     var extra_index: usize = switch_br.end;
-    var case_i: u32 = 0;
-    while (case_i < switch_br.data.cases_len) : (case_i += 1) {
+    for (0..switch_br.data.cases_len) |case_i| {
         const case = f.air.extraData(Air.SwitchBr.Case, extra_index);
         const items = @ptrCast([]const Air.Inst.Ref, f.air.extra[case.end..][0..case.data.items_len]);
         const case_body = f.air.extra[case.end + items.len ..][0..case.data.body_len];
@@ -4792,7 +4618,7 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue {
             try writer.writeAll("case ");
             if (condition_ty.isPtrAtRuntime()) {
                 try writer.writeByte('(');
-                try f.renderTypecast(writer, Type.usize);
+                try f.renderType(writer, Type.usize);
                 try writer.writeByte(')');
             }
             try f.object.dg.renderValue(writer, condition_ty, f.air.value(item).?, .Other);
@@ -4811,6 +4637,10 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue {
             // we can notice and use them in subsequent branches. Any new locals must
             // necessarily be free already after the previous branch is complete.
             const pre_locals_len = @intCast(LocalIndex, f.locals.items.len);
+            // Remember how many allocs there were before entering each branch so that
+            // we can notice and make sure not to use them in subsequent branches.
+            // Any new allocs must be removed from the free list.
+            const pre_allocs_len = @intCast(LocalIndex, f.allocs.count());
             const pre_clone_depth = f.free_locals_clone_depth;
             f.free_locals_clone_depth = @intCast(LoopDepth, f.free_locals_stack.items.len);
 
@@ -4831,7 +4661,7 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue {
                 try genBody(f, case_body);
             }
 
-            try noticeBranchFrees(f, pre_locals_len, inst);
+            try noticeBranchFrees(f, pre_locals_len, pre_allocs_len, inst);
         } else {
             for (liveness.deaths[case_i]) |operand| {
                 try die(f, inst, Air.indexToRef(operand));
@@ -4858,7 +4688,7 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue {
 
     f.object.indent_writer.popIndent();
     try writer.writeAll("}\n");
-    return CValue.none;
+    return .none;
 }
 
 fn asmInputNeedsLocal(constraint: []const u8, value: CValue) bool {
@@ -4880,8 +4710,8 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
     const inputs = @ptrCast([]const Air.Inst.Ref, f.air.extra[extra_i..][0..extra.data.inputs_len]);
     extra_i += inputs.len;
 
-    const result: CValue = r: {
-        if (!is_volatile and f.liveness.isUnused(inst)) break :r CValue.none;
+    const result = r: {
+        if (!is_volatile and f.liveness.isUnused(inst)) break :r .none;
 
         const writer = f.object.writer();
         const inst_ty = f.air.typeOfIndex(inst);
@@ -4918,14 +4748,7 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
                 try writer.writeAll("register ");
                 const alignment = 0;
                 const local_value = try f.allocLocalValue(output_ty, alignment);
-                try f.object.dg.renderTypeAndName(
-                    writer,
-                    output_ty,
-                    local_value,
-                    .Mut,
-                    alignment,
-                    .Complete,
-                );
+                try f.object.dg.renderTypeAndName(writer, output_ty, local_value, .{}, alignment, .complete);
                 try writer.writeAll(" __asm(\"");
                 try writer.writeAll(constraint["={".len .. constraint.len - "}".len]);
                 try writer.writeAll("\")");
@@ -4957,14 +4780,7 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
                 if (is_reg) try writer.writeAll("register ");
                 const alignment = 0;
                 const local_value = try f.allocLocalValue(input_ty, alignment);
-                try f.object.dg.renderTypeAndName(
-                    writer,
-                    input_ty,
-                    local_value,
-                    .Const,
-                    alignment,
-                    .Complete,
-                );
+                try f.object.dg.renderTypeAndName(writer, input_ty, local_value, Const, alignment, .complete);
                 if (is_reg) {
                     try writer.writeAll(" __asm(\"");
                     try writer.writeAll(constraint["{".len .. constraint.len - "}".len]);
@@ -4975,14 +4791,11 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
                 try writer.writeAll(";\n");
             }
         }
-        {
-            var clobber_i: u32 = 0;
-            while (clobber_i < clobbers_len) : (clobber_i += 1) {
-                const clobber = std.mem.sliceTo(std.mem.sliceAsBytes(f.air.extra[extra_i..]), 0);
-                // This equation accounts for the fact that even if we have exactly 4 bytes
-                // for the string, we still use the next u32 for the null terminator.
-                extra_i += clobber.len / 4 + 1;
-            }
+        for (0..clobbers_len) |_| {
+            const clobber = std.mem.sliceTo(std.mem.sliceAsBytes(f.air.extra[extra_i..]), 0);
+            // This equation accounts for the fact that even if we have exactly 4 bytes
+            // for the string, we still use the next u32 for the null terminator.
+            extra_i += clobber.len / 4 + 1;
         }
 
         {
@@ -5037,7 +4850,7 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
 
             try writer.writeAll("__asm");
             if (is_volatile) try writer.writeAll(" volatile");
-            try writer.print("({s}", .{fmtStringLiteral(fixed_asm_source[0..dst_i])});
+            try writer.print("({s}", .{fmtStringLiteral(fixed_asm_source[0..dst_i], null)});
         }
 
         extra_i = constraints_extra_begin;
@@ -5055,7 +4868,7 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
             try writer.writeByte(' ');
             if (!std.mem.eql(u8, name, "_")) try writer.print("[{s}]", .{name});
             const is_reg = constraint[1] == '{';
-            try writer.print("{s}(", .{fmtStringLiteral(if (is_reg) "=r" else constraint)});
+            try writer.print("{s}(", .{fmtStringLiteral(if (is_reg) "=r" else constraint, null)});
             if (is_reg) {
                 try f.writeCValue(writer, .{ .local = locals_index }, .Other);
                 locals_index += 1;
@@ -5081,28 +4894,25 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
 
             const is_reg = constraint[0] == '{';
             const input_val = try f.resolveInst(input);
-            try writer.print("{s}(", .{fmtStringLiteral(if (is_reg) "r" else constraint)});
+            try writer.print("{s}(", .{fmtStringLiteral(if (is_reg) "r" else constraint, null)});
             try f.writeCValue(writer, if (asmInputNeedsLocal(constraint, input_val)) local: {
-                const input_local = CValue{ .local = locals_index };
+                const input_local = .{ .local = locals_index };
                 locals_index += 1;
                 break :local input_local;
             } else input_val, .Other);
             try writer.writeByte(')');
         }
         try writer.writeByte(':');
-        {
-            var clobber_i: u32 = 0;
-            while (clobber_i < clobbers_len) : (clobber_i += 1) {
-                const clobber = std.mem.sliceTo(std.mem.sliceAsBytes(f.air.extra[extra_i..]), 0);
-                // This equation accounts for the fact that even if we have exactly 4 bytes
-                // for the string, we still use the next u32 for the null terminator.
-                extra_i += clobber.len / 4 + 1;
+        for (0..clobbers_len) |clobber_i| {
+            const clobber = std.mem.sliceTo(std.mem.sliceAsBytes(f.air.extra[extra_i..]), 0);
+            // This equation accounts for the fact that even if we have exactly 4 bytes
+            // for the string, we still use the next u32 for the null terminator.
+            extra_i += clobber.len / 4 + 1;
 
-                if (clobber.len == 0) continue;
+            if (clobber.len == 0) continue;
 
-                if (clobber_i > 0) try writer.writeByte(',');
-                try writer.print(" {s}", .{fmtStringLiteral(clobber)});
-            }
+            if (clobber_i > 0) try writer.writeByte(',');
+            try writer.print(" {s}", .{fmtStringLiteral(clobber, null)});
         }
         try writer.writeAll(");\n");
 
@@ -5119,7 +4929,7 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
             const is_reg = constraint[1] == '{';
             if (is_reg) {
                 try f.writeCValueDeref(writer, if (output == .none)
-                    CValue{ .local_ref = local.local }
+                    .{ .local_ref = local.new_local }
                 else
                     try f.resolveInst(output));
                 try writer.writeAll(" = ");
@@ -5154,7 +4964,7 @@ fn airIsNull(
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{un_op});
-        return CValue.none;
+        return .none;
     }
 
     const writer = f.object.writer();
@@ -5204,7 +5014,7 @@ fn airOptionalPayload(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const operand = try f.resolveInst(ty_op.operand);
@@ -5215,7 +5025,7 @@ fn airOptionalPayload(f: *Function, inst: Air.Inst.Index) !CValue {
     const payload_ty = opt_ty.optionalChild(&buf);
 
     if (!payload_ty.hasRuntimeBitsIgnoreComptime()) {
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -5244,7 +5054,7 @@ fn airOptionalPayload(f: *Function, inst: Air.Inst.Index) !CValue {
     try f.writeCValueMember(writer, operand, .{ .identifier = "payload" });
     if (is_array) {
         try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, inst_ty);
+        try f.renderType(writer, inst_ty);
         try writer.writeAll("))");
     }
     try writer.writeAll(";\n");
@@ -5256,7 +5066,7 @@ fn airOptionalPayloadPtr(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const writer = f.object.writer();
@@ -5267,7 +5077,7 @@ fn airOptionalPayloadPtr(f: *Function, inst: Air.Inst.Index) !CValue {
     const inst_ty = f.air.typeOfIndex(inst);
 
     if (!inst_ty.childType().hasRuntimeBitsIgnoreComptime()) {
-        return CValue{ .undef = inst_ty };
+        return .{ .undef = inst_ty };
     }
 
     const local = try f.allocLocal(inst, inst_ty);
@@ -5299,7 +5109,7 @@ fn airOptionalPayloadPtrSet(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (opt_ty.optionalReprIsPayload()) {
         if (f.liveness.isUnused(inst)) {
-            return CValue.none;
+            return .none;
         }
         const local = try f.allocLocal(inst, inst_ty);
         // The payload and the optional are the same value.
@@ -5316,7 +5126,7 @@ fn airOptionalPayloadPtrSet(f: *Function, inst: Air.Inst.Index) !CValue {
         try writer.writeAll(";\n");
 
         if (f.liveness.isUnused(inst)) {
-            return CValue.none;
+            return .none;
         }
 
         const local = try f.allocLocal(inst, inst_ty);
@@ -5328,6 +5138,62 @@ fn airOptionalPayloadPtrSet(f: *Function, inst: Air.Inst.Index) !CValue {
     }
 }
 
+fn fieldLocation(
+    container_ty: Type,
+    field_ptr_ty: Type,
+    field_index: u32,
+    target: std.Target,
+) union(enum) {
+    begin: void,
+    field: CValue,
+    byte_offset: u32,
+    end: void,
+} {
+    return switch (container_ty.zigTypeTag()) {
+        .Struct => switch (container_ty.containerLayout()) {
+            .Auto, .Extern => for (field_index..container_ty.structFieldCount()) |next_field_index| {
+                if (container_ty.structFieldIsComptime(next_field_index)) continue;
+                const field_ty = container_ty.structFieldType(next_field_index);
+                if (!field_ty.hasRuntimeBitsIgnoreComptime()) continue;
+                break .{ .field = if (container_ty.isSimpleTuple())
+                    .{ .field = next_field_index }
+                else
+                    .{ .identifier = container_ty.structFieldName(next_field_index) } };
+            } else if (container_ty.hasRuntimeBitsIgnoreComptime()) .end else .begin,
+            .Packed => if (field_ptr_ty.ptrInfo().data.host_size == 0)
+                .{ .byte_offset = container_ty.packedStructFieldByteOffset(field_index, target) }
+            else
+                .begin,
+        },
+        .Union => switch (container_ty.containerLayout()) {
+            .Auto, .Extern => {
+                const field_ty = container_ty.structFieldType(field_index);
+                if (!field_ty.hasRuntimeBitsIgnoreComptime())
+                    return if (container_ty.unionTagTypeSafety() != null and
+                        !container_ty.unionHasAllZeroBitFieldTypes())
+                        .{ .field = .{ .identifier = "payload" } }
+                    else
+                        .begin;
+                const field_name = container_ty.unionFields().keys()[field_index];
+                return .{ .field = if (container_ty.unionTagTypeSafety()) |_|
+                    .{ .payload_identifier = field_name }
+                else
+                    .{ .identifier = field_name } };
+            },
+            .Packed => .begin,
+        },
+        .Pointer => switch (container_ty.ptrSize()) {
+            .Slice => switch (field_index) {
+                0 => .{ .field = .{ .identifier = "ptr" } },
+                1 => .{ .field = .{ .identifier = "len" } },
+                else => unreachable,
+            },
+            .One, .Many, .C => unreachable,
+        },
+        else => unreachable,
+    };
+}
+
 fn airStructFieldPtr(f: *Function, inst: Air.Inst.Index) !CValue {
     const ty_pl = f.air.instructions.items(.data)[inst].ty_pl;
     const extra = f.air.extraData(Air.StructField, ty_pl.payload).data;
@@ -5337,10 +5203,10 @@ fn airStructFieldPtr(f: *Function, inst: Air.Inst.Index) !CValue {
         return .none;
     }
 
-    const struct_ptr = try f.resolveInst(extra.struct_operand);
+    const container_ptr_val = try f.resolveInst(extra.struct_operand);
     try reap(f, inst, &.{extra.struct_operand});
-    const struct_ptr_ty = f.air.typeOf(extra.struct_operand);
-    return structFieldPtr(f, inst, struct_ptr_ty, struct_ptr, extra.field_index);
+    const container_ptr_ty = f.air.typeOf(extra.struct_operand);
+    return fieldPtr(f, inst, container_ptr_ty, container_ptr_val, extra.field_index);
 }
 
 fn airStructFieldPtrIndex(f: *Function, inst: Air.Inst.Index, index: u8) !CValue {
@@ -5351,10 +5217,10 @@ fn airStructFieldPtrIndex(f: *Function, inst: Air.Inst.Index, index: u8) !CValue
         return .none;
     }
 
-    const struct_ptr = try f.resolveInst(ty_op.operand);
+    const container_ptr_val = try f.resolveInst(ty_op.operand);
     try reap(f, inst, &.{ty_op.operand});
-    const struct_ptr_ty = f.air.typeOf(ty_op.operand);
-    return structFieldPtr(f, inst, struct_ptr_ty, struct_ptr, index);
+    const container_ptr_ty = f.air.typeOf(ty_op.operand);
+    return fieldPtr(f, inst, container_ptr_ty, container_ptr_val, index);
 }
 
 fn airFieldParentPtr(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -5363,137 +5229,119 @@ fn airFieldParentPtr(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{extra.field_ptr});
-        return CValue.none;
+        return .none;
     }
 
-    const struct_ptr_ty = f.air.typeOfIndex(inst);
+    const target = f.object.dg.module.getTarget();
+    const container_ptr_ty = f.air.typeOfIndex(inst);
+    const container_ty = container_ptr_ty.childType();
+
     const field_ptr_ty = f.air.typeOf(extra.field_ptr);
     const field_ptr_val = try f.resolveInst(extra.field_ptr);
     try reap(f, inst, &.{extra.field_ptr});
 
-    const target = f.object.dg.module.getTarget();
-    const struct_ty = struct_ptr_ty.childType();
-    const field_offset = struct_ty.structFieldOffset(extra.field_index, target);
-
-    var field_offset_pl = Value.Payload.I64{
-        .base = .{ .tag = .int_i64 },
-        .data = -@intCast(i64, field_offset),
-    };
-    const field_offset_val = Value.initPayload(&field_offset_pl.base);
-
-    var u8_ptr_pl = field_ptr_ty.ptrInfo();
-    u8_ptr_pl.data.pointee_type = Type.u8;
-    const u8_ptr_ty = Type.initPayload(&u8_ptr_pl.base);
-
     const writer = f.object.writer();
-    const local = try f.allocLocal(inst, struct_ptr_ty);
+    const local = try f.allocLocal(inst, container_ptr_ty);
     try f.writeCValue(writer, local, .Other);
     try writer.writeAll(" = (");
-    try f.renderTypecast(writer, struct_ptr_ty);
-    try writer.writeAll(")&((");
-    try f.renderTypecast(writer, u8_ptr_ty);
+    try f.renderType(writer, container_ptr_ty);
     try writer.writeByte(')');
-    try f.writeCValue(writer, field_ptr_val, .Other);
-    try writer.print(")[{}];\n", .{try f.fmtIntLiteral(Type.isize, field_offset_val)});
+
+    switch (fieldLocation(container_ty, field_ptr_ty, extra.field_index, target)) {
+        .begin => try f.writeCValue(writer, field_ptr_val, .Initializer),
+        .field => |field| {
+            var u8_ptr_pl = field_ptr_ty.ptrInfo();
+            u8_ptr_pl.data.pointee_type = Type.u8;
+            const u8_ptr_ty = Type.initPayload(&u8_ptr_pl.base);
+
+            try writer.writeAll("((");
+            try f.renderType(writer, u8_ptr_ty);
+            try writer.writeByte(')');
+            try f.writeCValue(writer, field_ptr_val, .Other);
+            try writer.writeAll(" - offsetof(");
+            try f.renderType(writer, container_ty);
+            try writer.writeAll(", ");
+            try f.writeCValue(writer, field, .Other);
+            try writer.writeAll("))");
+        },
+        .byte_offset => |byte_offset| {
+            var u8_ptr_pl = field_ptr_ty.ptrInfo();
+            u8_ptr_pl.data.pointee_type = Type.u8;
+            const u8_ptr_ty = Type.initPayload(&u8_ptr_pl.base);
+
+            var byte_offset_pl = Value.Payload.U64{
+                .base = .{ .tag = .int_u64 },
+                .data = byte_offset,
+            };
+            const byte_offset_val = Value.initPayload(&byte_offset_pl.base);
+
+            try writer.writeAll("((");
+            try f.renderType(writer, u8_ptr_ty);
+            try writer.writeByte(')');
+            try f.writeCValue(writer, field_ptr_val, .Other);
+            try writer.print(" - {})", .{try f.fmtIntLiteral(Type.usize, byte_offset_val)});
+        },
+        .end => {
+            try f.writeCValue(writer, field_ptr_val, .Other);
+            try writer.print(" - {}", .{try f.fmtIntLiteral(Type.usize, Value.one)});
+        },
+    }
+
+    try writer.writeAll(";\n");
     return local;
 }
 
-fn structFieldPtr(f: *Function, inst: Air.Inst.Index, struct_ptr_ty: Type, struct_ptr: CValue, index: u32) !CValue {
-    const writer = f.object.writer();
+fn fieldPtr(
+    f: *Function,
+    inst: Air.Inst.Index,
+    container_ptr_ty: Type,
+    container_ptr_val: CValue,
+    field_index: u32,
+) !CValue {
+    const target = f.object.dg.module.getTarget();
+    const container_ty = container_ptr_ty.elemType();
     const field_ptr_ty = f.air.typeOfIndex(inst);
-    const field_ptr_info = field_ptr_ty.ptrInfo();
-    const struct_ty = struct_ptr_ty.elemType();
-    const field_ty = struct_ty.structFieldType(index);
 
     // Ensure complete type definition is visible before accessing fields.
-    try f.renderType(std.io.null_writer, struct_ty);
+    _ = try f.typeToIndex(container_ty, .complete);
 
+    const writer = f.object.writer();
     const local = try f.allocLocal(inst, field_ptr_ty);
     try f.writeCValue(writer, local, .Other);
     try writer.writeAll(" = (");
-    try f.renderTypecast(writer, field_ptr_ty);
+    try f.renderType(writer, field_ptr_ty);
     try writer.writeByte(')');
 
-    const extra_name: CValue = switch (struct_ty.tag()) {
-        .union_tagged, .union_safety_tagged => .{ .identifier = "payload" },
-        else => .none,
-    };
-
-    const FieldLoc = union(enum) {
-        begin: void,
-        field: CValue,
-        end: void,
-    };
-    const field_loc = switch (struct_ty.tag()) {
-        .@"struct" => switch (struct_ty.containerLayout()) {
-            .Auto, .Extern => for (struct_ty.structFields().values()[index..], 0..) |field, offset| {
-                if (field.ty.hasRuntimeBitsIgnoreComptime()) break FieldLoc{ .field = .{
-                    .identifier = struct_ty.structFieldName(index + offset),
-                } };
-            } else @as(FieldLoc, .end),
-            .Packed => if (field_ptr_info.data.host_size == 0) {
-                const target = f.object.dg.module.getTarget();
-
-                const byte_offset = struct_ty.packedStructFieldByteOffset(index, target);
-                var byte_offset_pl = Value.Payload.U64{
-                    .base = .{ .tag = .int_u64 },
-                    .data = byte_offset,
-                };
-                const byte_offset_val = Value.initPayload(&byte_offset_pl.base);
-
-                var u8_ptr_pl = field_ptr_info;
-                u8_ptr_pl.data.pointee_type = Type.u8;
-                const u8_ptr_ty = Type.initPayload(&u8_ptr_pl.base);
-
-                if (!std.mem.isAligned(byte_offset, field_ptr_ty.ptrAlignment(target))) {
-                    return f.fail("TODO: CBE: unaligned packed struct field pointer", .{});
-                }
-
-                try writer.writeAll("&((");
-                try f.renderTypecast(writer, u8_ptr_ty);
-                try writer.writeByte(')');
-                try f.writeCValue(writer, struct_ptr, .Other);
-                try writer.print(")[{}];\n", .{try f.fmtIntLiteral(Type.usize, byte_offset_val)});
-                return local;
-            } else @as(FieldLoc, .begin),
+    switch (fieldLocation(container_ty, field_ptr_ty, field_index, target)) {
+        .begin => try f.writeCValue(writer, container_ptr_val, .Initializer),
+        .field => |field| {
+            try writer.writeByte('&');
+            try f.writeCValueDerefMember(writer, container_ptr_val, field);
         },
-        .@"union", .union_safety_tagged, .union_tagged => if (struct_ty.containerLayout() == .Packed) {
-            try f.writeCValue(writer, struct_ptr, .Other);
-            try writer.writeAll(";\n");
-            return local;
-        } else if (field_ty.hasRuntimeBitsIgnoreComptime()) FieldLoc{ .field = .{
-            .identifier = struct_ty.unionFields().keys()[index],
-        } } else @as(FieldLoc, .end),
-        .tuple, .anon_struct => field_name: {
-            const tuple = struct_ty.tupleFields();
-            if (tuple.values[index].tag() != .unreachable_value) return CValue.none;
+        .byte_offset => |byte_offset| {
+            var u8_ptr_pl = field_ptr_ty.ptrInfo();
+            u8_ptr_pl.data.pointee_type = Type.u8;
+            const u8_ptr_ty = Type.initPayload(&u8_ptr_pl.base);
 
-            var id: usize = 0;
-            break :field_name for (tuple.values, 0..) |value, i| {
-                if (value.tag() != .unreachable_value) continue;
-                if (!tuple.types[i].hasRuntimeBitsIgnoreComptime()) continue;
-                if (i >= index) break FieldLoc{ .field = .{ .field = id } };
-                id += 1;
-            } else @as(FieldLoc, .end);
+            var byte_offset_pl = Value.Payload.U64{
+                .base = .{ .tag = .int_u64 },
+                .data = byte_offset,
+            };
+            const byte_offset_val = Value.initPayload(&byte_offset_pl.base);
+
+            try writer.writeAll("((");
+            try f.renderType(writer, u8_ptr_ty);
+            try writer.writeByte(')');
+            try f.writeCValue(writer, container_ptr_val, .Other);
+            try writer.print(" + {})", .{try f.fmtIntLiteral(Type.usize, byte_offset_val)});
         },
-        else => unreachable,
-    };
-
-    try writer.writeByte('&');
-    switch (field_loc) {
-        .begin, .end => {
+        .end => {
             try writer.writeByte('(');
-            try f.writeCValue(writer, struct_ptr, .Other);
-            try writer.print(")[{}]", .{
-                @boolToInt(field_loc == .end and struct_ty.hasRuntimeBitsIgnoreComptime()),
-            });
+            try f.writeCValue(writer, container_ptr_val, .Other);
+            try writer.print(" + {})", .{try f.fmtIntLiteral(Type.usize, Value.one)});
         },
-        .field => |field| if (extra_name != .none) {
-            try f.writeCValueDerefMember(writer, struct_ptr, extra_name);
-            try writer.writeByte('.');
-            try f.writeCValue(writer, field, .Other);
-        } else try f.writeCValueDerefMember(writer, struct_ptr, field),
     }
+
     try writer.writeAll(";\n");
     return local;
 }
@@ -5504,13 +5352,13 @@ fn airStructFieldVal(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{extra.struct_operand});
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
     if (!inst_ty.hasRuntimeBitsIgnoreComptime()) {
         try reap(f, inst, &.{extra.struct_operand});
-        return CValue.none;
+        return .none;
     }
 
     const target = f.object.dg.module.getTarget();
@@ -5520,16 +5368,14 @@ fn airStructFieldVal(f: *Function, inst: Air.Inst.Index) !CValue {
     const writer = f.object.writer();
 
     // Ensure complete type definition is visible before accessing fields.
-    try f.renderType(std.io.null_writer, struct_ty);
-
-    const extra_name: CValue = switch (struct_ty.tag()) {
-        .union_tagged, .union_safety_tagged => .{ .identifier = "payload" },
-        else => .none,
-    };
+    _ = try f.typeToIndex(struct_ty, .complete);
 
     const field_name: CValue = switch (struct_ty.tag()) {
-        .@"struct" => switch (struct_ty.containerLayout()) {
-            .Auto, .Extern => .{ .identifier = struct_ty.structFieldName(extra.field_index) },
+        .tuple, .anon_struct, .@"struct" => switch (struct_ty.containerLayout()) {
+            .Auto, .Extern => if (struct_ty.isSimpleTuple())
+                .{ .field = extra.field_index }
+            else
+                .{ .identifier = struct_ty.structFieldName(extra.field_index) },
             .Packed => {
                 const struct_obj = struct_ty.castTag(.@"struct").?.data;
                 const int_info = struct_ty.intInfo(target);
@@ -5564,7 +5410,7 @@ fn airStructFieldVal(f: *Function, inst: Air.Inst.Index) !CValue {
                 try writer.writeAll(" = zig_wrap_");
                 try f.object.dg.renderTypeForBuiltinFnName(writer, field_int_ty);
                 try writer.writeAll("((");
-                try f.renderTypecast(writer, field_int_ty);
+                try f.renderType(writer, field_int_ty);
                 try writer.writeByte(')');
                 const cant_cast = int_info.bits > 64;
                 if (cant_cast) {
@@ -5587,13 +5433,13 @@ fn airStructFieldVal(f: *Function, inst: Air.Inst.Index) !CValue {
 
                 const local = try f.allocLocal(inst, inst_ty);
                 try writer.writeAll("memcpy(");
-                try f.writeCValue(writer, .{ .local_ref = local.local }, .FunctionArgument);
+                try f.writeCValue(writer, .{ .local_ref = local.new_local }, .FunctionArgument);
                 try writer.writeAll(", ");
-                try f.writeCValue(writer, .{ .local_ref = temp_local.local }, .FunctionArgument);
+                try f.writeCValue(writer, .{ .local_ref = temp_local.new_local }, .FunctionArgument);
                 try writer.writeAll(", sizeof(");
-                try f.renderTypecast(writer, inst_ty);
+                try f.renderType(writer, inst_ty);
                 try writer.writeAll("));\n");
-                try freeLocal(f, inst, temp_local.local, 0);
+                try freeLocal(f, inst, temp_local.new_local, 0);
                 return local;
             },
         },
@@ -5613,48 +5459,37 @@ fn airStructFieldVal(f: *Function, inst: Air.Inst.Index) !CValue {
             try writer.writeAll(", &");
             try f.writeCValue(writer, operand_lval, .FunctionArgument);
             try writer.writeAll(", sizeof(");
-            try f.renderTypecast(writer, inst_ty);
+            try f.renderType(writer, inst_ty);
             try writer.writeAll("));\n");
 
             if (struct_byval == .constant) {
-                try freeLocal(f, inst, operand_lval.local, 0);
+                try freeLocal(f, inst, operand_lval.new_local, 0);
             }
 
             return local;
-        } else .{
-            .identifier = struct_ty.unionFields().keys()[extra.field_index],
-        },
-        .tuple, .anon_struct => blk: {
-            const tuple = struct_ty.tupleFields();
-            if (tuple.values[extra.field_index].tag() != .unreachable_value) return CValue.none;
-
-            var id: usize = 0;
-            for (tuple.values[0..extra.field_index]) |value|
-                id += @boolToInt(value.tag() == .unreachable_value);
-            break :blk .{ .field = id };
+        } else field_name: {
+            const name = struct_ty.unionFields().keys()[extra.field_index];
+            break :field_name if (struct_ty.unionTagTypeSafety()) |_|
+                .{ .payload_identifier = name }
+            else
+                .{ .identifier = name };
         },
         else => unreachable,
     };
 
-    const is_array = lowersToArray(inst_ty, target);
     const local = try f.allocLocal(inst, inst_ty);
-    if (is_array) {
+    if (lowersToArray(inst_ty, target)) {
         try writer.writeAll("memcpy(");
         try f.writeCValue(writer, local, .FunctionArgument);
         try writer.writeAll(", ");
+        try f.writeCValueMember(writer, struct_byval, field_name);
+        try writer.writeAll(", sizeof(");
+        try f.renderType(writer, inst_ty);
+        try writer.writeAll("))");
     } else {
         try f.writeCValue(writer, local, .Other);
         try writer.writeAll(" = ");
-    }
-    if (extra_name != .none) {
-        try f.writeCValueMember(writer, struct_byval, extra_name);
-        try writer.writeByte('.');
-        try f.writeCValue(writer, field_name, .Other);
-    } else try f.writeCValueMember(writer, struct_byval, field_name);
-    if (is_array) {
-        try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, inst_ty);
-        try writer.writeAll("))");
+        try f.writeCValueMember(writer, struct_byval, field_name);
     }
     try writer.writeAll(";\n");
     return local;
@@ -5667,7 +5502,7 @@ fn airUnwrapErrUnionErr(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -5704,7 +5539,7 @@ fn airUnwrapErrUnionPay(f: *Function, inst: Air.Inst.Index, is_ptr: bool) !CValu
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -5715,13 +5550,13 @@ fn airUnwrapErrUnionPay(f: *Function, inst: Air.Inst.Index, is_ptr: bool) !CValu
     const error_union_ty = if (operand_is_ptr) operand_ty.childType() else operand_ty;
 
     if (!error_union_ty.errorUnionPayload().hasRuntimeBits()) {
-        if (!is_ptr) return CValue.none;
+        if (!is_ptr) return .none;
 
         const w = f.object.writer();
         const local = try f.allocLocal(inst, inst_ty);
         try f.writeCValue(w, local, .Other);
         try w.writeAll(" = (");
-        try f.renderTypecast(w, inst_ty);
+        try f.renderType(w, inst_ty);
         try w.writeByte(')');
         try f.writeCValue(w, operand, .Initializer);
         try w.writeAll(";\n");
@@ -5746,7 +5581,7 @@ fn airWrapOptional(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -5782,7 +5617,7 @@ fn airWrapOptional(f: *Function, inst: Air.Inst.Index) !CValue {
         try writer.writeAll(", ");
         try f.writeCValue(writer, payload, .FunctionArgument);
         try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, payload_ty);
+        try f.renderType(writer, payload_ty);
         try writer.writeAll("));\n");
     }
     return local;
@@ -5792,7 +5627,7 @@ fn airWrapErrUnionErr(f: *Function, inst: Air.Inst.Index) !CValue {
     const ty_op = f.air.instructions.items(.data)[inst].ty_op;
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const writer = f.object.writer();
@@ -5846,7 +5681,7 @@ fn airErrUnionPayloadPtrSet(f: *Function, inst: Air.Inst.Index) !CValue {
     try writer.writeAll(";\n");
 
     // Then return the payload pointer (only if it is used)
-    if (f.liveness.isUnused(inst)) return CValue.none;
+    if (f.liveness.isUnused(inst)) return .none;
 
     const local = try f.allocLocal(inst, f.air.typeOfIndex(inst));
     try f.writeCValue(writer, local, .Other);
@@ -5857,7 +5692,7 @@ fn airErrUnionPayloadPtrSet(f: *Function, inst: Air.Inst.Index) !CValue {
 }
 
 fn airErrReturnTrace(f: *Function, inst: Air.Inst.Index) !CValue {
-    if (f.liveness.isUnused(inst)) return CValue.none;
+    if (f.liveness.isUnused(inst)) return .none;
     return f.fail("TODO: C backend: implement airErrReturnTrace", .{});
 }
 
@@ -5875,7 +5710,7 @@ fn airWrapErrUnionPay(f: *Function, inst: Air.Inst.Index) !CValue {
     const ty_op = f.air.instructions.items(.data)[inst].ty_op;
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -5902,7 +5737,7 @@ fn airWrapErrUnionPay(f: *Function, inst: Air.Inst.Index) !CValue {
         try writer.writeAll(", ");
         try f.writeCValue(writer, payload, .FunctionArgument);
         try writer.writeAll(", sizeof(");
-        try f.renderTypecast(writer, payload_ty);
+        try f.renderType(writer, payload_ty);
         try writer.writeAll("));\n");
     }
     return local;
@@ -5913,7 +5748,7 @@ fn airIsErr(f: *Function, inst: Air.Inst.Index, is_ptr: bool, operator: []const
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{un_op});
-        return CValue.none;
+        return .none;
     }
 
     const writer = f.object.writer();
@@ -5951,7 +5786,7 @@ fn airArrayToSlice(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const operand = try f.resolveInst(ty_op.operand);
@@ -5959,26 +5794,28 @@ fn airArrayToSlice(f: *Function, inst: Air.Inst.Index) !CValue {
     const inst_ty = f.air.typeOfIndex(inst);
     const writer = f.object.writer();
     const local = try f.allocLocal(inst, inst_ty);
-    try f.writeCValue(writer, local, .Other);
-    const array_len = f.air.typeOf(ty_op.operand).elemType().arrayLen();
+    const array_ty = f.air.typeOf(ty_op.operand).childType();
 
-    try writer.writeAll(".ptr = ");
+    try f.writeCValueMember(writer, local, .{ .identifier = "ptr" });
+    try writer.writeAll(" = ");
+    // Unfortunately, C does not support any equivalent to
+    // &(*(void *)p)[0], although LLVM does via GetElementPtr
     if (operand == .undef) {
-        // Unfortunately, C does not support any equivalent to
-        // &(*(void *)p)[0], although LLVM does via GetElementPtr
         var buf: Type.SlicePtrFieldTypeBuffer = undefined;
-        try f.writeCValue(writer, CValue{ .undef = inst_ty.slicePtrFieldType(&buf) }, .Initializer);
-    } else {
+        try f.writeCValue(writer, .{ .undef = inst_ty.slicePtrFieldType(&buf) }, .Initializer);
+    } else if (array_ty.hasRuntimeBitsIgnoreComptime()) {
         try writer.writeAll("&(");
         try f.writeCValueDeref(writer, operand);
         try writer.print(")[{}]", .{try f.fmtIntLiteral(Type.usize, Value.zero)});
-    }
+    } else try f.writeCValue(writer, operand, .Initializer);
+    try writer.writeAll("; ");
 
+    const array_len = array_ty.arrayLen();
     var len_pl: Value.Payload.U64 = .{ .base = .{ .tag = .int_u64 }, .data = array_len };
     const len_val = Value.initPayload(&len_pl.base);
-    try writer.writeAll("; ");
-    try f.writeCValue(writer, local, .Other);
-    try writer.print(".len = {};\n", .{try f.fmtIntLiteral(Type.usize, len_val)});
+    try f.writeCValueMember(writer, local, .{ .identifier = "len" });
+    try writer.print(" = {};\n", .{try f.fmtIntLiteral(Type.usize, len_val)});
+
     return local;
 }
 
@@ -5987,7 +5824,7 @@ fn airFloatCast(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -6035,7 +5872,7 @@ fn airPtrToInt(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{un_op});
-        return CValue.none;
+        return .none;
     }
 
     const operand = try f.resolveInst(un_op);
@@ -6046,7 +5883,7 @@ fn airPtrToInt(f: *Function, inst: Air.Inst.Index) !CValue {
     try f.writeCValue(writer, local, .Other);
 
     try writer.writeAll(" = (");
-    try f.renderTypecast(writer, inst_ty);
+    try f.renderType(writer, inst_ty);
     try writer.writeByte(')');
     try f.writeCValue(writer, operand, .Other);
     try writer.writeAll(";\n");
@@ -6063,7 +5900,7 @@ fn airUnBuiltinCall(
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const operand = try f.resolveInst(ty_op.operand);
@@ -6095,7 +5932,7 @@ fn airBinBuiltinCall(
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
 
     const lhs = try f.resolveInst(bin_op.lhs);
@@ -6168,7 +6005,7 @@ fn airCmpxchg(f: *Function, inst: Air.Inst.Index, flavor: [*:0]const u8) !CValue
         try writer.writeAll(";\n");
         try writer.writeAll("if (");
         try writer.print("zig_cmpxchg_{s}((zig_atomic(", .{flavor});
-        try f.renderTypecast(writer, ptr_ty.childType());
+        try f.renderType(writer, ptr_ty.childType());
         try writer.writeByte(')');
         if (ptr_ty.isVolatilePtr()) try writer.writeAll(" volatile");
         try writer.writeAll(" *)");
@@ -6197,7 +6034,7 @@ fn airCmpxchg(f: *Function, inst: Air.Inst.Index, flavor: [*:0]const u8) !CValue
         try writer.writeAll(";\n");
         try f.writeCValue(writer, local, .Other);
         try writer.print(".is_null = zig_cmpxchg_{s}((zig_atomic(", .{flavor});
-        try f.renderTypecast(writer, ptr_ty.childType());
+        try f.renderType(writer, ptr_ty.childType());
         try writer.writeByte(')');
         if (ptr_ty.isVolatilePtr()) try writer.writeAll(" volatile");
         try writer.writeAll(" *)");
@@ -6217,8 +6054,8 @@ fn airCmpxchg(f: *Function, inst: Air.Inst.Index, flavor: [*:0]const u8) !CValue
     }
 
     if (f.liveness.isUnused(inst)) {
-        try freeLocal(f, inst, local.local, 0);
-        return CValue.none;
+        try freeLocal(f, inst, local.new_local, 0);
+        return .none;
     }
 
     return local;
@@ -6240,12 +6077,12 @@ fn airAtomicRmw(f: *Function, inst: Air.Inst.Index) !CValue {
     switch (extra.op()) {
         else => {
             try writer.writeAll("zig_atomic(");
-            try f.renderTypecast(writer, ptr_ty.elemType());
+            try f.renderType(writer, ptr_ty.elemType());
             try writer.writeByte(')');
         },
         .Nand, .Min, .Max => {
             // These are missing from stdatomic.h, so no atomic types for now.
-            try f.renderTypecast(writer, ptr_ty.elemType());
+            try f.renderType(writer, ptr_ty.elemType());
         },
     }
     if (ptr_ty.isVolatilePtr()) try writer.writeAll(" volatile");
@@ -6260,8 +6097,8 @@ fn airAtomicRmw(f: *Function, inst: Air.Inst.Index) !CValue {
     try writer.writeAll(");\n");
 
     if (f.liveness.isUnused(inst)) {
-        try freeLocal(f, inst, local.local, 0);
-        return CValue.none;
+        try freeLocal(f, inst, local.new_local, 0);
+        return .none;
     }
 
     return local;
@@ -6273,7 +6110,7 @@ fn airAtomicLoad(f: *Function, inst: Air.Inst.Index) !CValue {
     try reap(f, inst, &.{atomic_load.ptr});
     const ptr_ty = f.air.typeOf(atomic_load.ptr);
     if (!ptr_ty.isVolatilePtr() and f.liveness.isUnused(inst)) {
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -6282,7 +6119,7 @@ fn airAtomicLoad(f: *Function, inst: Air.Inst.Index) !CValue {
     try f.writeCValue(writer, local, .Other);
 
     try writer.writeAll(" = zig_atomic_load((zig_atomic(");
-    try f.renderTypecast(writer, ptr_ty.elemType());
+    try f.renderType(writer, ptr_ty.elemType());
     try writer.writeByte(')');
     if (ptr_ty.isVolatilePtr()) try writer.writeAll(" volatile");
     try writer.writeAll(" *)");
@@ -6305,7 +6142,7 @@ fn airAtomicStore(f: *Function, inst: Air.Inst.Index, order: [*:0]const u8) !CVa
     const writer = f.object.writer();
 
     try writer.writeAll("zig_atomic_store((zig_atomic(");
-    try f.renderTypecast(writer, ptr_ty.elemType());
+    try f.renderType(writer, ptr_ty.elemType());
     try writer.writeByte(')');
     if (ptr_ty.isVolatilePtr()) try writer.writeAll(" volatile");
     try writer.writeAll(" *)");
@@ -6316,7 +6153,7 @@ fn airAtomicStore(f: *Function, inst: Air.Inst.Index, order: [*:0]const u8) !CVa
     try f.object.dg.renderTypeForBuiltinFnName(writer, ptr_ty.childType());
     try writer.writeAll(");\n");
 
-    return CValue.none;
+    return .none;
 }
 
 fn airMemset(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -6347,7 +6184,7 @@ fn airMemset(f: *Function, inst: Air.Inst.Index) !CValue {
         try writer.writeAll(" += ");
         try f.object.dg.renderValue(writer, Type.usize, Value.one, .Other);
         try writer.writeAll(") ((");
-        try f.renderTypecast(writer, u8_ptr_ty);
+        try f.renderType(writer, u8_ptr_ty);
         try writer.writeByte(')');
         try f.writeCValue(writer, dest_ptr, .FunctionArgument);
         try writer.writeAll(")[");
@@ -6357,9 +6194,9 @@ fn airMemset(f: *Function, inst: Air.Inst.Index) !CValue {
         try writer.writeAll(";\n");
 
         try reap(f, inst, &.{ pl_op.operand, extra.lhs, extra.rhs });
-        try freeLocal(f, inst, index.local, 0);
+        try freeLocal(f, inst, index.new_local, 0);
 
-        return CValue.none;
+        return .none;
     }
 
     try reap(f, inst, &.{ pl_op.operand, extra.lhs, extra.rhs });
@@ -6371,7 +6208,7 @@ fn airMemset(f: *Function, inst: Air.Inst.Index) !CValue {
     try f.writeCValue(writer, len, .FunctionArgument);
     try writer.writeAll(");\n");
 
-    return CValue.none;
+    return .none;
 }
 
 fn airMemcpy(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -6391,7 +6228,7 @@ fn airMemcpy(f: *Function, inst: Air.Inst.Index) !CValue {
     try f.writeCValue(writer, len, .FunctionArgument);
     try writer.writeAll(");\n");
 
-    return CValue.none;
+    return .none;
 }
 
 fn airSetUnionTag(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -6404,7 +6241,7 @@ fn airSetUnionTag(f: *Function, inst: Air.Inst.Index) !CValue {
     const union_ty = f.air.typeOf(bin_op.lhs).childType();
     const target = f.object.dg.module.getTarget();
     const layout = union_ty.unionGetLayout(target);
-    if (layout.tag_size == 0) return CValue.none;
+    if (layout.tag_size == 0) return .none;
 
     try writer.writeByte('(');
     try f.writeCValue(writer, union_ptr, .Other);
@@ -6412,7 +6249,7 @@ fn airSetUnionTag(f: *Function, inst: Air.Inst.Index) !CValue {
     try f.writeCValue(writer, new_tag, .Other);
     try writer.writeAll(";\n");
 
-    return CValue.none;
+    return .none;
 }
 
 fn airGetUnionTag(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -6420,7 +6257,7 @@ fn airGetUnionTag(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const operand = try f.resolveInst(ty_op.operand);
@@ -6430,7 +6267,7 @@ fn airGetUnionTag(f: *Function, inst: Air.Inst.Index) !CValue {
 
     const target = f.object.dg.module.getTarget();
     const layout = un_ty.unionGetLayout(target);
-    if (layout.tag_size == 0) return CValue.none;
+    if (layout.tag_size == 0) return .none;
 
     const inst_ty = f.air.typeOfIndex(inst);
     const writer = f.object.writer();
@@ -6448,7 +6285,7 @@ fn airTagName(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{un_op});
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -6459,7 +6296,9 @@ fn airTagName(f: *Function, inst: Air.Inst.Index) !CValue {
     const writer = f.object.writer();
     const local = try f.allocLocal(inst, inst_ty);
     try f.writeCValue(writer, local, .Other);
-    try writer.print(" = {s}(", .{try f.object.dg.getTagNameFn(enum_ty)});
+    try writer.print(" = {s}(", .{
+        try f.getLazyFnName(.{ .tag_name = enum_ty.getOwnerDecl() }, .{ .tag_name = enum_ty }),
+    });
     try f.writeCValue(writer, operand, .Other);
     try writer.writeAll(");\n");
 
@@ -6471,7 +6310,7 @@ fn airErrorName(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{un_op});
-        return CValue.none;
+        return .none;
     }
 
     const writer = f.object.writer();
@@ -6491,7 +6330,7 @@ fn airSplat(f: *Function, inst: Air.Inst.Index) !CValue {
     const ty_op = f.air.instructions.items(.data)[inst].ty_op;
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
-        return CValue.none;
+        return .none;
     }
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -6507,13 +6346,13 @@ fn airSplat(f: *Function, inst: Air.Inst.Index) !CValue {
 }
 
 fn airSelect(f: *Function, inst: Air.Inst.Index) !CValue {
-    if (f.liveness.isUnused(inst)) return CValue.none;
+    if (f.liveness.isUnused(inst)) return .none;
 
     return f.fail("TODO: C backend: implement airSelect", .{});
 }
 
 fn airShuffle(f: *Function, inst: Air.Inst.Index) !CValue {
-    if (f.liveness.isUnused(inst)) return CValue.none;
+    if (f.liveness.isUnused(inst)) return .none;
 
     return f.fail("TODO: C backend: implement airShuffle", .{});
 }
@@ -6523,7 +6362,7 @@ fn airReduce(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{reduce.operand});
-        return CValue.none;
+        return .none;
     }
 
     const target = f.object.dg.module.getTarget();
@@ -6599,10 +6438,9 @@ fn airReduce(f: *Function, inst: Air.Inst.Index) !CValue {
     //
     // Equivalent to:
     //   reduce: {
-    //     var i: usize = 0;
     //     var accum: T = init;
-    //     while (i < vec.len) : (i += 1) {
-    //       accum = func(accum, vec[i]);
+    //     for (vec) : (elem) {
+    //       accum = func(accum, elem);
     //     }
     //     break :reduce accum;
     //   }
@@ -6674,7 +6512,7 @@ fn airReduce(f: *Function, inst: Air.Inst.Index) !CValue {
 
     try writer.writeAll(";\n");
 
-    try freeLocal(f, inst, it.local, 0);
+    try freeLocal(f, inst, it.new_local, 0);
 
     return accum;
 }
@@ -6687,8 +6525,8 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
     const gpa = f.object.dg.gpa;
     const resolved_elements = try gpa.alloc(CValue, elements.len);
     defer gpa.free(resolved_elements);
-    for (elements, 0..) |element, i| {
-        resolved_elements[i] = try f.resolveInst(element);
+    for (resolved_elements, elements) |*resolved_element, element| {
+        resolved_element.* = try f.resolveInst(element);
     }
     {
         var bt = iterateBigTomb(f, inst);
@@ -6697,7 +6535,7 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
         }
     }
 
-    if (f.liveness.isUnused(inst)) return CValue.none;
+    if (f.liveness.isUnused(inst)) return .none;
 
     const target = f.object.dg.module.getTarget();
 
@@ -6723,50 +6561,51 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
             .Auto, .Extern => {
                 try f.writeCValue(writer, local, .Other);
                 try writer.writeAll(" = (");
-                try f.renderTypecast(writer, inst_ty);
+                try f.renderType(writer, inst_ty);
                 try writer.writeAll(")");
                 try writer.writeByte('{');
                 var empty = true;
-                for (elements, 0..) |element, index| {
-                    if (inst_ty.structFieldValueComptime(index)) |_| continue;
+                for (elements, resolved_elements, 0..) |element, resolved_element, field_i| {
+                    if (inst_ty.structFieldValueComptime(field_i)) |_| continue;
 
                     if (!empty) try writer.writeAll(", ");
-                    if (!inst_ty.isTupleOrAnonStruct()) {
-                        try writer.print(".{ } = ", .{fmtIdent(inst_ty.structFieldName(index))});
-                    }
+
+                    const field_name: CValue = if (inst_ty.isSimpleTuple())
+                        .{ .field = field_i }
+                    else
+                        .{ .identifier = inst_ty.structFieldName(field_i) };
+                    try writer.writeByte('.');
+                    try f.object.dg.writeCValue(writer, field_name);
+                    try writer.writeAll(" = ");
 
                     const element_ty = f.air.typeOf(element);
                     try f.writeCValue(writer, switch (element_ty.zigTypeTag()) {
-                        .Array => CValue{ .undef = element_ty },
-                        else => resolved_elements[index],
+                        .Array => .{ .undef = element_ty },
+                        else => resolved_element,
                     }, .Initializer);
                     empty = false;
                 }
-                if (empty) try writer.print("{}", .{try f.fmtIntLiteral(Type.u8, Value.zero)});
                 try writer.writeAll("};\n");
 
-                var field_id: usize = 0;
-                for (elements, 0..) |element, index| {
-                    if (inst_ty.structFieldValueComptime(index)) |_| continue;
+                for (elements, resolved_elements, 0..) |element, resolved_element, field_i| {
+                    if (inst_ty.structFieldValueComptime(field_i)) |_| continue;
 
                     const element_ty = f.air.typeOf(element);
                     if (element_ty.zigTypeTag() != .Array) continue;
 
-                    const field_name = if (inst_ty.isTupleOrAnonStruct())
-                        CValue{ .field = field_id }
+                    const field_name: CValue = if (inst_ty.isSimpleTuple())
+                        .{ .field = field_i }
                     else
-                        CValue{ .identifier = inst_ty.structFieldName(index) };
+                        .{ .identifier = inst_ty.structFieldName(field_i) };
 
                     try writer.writeAll(";\n");
                     try writer.writeAll("memcpy(");
                     try f.writeCValueMember(writer, local, field_name);
                     try writer.writeAll(", ");
-                    try f.writeCValue(writer, resolved_elements[index], .FunctionArgument);
+                    try f.writeCValue(writer, resolved_element, .FunctionArgument);
                     try writer.writeAll(", sizeof(");
-                    try f.renderTypecast(writer, element_ty);
+                    try f.renderType(writer, element_ty);
                     try writer.writeAll("));\n");
-
-                    field_id += 1;
                 }
             },
             .Packed => {
@@ -6784,7 +6623,7 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
                 const bit_offset_val = Value.initPayload(&bit_offset_val_pl.base);
 
                 var empty = true;
-                for (elements, 0..) |_, index| {
+                for (0..elements.len) |index| {
                     const field_ty = inst_ty.structFieldType(index);
                     if (!field_ty.hasRuntimeBitsIgnoreComptime()) continue;
 
@@ -6810,11 +6649,11 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
                         try f.renderIntCast(writer, inst_ty, element, field_ty, .FunctionArgument);
                     } else {
                         try writer.writeByte('(');
-                        try f.renderTypecast(writer, inst_ty);
+                        try f.renderType(writer, inst_ty);
                         try writer.writeByte(')');
                         if (field_ty.isPtrAtRuntime()) {
                             try writer.writeByte('(');
-                            try f.renderTypecast(writer, switch (int_info.signedness) {
+                            try f.renderType(writer, switch (int_info.signedness) {
                                 .unsigned => Type.usize,
                                 .signed => Type.isize,
                             });
@@ -6833,13 +6672,6 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
                     empty = false;
                 }
 
-                if (empty) {
-                    try writer.writeByte('(');
-                    try f.renderTypecast(writer, inst_ty);
-                    try writer.writeByte(')');
-                    try f.writeCValue(writer, .{ .undef = inst_ty }, .Initializer);
-                }
-
                 try writer.writeAll(";\n");
             },
         },
@@ -6855,7 +6687,7 @@ fn airUnionInit(f: *Function, inst: Air.Inst.Index) !CValue {
 
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{extra.init});
-        return CValue.none;
+        return .none;
     }
 
     const union_ty = f.air.typeOfIndex(inst);
@@ -6875,7 +6707,7 @@ fn airUnionInit(f: *Function, inst: Air.Inst.Index) !CValue {
         return local;
     }
 
-    if (union_ty.unionTagTypeSafety()) |tag_ty| {
+    const field: CValue = if (union_ty.unionTagTypeSafety()) |tag_ty| field: {
         const layout = union_ty.unionGetLayout(target);
         if (layout.tag_size != 0) {
             const field_index = tag_ty.enumFieldIndex(field_name).?;
@@ -6892,18 +6724,13 @@ fn airUnionInit(f: *Function, inst: Air.Inst.Index) !CValue {
             try f.writeCValue(writer, local, .Other);
             try writer.print(".tag = {}; ", .{try f.fmtIntLiteral(tag_ty, int_val)});
         }
-        try f.writeCValue(writer, local, .Other);
-        try writer.print(".payload.{ } = ", .{fmtIdent(field_name)});
-        try f.writeCValue(writer, payload, .Other);
-        try writer.writeAll(";\n");
-        return local;
-    }
+        break :field .{ .payload_identifier = field_name };
+    } else .{ .identifier = field_name };
 
-    try f.writeCValue(writer, local, .Other);
-    try writer.print(".{ } = ", .{fmtIdent(field_name)});
+    try f.writeCValueMember(writer, local, field);
+    try writer.writeAll(" = ");
     try f.writeCValue(writer, payload, .Other);
     try writer.writeAll(";\n");
-
     return local;
 }
 
@@ -6914,7 +6741,7 @@ fn airPrefetch(f: *Function, inst: Air.Inst.Index) !CValue {
         // The available prefetch intrinsics do not accept a cache argument; only
         // address, rw, and locality. So unless the cache is data, we do not lower
         // this instruction.
-        .instruction => return CValue.none,
+        .instruction => return .none,
     }
     const ptr = try f.resolveInst(prefetch.ptr);
     try reap(f, inst, &.{prefetch.ptr});
@@ -6924,11 +6751,11 @@ fn airPrefetch(f: *Function, inst: Air.Inst.Index) !CValue {
     try writer.print(", {d}, {d});\n", .{
         @enumToInt(prefetch.rw), prefetch.locality,
     });
-    return CValue.none;
+    return .none;
 }
 
 fn airWasmMemorySize(f: *Function, inst: Air.Inst.Index) !CValue {
-    if (f.liveness.isUnused(inst)) return CValue.none;
+    if (f.liveness.isUnused(inst)) return .none;
 
     const pl_op = f.air.instructions.items(.data)[inst].pl_op;
 
@@ -6965,7 +6792,7 @@ fn airFloatNeg(f: *Function, inst: Air.Inst.Index) !CValue {
     const un_op = f.air.instructions.items(.data)[inst].un_op;
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{un_op});
-        return CValue.none;
+        return .none;
     }
 
     const operand = try f.resolveInst(un_op);
@@ -6987,7 +6814,7 @@ fn airUnFloatOp(f: *Function, inst: Air.Inst.Index, operation: []const u8) !CVal
     const un_op = f.air.instructions.items(.data)[inst].un_op;
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{un_op});
-        return CValue.none;
+        return .none;
     }
     const operand = try f.resolveInst(un_op);
     try reap(f, inst, &.{un_op});
@@ -7009,7 +6836,7 @@ fn airBinFloatOp(f: *Function, inst: Air.Inst.Index, operation: []const u8) !CVa
     const bin_op = f.air.instructions.items(.data)[inst].bin_op;
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return CValue.none;
+        return .none;
     }
     const lhs = try f.resolveInst(bin_op.lhs);
     const rhs = try f.resolveInst(bin_op.rhs);
@@ -7036,7 +6863,7 @@ fn airMulAdd(f: *Function, inst: Air.Inst.Index) !CValue {
     const bin_op = f.air.extraData(Air.Bin, pl_op.payload).data;
     if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs, pl_op.operand });
-        return CValue.none;
+        return .none;
     }
     const inst_ty = f.air.typeOfIndex(inst);
     const mulend1 = try f.resolveInst(bin_op.lhs);
@@ -7058,6 +6885,81 @@ fn airMulAdd(f: *Function, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
+fn airCVaStart(f: *Function, inst: Air.Inst.Index) !CValue {
+    if (f.liveness.isUnused(inst)) return .none;
+
+    const inst_ty = f.air.typeOfIndex(inst);
+    const fn_cty = try f.typeToCType(f.object.dg.decl.?.ty, .complete);
+    const param_len = fn_cty.castTag(.varargs_function).?.data.param_types.len;
+
+    const writer = f.object.writer();
+    const local = try f.allocLocal(inst, inst_ty);
+    try writer.writeAll("va_start(*(va_list *)&");
+    try f.writeCValue(writer, local, .Other);
+    if (param_len > 0) {
+        try writer.writeAll(", ");
+        try f.writeCValue(writer, .{ .arg = param_len - 1 }, .FunctionArgument);
+    }
+    try writer.writeAll(");\n");
+    return local;
+}
+
+fn airCVaArg(f: *Function, inst: Air.Inst.Index) !CValue {
+    const ty_op = f.air.instructions.items(.data)[inst].ty_op;
+    if (f.liveness.isUnused(inst)) {
+        try reap(f, inst, &.{ty_op.operand});
+        return .none;
+    }
+
+    const inst_ty = f.air.typeOfIndex(inst);
+    const va_list = try f.resolveInst(ty_op.operand);
+    try reap(f, inst, &.{ty_op.operand});
+
+    const writer = f.object.writer();
+    const local = try f.allocLocal(inst, inst_ty);
+    try f.writeCValue(writer, local, .Other);
+    try writer.writeAll(" = va_arg(*(va_list *)");
+    try f.writeCValue(writer, va_list, .Other);
+    try writer.writeAll(", ");
+    try f.renderType(writer, f.air.getRefType(ty_op.ty));
+    try writer.writeAll(");\n");
+    return local;
+}
+
+fn airCVaEnd(f: *Function, inst: Air.Inst.Index) !CValue {
+    const un_op = f.air.instructions.items(.data)[inst].un_op;
+
+    const va_list = try f.resolveInst(un_op);
+    try reap(f, inst, &.{un_op});
+
+    const writer = f.object.writer();
+    try writer.writeAll("va_end(*(va_list *)");
+    try f.writeCValue(writer, va_list, .Other);
+    try writer.writeAll(");\n");
+    return .none;
+}
+
+fn airCVaCopy(f: *Function, inst: Air.Inst.Index) !CValue {
+    const ty_op = f.air.instructions.items(.data)[inst].ty_op;
+    if (f.liveness.isUnused(inst)) {
+        try reap(f, inst, &.{ty_op.operand});
+        return .none;
+    }
+
+    const inst_ty = f.air.typeOfIndex(inst);
+    const va_list = try f.resolveInst(ty_op.operand);
+    try reap(f, inst, &.{ty_op.operand});
+
+    const writer = f.object.writer();
+    const local = try f.allocLocal(inst, inst_ty);
+    try writer.writeAll("va_copy(*(va_list *)&");
+    try f.writeCValue(writer, local, .Other);
+    try writer.writeAll(", *(va_list *)");
+    try f.writeCValue(writer, va_list, .Other);
+    try writer.writeAll(");\n");
+    return local;
+}
+
 fn toMemoryOrder(order: std.builtin.AtomicOrder) [:0]const u8 {
     return switch (order) {
         // Note: unordered is actually even less atomic than relaxed
@@ -7243,8 +7145,9 @@ fn stringLiteral(child_stream: anytype) StringLiteral(@TypeOf(child_stream)) {
     return .{ .counting_writer = std.io.countingWriter(child_stream) };
 }
 
+const FormatStringContext = struct { str: []const u8, sentinel: ?u8 };
 fn formatStringLiteral(
-    str: []const u8,
+    data: FormatStringContext,
     comptime fmt: []const u8,
     _: std.fmt.FormatOptions,
     writer: anytype,
@@ -7253,13 +7156,13 @@ fn formatStringLiteral(
 
     var literal = stringLiteral(writer);
     try literal.start();
-    for (str) |c|
-        try literal.writeChar(c);
+    for (data.str) |c| try literal.writeChar(c);
+    if (data.sentinel) |sentinel| if (sentinel != 0) try literal.writeChar(sentinel);
     try literal.end();
 }
 
-fn fmtStringLiteral(str: []const u8) std.fmt.Formatter(formatStringLiteral) {
-    return .{ .data = str };
+fn fmtStringLiteral(str: []const u8, sentinel: ?u8) std.fmt.Formatter(formatStringLiteral) {
+    return .{ .data = .{ .str = str, .sentinel = sentinel } };
 }
 
 fn undefPattern(comptime IntType: type) IntType {
@@ -7344,7 +7247,7 @@ fn formatIntLiteral(
                 use_twos_comp = true;
             } else {
                 // TODO: Use fmtIntLiteral for 0?
-                try writer.print("zig_sub_{c}{d}(zig_as_{c}{d}(0, 0), ", .{ signAbbrev(int_info.signedness), c_bits, signAbbrev(int_info.signedness), c_bits });
+                try writer.print("zig_sub_{c}{d}(zig_make_{c}{d}(0, 0), ", .{ signAbbrev(int_info.signedness), c_bits, signAbbrev(int_info.signedness), c_bits });
             }
         } else {
             try writer.writeByte('-');
@@ -7354,11 +7257,16 @@ fn formatIntLiteral(
     switch (data.ty.tag()) {
         .c_short, .c_ushort, .c_int, .c_uint, .c_long, .c_ulong, .c_longlong, .c_ulonglong => {},
         else => {
-            if (int_info.bits > 64 and data.location != null and data.location.? == .StaticInitializer) {
+            if (int_info.bits <= 64) {
+                try writer.print("{s}INT{d}_C(", .{ switch (int_info.signedness) {
+                    .signed => "",
+                    .unsigned => "U",
+                }, c_bits });
+            } else if (data.location != null and data.location.? == .StaticInitializer) {
                 // MSVC treats casting the struct initializer as not constant (C2099), so an alternate form is used in global initializers
-                try writer.print("zig_as_constant_{c}{d}(", .{ signAbbrev(int_info.signedness), c_bits });
+                try writer.print("zig_make_constant_{c}{d}(", .{ signAbbrev(int_info.signedness), c_bits });
             } else {
-                try writer.print("zig_as_{c}{d}(", .{ signAbbrev(int_info.signedness), c_bits });
+                try writer.print("zig_make_{c}{d}(", .{ signAbbrev(int_info.signedness), c_bits });
             }
         },
     }
@@ -7467,17 +7375,20 @@ fn isByRef(ty: Type) bool {
 }
 
 const LowerFnRetTyBuffer = struct {
+    names: [1][]const u8,
     types: [1]Type,
     values: [1]Value,
-    payload: Type.Payload.Tuple,
+    payload: Type.Payload.AnonStruct,
 };
 fn lowerFnRetTy(ret_ty: Type, buffer: *LowerFnRetTyBuffer, target: std.Target) Type {
     if (ret_ty.zigTypeTag() == .NoReturn) return Type.initTag(.noreturn);
 
     if (lowersToArray(ret_ty, target)) {
+        buffer.names = [1][]const u8{"array"};
         buffer.types = [1]Type{ret_ty};
         buffer.values = [1]Value{Value.initTag(.unreachable_value)};
         buffer.payload = .{ .data = .{
+            .names = &buffer.names,
             .types = &buffer.types,
             .values = &buffer.values,
         } };
@@ -7533,7 +7444,7 @@ fn die(f: *Function, inst: Air.Inst.Index, ref: Air.Inst.Ref) !void {
     if (f.air.instructions.items(.tag)[ref_inst] == .constant) return;
     const c_value = (f.value_map.fetchRemove(ref) orelse return).value;
     const local_index = switch (c_value) {
-        .local => |l| l,
+        .local, .new_local => |l| l,
         else => return,
     };
     try freeLocal(f, inst, local_index, ref_inst);
@@ -7544,21 +7455,16 @@ fn freeLocal(f: *Function, inst: Air.Inst.Index, local_index: LocalIndex, ref_in
     const local = &f.locals.items[local_index];
     log.debug("%{d}: freeing t{d} (operand %{d})", .{ inst, local_index, ref_inst });
     if (local.loop_depth < f.free_locals_clone_depth) return;
-    const gop = try f.free_locals_stack.items[local.loop_depth].getOrPutContext(
-        gpa,
-        local.ty,
-        f.tyHashCtx(),
-    );
+    const gop = try f.free_locals_stack.items[local.loop_depth].getOrPut(gpa, local.getType());
     if (!gop.found_existing) gop.value_ptr.* = .{};
     if (std.debug.runtime_safety) {
-        // If this trips, it means a local is being inserted into the
-        // free_locals map while it already exists in the map, which is not
-        // allowed.
-        assert(mem.indexOfScalar(LocalIndex, gop.value_ptr.items, local_index) == null);
         // If this trips, an unfreeable allocation was attempted to be freed.
         assert(!f.allocs.contains(local_index));
     }
-    try gop.value_ptr.append(gpa, local_index);
+    // If this trips, it means a local is being inserted into the
+    // free_locals map while it already exists in the map, which is not
+    // allowed.
+    try gop.value_ptr.putNoClobber(gpa, local_index, {});
 }
 
 const BigTomb = struct {
@@ -7607,14 +7513,36 @@ fn deinitFreeLocalsMap(gpa: mem.Allocator, map: *LocalsMap) void {
     map.deinit(gpa);
 }
 
-fn noticeBranchFrees(f: *Function, pre_locals_len: LocalIndex, inst: Air.Inst.Index) !void {
-    for (f.locals.items[pre_locals_len..], 0..) |*local, local_offset| {
-        const local_index = pre_locals_len + @intCast(LocalIndex, local_offset);
-        if (f.allocs.contains(local_index)) continue; // allocs are not freeable
+fn noticeBranchFrees(
+    f: *Function,
+    pre_locals_len: LocalIndex,
+    pre_allocs_len: LocalIndex,
+    inst: Air.Inst.Index,
+) !void {
+    const free_locals = f.getFreeLocals();
+
+    for (f.locals.items[pre_locals_len..], pre_locals_len..) |*local, local_i| {
+        const local_index = @intCast(LocalIndex, local_i);
+        if (f.allocs.contains(local_index)) {
+            if (std.debug.runtime_safety) {
+                // new allocs are no longer freeable, so make sure they aren't in the free list
+                if (free_locals.getPtr(local.getType())) |locals_list| {
+                    assert(!locals_list.contains(local_index));
+                }
+            }
+            continue;
+        }
 
         // free more deeply nested locals from other branches at current depth
         assert(local.loop_depth >= f.free_locals_stack.items.len - 1);
         local.loop_depth = @intCast(LoopDepth, f.free_locals_stack.items.len - 1);
         try freeLocal(f, inst, local_index, 0);
     }
+
+    for (f.allocs.keys()[pre_allocs_len..]) |local_i| {
+        const local_index = @intCast(LocalIndex, local_i);
+        const local = &f.locals.items[local_index];
+        // new allocs are no longer freeable, so remove them from the free list
+        if (free_locals.getPtr(local.getType())) |locals_list| _ = locals_list.swapRemove(local_index);
+    }
 }
diff --git a/src/codegen/c/type.zig b/src/codegen/c/type.zig
new file mode 100644
index 0000000000..1f1a220cd2
--- /dev/null
+++ b/src/codegen/c/type.zig
@@ -0,0 +1,1896 @@
+const std = @import("std");
+const cstr = std.cstr;
+const mem = std.mem;
+const Allocator = mem.Allocator;
+const assert = std.debug.assert;
+const autoHash = std.hash.autoHash;
+const Target = std.Target;
+
+const Module = @import("../../Module.zig");
+const Type = @import("../../type.zig").Type;
+
+pub const CType = extern union {
+    /// If the tag value is less than Tag.no_payload_count, then no pointer
+    /// dereference is needed.
+    tag_if_small_enough: Tag,
+    ptr_otherwise: *const Payload,
+
+    pub fn initTag(small_tag: Tag) CType {
+        assert(!small_tag.hasPayload());
+        return .{ .tag_if_small_enough = small_tag };
+    }
+
+    pub fn initPayload(pl: anytype) CType {
+        const T = @typeInfo(@TypeOf(pl)).Pointer.child;
+        return switch (pl.base.tag) {
+            inline else => |t| if (comptime t.hasPayload() and t.Type() == T) .{
+                .ptr_otherwise = &pl.base,
+            } else unreachable,
+        };
+    }
+
+    pub fn hasPayload(self: CType) bool {
+        return self.tag_if_small_enough.hasPayload();
+    }
+
+    pub fn tag(self: CType) Tag {
+        return if (self.hasPayload()) self.ptr_otherwise.tag else self.tag_if_small_enough;
+    }
+
+    pub fn cast(self: CType, comptime T: type) ?*const T {
+        if (!self.hasPayload()) return null;
+        const pl = self.ptr_otherwise;
+        return switch (pl.tag) {
+            inline else => |t| if (comptime t.hasPayload() and t.Type() == T)
+                @fieldParentPtr(T, "base", pl)
+            else
+                null,
+        };
+    }
+
+    pub fn castTag(self: CType, comptime t: Tag) ?*const t.Type() {
+        return if (self.tag() == t) @fieldParentPtr(t.Type(), "base", self.ptr_otherwise) else null;
+    }
+
+    pub const Tag = enum(usize) {
+        // The first section of this enum are tags that require no payload.
+        void,
+
+        // C basic types
+        char,
+
+        @"signed char",
+        short,
+        int,
+        long,
+        @"long long",
+
+        _Bool,
+        @"unsigned char",
+        @"unsigned short",
+        @"unsigned int",
+        @"unsigned long",
+        @"unsigned long long",
+
+        float,
+        double,
+        @"long double",
+
+        // C header types
+        //  - stdbool.h
+        bool,
+        //  - stddef.h
+        size_t,
+        ptrdiff_t,
+        //  - stdint.h
+        uint8_t,
+        int8_t,
+        uint16_t,
+        int16_t,
+        uint32_t,
+        int32_t,
+        uint64_t,
+        int64_t,
+        uintptr_t,
+        intptr_t,
+
+        // zig.h types
+        zig_u128,
+        zig_i128,
+        zig_f16,
+        zig_f32,
+        zig_f64,
+        zig_f80,
+        zig_f128,
+        zig_c_longdouble, // Keep last_no_payload_tag updated!
+
+        // After this, the tag requires a payload.
+        pointer,
+        pointer_const,
+        pointer_volatile,
+        pointer_const_volatile,
+        array,
+        vector,
+        fwd_anon_struct,
+        fwd_anon_union,
+        fwd_struct,
+        fwd_union,
+        unnamed_struct,
+        unnamed_union,
+        packed_unnamed_struct,
+        packed_unnamed_union,
+        anon_struct,
+        anon_union,
+        @"struct",
+        @"union",
+        packed_struct,
+        packed_union,
+        function,
+        varargs_function,
+
+        pub const last_no_payload_tag = Tag.zig_c_longdouble;
+        pub const no_payload_count = @enumToInt(last_no_payload_tag) + 1;
+
+        pub fn hasPayload(self: Tag) bool {
+            return @enumToInt(self) >= no_payload_count;
+        }
+
+        pub fn toIndex(self: Tag) Index {
+            assert(!self.hasPayload());
+            return @intCast(Index, @enumToInt(self));
+        }
+
+        pub fn Type(comptime self: Tag) type {
+            return switch (self) {
+                .void,
+                .char,
+                .@"signed char",
+                .short,
+                .int,
+                .long,
+                .@"long long",
+                ._Bool,
+                .@"unsigned char",
+                .@"unsigned short",
+                .@"unsigned int",
+                .@"unsigned long",
+                .@"unsigned long long",
+                .float,
+                .double,
+                .@"long double",
+                .bool,
+                .size_t,
+                .ptrdiff_t,
+                .uint8_t,
+                .int8_t,
+                .uint16_t,
+                .int16_t,
+                .uint32_t,
+                .int32_t,
+                .uint64_t,
+                .int64_t,
+                .uintptr_t,
+                .intptr_t,
+                .zig_u128,
+                .zig_i128,
+                .zig_f16,
+                .zig_f32,
+                .zig_f64,
+                .zig_f80,
+                .zig_f128,
+                .zig_c_longdouble,
+                => @compileError("Type Tag " ++ @tagName(self) ++ " has no payload"),
+
+                .pointer,
+                .pointer_const,
+                .pointer_volatile,
+                .pointer_const_volatile,
+                => Payload.Child,
+
+                .array,
+                .vector,
+                => Payload.Sequence,
+
+                .fwd_anon_struct,
+                .fwd_anon_union,
+                => Payload.Fields,
+
+                .fwd_struct,
+                .fwd_union,
+                => Payload.FwdDecl,
+
+                .unnamed_struct,
+                .unnamed_union,
+                .packed_unnamed_struct,
+                .packed_unnamed_union,
+                => Payload.Unnamed,
+
+                .anon_struct,
+                .anon_union,
+                .@"struct",
+                .@"union",
+                .packed_struct,
+                .packed_union,
+                => Payload.Aggregate,
+
+                .function,
+                .varargs_function,
+                => Payload.Function,
+            };
+        }
+    };
+
+    pub const Payload = struct {
+        tag: Tag,
+
+        pub const Child = struct {
+            base: Payload,
+            data: Index,
+        };
+
+        pub const Sequence = struct {
+            base: Payload,
+            data: struct {
+                len: u64,
+                elem_type: Index,
+            },
+        };
+
+        pub const FwdDecl = struct {
+            base: Payload,
+            data: Module.Decl.Index,
+        };
+
+        pub const Fields = struct {
+            base: Payload,
+            data: Data,
+
+            pub const Data = []const Field;
+            pub const Field = struct {
+                name: [*:0]const u8,
+                type: Index,
+                alignas: AlignAs,
+            };
+        };
+
+        pub const Unnamed = struct {
+            base: Payload,
+            data: struct {
+                fields: Fields.Data,
+                owner_decl: Module.Decl.Index,
+                id: u32,
+            },
+        };
+
+        pub const Aggregate = struct {
+            base: Payload,
+            data: struct {
+                fields: Fields.Data,
+                fwd_decl: Index,
+            },
+        };
+
+        pub const Function = struct {
+            base: Payload,
+            data: struct {
+                return_type: Index,
+                param_types: []const Index,
+            },
+        };
+    };
+
+    pub const AlignAs = struct {
+        @"align": std.math.Log2Int(u32),
+        abi: std.math.Log2Int(u32),
+
+        pub fn init(alignment: u32, abi_alignment: u32) AlignAs {
+            const actual_align = if (alignment != 0) alignment else abi_alignment;
+            assert(std.math.isPowerOfTwo(actual_align));
+            assert(std.math.isPowerOfTwo(abi_alignment));
+            return .{
+                .@"align" = std.math.log2_int(u32, actual_align),
+                .abi = std.math.log2_int(u32, abi_alignment),
+            };
+        }
+        pub fn abiAlign(ty: Type, target: Target) AlignAs {
+            const abi_align = ty.abiAlignment(target);
+            return init(abi_align, abi_align);
+        }
+        pub fn fieldAlign(struct_ty: Type, field_i: usize, target: Target) AlignAs {
+            return init(
+                struct_ty.structFieldAlign(field_i, target),
+                struct_ty.structFieldType(field_i).abiAlignment(target),
+            );
+        }
+        pub fn unionPayloadAlign(union_ty: Type, target: Target) AlignAs {
+            const union_obj = union_ty.cast(Type.Payload.Union).?.data;
+            const union_payload_align = union_obj.abiAlignment(target, false);
+            return init(union_payload_align, union_payload_align);
+        }
+
+        pub fn getAlign(self: AlignAs) u32 {
+            return @as(u32, 1) << self.@"align";
+        }
+    };
+
+    pub const Index = u32;
+    pub const Store = struct {
+        arena: std.heap.ArenaAllocator.State = .{},
+        set: Set = .{},
+
+        pub const Set = struct {
+            pub const Map = std.ArrayHashMapUnmanaged(CType, void, HashContext, true);
+            const HashContext = struct {
+                store: *const Set,
+
+                pub fn hash(self: @This(), cty: CType) Map.Hash {
+                    return @truncate(Map.Hash, cty.hash(self.store.*));
+                }
+                pub fn eql(_: @This(), lhs: CType, rhs: CType, _: usize) bool {
+                    return lhs.eql(rhs);
+                }
+            };
+
+            map: Map = .{},
+
+            pub fn indexToCType(self: Set, index: Index) CType {
+                if (index < Tag.no_payload_count) return initTag(@intToEnum(Tag, index));
+                return self.map.keys()[index - Tag.no_payload_count];
+            }
+
+            pub fn indexToHash(self: Set, index: Index) Map.Hash {
+                if (index < Tag.no_payload_count)
+                    return (HashContext{ .store = &self }).hash(self.indexToCType(index));
+                return self.map.entries.items(.hash)[index - Tag.no_payload_count];
+            }
+
+            pub fn typeToIndex(self: Set, ty: Type, target: Target, kind: Kind) ?Index {
+                const lookup = Convert.Lookup{ .imm = .{ .set = &self, .target = target } };
+
+                var convert: Convert = undefined;
+                convert.initType(ty, kind, lookup) catch unreachable;
+
+                const t = convert.tag();
+                if (!t.hasPayload()) return t.toIndex();
+
+                return if (self.map.getIndexAdapted(
+                    ty,
+                    TypeAdapter32{ .kind = kind, .lookup = lookup, .convert = &convert },
+                )) |idx| @intCast(Index, Tag.no_payload_count + idx) else null;
+            }
+        };
+
+        pub const Promoted = struct {
+            arena: std.heap.ArenaAllocator,
+            set: Set,
+
+            pub fn gpa(self: *Promoted) Allocator {
+                return self.arena.child_allocator;
+            }
+
+            pub fn cTypeToIndex(self: *Promoted, cty: CType) Allocator.Error!Index {
+                const t = cty.tag();
+                if (@enumToInt(t) < Tag.no_payload_count) return @intCast(Index, @enumToInt(t));
+
+                const gop = try self.set.map.getOrPutContext(self.gpa(), cty, .{ .store = &self.set });
+                if (!gop.found_existing) gop.key_ptr.* = cty;
+                if (std.debug.runtime_safety) {
+                    const key = &self.set.map.entries.items(.key)[gop.index];
+                    assert(key == gop.key_ptr);
+                    assert(cty.eql(key.*));
+                    assert(cty.hash(self.set) == key.hash(self.set));
+                }
+                return @intCast(Index, Tag.no_payload_count + gop.index);
+            }
+
+            pub fn typeToIndex(
+                self: *Promoted,
+                ty: Type,
+                mod: *Module,
+                kind: Kind,
+            ) Allocator.Error!Index {
+                const lookup = Convert.Lookup{ .mut = .{ .promoted = self, .mod = mod } };
+
+                var convert: Convert = undefined;
+                try convert.initType(ty, kind, lookup);
+
+                const t = convert.tag();
+                if (!t.hasPayload()) return t.toIndex();
+
+                const gop = try self.set.map.getOrPutContextAdapted(
+                    self.gpa(),
+                    ty,
+                    TypeAdapter32{ .kind = kind, .lookup = lookup.freeze(), .convert = &convert },
+                    .{ .store = &self.set },
+                );
+                if (!gop.found_existing) {
+                    errdefer _ = self.set.map.pop();
+                    gop.key_ptr.* = try createFromConvert(self, ty, lookup.getTarget(), kind, convert);
+                }
+                if (std.debug.runtime_safety) {
+                    const adapter = TypeAdapter64{
+                        .kind = kind,
+                        .lookup = lookup.freeze(),
+                        .convert = &convert,
+                    };
+                    const cty = &self.set.map.entries.items(.key)[gop.index];
+                    assert(cty == gop.key_ptr);
+                    assert(adapter.eql(ty, cty.*));
+                    assert(adapter.hash(ty) == cty.hash(self.set));
+                }
+                return @intCast(Index, Tag.no_payload_count + gop.index);
+            }
+        };
+
+        pub fn promote(self: Store, gpa: Allocator) Promoted {
+            return .{ .arena = self.arena.promote(gpa), .set = self.set };
+        }
+
+        pub fn demote(self: *Store, promoted: Promoted) void {
+            self.arena = promoted.arena.state;
+            self.set = promoted.set;
+        }
+
+        pub fn indexToCType(self: Store, index: Index) CType {
+            return self.set.indexToCType(index);
+        }
+
+        pub fn indexToHash(self: Store, index: Index) Set.Map.Hash {
+            return self.set.indexToHash(index);
+        }
+
+        pub fn cTypeToIndex(self: *Store, gpa: Allocator, cty: CType) !Index {
+            var promoted = self.promote(gpa);
+            defer self.demote(promoted);
+            return promoted.cTypeToIndex(cty);
+        }
+
+        pub fn typeToCType(self: *Store, gpa: Allocator, ty: Type, mod: *Module, kind: Kind) !CType {
+            const idx = try self.typeToIndex(gpa, ty, mod, kind);
+            return self.indexToCType(idx);
+        }
+
+        pub fn typeToIndex(self: *Store, gpa: Allocator, ty: Type, mod: *Module, kind: Kind) !Index {
+            var promoted = self.promote(gpa);
+            defer self.demote(promoted);
+            return promoted.typeToIndex(ty, mod, kind);
+        }
+
+        pub fn clearRetainingCapacity(self: *Store, gpa: Allocator) void {
+            var promoted = self.promote(gpa);
+            defer self.demote(promoted);
+            promoted.set.map.clearRetainingCapacity();
+            _ = promoted.arena.reset(.retain_capacity);
+        }
+
+        pub fn clearAndFree(self: *Store, gpa: Allocator) void {
+            var promoted = self.promote(gpa);
+            defer self.demote(promoted);
+            promoted.set.map.clearAndFree(gpa);
+            _ = promoted.arena.reset(.free_all);
+        }
+
+        pub fn shrinkRetainingCapacity(self: *Store, gpa: Allocator, new_len: usize) void {
+            self.set.map.shrinkRetainingCapacity(gpa, new_len);
+        }
+
+        pub fn shrinkAndFree(self: *Store, gpa: Allocator, new_len: usize) void {
+            self.set.map.shrinkAndFree(gpa, new_len);
+        }
+
+        pub fn count(self: Store) usize {
+            return self.set.map.count();
+        }
+
+        pub fn move(self: *Store) Store {
+            const moved = self.*;
+            self.* = .{};
+            return moved;
+        }
+
+        pub fn deinit(self: *Store, gpa: Allocator) void {
+            var promoted = self.promote(gpa);
+            promoted.set.map.deinit(gpa);
+            _ = promoted.arena.deinit();
+            self.* = undefined;
+        }
+    };
+
+    pub fn isPacked(self: CType) bool {
+        return switch (self.tag()) {
+            else => false,
+            .packed_unnamed_struct,
+            .packed_unnamed_union,
+            .packed_struct,
+            .packed_union,
+            => true,
+        };
+    }
+
+    pub fn fields(self: CType) Payload.Fields.Data {
+        return if (self.cast(Payload.Aggregate)) |pl|
+            pl.data.fields
+        else if (self.cast(Payload.Unnamed)) |pl|
+            pl.data.fields
+        else if (self.cast(Payload.Fields)) |pl|
+            pl.data
+        else
+            unreachable;
+    }
+
+    pub fn eql(lhs: CType, rhs: CType) bool {
+        return lhs.eqlContext(rhs, struct {
+            pub fn eqlIndex(_: @This(), lhs_idx: Index, rhs_idx: Index) bool {
+                return lhs_idx == rhs_idx;
+            }
+        }{});
+    }
+
+    pub fn eqlContext(lhs: CType, rhs: CType, ctx: anytype) bool {
+        // As a shortcut, if the small tags / addresses match, we're done.
+        if (lhs.tag_if_small_enough == rhs.tag_if_small_enough) return true;
+
+        const lhs_tag = lhs.tag();
+        const rhs_tag = rhs.tag();
+        if (lhs_tag != rhs_tag) return false;
+
+        return switch (lhs_tag) {
+            .void,
+            .char,
+            .@"signed char",
+            .short,
+            .int,
+            .long,
+            .@"long long",
+            ._Bool,
+            .@"unsigned char",
+            .@"unsigned short",
+            .@"unsigned int",
+            .@"unsigned long",
+            .@"unsigned long long",
+            .float,
+            .double,
+            .@"long double",
+            .bool,
+            .size_t,
+            .ptrdiff_t,
+            .uint8_t,
+            .int8_t,
+            .uint16_t,
+            .int16_t,
+            .uint32_t,
+            .int32_t,
+            .uint64_t,
+            .int64_t,
+            .uintptr_t,
+            .intptr_t,
+            .zig_u128,
+            .zig_i128,
+            .zig_f16,
+            .zig_f32,
+            .zig_f64,
+            .zig_f80,
+            .zig_f128,
+            .zig_c_longdouble,
+            => false,
+
+            .pointer,
+            .pointer_const,
+            .pointer_volatile,
+            .pointer_const_volatile,
+            => ctx.eqlIndex(lhs.cast(Payload.Child).?.data, rhs.cast(Payload.Child).?.data),
+
+            .array,
+            .vector,
+            => {
+                const lhs_data = lhs.cast(Payload.Sequence).?.data;
+                const rhs_data = rhs.cast(Payload.Sequence).?.data;
+                return lhs_data.len == rhs_data.len and
+                    ctx.eqlIndex(lhs_data.elem_type, rhs_data.elem_type);
+            },
+
+            .fwd_anon_struct,
+            .fwd_anon_union,
+            => {
+                const lhs_data = lhs.cast(Payload.Fields).?.data;
+                const rhs_data = rhs.cast(Payload.Fields).?.data;
+                if (lhs_data.len != rhs_data.len) return false;
+                for (lhs_data, rhs_data) |lhs_field, rhs_field| {
+                    if (!ctx.eqlIndex(lhs_field.type, rhs_field.type)) return false;
+                    if (lhs_field.alignas.@"align" != rhs_field.alignas.@"align") return false;
+                    if (cstr.cmp(lhs_field.name, rhs_field.name) != 0) return false;
+                }
+                return true;
+            },
+
+            .fwd_struct,
+            .fwd_union,
+            => lhs.cast(Payload.FwdDecl).?.data == rhs.cast(Payload.FwdDecl).?.data,
+
+            .unnamed_struct,
+            .unnamed_union,
+            .packed_unnamed_struct,
+            .packed_unnamed_union,
+            => {
+                const lhs_data = lhs.cast(Payload.Unnamed).?.data;
+                const rhs_data = rhs.cast(Payload.Unnamed).?.data;
+                return lhs_data.owner_decl == rhs_data.owner_decl and lhs_data.id == rhs_data.id;
+            },
+
+            .anon_struct,
+            .anon_union,
+            .@"struct",
+            .@"union",
+            .packed_struct,
+            .packed_union,
+            => ctx.eqlIndex(
+                lhs.cast(Payload.Aggregate).?.data.fwd_decl,
+                rhs.cast(Payload.Aggregate).?.data.fwd_decl,
+            ),
+
+            .function,
+            .varargs_function,
+            => {
+                const lhs_data = lhs.cast(Payload.Function).?.data;
+                const rhs_data = rhs.cast(Payload.Function).?.data;
+                if (lhs_data.param_types.len != rhs_data.param_types.len) return false;
+                if (!ctx.eqlIndex(lhs_data.return_type, rhs_data.return_type)) return false;
+                for (lhs_data.param_types, rhs_data.param_types) |lhs_param_idx, rhs_param_idx| {
+                    if (!ctx.eqlIndex(lhs_param_idx, rhs_param_idx)) return false;
+                }
+                return true;
+            },
+        };
+    }
+
+    pub fn hash(self: CType, store: Store.Set) u64 {
+        var hasher = std.hash.Wyhash.init(0);
+        self.updateHasher(&hasher, store);
+        return hasher.final();
+    }
+
+    pub fn updateHasher(self: CType, hasher: anytype, store: Store.Set) void {
+        const t = self.tag();
+        autoHash(hasher, t);
+        switch (t) {
+            .void,
+            .char,
+            .@"signed char",
+            .short,
+            .int,
+            .long,
+            .@"long long",
+            ._Bool,
+            .@"unsigned char",
+            .@"unsigned short",
+            .@"unsigned int",
+            .@"unsigned long",
+            .@"unsigned long long",
+            .float,
+            .double,
+            .@"long double",
+            .bool,
+            .size_t,
+            .ptrdiff_t,
+            .uint8_t,
+            .int8_t,
+            .uint16_t,
+            .int16_t,
+            .uint32_t,
+            .int32_t,
+            .uint64_t,
+            .int64_t,
+            .uintptr_t,
+            .intptr_t,
+            .zig_u128,
+            .zig_i128,
+            .zig_f16,
+            .zig_f32,
+            .zig_f64,
+            .zig_f80,
+            .zig_f128,
+            .zig_c_longdouble,
+            => {},
+
+            .pointer,
+            .pointer_const,
+            .pointer_volatile,
+            .pointer_const_volatile,
+            => store.indexToCType(self.cast(Payload.Child).?.data).updateHasher(hasher, store),
+
+            .array,
+            .vector,
+            => {
+                const data = self.cast(Payload.Sequence).?.data;
+                autoHash(hasher, data.len);
+                store.indexToCType(data.elem_type).updateHasher(hasher, store);
+            },
+
+            .fwd_anon_struct,
+            .fwd_anon_union,
+            => for (self.cast(Payload.Fields).?.data) |field| {
+                store.indexToCType(field.type).updateHasher(hasher, store);
+                hasher.update(mem.span(field.name));
+                autoHash(hasher, field.alignas.@"align");
+            },
+
+            .fwd_struct,
+            .fwd_union,
+            => autoHash(hasher, self.cast(Payload.FwdDecl).?.data),
+
+            .unnamed_struct,
+            .unnamed_union,
+            .packed_unnamed_struct,
+            .packed_unnamed_union,
+            => {
+                const data = self.cast(Payload.Unnamed).?.data;
+                autoHash(hasher, data.owner_decl);
+                autoHash(hasher, data.id);
+            },
+
+            .anon_struct,
+            .anon_union,
+            .@"struct",
+            .@"union",
+            .packed_struct,
+            .packed_union,
+            => store.indexToCType(self.cast(Payload.Aggregate).?.data.fwd_decl)
+                .updateHasher(hasher, store),
+
+            .function,
+            .varargs_function,
+            => {
+                const data = self.cast(Payload.Function).?.data;
+                store.indexToCType(data.return_type).updateHasher(hasher, store);
+                for (data.param_types) |param_ty| {
+                    store.indexToCType(param_ty).updateHasher(hasher, store);
+                }
+            },
+        }
+    }
+
+    pub const Kind = enum { forward, forward_parameter, complete, global, parameter, payload };
+
+    const Convert = struct {
+        storage: union {
+            none: void,
+            child: Payload.Child,
+            seq: Payload.Sequence,
+            fwd: Payload.FwdDecl,
+            anon: struct {
+                fields: [2]Payload.Fields.Field,
+                pl: union {
+                    forward: Payload.Fields,
+                    complete: Payload.Aggregate,
+                },
+            },
+        },
+        value: union(enum) {
+            tag: Tag,
+            cty: CType,
+        },
+
+        pub fn init(self: *@This(), t: Tag) void {
+            self.* = if (t.hasPayload()) .{
+                .storage = .{ .none = {} },
+                .value = .{ .tag = t },
+            } else .{
+                .storage = .{ .none = {} },
+                .value = .{ .cty = initTag(t) },
+            };
+        }
+
+        pub fn tag(self: @This()) Tag {
+            return switch (self.value) {
+                .tag => |t| t,
+                .cty => |c| c.tag(),
+            };
+        }
+
+        fn tagFromIntInfo(signedness: std.builtin.Signedness, bits: u16) Tag {
+            return switch (bits) {
+                0 => .void,
+                1...8 => switch (signedness) {
+                    .unsigned => .uint8_t,
+                    .signed => .int8_t,
+                },
+                9...16 => switch (signedness) {
+                    .unsigned => .uint16_t,
+                    .signed => .int16_t,
+                },
+                17...32 => switch (signedness) {
+                    .unsigned => .uint32_t,
+                    .signed => .int32_t,
+                },
+                33...64 => switch (signedness) {
+                    .unsigned => .uint64_t,
+                    .signed => .int64_t,
+                },
+                65...128 => switch (signedness) {
+                    .unsigned => .zig_u128,
+                    .signed => .zig_i128,
+                },
+                else => .array,
+            };
+        }
+
+        pub const Lookup = union(enum) {
+            fail: Target,
+            imm: struct {
+                set: *const Store.Set,
+                target: Target,
+            },
+            mut: struct {
+                promoted: *Store.Promoted,
+                mod: *Module,
+            },
+
+            pub fn isMutable(self: @This()) bool {
+                return switch (self) {
+                    .fail, .imm => false,
+                    .mut => true,
+                };
+            }
+
+            pub fn getTarget(self: @This()) Target {
+                return switch (self) {
+                    .fail => |target| target,
+                    .imm => |imm| imm.target,
+                    .mut => |mut| mut.mod.getTarget(),
+                };
+            }
+
+            pub fn getSet(self: @This()) ?*const Store.Set {
+                return switch (self) {
+                    .fail => null,
+                    .imm => |imm| imm.set,
+                    .mut => |mut| &mut.promoted.set,
+                };
+            }
+
+            pub fn typeToIndex(self: @This(), ty: Type, kind: Kind) !?Index {
+                return switch (self) {
+                    .fail => null,
+                    .imm => |imm| imm.set.typeToIndex(ty, imm.target, kind),
+                    .mut => |mut| try mut.promoted.typeToIndex(ty, mut.mod, kind),
+                };
+            }
+
+            pub fn indexToCType(self: @This(), index: Index) ?CType {
+                return if (self.getSet()) |set| set.indexToCType(index) else null;
+            }
+
+            pub fn freeze(self: @This()) @This() {
+                return switch (self) {
+                    .fail, .imm => self,
+                    .mut => |mut| .{ .imm = .{ .set = &mut.promoted.set, .target = self.getTarget() } },
+                };
+            }
+        };
+
+        fn sortFields(self: *@This(), fields_len: usize) []Payload.Fields.Field {
+            const Field = Payload.Fields.Field;
+            const slice = self.storage.anon.fields[0..fields_len];
+            std.sort.sort(Field, slice, {}, struct {
+                fn before(_: void, lhs: Field, rhs: Field) bool {
+                    return lhs.alignas.@"align" > rhs.alignas.@"align";
+                }
+            }.before);
+            return slice;
+        }
+
+        fn initAnon(self: *@This(), kind: Kind, fwd_idx: Index, fields_len: usize) void {
+            switch (kind) {
+                .forward, .forward_parameter => {
+                    self.storage.anon.pl = .{ .forward = .{
+                        .base = .{ .tag = .fwd_anon_struct },
+                        .data = self.sortFields(fields_len),
+                    } };
+                    self.value = .{ .cty = initPayload(&self.storage.anon.pl.forward) };
+                },
+                .complete, .parameter, .global => {
+                    self.storage.anon.pl = .{ .complete = .{
+                        .base = .{ .tag = .anon_struct },
+                        .data = .{
+                            .fields = self.sortFields(fields_len),
+                            .fwd_decl = fwd_idx,
+                        },
+                    } };
+                    self.value = .{ .cty = initPayload(&self.storage.anon.pl.complete) };
+                },
+                .payload => unreachable,
+            }
+        }
+
+        fn initArrayParameter(self: *@This(), ty: Type, kind: Kind, lookup: Lookup) !void {
+            if (switch (kind) {
+                .forward_parameter => @as(Index, undefined),
+                .parameter => try lookup.typeToIndex(ty, .forward_parameter),
+                .forward, .complete, .global, .payload => unreachable,
+            }) |fwd_idx| {
+                if (try lookup.typeToIndex(ty, switch (kind) {
+                    .forward_parameter => .forward,
+                    .parameter => .complete,
+                    .forward, .complete, .global, .payload => unreachable,
+                })) |array_idx| {
+                    self.storage = .{ .anon = undefined };
+                    self.storage.anon.fields[0] = .{
+                        .name = "array",
+                        .type = array_idx,
+                        .alignas = AlignAs.abiAlign(ty, lookup.getTarget()),
+                    };
+                    self.initAnon(kind, fwd_idx, 1);
+                } else self.init(switch (kind) {
+                    .forward_parameter => .fwd_anon_struct,
+                    .parameter => .anon_struct,
+                    .forward, .complete, .global, .payload => unreachable,
+                });
+            } else self.init(.anon_struct);
+        }
+
+        pub fn initType(self: *@This(), ty: Type, kind: Kind, lookup: Lookup) !void {
+            const target = lookup.getTarget();
+
+            self.* = undefined;
+            if (!ty.isFnOrHasRuntimeBitsIgnoreComptime())
+                self.init(.void)
+            else if (ty.isAbiInt()) switch (ty.tag()) {
+                .usize => self.init(.uintptr_t),
+                .isize => self.init(.intptr_t),
+                .c_short => self.init(.short),
+                .c_ushort => self.init(.@"unsigned short"),
+                .c_int => self.init(.int),
+                .c_uint => self.init(.@"unsigned int"),
+                .c_long => self.init(.long),
+                .c_ulong => self.init(.@"unsigned long"),
+                .c_longlong => self.init(.@"long long"),
+                .c_ulonglong => self.init(.@"unsigned long long"),
+                else => {
+                    const info = ty.intInfo(target);
+                    const t = tagFromIntInfo(info.signedness, info.bits);
+                    switch (t) {
+                        .void => unreachable,
+                        else => self.init(t),
+                        .array => switch (kind) {
+                            .forward, .complete, .global => {
+                                const abi_size = ty.abiSize(target);
+                                const abi_align = ty.abiAlignment(target);
+                                self.storage = .{ .seq = .{ .base = .{ .tag = .array }, .data = .{
+                                    .len = @divExact(abi_size, abi_align),
+                                    .elem_type = tagFromIntInfo(
+                                        .unsigned,
+                                        @intCast(u16, abi_align * 8),
+                                    ).toIndex(),
+                                } } };
+                                self.value = .{ .cty = initPayload(&self.storage.seq) };
+                            },
+                            .forward_parameter,
+                            .parameter,
+                            => try self.initArrayParameter(ty, kind, lookup),
+                            .payload => unreachable,
+                        },
+                    }
+                },
+            } else switch (ty.zigTypeTag()) {
+                .Frame => unreachable,
+                .AnyFrame => unreachable,
+
+                .Int,
+                .Enum,
+                .ErrorSet,
+                .Type,
+                .Void,
+                .NoReturn,
+                .ComptimeFloat,
+                .ComptimeInt,
+                .Undefined,
+                .Null,
+                .EnumLiteral,
+                => unreachable,
+
+                .Bool => self.init(.bool),
+
+                .Float => self.init(switch (ty.tag()) {
+                    .f16 => .zig_f16,
+                    .f32 => .zig_f32,
+                    .f64 => .zig_f64,
+                    .f80 => .zig_f80,
+                    .f128 => .zig_f128,
+                    .c_longdouble => .zig_c_longdouble,
+                    else => unreachable,
+                }),
+
+                .Pointer => {
+                    const info = ty.ptrInfo().data;
+                    switch (info.size) {
+                        .Slice => {
+                            if (switch (kind) {
+                                .forward, .forward_parameter => @as(Index, undefined),
+                                .complete, .parameter, .global => try lookup.typeToIndex(ty, .forward),
+                                .payload => unreachable,
+                            }) |fwd_idx| {
+                                var buf: Type.SlicePtrFieldTypeBuffer = undefined;
+                                const ptr_ty = ty.slicePtrFieldType(&buf);
+                                if (try lookup.typeToIndex(ptr_ty, kind)) |ptr_idx| {
+                                    self.storage = .{ .anon = undefined };
+                                    self.storage.anon.fields[0] = .{
+                                        .name = "ptr",
+                                        .type = ptr_idx,
+                                        .alignas = AlignAs.abiAlign(ptr_ty, target),
+                                    };
+                                    self.storage.anon.fields[1] = .{
+                                        .name = "len",
+                                        .type = Tag.uintptr_t.toIndex(),
+                                        .alignas = AlignAs.abiAlign(Type.usize, target),
+                                    };
+                                    self.initAnon(kind, fwd_idx, 2);
+                                } else self.init(switch (kind) {
+                                    .forward, .forward_parameter => .fwd_anon_struct,
+                                    .complete, .parameter, .global => .anon_struct,
+                                    .payload => unreachable,
+                                });
+                            } else self.init(.anon_struct);
+                        },
+
+                        .One, .Many, .C => {
+                            const t: Tag = switch (info.@"volatile") {
+                                false => switch (info.mutable) {
+                                    true => .pointer,
+                                    false => .pointer_const,
+                                },
+                                true => switch (info.mutable) {
+                                    true => .pointer_volatile,
+                                    false => .pointer_const_volatile,
+                                },
+                            };
+
+                            var host_int_pl = Type.Payload.Bits{
+                                .base = .{ .tag = .int_unsigned },
+                                .data = info.host_size * 8,
+                            };
+                            const pointee_ty = if (info.host_size > 0)
+                                Type.initPayload(&host_int_pl.base)
+                            else
+                                info.pointee_type;
+
+                            if (if (info.size == .C and pointee_ty.tag() == .u8)
+                                Tag.char.toIndex()
+                            else
+                                try lookup.typeToIndex(pointee_ty, .forward)) |child_idx|
+                            {
+                                self.storage = .{ .child = .{
+                                    .base = .{ .tag = t },
+                                    .data = child_idx,
+                                } };
+                                self.value = .{ .cty = initPayload(&self.storage.child) };
+                            } else self.init(t);
+                        },
+                    }
+                },
+
+                .Struct, .Union => |zig_ty_tag| if (ty.containerLayout() == .Packed) {
+                    if (ty.castTag(.@"struct")) |struct_obj| {
+                        try self.initType(struct_obj.data.backing_int_ty, kind, lookup);
+                    } else {
+                        var buf: Type.Payload.Bits = .{
+                            .base = .{ .tag = .int_unsigned },
+                            .data = @intCast(u16, ty.bitSize(target)),
+                        };
+                        try self.initType(Type.initPayload(&buf.base), kind, lookup);
+                    }
+                } else if (ty.isTupleOrAnonStruct()) {
+                    if (lookup.isMutable()) {
+                        for (0..switch (zig_ty_tag) {
+                            .Struct => ty.structFieldCount(),
+                            .Union => ty.unionFields().count(),
+                            else => unreachable,
+                        }) |field_i| {
+                            const field_ty = ty.structFieldType(field_i);
+                            if ((zig_ty_tag == .Struct and ty.structFieldIsComptime(field_i)) or
+                                !field_ty.hasRuntimeBitsIgnoreComptime()) continue;
+                            _ = try lookup.typeToIndex(field_ty, switch (kind) {
+                                .forward, .forward_parameter => .forward,
+                                .complete, .parameter => .complete,
+                                .global => .global,
+                                .payload => unreachable,
+                            });
+                        }
+                        switch (kind) {
+                            .forward, .forward_parameter => {},
+                            .complete, .parameter, .global => _ = try lookup.typeToIndex(ty, .forward),
+                            .payload => unreachable,
+                        }
+                    }
+                    self.init(switch (kind) {
+                        .forward, .forward_parameter => switch (zig_ty_tag) {
+                            .Struct => .fwd_anon_struct,
+                            .Union => .fwd_anon_union,
+                            else => unreachable,
+                        },
+                        .complete, .parameter, .global => switch (zig_ty_tag) {
+                            .Struct => .anon_struct,
+                            .Union => .anon_union,
+                            else => unreachable,
+                        },
+                        .payload => unreachable,
+                    });
+                } else {
+                    const tag_ty = ty.unionTagTypeSafety();
+                    const is_tagged_union_wrapper = kind != .payload and tag_ty != null;
+                    const is_struct = zig_ty_tag == .Struct or is_tagged_union_wrapper;
+                    switch (kind) {
+                        .forward, .forward_parameter => {
+                            self.storage = .{ .fwd = .{
+                                .base = .{ .tag = if (is_struct) .fwd_struct else .fwd_union },
+                                .data = ty.getOwnerDecl(),
+                            } };
+                            self.value = .{ .cty = initPayload(&self.storage.fwd) };
+                        },
+                        .complete, .parameter, .global, .payload => if (is_tagged_union_wrapper) {
+                            const fwd_idx = try lookup.typeToIndex(ty, .forward);
+                            const payload_idx = try lookup.typeToIndex(ty, .payload);
+                            const tag_idx = try lookup.typeToIndex(tag_ty.?, kind);
+                            if (fwd_idx != null and payload_idx != null and tag_idx != null) {
+                                self.storage = .{ .anon = undefined };
+                                var field_count: usize = 0;
+                                if (payload_idx != Tag.void.toIndex()) {
+                                    self.storage.anon.fields[field_count] = .{
+                                        .name = "payload",
+                                        .type = payload_idx.?,
+                                        .alignas = AlignAs.unionPayloadAlign(ty, target),
+                                    };
+                                    field_count += 1;
+                                }
+                                if (tag_idx != Tag.void.toIndex()) {
+                                    self.storage.anon.fields[field_count] = .{
+                                        .name = "tag",
+                                        .type = tag_idx.?,
+                                        .alignas = AlignAs.abiAlign(tag_ty.?, target),
+                                    };
+                                    field_count += 1;
+                                }
+                                self.storage.anon.pl = .{ .complete = .{
+                                    .base = .{ .tag = .@"struct" },
+                                    .data = .{
+                                        .fields = self.sortFields(field_count),
+                                        .fwd_decl = fwd_idx.?,
+                                    },
+                                } };
+                                self.value = .{ .cty = initPayload(&self.storage.anon.pl.complete) };
+                            } else self.init(.@"struct");
+                        } else if (kind == .payload and ty.unionHasAllZeroBitFieldTypes()) {
+                            self.init(.void);
+                        } else {
+                            var is_packed = false;
+                            for (0..switch (zig_ty_tag) {
+                                .Struct => ty.structFieldCount(),
+                                .Union => ty.unionFields().count(),
+                                else => unreachable,
+                            }) |field_i| {
+                                const field_ty = ty.structFieldType(field_i);
+                                if (!field_ty.hasRuntimeBitsIgnoreComptime()) continue;
+
+                                const field_align = AlignAs.fieldAlign(ty, field_i, target);
+                                if (field_align.@"align" < field_align.abi) {
+                                    is_packed = true;
+                                    if (!lookup.isMutable()) break;
+                                }
+
+                                if (lookup.isMutable()) {
+                                    _ = try lookup.typeToIndex(field_ty, switch (kind) {
+                                        .forward, .forward_parameter => unreachable,
+                                        .complete, .parameter, .payload => .complete,
+                                        .global => .global,
+                                    });
+                                }
+                            }
+                            switch (kind) {
+                                .forward, .forward_parameter => unreachable,
+                                .complete, .parameter, .global => {
+                                    _ = try lookup.typeToIndex(ty, .forward);
+                                    self.init(if (is_struct)
+                                        if (is_packed) .packed_struct else .@"struct"
+                                    else if (is_packed) .packed_union else .@"union");
+                                },
+                                .payload => self.init(if (is_packed)
+                                    .packed_unnamed_union
+                                else
+                                    .unnamed_union),
+                            }
+                        },
+                    }
+                },
+
+                .Array, .Vector => |zig_ty_tag| {
+                    switch (kind) {
+                        .forward, .complete, .global => {
+                            const t: Tag = switch (zig_ty_tag) {
+                                .Array => .array,
+                                .Vector => .vector,
+                                else => unreachable,
+                            };
+                            if (try lookup.typeToIndex(ty.childType(), kind)) |child_idx| {
+                                self.storage = .{ .seq = .{ .base = .{ .tag = t }, .data = .{
+                                    .len = ty.arrayLenIncludingSentinel(),
+                                    .elem_type = child_idx,
+                                } } };
+                                self.value = .{ .cty = initPayload(&self.storage.seq) };
+                            } else self.init(t);
+                        },
+                        .forward_parameter, .parameter => try self.initArrayParameter(ty, kind, lookup),
+                        .payload => unreachable,
+                    }
+                },
+
+                .Optional => {
+                    var buf: Type.Payload.ElemType = undefined;
+                    const payload_ty = ty.optionalChild(&buf);
+                    if (payload_ty.hasRuntimeBitsIgnoreComptime()) {
+                        if (ty.optionalReprIsPayload()) {
+                            try self.initType(payload_ty, kind, lookup);
+                        } else if (switch (kind) {
+                            .forward, .forward_parameter => @as(Index, undefined),
+                            .complete, .parameter, .global => try lookup.typeToIndex(ty, .forward),
+                            .payload => unreachable,
+                        }) |fwd_idx| {
+                            if (try lookup.typeToIndex(payload_ty, switch (kind) {
+                                .forward, .forward_parameter => .forward,
+                                .complete, .parameter => .complete,
+                                .global => .global,
+                                .payload => unreachable,
+                            })) |payload_idx| {
+                                self.storage = .{ .anon = undefined };
+                                self.storage.anon.fields[0] = .{
+                                    .name = "payload",
+                                    .type = payload_idx,
+                                    .alignas = AlignAs.abiAlign(payload_ty, target),
+                                };
+                                self.storage.anon.fields[1] = .{
+                                    .name = "is_null",
+                                    .type = Tag.bool.toIndex(),
+                                    .alignas = AlignAs.abiAlign(Type.bool, target),
+                                };
+                                self.initAnon(kind, fwd_idx, 2);
+                            } else self.init(switch (kind) {
+                                .forward, .forward_parameter => .fwd_anon_struct,
+                                .complete, .parameter, .global => .anon_struct,
+                                .payload => unreachable,
+                            });
+                        } else self.init(.anon_struct);
+                    } else self.init(.bool);
+                },
+
+                .ErrorUnion => {
+                    if (switch (kind) {
+                        .forward, .forward_parameter => @as(Index, undefined),
+                        .complete, .parameter, .global => try lookup.typeToIndex(ty, .forward),
+                        .payload => unreachable,
+                    }) |fwd_idx| {
+                        const payload_ty = ty.errorUnionPayload();
+                        if (try lookup.typeToIndex(payload_ty, switch (kind) {
+                            .forward, .forward_parameter => .forward,
+                            .complete, .parameter => .complete,
+                            .global => .global,
+                            .payload => unreachable,
+                        })) |payload_idx| {
+                            const error_ty = ty.errorUnionSet();
+                            if (payload_idx == Tag.void.toIndex()) {
+                                try self.initType(error_ty, kind, lookup);
+                            } else if (try lookup.typeToIndex(error_ty, kind)) |error_idx| {
+                                self.storage = .{ .anon = undefined };
+                                self.storage.anon.fields[0] = .{
+                                    .name = "payload",
+                                    .type = payload_idx,
+                                    .alignas = AlignAs.abiAlign(payload_ty, target),
+                                };
+                                self.storage.anon.fields[1] = .{
+                                    .name = "error",
+                                    .type = error_idx,
+                                    .alignas = AlignAs.abiAlign(error_ty, target),
+                                };
+                                self.initAnon(kind, fwd_idx, 2);
+                            } else self.init(switch (kind) {
+                                .forward, .forward_parameter => .fwd_anon_struct,
+                                .complete, .parameter, .global => .anon_struct,
+                                .payload => unreachable,
+                            });
+                        } else self.init(switch (kind) {
+                            .forward, .forward_parameter => .fwd_anon_struct,
+                            .complete, .parameter, .global => .anon_struct,
+                            .payload => unreachable,
+                        });
+                    } else self.init(.anon_struct);
+                },
+
+                .Opaque => switch (ty.tag()) {
+                    .anyopaque => self.init(.void),
+                    .@"opaque" => {
+                        self.storage = .{ .fwd = .{
+                            .base = .{ .tag = .fwd_struct },
+                            .data = ty.getOwnerDecl(),
+                        } };
+                        self.value = .{ .cty = initPayload(&self.storage.fwd) };
+                    },
+                    else => unreachable,
+                },
+
+                .Fn => {
+                    const info = ty.fnInfo();
+                    if (!info.is_generic) {
+                        if (lookup.isMutable()) {
+                            const param_kind: Kind = switch (kind) {
+                                .forward, .forward_parameter => .forward_parameter,
+                                .complete, .parameter, .global => .parameter,
+                                .payload => unreachable,
+                            };
+                            _ = try lookup.typeToIndex(info.return_type, param_kind);
+                            for (info.param_types) |param_type| {
+                                if (!param_type.hasRuntimeBitsIgnoreComptime()) continue;
+                                _ = try lookup.typeToIndex(param_type, param_kind);
+                            }
+                        }
+                        self.init(if (info.is_var_args) .varargs_function else .function);
+                    } else self.init(.void);
+                },
+            }
+        }
+    };
+
+    pub fn copy(self: CType, arena: Allocator) !CType {
+        return self.copyContext(struct {
+            arena: Allocator,
+            pub fn copyIndex(_: @This(), idx: Index) Index {
+                return idx;
+            }
+        }{ .arena = arena });
+    }
+
+    fn copyFields(ctx: anytype, old_fields: Payload.Fields.Data) !Payload.Fields.Data {
+        const new_fields = try ctx.arena.alloc(Payload.Fields.Field, old_fields.len);
+        for (new_fields, old_fields) |*new_field, old_field| {
+            new_field.name = try ctx.arena.dupeZ(u8, mem.span(old_field.name));
+            new_field.type = ctx.copyIndex(old_field.type);
+            new_field.alignas = old_field.alignas;
+        }
+        return new_fields;
+    }
+
+    fn copyParams(ctx: anytype, old_param_types: []const Index) ![]const Index {
+        const new_param_types = try ctx.arena.alloc(Index, old_param_types.len);
+        for (new_param_types, old_param_types) |*new_param_type, old_param_type|
+            new_param_type.* = ctx.copyIndex(old_param_type);
+        return new_param_types;
+    }
+
+    pub fn copyContext(self: CType, ctx: anytype) !CType {
+        switch (self.tag()) {
+            .void,
+            .char,
+            .@"signed char",
+            .short,
+            .int,
+            .long,
+            .@"long long",
+            ._Bool,
+            .@"unsigned char",
+            .@"unsigned short",
+            .@"unsigned int",
+            .@"unsigned long",
+            .@"unsigned long long",
+            .float,
+            .double,
+            .@"long double",
+            .bool,
+            .size_t,
+            .ptrdiff_t,
+            .uint8_t,
+            .int8_t,
+            .uint16_t,
+            .int16_t,
+            .uint32_t,
+            .int32_t,
+            .uint64_t,
+            .int64_t,
+            .uintptr_t,
+            .intptr_t,
+            .zig_u128,
+            .zig_i128,
+            .zig_f16,
+            .zig_f32,
+            .zig_f64,
+            .zig_f80,
+            .zig_f128,
+            .zig_c_longdouble,
+            => return self,
+
+            .pointer,
+            .pointer_const,
+            .pointer_volatile,
+            .pointer_const_volatile,
+            => {
+                const pl = self.cast(Payload.Child).?;
+                const new_pl = try ctx.arena.create(Payload.Child);
+                new_pl.* = .{ .base = .{ .tag = pl.base.tag }, .data = ctx.copyIndex(pl.data) };
+                return initPayload(new_pl);
+            },
+
+            .array,
+            .vector,
+            => {
+                const pl = self.cast(Payload.Sequence).?;
+                const new_pl = try ctx.arena.create(Payload.Sequence);
+                new_pl.* = .{
+                    .base = .{ .tag = pl.base.tag },
+                    .data = .{ .len = pl.data.len, .elem_type = ctx.copyIndex(pl.data.elem_type) },
+                };
+                return initPayload(new_pl);
+            },
+
+            .fwd_anon_struct,
+            .fwd_anon_union,
+            => {
+                const pl = self.cast(Payload.Fields).?;
+                const new_pl = try ctx.arena.create(Payload.Fields);
+                new_pl.* = .{
+                    .base = .{ .tag = pl.base.tag },
+                    .data = try copyFields(ctx, pl.data),
+                };
+                return initPayload(new_pl);
+            },
+
+            .fwd_struct,
+            .fwd_union,
+            => {
+                const pl = self.cast(Payload.FwdDecl).?;
+                const new_pl = try ctx.arena.create(Payload.FwdDecl);
+                new_pl.* = .{ .base = .{ .tag = pl.base.tag }, .data = pl.data };
+                return initPayload(new_pl);
+            },
+
+            .unnamed_struct,
+            .unnamed_union,
+            .packed_unnamed_struct,
+            .packed_unnamed_union,
+            => {
+                const pl = self.cast(Payload.Unnamed).?;
+                const new_pl = try ctx.arena.create(Payload.Unnamed);
+                new_pl.* = .{ .base = .{ .tag = pl.base.tag }, .data = .{
+                    .fields = try copyFields(ctx, pl.data.fields),
+                    .owner_decl = pl.data.owner_decl,
+                    .id = pl.data.id,
+                } };
+                return initPayload(new_pl);
+            },
+
+            .anon_struct,
+            .anon_union,
+            .@"struct",
+            .@"union",
+            .packed_struct,
+            .packed_union,
+            => {
+                const pl = self.cast(Payload.Aggregate).?;
+                const new_pl = try ctx.arena.create(Payload.Aggregate);
+                new_pl.* = .{ .base = .{ .tag = pl.base.tag }, .data = .{
+                    .fields = try copyFields(ctx, pl.data.fields),
+                    .fwd_decl = ctx.copyIndex(pl.data.fwd_decl),
+                } };
+                return initPayload(new_pl);
+            },
+
+            .function,
+            .varargs_function,
+            => {
+                const pl = self.cast(Payload.Function).?;
+                const new_pl = try ctx.arena.create(Payload.Function);
+                new_pl.* = .{ .base = .{ .tag = pl.base.tag }, .data = .{
+                    .return_type = ctx.copyIndex(pl.data.return_type),
+                    .param_types = try copyParams(ctx, pl.data.param_types),
+                } };
+                return initPayload(new_pl);
+            },
+        }
+    }
+
+    fn createFromType(store: *Store.Promoted, ty: Type, target: Target, kind: Kind) !CType {
+        var convert: Convert = undefined;
+        try convert.initType(ty, kind, .{ .imm = .{ .set = &store.set, .target = target } });
+        return createFromConvert(store, ty, target, kind, &convert);
+    }
+
+    fn createFromConvert(
+        store: *Store.Promoted,
+        ty: Type,
+        target: Target,
+        kind: Kind,
+        convert: Convert,
+    ) !CType {
+        const arena = store.arena.allocator();
+        switch (convert.value) {
+            .cty => |c| return c.copy(arena),
+            .tag => |t| switch (t) {
+                .fwd_anon_struct,
+                .fwd_anon_union,
+                .unnamed_struct,
+                .unnamed_union,
+                .packed_unnamed_struct,
+                .packed_unnamed_union,
+                .anon_struct,
+                .anon_union,
+                .@"struct",
+                .@"union",
+                .packed_struct,
+                .packed_union,
+                => {
+                    const zig_ty_tag = ty.zigTypeTag();
+                    const fields_len = switch (zig_ty_tag) {
+                        .Struct => ty.structFieldCount(),
+                        .Union => ty.unionFields().count(),
+                        else => unreachable,
+                    };
+
+                    var c_fields_len: usize = 0;
+                    for (0..fields_len) |field_i| {
+                        const field_ty = ty.structFieldType(field_i);
+                        if ((zig_ty_tag == .Struct and ty.structFieldIsComptime(field_i)) or
+                            !field_ty.hasRuntimeBitsIgnoreComptime()) continue;
+                        c_fields_len += 1;
+                    }
+
+                    const fields_pl = try arena.alloc(Payload.Fields.Field, c_fields_len);
+                    var c_field_i: usize = 0;
+                    for (0..fields_len) |field_i| {
+                        const field_ty = ty.structFieldType(field_i);
+                        if ((zig_ty_tag == .Struct and ty.structFieldIsComptime(field_i)) or
+                            !field_ty.hasRuntimeBitsIgnoreComptime()) continue;
+
+                        defer c_field_i += 1;
+                        fields_pl[c_field_i] = .{
+                            .name = try if (ty.isSimpleTuple())
+                                std.fmt.allocPrintZ(arena, "f{}", .{field_i})
+                            else
+                                arena.dupeZ(u8, switch (zig_ty_tag) {
+                                    .Struct => ty.structFieldName(field_i),
+                                    .Union => ty.unionFields().keys()[field_i],
+                                    else => unreachable,
+                                }),
+                            .type = store.set.typeToIndex(field_ty, target, switch (kind) {
+                                .forward, .forward_parameter => .forward,
+                                .complete, .parameter, .payload => .complete,
+                                .global => .global,
+                            }).?,
+                            .alignas = AlignAs.fieldAlign(ty, field_i, target),
+                        };
+                    }
+
+                    switch (t) {
+                        .fwd_anon_struct,
+                        .fwd_anon_union,
+                        => {
+                            const anon_pl = try arena.create(Payload.Fields);
+                            anon_pl.* = .{ .base = .{ .tag = t }, .data = fields_pl };
+                            return initPayload(anon_pl);
+                        },
+
+                        .unnamed_struct,
+                        .unnamed_union,
+                        .packed_unnamed_struct,
+                        .packed_unnamed_union,
+                        => {
+                            const unnamed_pl = try arena.create(Payload.Unnamed);
+                            unnamed_pl.* = .{ .base = .{ .tag = t }, .data = .{
+                                .fields = fields_pl,
+                                .owner_decl = ty.getOwnerDecl(),
+                                .id = if (ty.unionTagTypeSafety()) |_| 0 else unreachable,
+                            } };
+                            return initPayload(unnamed_pl);
+                        },
+
+                        .anon_struct,
+                        .anon_union,
+                        .@"struct",
+                        .@"union",
+                        .packed_struct,
+                        .packed_union,
+                        => {
+                            const struct_pl = try arena.create(Payload.Aggregate);
+                            struct_pl.* = .{ .base = .{ .tag = t }, .data = .{
+                                .fields = fields_pl,
+                                .fwd_decl = store.set.typeToIndex(ty, target, .forward).?,
+                            } };
+                            return initPayload(struct_pl);
+                        },
+
+                        else => unreachable,
+                    }
+                },
+
+                .function,
+                .varargs_function,
+                => {
+                    const info = ty.fnInfo();
+                    assert(!info.is_generic);
+                    const param_kind: Kind = switch (kind) {
+                        .forward, .forward_parameter => .forward_parameter,
+                        .complete, .parameter, .global => .parameter,
+                        .payload => unreachable,
+                    };
+
+                    var c_params_len: usize = 0;
+                    for (info.param_types) |param_type| {
+                        if (!param_type.hasRuntimeBitsIgnoreComptime()) continue;
+                        c_params_len += 1;
+                    }
+
+                    const params_pl = try arena.alloc(Index, c_params_len);
+                    var c_param_i: usize = 0;
+                    for (info.param_types) |param_type| {
+                        if (!param_type.hasRuntimeBitsIgnoreComptime()) continue;
+                        params_pl[c_param_i] = store.set.typeToIndex(param_type, target, param_kind).?;
+                        c_param_i += 1;
+                    }
+
+                    const fn_pl = try arena.create(Payload.Function);
+                    fn_pl.* = .{ .base = .{ .tag = t }, .data = .{
+                        .return_type = store.set.typeToIndex(info.return_type, target, param_kind).?,
+                        .param_types = params_pl,
+                    } };
+                    return initPayload(fn_pl);
+                },
+
+                else => unreachable,
+            },
+        }
+    }
+
+    pub const TypeAdapter64 = struct {
+        kind: Kind,
+        lookup: Convert.Lookup,
+        convert: *const Convert,
+
+        fn eqlRecurse(self: @This(), ty: Type, cty: Index, kind: Kind) bool {
+            assert(!self.lookup.isMutable());
+
+            var convert: Convert = undefined;
+            convert.initType(ty, kind, self.lookup) catch unreachable;
+
+            const self_recurse = @This(){ .kind = kind, .lookup = self.lookup, .convert = &convert };
+            return self_recurse.eql(ty, self.lookup.indexToCType(cty).?);
+        }
+
+        pub fn eql(self: @This(), ty: Type, cty: CType) bool {
+            switch (self.convert.value) {
+                .cty => |c| return c.eql(cty),
+                .tag => |t| {
+                    if (t != cty.tag()) return false;
+
+                    const target = self.lookup.getTarget();
+                    switch (t) {
+                        .fwd_anon_struct,
+                        .fwd_anon_union,
+                        => {
+                            if (!ty.isTupleOrAnonStruct()) return false;
+
+                            var name_buf: [
+                                std.fmt.count("f{}", .{std.math.maxInt(usize)})
+                            ]u8 = undefined;
+                            const c_fields = cty.cast(Payload.Fields).?.data;
+
+                            const zig_ty_tag = ty.zigTypeTag();
+                            var c_field_i: usize = 0;
+                            for (0..switch (zig_ty_tag) {
+                                .Struct => ty.structFieldCount(),
+                                .Union => ty.unionFields().count(),
+                                else => unreachable,
+                            }) |field_i| {
+                                const field_ty = ty.structFieldType(field_i);
+                                if ((zig_ty_tag == .Struct and ty.structFieldIsComptime(field_i)) or
+                                    !field_ty.hasRuntimeBitsIgnoreComptime()) continue;
+
+                                defer c_field_i += 1;
+                                const c_field = &c_fields[c_field_i];
+
+                                if (!self.eqlRecurse(field_ty, c_field.type, switch (self.kind) {
+                                    .forward, .forward_parameter => .forward,
+                                    .complete, .parameter => .complete,
+                                    .global => .global,
+                                    .payload => unreachable,
+                                }) or !mem.eql(
+                                    u8,
+                                    if (ty.isSimpleTuple())
+                                        std.fmt.bufPrint(&name_buf, "f{}", .{field_i}) catch unreachable
+                                    else switch (zig_ty_tag) {
+                                        .Struct => ty.structFieldName(field_i),
+                                        .Union => ty.unionFields().keys()[field_i],
+                                        else => unreachable,
+                                    },
+                                    mem.span(c_field.name),
+                                ) or AlignAs.fieldAlign(ty, field_i, target).@"align" !=
+                                    c_field.alignas.@"align") return false;
+                            }
+                            return true;
+                        },
+
+                        .unnamed_struct,
+                        .unnamed_union,
+                        .packed_unnamed_struct,
+                        .packed_unnamed_union,
+                        => switch (self.kind) {
+                            .forward, .forward_parameter, .complete, .parameter, .global => unreachable,
+                            .payload => if (ty.unionTagTypeSafety()) |_| {
+                                const data = cty.cast(Payload.Unnamed).?.data;
+                                return ty.getOwnerDecl() == data.owner_decl and data.id == 0;
+                            } else unreachable,
+                        },
+
+                        .anon_struct,
+                        .anon_union,
+                        .@"struct",
+                        .@"union",
+                        .packed_struct,
+                        .packed_union,
+                        => return self.eqlRecurse(
+                            ty,
+                            cty.cast(Payload.Aggregate).?.data.fwd_decl,
+                            .forward,
+                        ),
+
+                        .function,
+                        .varargs_function,
+                        => {
+                            if (ty.zigTypeTag() != .Fn) return false;
+
+                            const info = ty.fnInfo();
+                            assert(!info.is_generic);
+                            const data = cty.cast(Payload.Function).?.data;
+                            const param_kind: Kind = switch (self.kind) {
+                                .forward, .forward_parameter => .forward_parameter,
+                                .complete, .parameter, .global => .parameter,
+                                .payload => unreachable,
+                            };
+
+                            if (!self.eqlRecurse(info.return_type, data.return_type, param_kind))
+                                return false;
+
+                            var c_param_i: usize = 0;
+                            for (info.param_types) |param_type| {
+                                if (!param_type.hasRuntimeBitsIgnoreComptime()) continue;
+
+                                if (c_param_i >= data.param_types.len) return false;
+                                const param_cty = data.param_types[c_param_i];
+                                c_param_i += 1;
+
+                                if (!self.eqlRecurse(param_type, param_cty, param_kind))
+                                    return false;
+                            }
+                            return c_param_i == data.param_types.len;
+                        },
+
+                        else => unreachable,
+                    }
+                },
+            }
+        }
+
+        pub fn hash(self: @This(), ty: Type) u64 {
+            var hasher = std.hash.Wyhash.init(0);
+            self.updateHasher(&hasher, ty);
+            return hasher.final();
+        }
+
+        fn updateHasherRecurse(self: @This(), hasher: anytype, ty: Type, kind: Kind) void {
+            assert(!self.lookup.isMutable());
+
+            var convert: Convert = undefined;
+            convert.initType(ty, kind, self.lookup) catch unreachable;
+
+            const self_recurse = @This(){ .kind = kind, .lookup = self.lookup, .convert = &convert };
+            self_recurse.updateHasher(hasher, ty);
+        }
+
+        pub fn updateHasher(self: @This(), hasher: anytype, ty: Type) void {
+            switch (self.convert.value) {
+                .cty => |c| return c.updateHasher(hasher, self.lookup.getSet().?.*),
+                .tag => |t| {
+                    autoHash(hasher, t);
+
+                    const target = self.lookup.getTarget();
+                    switch (t) {
+                        .fwd_anon_struct,
+                        .fwd_anon_union,
+                        => {
+                            var name_buf: [
+                                std.fmt.count("f{}", .{std.math.maxInt(usize)})
+                            ]u8 = undefined;
+
+                            const zig_ty_tag = ty.zigTypeTag();
+                            for (0..switch (ty.zigTypeTag()) {
+                                .Struct => ty.structFieldCount(),
+                                .Union => ty.unionFields().count(),
+                                else => unreachable,
+                            }) |field_i| {
+                                const field_ty = ty.structFieldType(field_i);
+                                if ((zig_ty_tag == .Struct and ty.structFieldIsComptime(field_i)) or
+                                    !field_ty.hasRuntimeBitsIgnoreComptime()) continue;
+
+                                self.updateHasherRecurse(hasher, field_ty, switch (self.kind) {
+                                    .forward, .forward_parameter => .forward,
+                                    .complete, .parameter => .complete,
+                                    .global => .global,
+                                    .payload => unreachable,
+                                });
+                                hasher.update(if (ty.isSimpleTuple())
+                                    std.fmt.bufPrint(&name_buf, "f{}", .{field_i}) catch unreachable
+                                else switch (zig_ty_tag) {
+                                    .Struct => ty.structFieldName(field_i),
+                                    .Union => ty.unionFields().keys()[field_i],
+                                    else => unreachable,
+                                });
+                                autoHash(hasher, AlignAs.fieldAlign(ty, field_i, target).@"align");
+                            }
+                        },
+
+                        .unnamed_struct,
+                        .unnamed_union,
+                        .packed_unnamed_struct,
+                        .packed_unnamed_union,
+                        => switch (self.kind) {
+                            .forward, .forward_parameter, .complete, .parameter, .global => unreachable,
+                            .payload => if (ty.unionTagTypeSafety()) |_| {
+                                autoHash(hasher, ty.getOwnerDecl());
+                                autoHash(hasher, @as(u32, 0));
+                            } else unreachable,
+                        },
+
+                        .anon_struct,
+                        .anon_union,
+                        .@"struct",
+                        .@"union",
+                        .packed_struct,
+                        .packed_union,
+                        => self.updateHasherRecurse(hasher, ty, .forward),
+
+                        .function,
+                        .varargs_function,
+                        => {
+                            const info = ty.fnInfo();
+                            assert(!info.is_generic);
+                            const param_kind: Kind = switch (self.kind) {
+                                .forward, .forward_parameter => .forward_parameter,
+                                .complete, .parameter, .global => .parameter,
+                                .payload => unreachable,
+                            };
+
+                            self.updateHasherRecurse(hasher, info.return_type, param_kind);
+                            for (info.param_types) |param_type| {
+                                if (!param_type.hasRuntimeBitsIgnoreComptime()) continue;
+                                self.updateHasherRecurse(hasher, param_type, param_kind);
+                            }
+                        },
+
+                        else => unreachable,
+                    }
+                },
+            }
+        }
+    };
+
+    pub const TypeAdapter32 = struct {
+        kind: Kind,
+        lookup: Convert.Lookup,
+        convert: *const Convert,
+
+        fn to64(self: @This()) TypeAdapter64 {
+            return .{ .kind = self.kind, .lookup = self.lookup, .convert = self.convert };
+        }
+
+        pub fn eql(self: @This(), ty: Type, cty: CType, cty_index: usize) bool {
+            _ = cty_index;
+            return self.to64().eql(ty, cty);
+        }
+
+        pub fn hash(self: @This(), ty: Type) u32 {
+            return @truncate(u32, self.to64().hash(ty));
+        }
+    };
+};
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 21c57b5e13..f0d5097f90 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -6025,8 +6025,8 @@ pub const FuncGen = struct {
         const field_ptr = try self.resolveInst(extra.field_ptr);
 
         const target = self.dg.module.getTarget();
-        const struct_ty = self.air.getRefType(ty_pl.ty).childType();
-        const field_offset = struct_ty.structFieldOffset(extra.field_index, target);
+        const parent_ty = self.air.getRefType(ty_pl.ty).childType();
+        const field_offset = parent_ty.structFieldOffset(extra.field_index, target);
 
         const res_ty = try self.dg.lowerType(self.air.getRefType(ty_pl.ty));
         if (field_offset == 0) {
diff --git a/src/link/C.zig b/src/link/C.zig
index 02e5cadfbc..5663ba71e2 100644
--- a/src/link/C.zig
+++ b/src/link/C.zig
@@ -22,27 +22,22 @@ base: link.File,
 /// Instead, it tracks all declarations in this table, and iterates over it
 /// in the flush function, stitching pre-rendered pieces of C code together.
 decl_table: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, DeclBlock) = .{},
-/// Stores Type/Value data for `typedefs` to reference.
-/// Accumulates allocations and then there is a periodic garbage collection after flush().
-arena: std.heap.ArenaAllocator,
 
 /// Per-declaration data.
 const DeclBlock = struct {
     code: std.ArrayListUnmanaged(u8) = .{},
     fwd_decl: std.ArrayListUnmanaged(u8) = .{},
-    /// Each Decl stores a mapping of Zig Types to corresponding C types, for every
-    /// Zig Type used by the Decl. In flush(), we iterate over each Decl
-    /// and emit the typedef code for all types, making sure to not emit the same thing twice.
-    /// Any arena memory the Type points to lives in the `arena` field of `C`.
-    typedefs: codegen.TypedefMap.Unmanaged = .{},
+    /// Each `Decl` stores a set of used `CType`s.  In `flush()`, we iterate
+    /// over each `Decl` and generate the definition for each used `CType` once.
+    ctypes: codegen.CType.Store = .{},
+    /// Key and Value storage use the ctype arena.
+    lazy_fns: codegen.LazyFnMap = .{},
 
     fn deinit(db: *DeclBlock, gpa: Allocator) void {
-        db.code.deinit(gpa);
+        db.lazy_fns.deinit(gpa);
+        db.ctypes.deinit(gpa);
         db.fwd_decl.deinit(gpa);
-        for (db.typedefs.values()) |typedef| {
-            gpa.free(typedef.rendered);
-        }
-        db.typedefs.deinit(gpa);
+        db.code.deinit(gpa);
         db.* = undefined;
     }
 };
@@ -64,7 +59,6 @@ pub fn openPath(gpa: Allocator, sub_path: []const u8, options: link.Options) !*C
     errdefer gpa.destroy(c_file);
 
     c_file.* = C{
-        .arena = std.heap.ArenaAllocator.init(gpa),
         .base = .{
             .tag = .c,
             .options = options,
@@ -83,8 +77,6 @@ pub fn deinit(self: *C) void {
         db.deinit(gpa);
     }
     self.decl_table.deinit(gpa);
-
-    self.arena.deinit();
 }
 
 pub fn freeDecl(self: *C, decl_index: Module.Decl.Index) void {
@@ -99,124 +91,122 @@ pub fn updateFunc(self: *C, module: *Module, func: *Module.Fn, air: Air, livenes
     const tracy = trace(@src());
     defer tracy.end();
 
+    const gpa = self.base.allocator;
+
     const decl_index = func.owner_decl;
-    const gop = try self.decl_table.getOrPut(self.base.allocator, decl_index);
+    const gop = try self.decl_table.getOrPut(gpa, decl_index);
     if (!gop.found_existing) {
         gop.value_ptr.* = .{};
     }
+    const ctypes = &gop.value_ptr.ctypes;
+    const lazy_fns = &gop.value_ptr.lazy_fns;
     const fwd_decl = &gop.value_ptr.fwd_decl;
-    const typedefs = &gop.value_ptr.typedefs;
     const code = &gop.value_ptr.code;
+    ctypes.clearRetainingCapacity(gpa);
+    lazy_fns.clearRetainingCapacity();
     fwd_decl.shrinkRetainingCapacity(0);
-    for (typedefs.values()) |typedef| {
-        module.gpa.free(typedef.rendered);
-    }
-    typedefs.clearRetainingCapacity();
     code.shrinkRetainingCapacity(0);
 
     var function: codegen.Function = .{
-        .value_map = codegen.CValueMap.init(module.gpa),
+        .value_map = codegen.CValueMap.init(gpa),
         .air = air,
         .liveness = liveness,
         .func = func,
         .object = .{
             .dg = .{
-                .gpa = module.gpa,
+                .gpa = gpa,
                 .module = module,
                 .error_msg = null,
-                .decl_index = decl_index,
+                .decl_index = decl_index.toOptional(),
                 .decl = module.declPtr(decl_index),
-                .fwd_decl = fwd_decl.toManaged(module.gpa),
-                .typedefs = typedefs.promoteContext(module.gpa, .{ .mod = module }),
-                .typedefs_arena = self.arena.allocator(),
+                .fwd_decl = fwd_decl.toManaged(gpa),
+                .ctypes = ctypes.*,
             },
-            .code = code.toManaged(module.gpa),
+            .code = code.toManaged(gpa),
             .indent_writer = undefined, // set later so we can get a pointer to object.code
         },
-        .arena = std.heap.ArenaAllocator.init(module.gpa),
+        .lazy_fns = lazy_fns.*,
+        .arena = std.heap.ArenaAllocator.init(gpa),
     };
 
     function.object.indent_writer = .{ .underlying_writer = function.object.code.writer() };
-    defer function.deinit(module.gpa);
+    defer function.deinit();
 
     codegen.genFunc(&function) catch |err| switch (err) {
         error.AnalysisFail => {
-            try module.failed_decls.put(module.gpa, decl_index, function.object.dg.error_msg.?);
+            try module.failed_decls.put(gpa, decl_index, function.object.dg.error_msg.?);
             return;
         },
         else => |e| return e,
     };
 
+    ctypes.* = function.object.dg.ctypes.move();
+    lazy_fns.* = function.lazy_fns.move();
     fwd_decl.* = function.object.dg.fwd_decl.moveToUnmanaged();
-    typedefs.* = function.object.dg.typedefs.unmanaged;
-    function.object.dg.typedefs.unmanaged = .{};
     code.* = function.object.code.moveToUnmanaged();
 
     // Free excess allocated memory for this Decl.
-    fwd_decl.shrinkAndFree(module.gpa, fwd_decl.items.len);
-    code.shrinkAndFree(module.gpa, code.items.len);
+    ctypes.shrinkAndFree(gpa, ctypes.count());
+    lazy_fns.shrinkAndFree(gpa, lazy_fns.count());
+    fwd_decl.shrinkAndFree(gpa, fwd_decl.items.len);
+    code.shrinkAndFree(gpa, code.items.len);
 }
 
 pub fn updateDecl(self: *C, module: *Module, decl_index: Module.Decl.Index) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const gop = try self.decl_table.getOrPut(self.base.allocator, decl_index);
+    const gpa = self.base.allocator;
+
+    const gop = try self.decl_table.getOrPut(gpa, decl_index);
     if (!gop.found_existing) {
         gop.value_ptr.* = .{};
     }
+    const ctypes = &gop.value_ptr.ctypes;
     const fwd_decl = &gop.value_ptr.fwd_decl;
-    const typedefs = &gop.value_ptr.typedefs;
     const code = &gop.value_ptr.code;
+    ctypes.clearRetainingCapacity(gpa);
     fwd_decl.shrinkRetainingCapacity(0);
-    for (typedefs.values()) |value| {
-        module.gpa.free(value.rendered);
-    }
-    typedefs.clearRetainingCapacity();
     code.shrinkRetainingCapacity(0);
 
     const decl = module.declPtr(decl_index);
 
     var object: codegen.Object = .{
         .dg = .{
-            .gpa = module.gpa,
+            .gpa = gpa,
             .module = module,
             .error_msg = null,
-            .decl_index = decl_index,
+            .decl_index = decl_index.toOptional(),
             .decl = decl,
-            .fwd_decl = fwd_decl.toManaged(module.gpa),
-            .typedefs = typedefs.promoteContext(module.gpa, .{ .mod = module }),
-            .typedefs_arena = self.arena.allocator(),
+            .fwd_decl = fwd_decl.toManaged(gpa),
+            .ctypes = ctypes.*,
         },
-        .code = code.toManaged(module.gpa),
+        .code = code.toManaged(gpa),
         .indent_writer = undefined, // set later so we can get a pointer to object.code
     };
     object.indent_writer = .{ .underlying_writer = object.code.writer() };
     defer {
         object.code.deinit();
-        for (object.dg.typedefs.values()) |typedef| {
-            module.gpa.free(typedef.rendered);
-        }
-        object.dg.typedefs.deinit();
+        object.dg.ctypes.deinit(object.dg.gpa);
         object.dg.fwd_decl.deinit();
     }
 
     codegen.genDecl(&object) catch |err| switch (err) {
         error.AnalysisFail => {
-            try module.failed_decls.put(module.gpa, decl_index, object.dg.error_msg.?);
+            try module.failed_decls.put(gpa, decl_index, object.dg.error_msg.?);
             return;
         },
         else => |e| return e,
     };
 
+    ctypes.* = object.dg.ctypes.move();
     fwd_decl.* = object.dg.fwd_decl.moveToUnmanaged();
-    typedefs.* = object.dg.typedefs.unmanaged;
-    object.dg.typedefs.unmanaged = .{};
     code.* = object.code.moveToUnmanaged();
 
     // Free excess allocated memory for this Decl.
-    fwd_decl.shrinkAndFree(module.gpa, fwd_decl.items.len);
-    code.shrinkAndFree(module.gpa, code.items.len);
+    ctypes.shrinkAndFree(gpa, ctypes.count());
+    fwd_decl.shrinkAndFree(gpa, fwd_decl.items.len);
+    code.shrinkAndFree(gpa, code.items.len);
 }
 
 pub fn updateDeclLineNumber(self: *C, module: *Module, decl_index: Module.Decl.Index) !void {
@@ -246,7 +236,7 @@ pub fn flushModule(self: *C, comp: *Compilation, prog_node: *std.Progress.Node)
     sub_prog_node.activate();
     defer sub_prog_node.end();
 
-    const gpa = comp.gpa;
+    const gpa = self.base.allocator;
     const module = self.base.options.module.?;
 
     // This code path happens exclusively with -ofmt=c. The flush logic for
@@ -257,30 +247,28 @@ pub fn flushModule(self: *C, comp: *Compilation, prog_node: *std.Progress.Node)
 
     const abi_define = abiDefine(comp);
 
-    // Covers defines, zig.h, typedef, and asm.
-    var buf_count: usize = 2;
-    if (abi_define != null) buf_count += 1;
-    try f.all_buffers.ensureUnusedCapacity(gpa, buf_count);
+    // Covers defines, zig.h, ctypes, asm, lazy fwd.
+    try f.all_buffers.ensureUnusedCapacity(gpa, 5);
 
     if (abi_define) |buf| f.appendBufAssumeCapacity(buf);
     f.appendBufAssumeCapacity(zig_h);
 
-    const typedef_index = f.all_buffers.items.len;
+    const ctypes_index = f.all_buffers.items.len;
     f.all_buffers.items.len += 1;
 
     {
-        var asm_buf = f.asm_buf.toManaged(module.gpa);
-        defer asm_buf.deinit();
-
-        try codegen.genGlobalAsm(module, &asm_buf);
-
-        f.asm_buf = asm_buf.moveToUnmanaged();
-        f.appendBufAssumeCapacity(f.asm_buf.items);
+        var asm_buf = f.asm_buf.toManaged(gpa);
+        defer f.asm_buf = asm_buf.moveToUnmanaged();
+        try codegen.genGlobalAsm(module, asm_buf.writer());
+        f.appendBufAssumeCapacity(asm_buf.items);
     }
 
-    try self.flushErrDecls(&f);
+    const lazy_index = f.all_buffers.items.len;
+    f.all_buffers.items.len += 1;
 
-    // Typedefs, forward decls, and non-functions first.
+    try self.flushErrDecls(&f.lazy_db);
+
+    // `CType`s, forward decls, and non-functions first.
     // Unlike other backends, the .c code we are emitting is order-dependent. Therefore
     // we must traverse the set of Decls that we are emitting according to their dependencies.
     // Our strategy is to populate a set of remaining decls, pop Decls one by one,
@@ -307,16 +295,33 @@ pub fn flushModule(self: *C, comp: *Compilation, prog_node: *std.Progress.Node)
         }
     }
 
-    f.all_buffers.items[typedef_index] = .{
-        .iov_base = if (f.typedef_buf.items.len > 0) f.typedef_buf.items.ptr else "",
-        .iov_len = f.typedef_buf.items.len,
+    {
+        // We need to flush lazy ctypes after flushing all decls but before flushing any decl ctypes.
+        // This ensures that every lazy CType.Index exactly matches the global CType.Index.
+        assert(f.ctypes.count() == 0);
+        try self.flushCTypes(&f, .none, f.lazy_db.ctypes);
+
+        var it = self.decl_table.iterator();
+        while (it.next()) |entry|
+            try self.flushCTypes(&f, entry.key_ptr.toOptional(), entry.value_ptr.ctypes);
+    }
+
+    f.all_buffers.items[ctypes_index] = .{
+        .iov_base = if (f.ctypes_buf.items.len > 0) f.ctypes_buf.items.ptr else "",
+        .iov_len = f.ctypes_buf.items.len,
     };
-    f.file_size += f.typedef_buf.items.len;
+    f.file_size += f.ctypes_buf.items.len;
+
+    f.all_buffers.items[lazy_index] = .{
+        .iov_base = if (f.lazy_db.fwd_decl.items.len > 0) f.lazy_db.fwd_decl.items.ptr else "",
+        .iov_len = f.lazy_db.fwd_decl.items.len,
+    };
+    f.file_size += f.lazy_db.fwd_decl.items.len;
 
     // Now the code.
-    try f.all_buffers.ensureUnusedCapacity(gpa, decl_values.len);
-    for (decl_values) |decl|
-        f.appendBufAssumeCapacity(decl.code.items);
+    try f.all_buffers.ensureUnusedCapacity(gpa, 1 + decl_values.len);
+    f.appendBufAssumeCapacity(f.lazy_db.code.items);
+    for (decl_values) |decl| f.appendBufAssumeCapacity(decl.code.items);
 
     const file = self.base.file.?;
     try file.setEndPos(f.file_size);
@@ -324,22 +329,23 @@ pub fn flushModule(self: *C, comp: *Compilation, prog_node: *std.Progress.Node)
 }
 
 const Flush = struct {
-    err_decls: DeclBlock = .{},
     remaining_decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, void) = .{},
-    typedefs: Typedefs = .{},
-    typedef_buf: std.ArrayListUnmanaged(u8) = .{},
+
+    ctypes: codegen.CType.Store = .{},
+    ctypes_map: std.ArrayListUnmanaged(codegen.CType.Index) = .{},
+    ctypes_buf: std.ArrayListUnmanaged(u8) = .{},
+
+    lazy_db: DeclBlock = .{},
+    lazy_fns: LazyFns = .{},
+
     asm_buf: std.ArrayListUnmanaged(u8) = .{},
+
     /// We collect a list of buffers to write, and write them all at once with pwritev 😎
     all_buffers: std.ArrayListUnmanaged(std.os.iovec_const) = .{},
     /// Keeps track of the total bytes of `all_buffers`.
     file_size: u64 = 0,
 
-    const Typedefs = std.HashMapUnmanaged(
-        Type,
-        void,
-        Type.HashContext64,
-        std.hash_map.default_max_load_percentage,
-    );
+    const LazyFns = std.AutoHashMapUnmanaged(codegen.LazyFnKey, void);
 
     fn appendBufAssumeCapacity(f: *Flush, buf: []const u8) void {
         if (buf.len == 0) return;
@@ -349,10 +355,13 @@ const Flush = struct {
 
     fn deinit(f: *Flush, gpa: Allocator) void {
         f.all_buffers.deinit(gpa);
-        f.typedef_buf.deinit(gpa);
-        f.typedefs.deinit(gpa);
+        f.asm_buf.deinit(gpa);
+        f.lazy_fns.deinit(gpa);
+        f.lazy_db.deinit(gpa);
+        f.ctypes_buf.deinit(gpa);
+        f.ctypes_map.deinit(gpa);
+        f.ctypes.deinit(gpa);
         f.remaining_decls.deinit(gpa);
-        f.err_decls.deinit(gpa);
     }
 };
 
@@ -360,53 +369,116 @@ const FlushDeclError = error{
     OutOfMemory,
 };
 
-fn flushTypedefs(self: *C, f: *Flush, typedefs: codegen.TypedefMap.Unmanaged) FlushDeclError!void {
-    if (typedefs.count() == 0) return;
+fn flushCTypes(
+    self: *C,
+    f: *Flush,
+    decl_index: Module.Decl.OptionalIndex,
+    decl_ctypes: codegen.CType.Store,
+) FlushDeclError!void {
     const gpa = self.base.allocator;
-    const module = self.base.options.module.?;
+    const mod = self.base.options.module.?;
 
-    try f.typedefs.ensureUnusedCapacityContext(gpa, @intCast(u32, typedefs.count()), .{
-        .mod = module,
-    });
-    var it = typedefs.iterator();
-    while (it.next()) |new| {
-        const gop = f.typedefs.getOrPutAssumeCapacityContext(new.key_ptr.*, .{
-            .mod = module,
+    const decl_ctypes_len = decl_ctypes.count();
+    f.ctypes_map.clearRetainingCapacity();
+    try f.ctypes_map.ensureTotalCapacity(gpa, decl_ctypes_len);
+
+    var global_ctypes = f.ctypes.promote(gpa);
+    defer f.ctypes.demote(global_ctypes);
+
+    var ctypes_buf = f.ctypes_buf.toManaged(gpa);
+    defer f.ctypes_buf = ctypes_buf.moveToUnmanaged();
+    const writer = ctypes_buf.writer();
+
+    const slice = decl_ctypes.set.map.entries.slice();
+    for (slice.items(.key), 0..) |decl_cty, decl_i| {
+        const Context = struct {
+            arena: Allocator,
+            ctypes_map: []codegen.CType.Index,
+            cached_hash: codegen.CType.Store.Set.Map.Hash,
+            idx: codegen.CType.Index,
+
+            pub fn hash(ctx: @This(), _: codegen.CType) codegen.CType.Store.Set.Map.Hash {
+                return ctx.cached_hash;
+            }
+            pub fn eql(ctx: @This(), lhs: codegen.CType, rhs: codegen.CType, _: usize) bool {
+                return lhs.eqlContext(rhs, ctx);
+            }
+            pub fn eqlIndex(
+                ctx: @This(),
+                lhs_idx: codegen.CType.Index,
+                rhs_idx: codegen.CType.Index,
+            ) bool {
+                if (lhs_idx < codegen.CType.Tag.no_payload_count or
+                    rhs_idx < codegen.CType.Tag.no_payload_count) return lhs_idx == rhs_idx;
+                const lhs_i = lhs_idx - codegen.CType.Tag.no_payload_count;
+                if (lhs_i >= ctx.ctypes_map.len) return false;
+                return ctx.ctypes_map[lhs_i] == rhs_idx;
+            }
+            pub fn copyIndex(ctx: @This(), idx: codegen.CType.Index) codegen.CType.Index {
+                if (idx < codegen.CType.Tag.no_payload_count) return idx;
+                return ctx.ctypes_map[idx - codegen.CType.Tag.no_payload_count];
+            }
+        };
+        const decl_idx = @intCast(codegen.CType.Index, codegen.CType.Tag.no_payload_count + decl_i);
+        const ctx = Context{
+            .arena = global_ctypes.arena.allocator(),
+            .ctypes_map = f.ctypes_map.items,
+            .cached_hash = decl_ctypes.indexToHash(decl_idx),
+            .idx = decl_idx,
+        };
+        const gop = try global_ctypes.set.map.getOrPutContextAdapted(gpa, decl_cty, ctx, .{
+            .store = &global_ctypes.set,
         });
+        const global_idx =
+            @intCast(codegen.CType.Index, codegen.CType.Tag.no_payload_count + gop.index);
+        f.ctypes_map.appendAssumeCapacity(global_idx);
         if (!gop.found_existing) {
-            try f.typedef_buf.appendSlice(gpa, new.value_ptr.rendered);
+            errdefer _ = global_ctypes.set.map.pop();
+            gop.key_ptr.* = try decl_cty.copyContext(ctx);
         }
+        if (std.debug.runtime_safety) {
+            const global_cty = &global_ctypes.set.map.entries.items(.key)[gop.index];
+            assert(global_cty == gop.key_ptr);
+            assert(decl_cty.eqlContext(global_cty.*, ctx));
+            assert(decl_cty.hash(decl_ctypes.set) == global_cty.hash(global_ctypes.set));
+        }
+        try codegen.genTypeDecl(
+            mod,
+            writer,
+            global_ctypes.set,
+            global_idx,
+            decl_index,
+            decl_ctypes.set,
+            decl_idx,
+            gop.found_existing,
+        );
     }
 }
 
-fn flushErrDecls(self: *C, f: *Flush) FlushDeclError!void {
-    const module = self.base.options.module.?;
+fn flushErrDecls(self: *C, db: *DeclBlock) FlushDeclError!void {
+    const gpa = self.base.allocator;
 
-    const fwd_decl = &f.err_decls.fwd_decl;
-    const typedefs = &f.err_decls.typedefs;
-    const code = &f.err_decls.code;
+    const fwd_decl = &db.fwd_decl;
+    const ctypes = &db.ctypes;
+    const code = &db.code;
 
     var object = codegen.Object{
         .dg = .{
-            .gpa = module.gpa,
-            .module = module,
+            .gpa = gpa,
+            .module = self.base.options.module.?,
             .error_msg = null,
-            .decl_index = undefined,
-            .decl = undefined,
-            .fwd_decl = fwd_decl.toManaged(module.gpa),
-            .typedefs = typedefs.promoteContext(module.gpa, .{ .mod = module }),
-            .typedefs_arena = self.arena.allocator(),
+            .decl_index = .none,
+            .decl = null,
+            .fwd_decl = fwd_decl.toManaged(gpa),
+            .ctypes = ctypes.*,
         },
-        .code = code.toManaged(module.gpa),
+        .code = code.toManaged(gpa),
         .indent_writer = undefined, // set later so we can get a pointer to object.code
     };
     object.indent_writer = .{ .underlying_writer = object.code.writer() };
     defer {
         object.code.deinit();
-        for (object.dg.typedefs.values()) |typedef| {
-            module.gpa.free(typedef.rendered);
-        }
-        object.dg.typedefs.deinit();
+        object.dg.ctypes.deinit(gpa);
         object.dg.fwd_decl.deinit();
     }
 
@@ -416,14 +488,58 @@ fn flushErrDecls(self: *C, f: *Flush) FlushDeclError!void {
     };
 
     fwd_decl.* = object.dg.fwd_decl.moveToUnmanaged();
-    typedefs.* = object.dg.typedefs.unmanaged;
-    object.dg.typedefs.unmanaged = .{};
+    ctypes.* = object.dg.ctypes.move();
     code.* = object.code.moveToUnmanaged();
+}
 
-    try self.flushTypedefs(f, typedefs.*);
-    try f.all_buffers.ensureUnusedCapacity(self.base.allocator, 1);
-    f.appendBufAssumeCapacity(fwd_decl.items);
-    f.appendBufAssumeCapacity(code.items);
+fn flushLazyFn(self: *C, db: *DeclBlock, lazy_fn: codegen.LazyFnMap.Entry) FlushDeclError!void {
+    const gpa = self.base.allocator;
+
+    const fwd_decl = &db.fwd_decl;
+    const ctypes = &db.ctypes;
+    const code = &db.code;
+
+    var object = codegen.Object{
+        .dg = .{
+            .gpa = gpa,
+            .module = self.base.options.module.?,
+            .error_msg = null,
+            .decl_index = .none,
+            .decl = null,
+            .fwd_decl = fwd_decl.toManaged(gpa),
+            .ctypes = ctypes.*,
+        },
+        .code = code.toManaged(gpa),
+        .indent_writer = undefined, // set later so we can get a pointer to object.code
+    };
+    object.indent_writer = .{ .underlying_writer = object.code.writer() };
+    defer {
+        object.code.deinit();
+        object.dg.ctypes.deinit(gpa);
+        object.dg.fwd_decl.deinit();
+    }
+
+    codegen.genLazyFn(&object, lazy_fn) catch |err| switch (err) {
+        error.AnalysisFail => unreachable,
+        else => |e| return e,
+    };
+
+    fwd_decl.* = object.dg.fwd_decl.moveToUnmanaged();
+    ctypes.* = object.dg.ctypes.move();
+    code.* = object.code.moveToUnmanaged();
+}
+
+fn flushLazyFns(self: *C, f: *Flush, lazy_fns: codegen.LazyFnMap) FlushDeclError!void {
+    const gpa = self.base.allocator;
+    try f.lazy_fns.ensureUnusedCapacity(gpa, @intCast(Flush.LazyFns.Size, lazy_fns.count()));
+
+    var it = lazy_fns.iterator();
+    while (it.next()) |entry| {
+        const gop = f.lazy_fns.getOrPutAssumeCapacity(entry.key_ptr.*);
+        if (gop.found_existing) continue;
+        gop.value_ptr.* = {};
+        try self.flushLazyFn(&f.lazy_db, entry);
+    }
 }
 
 /// Assumes `decl` was in the `remaining_decls` set, and has already been removed.
@@ -433,8 +549,8 @@ fn flushDecl(
     decl_index: Module.Decl.Index,
     export_names: std.StringHashMapUnmanaged(void),
 ) FlushDeclError!void {
-    const module = self.base.options.module.?;
-    const decl = module.declPtr(decl_index);
+    const gpa = self.base.allocator;
+    const decl = self.base.options.module.?.declPtr(decl_index);
     // Before flushing any particular Decl we must ensure its
     // dependencies are already flushed, so that the order in the .c
     // file comes out correctly.
@@ -445,10 +561,9 @@ fn flushDecl(
     }
 
     const decl_block = self.decl_table.getPtr(decl_index).?;
-    const gpa = self.base.allocator;
 
-    try self.flushTypedefs(f, decl_block.typedefs);
-    try f.all_buffers.ensureUnusedCapacity(gpa, 2);
+    try self.flushLazyFns(f, decl_block.lazy_fns);
+    try f.all_buffers.ensureUnusedCapacity(gpa, 1);
     if (!(decl.isExtern() and export_names.contains(mem.span(decl.name))))
         f.appendBufAssumeCapacity(decl_block.fwd_decl.items);
 }
diff --git a/src/main.zig b/src/main.zig
index e80be06a36..d544940779 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -403,8 +403,11 @@ const usage_build_generic =
     \\    ReleaseFast             Optimizations on, safety off
     \\    ReleaseSafe             Optimizations on, safety on
     \\    ReleaseSmall            Optimize for small binary, safety off
-    \\  --pkg-begin [name] [path] Make pkg available to import and push current pkg
-    \\  --pkg-end                 Pop current pkg
+    \\  --mod [name]:[deps]:[src] Make a module available for dependency under the given name
+    \\      deps: [dep],[dep],...
+    \\      dep:  [[import=]name]
+    \\  --deps [dep],[dep],...    Set dependency names for the root package
+    \\      dep:  [[import=]name]
     \\  --main-pkg-path           Set the directory of the root package
     \\  -fPIC                     Force-enable Position Independent Code
     \\  -fno-PIC                  Force-disable Position Independent Code
@@ -858,15 +861,21 @@ fn buildOutputType(
     var linker_export_symbol_names = std.ArrayList([]const u8).init(gpa);
     defer linker_export_symbol_names.deinit();
 
-    // This package only exists to clean up the code parsing --pkg-begin and
-    // --pkg-end flags. Use dummy values that are safe for the destroy call.
-    var pkg_tree_root: Package = .{
-        .root_src_directory = .{ .path = null, .handle = fs.cwd() },
-        .root_src_path = &[0]u8{},
-        .name = &[0]u8{},
-    };
-    defer freePkgTree(gpa, &pkg_tree_root, false);
-    var cur_pkg: *Package = &pkg_tree_root;
+    // Contains every module specified via --mod. The dependencies are added
+    // after argument parsing is completed. We use a StringArrayHashMap to make
+    // error output consistent.
+    var modules = std.StringArrayHashMap(struct {
+        mod: *Package,
+        deps_str: []const u8, // still in CLI arg format
+    }).init(gpa);
+    defer {
+        var it = modules.iterator();
+        while (it.next()) |kv| kv.value_ptr.mod.destroy(gpa);
+        modules.deinit();
+    }
+
+    // The dependency string for the root package
+    var root_deps_str: ?[]const u8 = null;
 
     // before arg parsing, check for the NO_COLOR environment variable
     // if it exists, default the color setting to .off
@@ -943,34 +952,44 @@ fn buildOutputType(
                         } else {
                             fatal("unexpected end-of-parameter mark: --", .{});
                         }
-                    } else if (mem.eql(u8, arg, "--pkg-begin")) {
-                        const opt_pkg_name = args_iter.next();
-                        const opt_pkg_path = args_iter.next();
-                        if (opt_pkg_name == null or opt_pkg_path == null)
-                            fatal("Expected 2 arguments after {s}", .{arg});
+                    } else if (mem.eql(u8, arg, "--mod")) {
+                        const info = args_iter.nextOrFatal();
+                        var info_it = mem.split(u8, info, ":");
+                        const mod_name = info_it.next() orelse fatal("expected non-empty argument after {s}", .{arg});
+                        const deps_str = info_it.next() orelse fatal("expected 'name:deps:path' after {s}", .{arg});
+                        const root_src_orig = info_it.rest();
+                        if (root_src_orig.len == 0) fatal("expected 'name:deps:path' after {s}", .{arg});
+                        if (mod_name.len == 0) fatal("empty name for module at '{s}'", .{root_src_orig});
 
-                        const pkg_name = opt_pkg_name.?;
-                        const pkg_path = try introspect.resolvePath(arena, opt_pkg_path.?);
+                        const root_src = try introspect.resolvePath(arena, root_src_orig);
 
-                        const new_cur_pkg = Package.create(
-                            gpa,
-                            pkg_name,
-                            fs.path.dirname(pkg_path),
-                            fs.path.basename(pkg_path),
-                        ) catch |err| {
-                            fatal("Failed to add package at path {s}: {s}", .{ pkg_path, @errorName(err) });
-                        };
-
-                        if (mem.eql(u8, pkg_name, "std") or mem.eql(u8, pkg_name, "root") or mem.eql(u8, pkg_name, "builtin")) {
-                            fatal("unable to add package '{s}' -> '{s}': conflicts with builtin package", .{ pkg_name, pkg_path });
-                        } else if (cur_pkg.table.get(pkg_name)) |prev| {
-                            fatal("unable to add package '{s}' -> '{s}': already exists as '{s}", .{ pkg_name, pkg_path, prev.root_src_path });
+                        for ([_][]const u8{ "std", "root", "builtin" }) |name| {
+                            if (mem.eql(u8, mod_name, name)) {
+                                fatal("unable to add module '{s}' -> '{s}': conflicts with builtin module", .{ mod_name, root_src });
+                            }
                         }
-                        try cur_pkg.addAndAdopt(gpa, new_cur_pkg);
-                        cur_pkg = new_cur_pkg;
-                    } else if (mem.eql(u8, arg, "--pkg-end")) {
-                        cur_pkg = cur_pkg.parent orelse
-                            fatal("encountered --pkg-end with no matching --pkg-begin", .{});
+
+                        var mod_it = modules.iterator();
+                        while (mod_it.next()) |kv| {
+                            if (std.mem.eql(u8, mod_name, kv.key_ptr.*)) {
+                                fatal("unable to add module '{s}' -> '{s}': already exists as '{s}'", .{ mod_name, root_src, kv.value_ptr.mod.root_src_path });
+                            }
+                        }
+
+                        try modules.ensureUnusedCapacity(1);
+                        modules.put(mod_name, .{
+                            .mod = try Package.create(
+                                gpa,
+                                fs.path.dirname(root_src),
+                                fs.path.basename(root_src),
+                            ),
+                            .deps_str = deps_str,
+                        }) catch unreachable;
+                    } else if (mem.eql(u8, arg, "--deps")) {
+                        if (root_deps_str != null) {
+                            fatal("only one --deps argument is allowed", .{});
+                        }
+                        root_deps_str = args_iter.nextOrFatal();
                     } else if (mem.eql(u8, arg, "--main-pkg-path")) {
                         main_pkg_path = args_iter.nextOrFatal();
                     } else if (mem.eql(u8, arg, "-cflags")) {
@@ -1955,12 +1974,15 @@ fn buildOutputType(
                     linker_compress_debug_sections = std.meta.stringToEnum(link.CompressDebugSections, arg1) orelse {
                         fatal("expected [none|zlib] after --compress-debug-sections, found '{s}'", .{arg1});
                     };
-                } else if (mem.eql(u8, arg, "-z")) {
-                    i += 1;
-                    if (i >= linker_args.items.len) {
-                        fatal("expected linker extension flag after '{s}'", .{arg});
+                } else if (mem.startsWith(u8, arg, "-z")) {
+                    var z_arg = arg[2..];
+                    if (z_arg.len == 0) {
+                        i += 1;
+                        if (i >= linker_args.items.len) {
+                            fatal("expected linker extension flag after '{s}'", .{arg});
+                        }
+                        z_arg = linker_args.items[i];
                     }
-                    const z_arg = linker_args.items[i];
                     if (mem.eql(u8, z_arg, "nodelete")) {
                         linker_z_nodelete = true;
                     } else if (mem.eql(u8, z_arg, "notext")) {
@@ -2304,6 +2326,31 @@ fn buildOutputType(
         },
     }
 
+    {
+        // Resolve module dependencies
+        var it = modules.iterator();
+        while (it.next()) |kv| {
+            const deps_str = kv.value_ptr.deps_str;
+            var deps_it = ModuleDepIterator.init(deps_str);
+            while (deps_it.next()) |dep| {
+                if (dep.expose.len == 0) {
+                    fatal("module '{s}' depends on '{s}' with a blank name", .{ kv.key_ptr.*, dep.name });
+                }
+
+                for ([_][]const u8{ "std", "root", "builtin" }) |name| {
+                    if (mem.eql(u8, dep.expose, name)) {
+                        fatal("unable to add module '{s}' under name '{s}': conflicts with builtin module", .{ dep.name, dep.expose });
+                    }
+                }
+
+                const dep_mod = modules.get(dep.name) orelse
+                    fatal("module '{s}' depends on module '{s}' which does not exist", .{ kv.key_ptr.*, dep.name });
+
+                try kv.value_ptr.mod.add(gpa, dep.expose, dep_mod.mod);
+            }
+        }
+    }
+
     if (arg_mode == .build and optimize_mode == .ReleaseSmall and strip == null)
         strip = true;
 
@@ -2883,14 +2930,14 @@ fn buildOutputType(
         if (main_pkg_path) |unresolved_main_pkg_path| {
             const p = try introspect.resolvePath(arena, unresolved_main_pkg_path);
             if (p.len == 0) {
-                break :blk try Package.create(gpa, "root", null, src_path);
+                break :blk try Package.create(gpa, null, src_path);
             } else {
                 const rel_src_path = try fs.path.relative(arena, p, src_path);
-                break :blk try Package.create(gpa, "root", p, rel_src_path);
+                break :blk try Package.create(gpa, p, rel_src_path);
             }
         } else {
             const root_src_dir_path = fs.path.dirname(src_path);
-            break :blk Package.create(gpa, "root", root_src_dir_path, fs.path.basename(src_path)) catch |err| {
+            break :blk Package.create(gpa, root_src_dir_path, fs.path.basename(src_path)) catch |err| {
                 if (root_src_dir_path) |p| {
                     fatal("unable to open '{s}': {s}", .{ p, @errorName(err) });
                 } else {
@@ -2901,23 +2948,24 @@ fn buildOutputType(
     } else null;
     defer if (main_pkg) |p| p.destroy(gpa);
 
-    // Transfer packages added with --pkg-begin/--pkg-end to the root package
-    if (main_pkg) |pkg| {
-        var it = pkg_tree_root.table.valueIterator();
-        while (it.next()) |p| {
-            if (p.*.parent == &pkg_tree_root) {
-                p.*.parent = pkg;
+    // Transfer packages added with --deps to the root package
+    if (main_pkg) |mod| {
+        var it = ModuleDepIterator.init(root_deps_str orelse "");
+        while (it.next()) |dep| {
+            if (dep.expose.len == 0) {
+                fatal("root module depends on '{s}' with a blank name", .{dep.name});
             }
-        }
-        pkg.table = pkg_tree_root.table;
-        pkg_tree_root.table = .{};
-    } else {
-        // Remove any dangling pointers just in case.
-        var it = pkg_tree_root.table.valueIterator();
-        while (it.next()) |p| {
-            if (p.*.parent == &pkg_tree_root) {
-                p.*.parent = null;
+
+            for ([_][]const u8{ "std", "root", "builtin" }) |name| {
+                if (mem.eql(u8, dep.expose, name)) {
+                    fatal("unable to add module '{s}' under name '{s}': conflicts with builtin module", .{ dep.name, dep.expose });
+                }
             }
+
+            const dep_mod = modules.get(dep.name) orelse
+                fatal("root module depends on module '{s}' which does not exist", .{dep.name});
+
+            try mod.add(gpa, dep.expose, dep_mod.mod);
         }
     }
 
@@ -3397,6 +3445,32 @@ fn buildOutputType(
     return cleanExit();
 }
 
+const ModuleDepIterator = struct {
+    split: mem.SplitIterator(u8),
+
+    fn init(deps_str: []const u8) ModuleDepIterator {
+        return .{ .split = mem.split(u8, deps_str, ",") };
+    }
+
+    const Dependency = struct {
+        expose: []const u8,
+        name: []const u8,
+    };
+
+    fn next(it: *ModuleDepIterator) ?Dependency {
+        if (it.split.buffer.len == 0) return null; // don't return "" for the first iteration on ""
+        const str = it.split.next() orelse return null;
+        if (mem.indexOfScalar(u8, str, '=')) |i| {
+            return .{
+                .expose = str[0..i],
+                .name = str[i + 1 ..],
+            };
+        } else {
+            return .{ .expose = str, .name = str };
+        }
+    }
+};
+
 fn parseCrossTargetOrReportFatalError(
     allocator: Allocator,
     opts: std.zig.CrossTarget.ParseOptions,
@@ -3623,18 +3697,6 @@ fn updateModule(gpa: Allocator, comp: *Compilation, hook: AfterUpdateHook) !void
     }
 }
 
-fn freePkgTree(gpa: Allocator, pkg: *Package, free_parent: bool) void {
-    {
-        var it = pkg.table.valueIterator();
-        while (it.next()) |value| {
-            freePkgTree(gpa, value.*, true);
-        }
-    }
-    if (free_parent) {
-        pkg.destroy(gpa);
-    }
-}
-
 fn cmdTranslateC(comp: *Compilation, arena: Allocator, enable_cache: bool) !void {
     if (!build_options.have_llvm)
         fatal("cannot translate-c: compiler built without LLVM extensions", .{});
@@ -4138,7 +4200,6 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi
         var main_pkg: Package = .{
             .root_src_directory = zig_lib_directory,
             .root_src_path = "build_runner.zig",
-            .name = "root",
         };
 
         if (!build_options.omit_pkg_fetching_code) {
@@ -4181,22 +4242,20 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi
 
             const deps_pkg = try Package.createFilePkg(
                 gpa,
-                "@dependencies",
                 local_cache_directory,
                 "dependencies.zig",
                 dependencies_source.items,
             );
 
             mem.swap(Package.Table, &main_pkg.table, &deps_pkg.table);
-            try main_pkg.addAndAdopt(gpa, deps_pkg);
+            try main_pkg.add(gpa, "@dependencies", deps_pkg);
         }
 
         var build_pkg: Package = .{
             .root_src_directory = build_directory,
             .root_src_path = build_zig_basename,
-            .name = "@build",
         };
-        try main_pkg.addAndAdopt(gpa, &build_pkg);
+        try main_pkg.add(gpa, "@build", &build_pkg);
 
         const comp = Compilation.create(gpa, .{
             .zig_lib_directory = zig_lib_directory,
@@ -4431,7 +4490,7 @@ pub fn cmdFmt(gpa: Allocator, arena: Allocator, args: []const []const u8) !void
                 .root_decl = .none,
             };
 
-            file.pkg = try Package.create(gpa, "root", null, file.sub_file_path);
+            file.pkg = try Package.create(gpa, null, file.sub_file_path);
             defer file.pkg.destroy(gpa);
 
             file.zir = try AstGen.generate(gpa, file.tree);
@@ -4642,7 +4701,7 @@ fn fmtPathFile(
             .root_decl = .none,
         };
 
-        file.pkg = try Package.create(fmt.gpa, "root", null, file.sub_file_path);
+        file.pkg = try Package.create(fmt.gpa, null, file.sub_file_path);
         defer file.pkg.destroy(fmt.gpa);
 
         if (stat.size > max_src_size)
@@ -5354,7 +5413,7 @@ pub fn cmdAstCheck(
         file.stat.size = source.len;
     }
 
-    file.pkg = try Package.create(gpa, "root", null, file.sub_file_path);
+    file.pkg = try Package.create(gpa, null, file.sub_file_path);
     defer file.pkg.destroy(gpa);
 
     file.tree = try Ast.parse(gpa, file.source, .zig);
@@ -5473,7 +5532,7 @@ pub fn cmdChangelist(
         .root_decl = .none,
     };
 
-    file.pkg = try Package.create(gpa, "root", null, file.sub_file_path);
+    file.pkg = try Package.create(gpa, null, file.sub_file_path);
     defer file.pkg.destroy(gpa);
 
     const source = try arena.allocSentinel(u8, @intCast(usize, stat.size), 0);
diff --git a/src/target.zig b/src/target.zig
index d7b2858a4b..8cbddb4d0c 100644
--- a/src/target.zig
+++ b/src/target.zig
@@ -727,6 +727,7 @@ pub fn supportsFunctionAlignment(target: std.Target) bool {
 pub fn supportsTailCall(target: std.Target, backend: std.builtin.CompilerBackend) bool {
     switch (backend) {
         .stage1, .stage2_llvm => return @import("codegen/llvm.zig").supportsTailCall(target),
+        .stage2_c => return true,
         else => return false,
     }
 }
diff --git a/src/test.zig b/src/test.zig
index acc1bcdc1f..61cdb705e3 100644
--- a/src/test.zig
+++ b/src/test.zig
@@ -583,6 +583,11 @@ pub const TestContext = struct {
         path: []const u8,
     };
 
+    pub const DepModule = struct {
+        name: []const u8,
+        path: []const u8,
+    };
+
     pub const Backend = enum {
         stage1,
         stage2,
@@ -611,6 +616,7 @@ pub const TestContext = struct {
         link_libc: bool = false,
 
         files: std.ArrayList(File),
+        deps: std.ArrayList(DepModule),
 
         result: anyerror!void = {},
 
@@ -618,6 +624,13 @@ pub const TestContext = struct {
             case.files.append(.{ .path = name, .src = src }) catch @panic("out of memory");
         }
 
+        pub fn addDepModule(case: *Case, name: []const u8, path: []const u8) void {
+            case.deps.append(.{
+                .name = name,
+                .path = path,
+            }) catch @panic("out of memory");
+        }
+
         /// Adds a subcase in which the module is updated with `src`, and a C
         /// header is generated.
         pub fn addHeader(self: *Case, src: [:0]const u8, result: [:0]const u8) void {
@@ -767,6 +780,7 @@ pub const TestContext = struct {
             .updates = std.ArrayList(Update).init(ctx.cases.allocator),
             .output_mode = .Exe,
             .files = std.ArrayList(File).init(ctx.arena),
+            .deps = std.ArrayList(DepModule).init(ctx.arena),
         }) catch @panic("out of memory");
         return &ctx.cases.items[ctx.cases.items.len - 1];
     }
@@ -787,6 +801,7 @@ pub const TestContext = struct {
             .updates = std.ArrayList(Update).init(ctx.cases.allocator),
             .output_mode = .Exe,
             .files = std.ArrayList(File).init(ctx.arena),
+            .deps = std.ArrayList(DepModule).init(ctx.arena),
             .link_libc = true,
         }) catch @panic("out of memory");
         return &ctx.cases.items[ctx.cases.items.len - 1];
@@ -801,6 +816,7 @@ pub const TestContext = struct {
             .updates = std.ArrayList(Update).init(ctx.cases.allocator),
             .output_mode = .Exe,
             .files = std.ArrayList(File).init(ctx.arena),
+            .deps = std.ArrayList(DepModule).init(ctx.arena),
             .backend = .llvm,
             .link_libc = true,
         }) catch @panic("out of memory");
@@ -818,6 +834,7 @@ pub const TestContext = struct {
             .updates = std.ArrayList(Update).init(ctx.cases.allocator),
             .output_mode = .Obj,
             .files = std.ArrayList(File).init(ctx.arena),
+            .deps = std.ArrayList(DepModule).init(ctx.arena),
         }) catch @panic("out of memory");
         return &ctx.cases.items[ctx.cases.items.len - 1];
     }
@@ -834,6 +851,7 @@ pub const TestContext = struct {
             .output_mode = .Exe,
             .is_test = true,
             .files = std.ArrayList(File).init(ctx.arena),
+            .deps = std.ArrayList(DepModule).init(ctx.arena),
         }) catch @panic("out of memory");
         return &ctx.cases.items[ctx.cases.items.len - 1];
     }
@@ -858,6 +876,7 @@ pub const TestContext = struct {
             .updates = std.ArrayList(Update).init(ctx.cases.allocator),
             .output_mode = .Obj,
             .files = std.ArrayList(File).init(ctx.arena),
+            .deps = std.ArrayList(DepModule).init(ctx.arena),
         }) catch @panic("out of memory");
         return &ctx.cases.items[ctx.cases.items.len - 1];
     }
@@ -1145,6 +1164,7 @@ pub const TestContext = struct {
                                 .output_mode = output_mode,
                                 .link_libc = backend == .llvm,
                                 .files = std.ArrayList(TestContext.File).init(ctx.cases.allocator),
+                                .deps = std.ArrayList(DepModule).init(ctx.cases.allocator),
                             });
                             try cases.append(next);
                         }
@@ -1497,9 +1517,25 @@ pub const TestContext = struct {
         var main_pkg: Package = .{
             .root_src_directory = .{ .path = tmp_dir_path, .handle = tmp.dir },
             .root_src_path = tmp_src_path,
-            .name = "root",
         };
-        defer main_pkg.table.deinit(allocator);
+        defer {
+            var it = main_pkg.table.iterator();
+            while (it.next()) |kv| {
+                allocator.free(kv.key_ptr.*);
+                kv.value_ptr.*.destroy(allocator);
+            }
+            main_pkg.table.deinit(allocator);
+        }
+
+        for (case.deps.items) |dep| {
+            var pkg = try Package.create(
+                allocator,
+                tmp_dir_path,
+                dep.path,
+            );
+            errdefer pkg.destroy(allocator);
+            try main_pkg.add(allocator, dep.name, pkg);
+        }
 
         const bin_name = try std.zig.binNameAlloc(arena, .{
             .root_name = "test_case",
diff --git a/src/value.zig b/src/value.zig
index 0d80bf7927..5646a837ad 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -1249,11 +1249,22 @@ pub const Value = extern union {
         };
     }
 
+    fn isDeclRef(val: Value) bool {
+        var check = val;
+        while (true) switch (check.tag()) {
+            .variable, .decl_ref, .decl_ref_mut, .comptime_field_ptr => return true,
+            .field_ptr => check = check.castTag(.field_ptr).?.data.container_ptr,
+            .elem_ptr => check = check.castTag(.elem_ptr).?.data.array_ptr,
+            .eu_payload_ptr, .opt_payload_ptr => check = check.cast(Value.Payload.PayloadPtr).?.data.container_ptr,
+            else => return false,
+        };
+    }
+
     /// Write a Value's contents to `buffer`.
     ///
     /// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past
     /// the end of the value in memory.
-    pub fn writeToMemory(val: Value, ty: Type, mod: *Module, buffer: []u8) void {
+    pub fn writeToMemory(val: Value, ty: Type, mod: *Module, buffer: []u8) error{ReinterpretDeclRef}!void {
         const target = mod.getTarget();
         const endian = target.cpu.arch.endian();
         if (val.isUndef()) {
@@ -1309,7 +1320,7 @@ pub const Value = extern union {
                 var buf_off: usize = 0;
                 while (elem_i < len) : (elem_i += 1) {
                     const elem_val = val.elemValueBuffer(mod, elem_i, &elem_value_buf);
-                    elem_val.writeToMemory(elem_ty, mod, buffer[buf_off..]);
+                    try elem_val.writeToMemory(elem_ty, mod, buffer[buf_off..]);
                     buf_off += elem_size;
                 }
             },
@@ -1317,7 +1328,7 @@ pub const Value = extern union {
                 // We use byte_count instead of abi_size here, so that any padding bytes
                 // follow the data bytes, on both big- and little-endian systems.
                 const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
-                writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
+                return writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
             },
             .Struct => switch (ty.containerLayout()) {
                 .Auto => unreachable, // Sema is supposed to have emitted a compile error already
@@ -1326,12 +1337,12 @@ pub const Value = extern union {
                     const field_vals = val.castTag(.aggregate).?.data;
                     for (fields, 0..) |field, i| {
                         const off = @intCast(usize, ty.structFieldOffset(i, target));
-                        writeToMemory(field_vals[i], field.ty, mod, buffer[off..]);
+                        try writeToMemory(field_vals[i], field.ty, mod, buffer[off..]);
                     }
                 },
                 .Packed => {
                     const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
-                    writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
+                    return writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
                 },
             },
             .ErrorSet => {
@@ -1345,9 +1356,14 @@ pub const Value = extern union {
                 .Extern => @panic("TODO implement writeToMemory for extern unions"),
                 .Packed => {
                     const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
-                    writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
+                    return writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
                 },
             },
+            .Pointer => {
+                assert(!ty.isSlice()); // No well defined layout.
+                if (val.isDeclRef()) return error.ReinterpretDeclRef;
+                return val.writeToMemory(Type.usize, mod, buffer);
+            },
             else => @panic("TODO implement writeToMemory for more types"),
         }
     }
@@ -1356,7 +1372,7 @@ pub const Value = extern union {
     ///
     /// Both the start and the end of the provided buffer must be tight, since
     /// big-endian packed memory layouts start at the end of the buffer.
-    pub fn writeToPackedMemory(val: Value, ty: Type, mod: *Module, buffer: []u8, bit_offset: usize) void {
+    pub fn writeToPackedMemory(val: Value, ty: Type, mod: *Module, buffer: []u8, bit_offset: usize) error{ReinterpretDeclRef}!void {
         const target = mod.getTarget();
         const endian = target.cpu.arch.endian();
         if (val.isUndef()) {
@@ -1420,7 +1436,7 @@ pub const Value = extern union {
                     // On big-endian systems, LLVM reverses the element order of vectors by default
                     const tgt_elem_i = if (endian == .Big) len - elem_i - 1 else elem_i;
                     const elem_val = val.elemValueBuffer(mod, tgt_elem_i, &elem_value_buf);
-                    elem_val.writeToPackedMemory(elem_ty, mod, buffer, bit_offset + bits);
+                    try elem_val.writeToPackedMemory(elem_ty, mod, buffer, bit_offset + bits);
                     bits += elem_bit_size;
                 }
             },
@@ -1433,7 +1449,7 @@ pub const Value = extern union {
                     const field_vals = val.castTag(.aggregate).?.data;
                     for (fields, 0..) |field, i| {
                         const field_bits = @intCast(u16, field.ty.bitSize(target));
-                        field_vals[i].writeToPackedMemory(field.ty, mod, buffer, bit_offset + bits);
+                        try field_vals[i].writeToPackedMemory(field.ty, mod, buffer, bit_offset + bits);
                         bits += field_bits;
                     }
                 },
@@ -1446,9 +1462,14 @@ pub const Value = extern union {
                     const field_type = ty.unionFields().values()[field_index.?].ty;
                     const field_val = val.fieldValue(field_type, field_index.?);
 
-                    field_val.writeToPackedMemory(field_type, mod, buffer, bit_offset);
+                    return field_val.writeToPackedMemory(field_type, mod, buffer, bit_offset);
                 },
             },
+            .Pointer => {
+                assert(!ty.isSlice()); // No well defined layout.
+                if (val.isDeclRef()) return error.ReinterpretDeclRef;
+                return val.writeToPackedMemory(Type.usize, mod, buffer, bit_offset);
+            },
             else => @panic("TODO implement writeToPackedMemory for more types"),
         }
     }
@@ -1553,6 +1574,10 @@ pub const Value = extern union {
                 };
                 return Value.initPayload(&payload.base);
             },
+            .Pointer => {
+                assert(!ty.isSlice()); // No well defined layout.
+                return readFromMemory(Type.usize, mod, buffer, arena);
+            },
             else => @panic("TODO implement readFromMemory for more types"),
         }
     }
@@ -1640,6 +1665,10 @@ pub const Value = extern union {
                     return Tag.aggregate.create(arena, field_vals);
                 },
             },
+            .Pointer => {
+                assert(!ty.isSlice()); // No well defined layout.
+                return readFromPackedMemory(Type.usize, mod, buffer, bit_offset, arena);
+            },
             else => @panic("TODO implement readFromPackedMemory for more types"),
         }
     }
diff --git a/stage1/zig.h b/stage1/zig.h
new file mode 100644
index 0000000000..0756d9f731
--- /dev/null
+++ b/stage1/zig.h
@@ -0,0 +1,2486 @@
+#undef linux
+
+#define __STDC_WANT_IEC_60559_TYPES_EXT__
+#include <float.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#if _MSC_VER
+#include <intrin.h>
+#elif defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+
+#if !defined(__cplusplus) && __STDC_VERSION__ <= 201710L
+#if __STDC_VERSION__ >= 199901L
+#include <stdbool.h>
+#else
+typedef char bool;
+#define false 0
+#define true  1
+#endif
+#endif
+
+#if defined(__has_builtin)
+#define zig_has_builtin(builtin) __has_builtin(__builtin_##builtin)
+#else
+#define zig_has_builtin(builtin) 0
+#endif
+
+#if defined(__has_attribute)
+#define zig_has_attribute(attribute) __has_attribute(attribute)
+#else
+#define zig_has_attribute(attribute) 0
+#endif
+
+#if __STDC_VERSION__ >= 201112L
+#define zig_threadlocal _Thread_local
+#elif defined(__GNUC__)
+#define zig_threadlocal __thread
+#elif _MSC_VER
+#define zig_threadlocal __declspec(thread)
+#else
+#define zig_threadlocal zig_threadlocal_unavailable
+#endif
+
+#if defined(__clang__)
+#define zig_clang
+#elif defined(__GNUC__)
+#define zig_gnuc
+#endif
+
+#if _MSC_VER
+#define zig_const_arr
+#define zig_callconv(c) __##c
+#else
+#define zig_const_arr static const
+#define zig_callconv(c) __attribute__((c))
+#endif
+
+#if zig_has_attribute(naked) || defined(zig_gnuc)
+#define zig_naked_decl __attribute__((naked))
+#define zig_naked __attribute__((naked))
+#elif defined(_MSC_VER)
+#define zig_naked_decl
+#define zig_naked __declspec(naked)
+#else
+#define zig_naked_decl zig_naked_unavailable
+#define zig_naked zig_naked_unavailable
+#endif
+
+#if zig_has_attribute(cold)
+#define zig_cold __attribute__((cold))
+#else
+#define zig_cold
+#endif
+
+#if __STDC_VERSION__ >= 199901L
+#define zig_restrict restrict
+#elif defined(__GNUC__)
+#define zig_restrict __restrict
+#else
+#define zig_restrict
+#endif
+
+#if __STDC_VERSION__ >= 201112L
+#define zig_align(alignment) _Alignas(alignment)
+#elif zig_has_attribute(aligned)
+#define zig_align(alignment) __attribute__((aligned(alignment)))
+#elif _MSC_VER
+#define zig_align(alignment) __declspec(align(alignment))
+#else
+#define zig_align zig_align_unavailable
+#endif
+
+#if zig_has_attribute(aligned)
+#define zig_under_align(alignment) __attribute__((aligned(alignment)))
+#elif _MSC_VER
+#define zig_under_align(alignment) zig_align(alignment)
+#else
+#define zig_align zig_align_unavailable
+#endif
+
+#if zig_has_attribute(aligned)
+#define zig_align_fn(alignment) __attribute__((aligned(alignment)))
+#elif _MSC_VER
+#define zig_align_fn(alignment)
+#else
+#define zig_align_fn zig_align_fn_unavailable
+#endif
+
+#if zig_has_attribute(packed)
+#define zig_packed(definition) __attribute__((packed)) definition
+#elif _MSC_VER
+#define zig_packed(definition) __pragma(pack(1)) definition __pragma(pack())
+#else
+#define zig_packed(definition) zig_packed_unavailable
+#endif
+
+#if zig_has_attribute(section)
+#define zig_linksection(name, def, ...) def __attribute__((section(name)))
+#elif _MSC_VER
+#define zig_linksection(name, def, ...) __pragma(section(name, __VA_ARGS__)) __declspec(allocate(name)) def
+#else
+#define zig_linksection(name, def, ...) zig_linksection_unavailable
+#endif
+
+#if zig_has_builtin(unreachable) || defined(zig_gnuc)
+#define zig_unreachable() __builtin_unreachable()
+#else
+#define zig_unreachable()
+#endif
+
+#if defined(__cplusplus)
+#define zig_extern extern "C"
+#else
+#define zig_extern extern
+#endif
+
+#if zig_has_attribute(alias)
+#define zig_export(sig, symbol, name) zig_extern sig __attribute__((alias(symbol)))
+#elif _MSC_VER
+#if _M_X64
+#define zig_export(sig, symbol, name) sig;\
+    __pragma(comment(linker, "/alternatename:" name "=" symbol ))
+#else /*_M_X64 */
+#define zig_export(sig, symbol, name) sig;\
+    __pragma(comment(linker, "/alternatename:_" name "=_" symbol ))
+#endif /*_M_X64 */
+#else
+#define zig_export(sig, symbol, name) __asm(name " = " symbol)
+#endif
+
+#if zig_has_builtin(debugtrap)
+#define zig_breakpoint() __builtin_debugtrap()
+#elif zig_has_builtin(trap) || defined(zig_gnuc)
+#define zig_breakpoint() __builtin_trap()
+#elif defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__)
+#define zig_breakpoint() __debugbreak()
+#elif defined(__i386__) || defined(__x86_64__)
+#define zig_breakpoint() __asm__ volatile("int $0x03");
+#else
+#define zig_breakpoint() raise(SIGTRAP)
+#endif
+
+#if zig_has_builtin(return_address) || defined(zig_gnuc)
+#define zig_return_address() __builtin_extract_return_addr(__builtin_return_address(0))
+#elif defined(_MSC_VER)
+#define zig_return_address() _ReturnAddress()
+#else
+#define zig_return_address() 0
+#endif
+
+#if zig_has_builtin(frame_address) || defined(zig_gnuc)
+#define zig_frame_address() __builtin_frame_address(0)
+#else
+#define zig_frame_address() 0
+#endif
+
+#if zig_has_builtin(prefetch) || defined(zig_gnuc)
+#define zig_prefetch(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
+#else
+#define zig_prefetch(addr, rw, locality)
+#endif
+
+#if zig_has_builtin(memory_size) && zig_has_builtin(memory_grow)
+#define zig_wasm_memory_size(index) __builtin_wasm_memory_size(index)
+#define zig_wasm_memory_grow(index, delta) __builtin_wasm_memory_grow(index, delta)
+#else
+#define zig_wasm_memory_size(index) zig_unimplemented()
+#define zig_wasm_memory_grow(index, delta) zig_unimplemented()
+#endif
+
+#define zig_concat(lhs, rhs) lhs##rhs
+#define zig_expand_concat(lhs, rhs) zig_concat(lhs, rhs)
+
+#if __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#define zig_atomic(type) _Atomic(type)
+#define zig_cmpxchg_strong(obj, expected, desired, succ, fail, type) atomic_compare_exchange_strong_explicit(obj, &(expected), desired, succ, fail)
+#define   zig_cmpxchg_weak(obj, expected, desired, succ, fail, type) atomic_compare_exchange_weak_explicit  (obj, &(expected), desired, succ, fail)
+#define zig_atomicrmw_xchg(obj, arg, order, type) atomic_exchange_explicit  (obj, arg, order)
+#define  zig_atomicrmw_add(obj, arg, order, type) atomic_fetch_add_explicit (obj, arg, order)
+#define  zig_atomicrmw_sub(obj, arg, order, type) atomic_fetch_sub_explicit (obj, arg, order)
+#define   zig_atomicrmw_or(obj, arg, order, type) atomic_fetch_or_explicit  (obj, arg, order)
+#define  zig_atomicrmw_xor(obj, arg, order, type) atomic_fetch_xor_explicit (obj, arg, order)
+#define  zig_atomicrmw_and(obj, arg, order, type) atomic_fetch_and_explicit (obj, arg, order)
+#define zig_atomicrmw_nand(obj, arg, order, type) __atomic_fetch_nand       (obj, arg, order)
+#define  zig_atomicrmw_min(obj, arg, order, type) __atomic_fetch_min        (obj, arg, order)
+#define  zig_atomicrmw_max(obj, arg, order, type) __atomic_fetch_max        (obj, arg, order)
+#define   zig_atomic_store(obj, arg, order, type) atomic_store_explicit     (obj, arg, order)
+#define    zig_atomic_load(obj,      order, type) atomic_load_explicit      (obj,      order)
+#define zig_fence(order) atomic_thread_fence(order)
+#elif defined(__GNUC__)
+#define memory_order_relaxed __ATOMIC_RELAXED
+#define memory_order_consume __ATOMIC_CONSUME
+#define memory_order_acquire __ATOMIC_ACQUIRE
+#define memory_order_release __ATOMIC_RELEASE
+#define memory_order_acq_rel __ATOMIC_ACQ_REL
+#define memory_order_seq_cst __ATOMIC_SEQ_CST
+#define zig_atomic(type) type
+#define zig_cmpxchg_strong(obj, expected, desired, succ, fail, type) __atomic_compare_exchange_n(obj, &(expected), desired, false, succ, fail)
+#define   zig_cmpxchg_weak(obj, expected, desired, succ, fail, type) __atomic_compare_exchange_n(obj, &(expected), desired, true , succ, fail)
+#define zig_atomicrmw_xchg(obj, arg, order, type) __atomic_exchange_n(obj, arg, order)
+#define  zig_atomicrmw_add(obj, arg, order, type) __atomic_fetch_add (obj, arg, order)
+#define  zig_atomicrmw_sub(obj, arg, order, type) __atomic_fetch_sub (obj, arg, order)
+#define   zig_atomicrmw_or(obj, arg, order, type) __atomic_fetch_or  (obj, arg, order)
+#define  zig_atomicrmw_xor(obj, arg, order, type) __atomic_fetch_xor (obj, arg, order)
+#define  zig_atomicrmw_and(obj, arg, order, type) __atomic_fetch_and (obj, arg, order)
+#define zig_atomicrmw_nand(obj, arg, order, type) __atomic_fetch_nand(obj, arg, order)
+#define  zig_atomicrmw_min(obj, arg, order, type) __atomic_fetch_min (obj, arg, order)
+#define  zig_atomicrmw_max(obj, arg, order, type) __atomic_fetch_max (obj, arg, order)
+#define   zig_atomic_store(obj, arg, order, type) __atomic_store_n   (obj, arg, order)
+#define    zig_atomic_load(obj,      order, type) __atomic_load_n    (obj,      order)
+#define zig_fence(order) __atomic_thread_fence(order)
+#elif _MSC_VER && (_M_IX86 || _M_X64)
+#define memory_order_relaxed 0
+#define memory_order_consume 1
+#define memory_order_acquire 2
+#define memory_order_release 3
+#define memory_order_acq_rel 4
+#define memory_order_seq_cst 5
+#define zig_atomic(type) type
+#define zig_cmpxchg_strong(obj, expected, desired, succ, fail, type) zig_expand_concat(zig_msvc_cmpxchg_, type)(obj, &(expected), desired)
+#define   zig_cmpxchg_weak(obj, expected, desired, succ, fail, type) zig_cmpxchg_strong(obj, expected, desired, succ, fail, type)
+#define zig_atomicrmw_xchg(obj, arg, order, type) zig_expand_concat(zig_msvc_atomicrmw_xchg_, type)(obj, arg)
+#define  zig_atomicrmw_add(obj, arg, order, type) zig_expand_concat(zig_msvc_atomicrmw_add_, type)(obj, arg)
+#define  zig_atomicrmw_sub(obj, arg, order, type) zig_expand_concat(zig_msvc_atomicrmw_sub_, type)(obj, arg)
+#define   zig_atomicrmw_or(obj, arg, order, type) zig_expand_concat(zig_msvc_atomicrmw_or_, type)(obj, arg)
+#define  zig_atomicrmw_xor(obj, arg, order, type) zig_expand_concat(zig_msvc_atomicrmw_xor_, type)(obj, arg)
+#define  zig_atomicrmw_and(obj, arg, order, type) zig_expand_concat(zig_msvc_atomicrmw_and_, type)(obj, arg)
+#define zig_atomicrmw_nand(obj, arg, order, type) zig_expand_concat(zig_msvc_atomicrmw_nand_, type)(obj, arg)
+#define  zig_atomicrmw_min(obj, arg, order, type) zig_expand_concat(zig_msvc_atomicrmw_min_, type)(obj, arg)
+#define  zig_atomicrmw_max(obj, arg, order, type) zig_expand_concat(zig_msvc_atomicrmw_max_, type)(obj, arg)
+#define   zig_atomic_store(obj, arg, order, type) zig_expand_concat(zig_msvc_atomic_store_, type)(obj, arg)
+#define    zig_atomic_load(obj,      order, type) zig_expand_concat(zig_msvc_atomic_load_, type)(obj)
+#if _M_X64
+#define zig_fence(order) __faststorefence()
+#else
+#define zig_fence(order) zig_msvc_atomic_barrier()
+#endif
+
+// TODO: _MSC_VER && (_M_ARM || _M_ARM64)
+#else
+#define memory_order_relaxed 0
+#define memory_order_consume 1
+#define memory_order_acquire 2
+#define memory_order_release 3
+#define memory_order_acq_rel 4
+#define memory_order_seq_cst 5
+#define zig_atomic(type) type
+#define zig_cmpxchg_strong(obj, expected, desired, succ, fail, type) zig_unimplemented()
+#define   zig_cmpxchg_weak(obj, expected, desired, succ, fail, type) zig_unimplemented()
+#define zig_atomicrmw_xchg(obj, arg, order, type) zig_unimplemented()
+#define  zig_atomicrmw_add(obj, arg, order, type) zig_unimplemented()
+#define  zig_atomicrmw_sub(obj, arg, order, type) zig_unimplemented()
+#define   zig_atomicrmw_or(obj, arg, order, type) zig_unimplemented()
+#define  zig_atomicrmw_xor(obj, arg, order, type) zig_unimplemented()
+#define  zig_atomicrmw_and(obj, arg, order, type) zig_unimplemented()
+#define zig_atomicrmw_nand(obj, arg, order, type) zig_unimplemented()
+#define  zig_atomicrmw_min(obj, arg, order, type) zig_unimplemented()
+#define  zig_atomicrmw_max(obj, arg, order, type) zig_unimplemented()
+#define   zig_atomic_store(obj, arg, order, type) zig_unimplemented()
+#define    zig_atomic_load(obj,      order, type) zig_unimplemented()
+#define zig_fence(order) zig_unimplemented()
+#endif
+
+#if __STDC_VERSION__ >= 201112L
+#define zig_noreturn _Noreturn void
+#elif zig_has_attribute(noreturn) || defined(zig_gnuc)
+#define zig_noreturn __attribute__((noreturn)) void
+#elif _MSC_VER
+#define zig_noreturn __declspec(noreturn) void
+#else
+#define zig_noreturn void
+#endif
+
+#define zig_bitSizeOf(T) (CHAR_BIT * sizeof(T))
+
+typedef              uintptr_t zig_usize;
+typedef               intptr_t zig_isize;
+typedef   signed     short int zig_c_short;
+typedef unsigned     short int zig_c_ushort;
+typedef   signed           int zig_c_int;
+typedef unsigned           int zig_c_uint;
+typedef   signed      long int zig_c_long;
+typedef unsigned      long int zig_c_ulong;
+typedef   signed long long int zig_c_longlong;
+typedef unsigned long long int zig_c_ulonglong;
+
+typedef uint8_t  zig_u8;
+typedef  int8_t  zig_i8;
+typedef uint16_t zig_u16;
+typedef  int16_t zig_i16;
+typedef uint32_t zig_u32;
+typedef  int32_t zig_i32;
+typedef uint64_t zig_u64;
+typedef  int64_t zig_i64;
+
+#define zig_as_u8(val)  UINT8_C(val)
+#define zig_as_i8(val)   INT8_C(val)
+#define zig_as_u16(val) UINT16_C(val)
+#define zig_as_i16(val)  INT16_C(val)
+#define zig_as_u32(val) UINT32_C(val)
+#define zig_as_i32(val)  INT32_C(val)
+#define zig_as_u64(val) UINT64_C(val)
+#define zig_as_i64(val)  INT64_C(val)
+
+#define zig_minInt_u8  zig_as_u8(0)
+#define zig_maxInt_u8   UINT8_MAX
+#define zig_minInt_i8    INT8_MIN
+#define zig_maxInt_i8    INT8_MAX
+#define zig_minInt_u16 zig_as_u16(0)
+#define zig_maxInt_u16 UINT16_MAX
+#define zig_minInt_i16  INT16_MIN
+#define zig_maxInt_i16  INT16_MAX
+#define zig_minInt_u32 zig_as_u32(0)
+#define zig_maxInt_u32 UINT32_MAX
+#define zig_minInt_i32  INT32_MIN
+#define zig_maxInt_i32  INT32_MAX
+#define zig_minInt_u64 zig_as_u64(0)
+#define zig_maxInt_u64 UINT64_MAX
+#define zig_minInt_i64  INT64_MIN
+#define zig_maxInt_i64  INT64_MAX
+
+#define zig_compiler_rt_abbrev_u32  si
+#define zig_compiler_rt_abbrev_i32  si
+#define zig_compiler_rt_abbrev_u64  di
+#define zig_compiler_rt_abbrev_i64  di
+#define zig_compiler_rt_abbrev_u128 ti
+#define zig_compiler_rt_abbrev_i128 ti
+#define zig_compiler_rt_abbrev_f16  hf
+#define zig_compiler_rt_abbrev_f32  sf
+#define zig_compiler_rt_abbrev_f64  df
+#define zig_compiler_rt_abbrev_f80  xf
+#define zig_compiler_rt_abbrev_f128 tf
+
+zig_extern void *memcpy (void *zig_restrict, void const *zig_restrict, zig_usize);
+zig_extern void *memset (void *, int, zig_usize);
+
+/* ==================== 8/16/32/64-bit Integer Routines ===================== */
+
+#define zig_maxInt(Type, bits) zig_shr_##Type(zig_maxInt_##Type, (zig_bitSizeOf(zig_##Type) - bits))
+#define zig_expand_maxInt(Type, bits) zig_maxInt(Type, bits)
+#define zig_minInt(Type, bits) zig_not_##Type(zig_maxInt(Type, bits), bits)
+#define zig_expand_minInt(Type, bits) zig_minInt(Type, bits)
+
+#define zig_int_operator(Type, RhsType, operation, operator) \
+    static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##RhsType rhs) { \
+        return lhs operator rhs; \
+    }
+#define zig_int_basic_operator(Type, operation, operator) \
+    zig_int_operator(Type, Type, operation, operator)
+#define zig_int_shift_operator(Type, operation, operator) \
+    zig_int_operator(Type,   u8, operation, operator)
+#define zig_int_helpers(w) \
+    zig_int_basic_operator(u##w, and,  &) \
+    zig_int_basic_operator(i##w, and,  &) \
+    zig_int_basic_operator(u##w,  or,  |) \
+    zig_int_basic_operator(i##w,  or,  |) \
+    zig_int_basic_operator(u##w, xor,  ^) \
+    zig_int_basic_operator(i##w, xor,  ^) \
+    zig_int_shift_operator(u##w, shl, <<) \
+    zig_int_shift_operator(i##w, shl, <<) \
+    zig_int_shift_operator(u##w, shr, >>) \
+\
+    static inline zig_i##w zig_shr_i##w(zig_i##w lhs, zig_u8 rhs) { \
+        zig_i##w sign_mask = lhs < zig_as_i##w(0) ? -zig_as_i##w(1) : zig_as_i##w(0); \
+        return ((lhs ^ sign_mask) >> rhs) ^ sign_mask; \
+    } \
+\
+    static inline zig_u##w zig_not_u##w(zig_u##w val, zig_u8 bits) { \
+        return val ^ zig_maxInt(u##w, bits); \
+    } \
+\
+    static inline zig_i##w zig_not_i##w(zig_i##w val, zig_u8 bits) { \
+        (void)bits; \
+        return ~val; \
+    } \
+\
+    static inline zig_u##w zig_wrap_u##w(zig_u##w val, zig_u8 bits) { \
+        return val & zig_maxInt(u##w, bits); \
+    } \
+\
+    static inline zig_i##w zig_wrap_i##w(zig_i##w val, zig_u8 bits) { \
+        return (val & zig_as_u##w(1) << (bits - zig_as_u8(1))) != 0 \
+            ? val | zig_minInt(i##w, bits) : val & zig_maxInt(i##w, bits); \
+    } \
+\
+    zig_int_basic_operator(u##w, div_floor, /) \
+\
+    static inline zig_i##w zig_div_floor_i##w(zig_i##w lhs, zig_i##w rhs) { \
+        return lhs / rhs - (((lhs ^ rhs) & (lhs % rhs)) < zig_as_i##w(0)); \
+    } \
+\
+    zig_int_basic_operator(u##w, mod, %) \
+\
+    static inline zig_i##w zig_mod_i##w(zig_i##w lhs, zig_i##w rhs) { \
+        zig_i##w rem = lhs % rhs; \
+        return rem + (((lhs ^ rhs) & rem) < zig_as_i##w(0) ? rhs : zig_as_i##w(0)); \
+    } \
+\
+    static inline zig_u##w zig_shlw_u##w(zig_u##w lhs, zig_u8 rhs, zig_u8 bits) { \
+        return zig_wrap_u##w(zig_shl_u##w(lhs, rhs), bits); \
+    } \
+\
+    static inline zig_i##w zig_shlw_i##w(zig_i##w lhs, zig_u8 rhs, zig_u8 bits) { \
+        return zig_wrap_i##w((zig_i##w)zig_shl_u##w((zig_u##w)lhs, (zig_u##w)rhs), bits); \
+    } \
+\
+    static inline zig_u##w zig_addw_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
+        return zig_wrap_u##w(lhs + rhs, bits); \
+    } \
+\
+    static inline zig_i##w zig_addw_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
+        return zig_wrap_i##w((zig_i##w)((zig_u##w)lhs + (zig_u##w)rhs), bits); \
+    } \
+\
+    static inline zig_u##w zig_subw_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
+        return zig_wrap_u##w(lhs - rhs, bits); \
+    } \
+\
+    static inline zig_i##w zig_subw_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
+        return zig_wrap_i##w((zig_i##w)((zig_u##w)lhs - (zig_u##w)rhs), bits); \
+    } \
+\
+    static inline zig_u##w zig_mulw_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
+        return zig_wrap_u##w(lhs * rhs, bits); \
+    } \
+\
+    static inline zig_i##w zig_mulw_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
+        return zig_wrap_i##w((zig_i##w)((zig_u##w)lhs * (zig_u##w)rhs), bits); \
+    }
+zig_int_helpers(8)
+zig_int_helpers(16)
+zig_int_helpers(32)
+zig_int_helpers(64)
+
+static inline bool zig_addo_u32(zig_u32 *res, zig_u32 lhs, zig_u32 rhs, zig_u8 bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gnuc)
+    zig_u32 full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u32(full_res, bits);
+    return overflow || full_res < zig_minInt(u32, bits) || full_res > zig_maxInt(u32, bits);
+#else
+    *res = zig_addw_u32(lhs, rhs, bits);
+    return *res < lhs;
+#endif
+}
+
+static inline void zig_vaddo_u32(zig_u8 *ov, zig_u32 *res, int n,
+    const zig_u32 *lhs, const zig_u32 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_addo_u32(&res[i], lhs[i], rhs[i], bits);
+}
+
+zig_extern zig_i32  __addosi4(zig_i32 lhs, zig_i32 rhs, zig_c_int *overflow);
+static inline bool zig_addo_i32(zig_i32 *res, zig_i32 lhs, zig_i32 rhs, zig_u8 bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gnuc)
+    zig_i32 full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+#else
+    zig_c_int overflow_int;
+    zig_i32 full_res = __addosi4(lhs, rhs, &overflow_int);
+    bool overflow = overflow_int != 0;
+#endif
+    *res = zig_wrap_i32(full_res, bits);
+    return overflow || full_res < zig_minInt(i32, bits) || full_res > zig_maxInt(i32, bits);
+}
+
+static inline void zig_vaddo_i32(zig_u8 *ov, zig_i32 *res, int n,
+    const zig_i32 *lhs, const zig_i32 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_addo_i32(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_addo_u64(zig_u64 *res, zig_u64 lhs, zig_u64 rhs, zig_u8 bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gnuc)
+    zig_u64 full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u64(full_res, bits);
+    return overflow || full_res < zig_minInt(u64, bits) || full_res > zig_maxInt(u64, bits);
+#else
+    *res = zig_addw_u64(lhs, rhs, bits);
+    return *res < lhs;
+#endif
+}
+
+static inline void zig_vaddo_u64(zig_u8 *ov, zig_u64 *res, int n,
+    const zig_u64 *lhs, const zig_u64 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_addo_u64(&res[i], lhs[i], rhs[i], bits);
+}
+
+zig_extern zig_i64  __addodi4(zig_i64 lhs, zig_i64 rhs, zig_c_int *overflow);
+static inline bool zig_addo_i64(zig_i64 *res, zig_i64 lhs, zig_i64 rhs, zig_u8 bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gnuc)
+    zig_i64 full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+#else
+    zig_c_int overflow_int;
+    zig_i64 full_res = __addodi4(lhs, rhs, &overflow_int);
+    bool overflow = overflow_int != 0;
+#endif
+    *res = zig_wrap_i64(full_res, bits);
+    return overflow || full_res < zig_minInt(i64, bits) || full_res > zig_maxInt(i64, bits);
+}
+
+static inline void zig_vaddo_i64(zig_u8 *ov, zig_i64 *res, int n,
+    const zig_i64 *lhs, const zig_i64 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_addo_i64(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_addo_u8(zig_u8 *res, zig_u8 lhs, zig_u8 rhs, zig_u8 bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gnuc)
+    zig_u8 full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u8(full_res, bits);
+    return overflow || full_res < zig_minInt(u8, bits) || full_res > zig_maxInt(u8, bits);
+#else
+    zig_u32 full_res;
+    bool overflow = zig_addo_u32(&full_res, lhs, rhs, bits);
+    *res = (zig_u8)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vaddo_u8(zig_u8 *ov, zig_u8 *res, int n,
+    const zig_u8 *lhs, const zig_u8 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_addo_u8(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_addo_i8(zig_i8 *res, zig_i8 lhs, zig_i8 rhs, zig_u8 bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gnuc)
+    zig_i8 full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i8(full_res, bits);
+    return overflow || full_res < zig_minInt(i8, bits) || full_res > zig_maxInt(i8, bits);
+#else
+    zig_i32 full_res;
+    bool overflow = zig_addo_i32(&full_res, lhs, rhs, bits);
+    *res = (zig_i8)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vaddo_i8(zig_u8 *ov, zig_i8 *res, int n,
+    const zig_i8 *lhs, const zig_i8 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_addo_i8(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_addo_u16(zig_u16 *res, zig_u16 lhs, zig_u16 rhs, zig_u8 bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gnuc)
+    zig_u16 full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u16(full_res, bits);
+    return overflow || full_res < zig_minInt(u16, bits) || full_res > zig_maxInt(u16, bits);
+#else
+    zig_u32 full_res;
+    bool overflow = zig_addo_u32(&full_res, lhs, rhs, bits);
+    *res = (zig_u16)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vaddo_u16(zig_u8 *ov, zig_u16 *res, int n,
+    const zig_u16 *lhs, const zig_u16 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_addo_u16(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_addo_i16(zig_i16 *res, zig_i16 lhs, zig_i16 rhs, zig_u8 bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gnuc)
+    zig_i16 full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i16(full_res, bits);
+    return overflow || full_res < zig_minInt(i16, bits) || full_res > zig_maxInt(i16, bits);
+#else
+    zig_i32 full_res;
+    bool overflow = zig_addo_i32(&full_res, lhs, rhs, bits);
+    *res = (zig_i16)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vaddo_i16(zig_u8 *ov, zig_i16 *res, int n,
+    const zig_i16 *lhs, const zig_i16 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_addo_i16(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_subo_u32(zig_u32 *res, zig_u32 lhs, zig_u32 rhs, zig_u8 bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
+    zig_u32 full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u32(full_res, bits);
+    return overflow || full_res < zig_minInt(u32, bits) || full_res > zig_maxInt(u32, bits);
+#else
+    *res = zig_subw_u32(lhs, rhs, bits);
+    return *res > lhs;
+#endif
+}
+
+static inline void zig_vsubo_u32(zig_u8 *ov, zig_u32 *res, int n,
+    const zig_u32 *lhs, const zig_u32 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_subo_u32(&res[i], lhs[i], rhs[i], bits);
+}
+
+zig_extern zig_i32  __subosi4(zig_i32 lhs, zig_i32 rhs, zig_c_int *overflow);
+static inline bool zig_subo_i32(zig_i32 *res, zig_i32 lhs, zig_i32 rhs, zig_u8 bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
+    zig_i32 full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+#else
+    zig_c_int overflow_int;
+    zig_i32 full_res = __subosi4(lhs, rhs, &overflow_int);
+    bool overflow = overflow_int != 0;
+#endif
+    *res = zig_wrap_i32(full_res, bits);
+    return overflow || full_res < zig_minInt(i32, bits) || full_res > zig_maxInt(i32, bits);
+}
+
+static inline void zig_vsubo_i32(zig_u8 *ov, zig_i32 *res, int n,
+    const zig_i32 *lhs, const zig_i32 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_subo_i32(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_subo_u64(zig_u64 *res, zig_u64 lhs, zig_u64 rhs, zig_u8 bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
+    zig_u64 full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u64(full_res, bits);
+    return overflow || full_res < zig_minInt(u64, bits) || full_res > zig_maxInt(u64, bits);
+#else
+    *res = zig_subw_u64(lhs, rhs, bits);
+    return *res > lhs;
+#endif
+}
+
+static inline void zig_vsubo_u64(zig_u8 *ov, zig_u64 *res, int n,
+    const zig_u64 *lhs, const zig_u64 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_subo_u64(&res[i], lhs[i], rhs[i], bits);
+}
+
+zig_extern zig_i64  __subodi4(zig_i64 lhs, zig_i64 rhs, zig_c_int *overflow);
+static inline bool zig_subo_i64(zig_i64 *res, zig_i64 lhs, zig_i64 rhs, zig_u8 bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
+    zig_i64 full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+#else
+    zig_c_int overflow_int;
+    zig_i64 full_res = __subodi4(lhs, rhs, &overflow_int);
+    bool overflow = overflow_int != 0;
+#endif
+    *res = zig_wrap_i64(full_res, bits);
+    return overflow || full_res < zig_minInt(i64, bits) || full_res > zig_maxInt(i64, bits);
+}
+
+static inline void zig_vsubo_i64(zig_u8 *ov, zig_i64 *res, int n,
+    const zig_i64 *lhs, const zig_i64 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_subo_i64(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_subo_u8(zig_u8 *res, zig_u8 lhs, zig_u8 rhs, zig_u8 bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
+    zig_u8 full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u8(full_res, bits);
+    return overflow || full_res < zig_minInt(u8, bits) || full_res > zig_maxInt(u8, bits);
+#else
+    zig_u32 full_res;
+    bool overflow = zig_subo_u32(&full_res, lhs, rhs, bits);
+    *res = (zig_u8)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vsubo_u8(zig_u8 *ov, zig_u8 *res, int n,
+    const zig_u8 *lhs, const zig_u8 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_subo_u8(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_subo_i8(zig_i8 *res, zig_i8 lhs, zig_i8 rhs, zig_u8 bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
+    zig_i8 full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i8(full_res, bits);
+    return overflow || full_res < zig_minInt(i8, bits) || full_res > zig_maxInt(i8, bits);
+#else
+    zig_i32 full_res;
+    bool overflow = zig_subo_i32(&full_res, lhs, rhs, bits);
+    *res = (zig_i8)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vsubo_i8(zig_u8 *ov, zig_i8 *res, int n,
+    const zig_i8 *lhs, const zig_i8 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_subo_i8(&res[i], lhs[i], rhs[i], bits);
+}
+
+
+static inline bool zig_subo_u16(zig_u16 *res, zig_u16 lhs, zig_u16 rhs, zig_u8 bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
+    zig_u16 full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u16(full_res, bits);
+    return overflow || full_res < zig_minInt(u16, bits) || full_res > zig_maxInt(u16, bits);
+#else
+    zig_u32 full_res;
+    bool overflow = zig_subo_u32(&full_res, lhs, rhs, bits);
+    *res = (zig_u16)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vsubo_u16(zig_u8 *ov, zig_u16 *res, int n,
+    const zig_u16 *lhs, const zig_u16 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_subo_u16(&res[i], lhs[i], rhs[i], bits);
+}
+
+
+static inline bool zig_subo_i16(zig_i16 *res, zig_i16 lhs, zig_i16 rhs, zig_u8 bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gnuc)
+    zig_i16 full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i16(full_res, bits);
+    return overflow || full_res < zig_minInt(i16, bits) || full_res > zig_maxInt(i16, bits);
+#else
+    zig_i32 full_res;
+    bool overflow = zig_subo_i32(&full_res, lhs, rhs, bits);
+    *res = (zig_i16)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vsubo_i16(zig_u8 *ov, zig_i16 *res, int n,
+    const zig_i16 *lhs, const zig_i16 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_subo_i16(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_mulo_u32(zig_u32 *res, zig_u32 lhs, zig_u32 rhs, zig_u8 bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
+    zig_u32 full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u32(full_res, bits);
+    return overflow || full_res < zig_minInt(u32, bits) || full_res > zig_maxInt(u32, bits);
+#else
+    *res = zig_mulw_u32(lhs, rhs, bits);
+    return rhs != zig_as_u32(0) && lhs > zig_maxInt(u32, bits) / rhs;
+#endif
+}
+
+static inline void zig_vmulo_u32(zig_u8 *ov, zig_u32 *res, int n,
+    const zig_u32 *lhs, const zig_u32 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_mulo_u32(&res[i], lhs[i], rhs[i], bits);
+}
+
+zig_extern zig_i32  __mulosi4(zig_i32 lhs, zig_i32 rhs, zig_c_int *overflow);
+static inline bool zig_mulo_i32(zig_i32 *res, zig_i32 lhs, zig_i32 rhs, zig_u8 bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
+    zig_i32 full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+#else
+    zig_c_int overflow_int;
+    zig_i32 full_res = __mulosi4(lhs, rhs, &overflow_int);
+    bool overflow = overflow_int != 0;
+#endif
+    *res = zig_wrap_i32(full_res, bits);
+    return overflow || full_res < zig_minInt(i32, bits) || full_res > zig_maxInt(i32, bits);
+}
+
+static inline void zig_vmulo_i32(zig_u8 *ov, zig_i32 *res, int n,
+    const zig_i32 *lhs, const zig_i32 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_mulo_i32(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_mulo_u64(zig_u64 *res, zig_u64 lhs, zig_u64 rhs, zig_u8 bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
+    zig_u64 full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u64(full_res, bits);
+    return overflow || full_res < zig_minInt(u64, bits) || full_res > zig_maxInt(u64, bits);
+#else
+    *res = zig_mulw_u64(lhs, rhs, bits);
+    return rhs != zig_as_u64(0) && lhs > zig_maxInt(u64, bits) / rhs;
+#endif
+}
+
+static inline void zig_vmulo_u64(zig_u8 *ov, zig_u64 *res, int n,
+    const zig_u64 *lhs, const zig_u64 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_mulo_u64(&res[i], lhs[i], rhs[i], bits);
+}
+
+zig_extern zig_i64  __mulodi4(zig_i64 lhs, zig_i64 rhs, zig_c_int *overflow);
+static inline bool zig_mulo_i64(zig_i64 *res, zig_i64 lhs, zig_i64 rhs, zig_u8 bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
+    zig_i64 full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+#else
+    zig_c_int overflow_int;
+    zig_i64 full_res = __mulodi4(lhs, rhs, &overflow_int);
+    bool overflow = overflow_int != 0;
+#endif
+    *res = zig_wrap_i64(full_res, bits);
+    return overflow || full_res < zig_minInt(i64, bits) || full_res > zig_maxInt(i64, bits);
+}
+
+static inline void zig_vmulo_i64(zig_u8 *ov, zig_i64 *res, int n,
+    const zig_i64 *lhs, const zig_i64 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_mulo_i64(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_mulo_u8(zig_u8 *res, zig_u8 lhs, zig_u8 rhs, zig_u8 bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
+    zig_u8 full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u8(full_res, bits);
+    return overflow || full_res < zig_minInt(u8, bits) || full_res > zig_maxInt(u8, bits);
+#else
+    zig_u32 full_res;
+    bool overflow = zig_mulo_u32(&full_res, lhs, rhs, bits);
+    *res = (zig_u8)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vmulo_u8(zig_u8 *ov, zig_u8 *res, int n,
+    const zig_u8 *lhs, const zig_u8 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_mulo_u8(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_mulo_i8(zig_i8 *res, zig_i8 lhs, zig_i8 rhs, zig_u8 bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
+    zig_i8 full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i8(full_res, bits);
+    return overflow || full_res < zig_minInt(i8, bits) || full_res > zig_maxInt(i8, bits);
+#else
+    zig_i32 full_res;
+    bool overflow = zig_mulo_i32(&full_res, lhs, rhs, bits);
+    *res = (zig_i8)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vmulo_i8(zig_u8 *ov, zig_i8 *res, int n,
+    const zig_i8 *lhs, const zig_i8 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_mulo_i8(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_mulo_u16(zig_u16 *res, zig_u16 lhs, zig_u16 rhs, zig_u8 bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
+    zig_u16 full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u16(full_res, bits);
+    return overflow || full_res < zig_minInt(u16, bits) || full_res > zig_maxInt(u16, bits);
+#else
+    zig_u32 full_res;
+    bool overflow = zig_mulo_u32(&full_res, lhs, rhs, bits);
+    *res = (zig_u16)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vmulo_u16(zig_u8 *ov, zig_u16 *res, int n,
+    const zig_u16 *lhs, const zig_u16 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_mulo_u16(&res[i], lhs[i], rhs[i], bits);
+}
+
+static inline bool zig_mulo_i16(zig_i16 *res, zig_i16 lhs, zig_i16 rhs, zig_u8 bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gnuc)
+    zig_i16 full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i16(full_res, bits);
+    return overflow || full_res < zig_minInt(i16, bits) || full_res > zig_maxInt(i16, bits);
+#else
+    zig_i32 full_res;
+    bool overflow = zig_mulo_i32(&full_res, lhs, rhs, bits);
+    *res = (zig_i16)full_res;
+    return overflow;
+#endif
+}
+
+static inline void zig_vmulo_i16(zig_u8 *ov, zig_i16 *res, int n,
+    const zig_i16 *lhs, const zig_i16 *rhs, zig_u8 bits)
+{
+    for (int i = 0; i < n; ++i) ov[i] = zig_mulo_i16(&res[i], lhs[i], rhs[i], bits);
+}
+
+#define zig_int_builtins(w) \
+    static inline bool zig_shlo_u##w(zig_u##w *res, zig_u##w lhs, zig_u8 rhs, zig_u8 bits) { \
+        *res = zig_shlw_u##w(lhs, rhs, bits); \
+        return lhs > zig_maxInt(u##w, bits) >> rhs; \
+    } \
+\
+    static inline bool zig_shlo_i##w(zig_i##w *res, zig_i##w lhs, zig_u8 rhs, zig_u8 bits) { \
+        *res = zig_shlw_i##w(lhs, rhs, bits); \
+        zig_i##w mask = (zig_i##w)(zig_maxInt_u##w << (bits - rhs - 1)); \
+        return (lhs & mask) != zig_as_i##w(0) && (lhs & mask) != mask; \
+    } \
+\
+    static inline zig_u##w zig_shls_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
+        zig_u##w res; \
+        if (rhs >= bits) return lhs != zig_as_u##w(0) ? zig_maxInt(u##w, bits) : lhs; \
+        return zig_shlo_u##w(&res, lhs, (zig_u8)rhs, bits) ? zig_maxInt(u##w, bits) : res; \
+    } \
+\
+    static inline zig_i##w zig_shls_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
+        zig_i##w res; \
+        if ((zig_u##w)rhs < (zig_u##w)bits && !zig_shlo_i##w(&res, lhs, rhs, bits)) return res; \
+        return lhs < zig_as_i##w(0) ? zig_minInt(i##w, bits) : zig_maxInt(i##w, bits); \
+    } \
+\
+    static inline zig_u##w zig_adds_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
+        zig_u##w res; \
+        return zig_addo_u##w(&res, lhs, rhs, bits) ? zig_maxInt(u##w, bits) : res; \
+    } \
+\
+    static inline zig_i##w zig_adds_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
+        zig_i##w res; \
+        if (!zig_addo_i##w(&res, lhs, rhs, bits)) return res; \
+        return res >= zig_as_i##w(0) ? zig_minInt(i##w, bits) : zig_maxInt(i##w, bits); \
+    } \
+\
+    static inline zig_u##w zig_subs_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
+        zig_u##w res; \
+        return zig_subo_u##w(&res, lhs, rhs, bits) ? zig_minInt(u##w, bits) : res; \
+    } \
+\
+    static inline zig_i##w zig_subs_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
+        zig_i##w res; \
+        if (!zig_subo_i##w(&res, lhs, rhs, bits)) return res; \
+        return res >= zig_as_i##w(0) ? zig_minInt(i##w, bits) : zig_maxInt(i##w, bits); \
+    } \
+\
+    static inline zig_u##w zig_muls_u##w(zig_u##w lhs, zig_u##w rhs, zig_u8 bits) { \
+        zig_u##w res; \
+        return zig_mulo_u##w(&res, lhs, rhs, bits) ? zig_maxInt(u##w, bits) : res; \
+    } \
+\
+    static inline zig_i##w zig_muls_i##w(zig_i##w lhs, zig_i##w rhs, zig_u8 bits) { \
+        zig_i##w res; \
+        if (!zig_mulo_i##w(&res, lhs, rhs, bits)) return res; \
+        return (lhs ^ rhs) < zig_as_i##w(0) ? zig_minInt(i##w, bits) : zig_maxInt(i##w, bits); \
+    }
+zig_int_builtins(8)
+zig_int_builtins(16)
+zig_int_builtins(32)
+zig_int_builtins(64)
+
+#define zig_builtin8(name, val) __builtin_##name(val)
+typedef zig_c_uint zig_Builtin8;
+
+#define zig_builtin16(name, val) __builtin_##name(val)
+typedef zig_c_uint zig_Builtin16;
+
+#if INT_MIN <= INT32_MIN
+#define zig_builtin32(name, val) __builtin_##name(val)
+typedef zig_c_uint zig_Builtin32;
+#elif LONG_MIN <= INT32_MIN
+#define zig_builtin32(name, val) __builtin_##name##l(val)
+typedef zig_c_ulong zig_Builtin32;
+#endif
+
+#if INT_MIN <= INT64_MIN
+#define zig_builtin64(name, val) __builtin_##name(val)
+typedef zig_c_uint zig_Builtin64;
+#elif LONG_MIN <= INT64_MIN
+#define zig_builtin64(name, val) __builtin_##name##l(val)
+typedef zig_c_ulong zig_Builtin64;
+#elif LLONG_MIN <= INT64_MIN
+#define zig_builtin64(name, val) __builtin_##name##ll(val)
+typedef zig_c_ulonglong zig_Builtin64;
+#endif
+
+static inline zig_u8 zig_byte_swap_u8(zig_u8 val, zig_u8 bits) {
+    return zig_wrap_u8(val >> (8 - bits), bits);
+}
+
+static inline zig_i8 zig_byte_swap_i8(zig_i8 val, zig_u8 bits) {
+    return zig_wrap_i8((zig_i8)zig_byte_swap_u8((zig_u8)val, bits), bits);
+}
+
+static inline zig_u16 zig_byte_swap_u16(zig_u16 val, zig_u8 bits) {
+    zig_u16 full_res;
+#if zig_has_builtin(bswap16) || defined(zig_gnuc)
+    full_res = __builtin_bswap16(val);
+#else
+    full_res = (zig_u16)zig_byte_swap_u8((zig_u8)(val >>  0), 8) <<  8 |
+               (zig_u16)zig_byte_swap_u8((zig_u8)(val >>  8), 8) >>  0;
+#endif
+    return zig_wrap_u16(full_res >> (16 - bits), bits);
+}
+
+static inline zig_i16 zig_byte_swap_i16(zig_i16 val, zig_u8 bits) {
+    return zig_wrap_i16((zig_i16)zig_byte_swap_u16((zig_u16)val, bits), bits);
+}
+
+static inline zig_u32 zig_byte_swap_u32(zig_u32 val, zig_u8 bits) {
+    zig_u32 full_res;
+#if zig_has_builtin(bswap32) || defined(zig_gnuc)
+    full_res = __builtin_bswap32(val);
+#else
+    full_res = (zig_u32)zig_byte_swap_u16((zig_u16)(val >>  0), 16) << 16 |
+               (zig_u32)zig_byte_swap_u16((zig_u16)(val >> 16), 16) >>  0;
+#endif
+    return zig_wrap_u32(full_res >> (32 - bits), bits);
+}
+
+static inline zig_i32 zig_byte_swap_i32(zig_i32 val, zig_u8 bits) {
+    return zig_wrap_i32((zig_i32)zig_byte_swap_u32((zig_u32)val, bits), bits);
+}
+
+static inline zig_u64 zig_byte_swap_u64(zig_u64 val, zig_u8 bits) {
+    zig_u64 full_res;
+#if zig_has_builtin(bswap64) || defined(zig_gnuc)
+    full_res = __builtin_bswap64(val);
+#else
+    full_res = (zig_u64)zig_byte_swap_u32((zig_u32)(val >>  0), 32) << 32 |
+               (zig_u64)zig_byte_swap_u32((zig_u32)(val >> 32), 32) >>  0;
+#endif
+    return zig_wrap_u64(full_res >> (64 - bits), bits);
+}
+
+static inline zig_i64 zig_byte_swap_i64(zig_i64 val, zig_u8 bits) {
+    return zig_wrap_i64((zig_i64)zig_byte_swap_u64((zig_u64)val, bits), bits);
+}
+
+static inline zig_u8 zig_bit_reverse_u8(zig_u8 val, zig_u8 bits) {
+    zig_u8 full_res;
+#if zig_has_builtin(bitreverse8)
+    full_res = __builtin_bitreverse8(val);
+#else
+    static zig_u8 const lut[0x10] = {
+        0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+        0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+    };
+    full_res = lut[val >> 0 & 0xF] << 4 | lut[val >> 4 & 0xF] << 0;
+#endif
+    return zig_wrap_u8(full_res >> (8 - bits), bits);
+}
+
+static inline zig_i8 zig_bit_reverse_i8(zig_i8 val, zig_u8 bits) {
+    return zig_wrap_i8((zig_i8)zig_bit_reverse_u8((zig_u8)val, bits), bits);
+}
+
+static inline zig_u16 zig_bit_reverse_u16(zig_u16 val, zig_u8 bits) {
+    zig_u16 full_res;
+#if zig_has_builtin(bitreverse16)
+    full_res = __builtin_bitreverse16(val);
+#else
+    full_res = (zig_u16)zig_bit_reverse_u8((zig_u8)(val >>  0), 8) <<  8 |
+               (zig_u16)zig_bit_reverse_u8((zig_u8)(val >>  8), 8) >>  0;
+#endif
+    return zig_wrap_u16(full_res >> (16 - bits), bits);
+}
+
+static inline zig_i16 zig_bit_reverse_i16(zig_i16 val, zig_u8 bits) {
+    return zig_wrap_i16((zig_i16)zig_bit_reverse_u16((zig_u16)val, bits), bits);
+}
+
+static inline zig_u32 zig_bit_reverse_u32(zig_u32 val, zig_u8 bits) {
+    zig_u32 full_res;
+#if zig_has_builtin(bitreverse32)
+    full_res = __builtin_bitreverse32(val);
+#else
+    full_res = (zig_u32)zig_bit_reverse_u16((zig_u16)(val >>  0), 16) << 16 |
+               (zig_u32)zig_bit_reverse_u16((zig_u16)(val >> 16), 16) >>  0;
+#endif
+    return zig_wrap_u32(full_res >> (32 - bits), bits);
+}
+
+static inline zig_i32 zig_bit_reverse_i32(zig_i32 val, zig_u8 bits) {
+    return zig_wrap_i32((zig_i32)zig_bit_reverse_u32((zig_u32)val, bits), bits);
+}
+
+static inline zig_u64 zig_bit_reverse_u64(zig_u64 val, zig_u8 bits) {
+    zig_u64 full_res;
+#if zig_has_builtin(bitreverse64)
+    full_res = __builtin_bitreverse64(val);
+#else
+    full_res = (zig_u64)zig_bit_reverse_u32((zig_u32)(val >>  0), 32) << 32 |
+               (zig_u64)zig_bit_reverse_u32((zig_u32)(val >> 32), 32) >>  0;
+#endif
+    return zig_wrap_u64(full_res >> (64 - bits), bits);
+}
+
+static inline zig_i64 zig_bit_reverse_i64(zig_i64 val, zig_u8 bits) {
+    return zig_wrap_i64((zig_i64)zig_bit_reverse_u64((zig_u64)val, bits), bits);
+}
+
+#define zig_builtin_popcount_common(w) \
+    static inline zig_u8 zig_popcount_i##w(zig_i##w val, zig_u8 bits) { \
+        return zig_popcount_u##w((zig_u##w)val, bits); \
+    }
+#if zig_has_builtin(popcount) || defined(zig_gnuc)
+#define zig_builtin_popcount(w) \
+    static inline zig_u8 zig_popcount_u##w(zig_u##w val, zig_u8 bits) { \
+        (void)bits; \
+        return zig_builtin##w(popcount, val); \
+    } \
+\
+    zig_builtin_popcount_common(w)
+#else
+#define zig_builtin_popcount(w) \
+    static inline zig_u8 zig_popcount_u##w(zig_u##w val, zig_u8 bits) { \
+        (void)bits; \
+        zig_u##w temp = val - ((val >> 1) & (zig_maxInt_u##w / 3)); \
+        temp = (temp & (zig_maxInt_u##w / 5)) + ((temp >> 2) & (zig_maxInt_u##w / 5)); \
+        temp = (temp + (temp >> 4)) & (zig_maxInt_u##w / 17); \
+        return temp * (zig_maxInt_u##w / 255) >> (w - 8); \
+    } \
+\
+    zig_builtin_popcount_common(w)
+#endif
+zig_builtin_popcount(8)
+zig_builtin_popcount(16)
+zig_builtin_popcount(32)
+zig_builtin_popcount(64)
+
+#define zig_builtin_ctz_common(w) \
+    static inline zig_u8 zig_ctz_i##w(zig_i##w val, zig_u8 bits) { \
+        return zig_ctz_u##w((zig_u##w)val, bits); \
+    }
+#if zig_has_builtin(ctz) || defined(zig_gnuc)
+#define zig_builtin_ctz(w) \
+    static inline zig_u8 zig_ctz_u##w(zig_u##w val, zig_u8 bits) { \
+        if (val == 0) return bits; \
+        return zig_builtin##w(ctz, val); \
+    } \
+\
+    zig_builtin_ctz_common(w)
+#else
+#define zig_builtin_ctz(w) \
+    static inline zig_u8 zig_ctz_u##w(zig_u##w val, zig_u8 bits) { \
+        return zig_popcount_u##w(zig_not_u##w(val, bits) & zig_subw_u##w(val, 1, bits), bits); \
+    } \
+\
+    zig_builtin_ctz_common(w)
+#endif
+zig_builtin_ctz(8)
+zig_builtin_ctz(16)
+zig_builtin_ctz(32)
+zig_builtin_ctz(64)
+
+#define zig_builtin_clz_common(w) \
+    static inline zig_u8 zig_clz_i##w(zig_i##w val, zig_u8 bits) { \
+        return zig_clz_u##w((zig_u##w)val, bits); \
+    }
+#if zig_has_builtin(clz) || defined(zig_gnuc)
+#define zig_builtin_clz(w) \
+    static inline zig_u8 zig_clz_u##w(zig_u##w val, zig_u8 bits) { \
+        if (val == 0) return bits; \
+        return zig_builtin##w(clz, val) - (zig_bitSizeOf(zig_Builtin##w) - bits); \
+    } \
+\
+    zig_builtin_clz_common(w)
+#else
+#define zig_builtin_clz(w) \
+    static inline zig_u8 zig_clz_u##w(zig_u##w val, zig_u8 bits) { \
+        return zig_ctz_u##w(zig_bit_reverse_u##w(val, bits), bits); \
+    } \
+\
+    zig_builtin_clz_common(w)
+#endif
+zig_builtin_clz(8)
+zig_builtin_clz(16)
+zig_builtin_clz(32)
+zig_builtin_clz(64)
+
+/* ======================== 128-bit Integer Routines ======================== */
+
+#if !defined(zig_has_int128)
+# if defined(__SIZEOF_INT128__)
+#  define zig_has_int128 1
+# else
+#  define zig_has_int128 0
+# endif
+#endif
+
+#if zig_has_int128
+
+typedef unsigned __int128 zig_u128;
+typedef   signed __int128 zig_i128;
+
+#define zig_as_u128(hi, lo) ((zig_u128)(hi)<<64|(lo))
+#define zig_as_i128(hi, lo) ((zig_i128)zig_as_u128(hi, lo))
+#define zig_as_constant_u128(hi, lo) zig_as_u128(hi, lo)
+#define zig_as_constant_i128(hi, lo) zig_as_i128(hi, lo)
+#define zig_hi_u128(val) ((zig_u64)((val) >> 64))
+#define zig_lo_u128(val) ((zig_u64)((val) >>  0))
+#define zig_hi_i128(val) ((zig_i64)((val) >> 64))
+#define zig_lo_i128(val) ((zig_u64)((val) >>  0))
+#define zig_bitcast_u128(val) ((zig_u128)(val))
+#define zig_bitcast_i128(val) ((zig_i128)(val))
+#define zig_cmp_int128(Type) \
+    static inline zig_i32 zig_cmp_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return (lhs > rhs) - (lhs < rhs); \
+    }
+#define zig_bit_int128(Type, operation, operator) \
+    static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return lhs operator rhs; \
+    }
+
+#else /* zig_has_int128 */
+
+#if __LITTLE_ENDIAN__ || _MSC_VER
+typedef struct { zig_align(16) zig_u64 lo; zig_u64 hi; } zig_u128;
+typedef struct { zig_align(16) zig_u64 lo; zig_i64 hi; } zig_i128;
+#else
+typedef struct { zig_align(16) zig_u64 hi; zig_u64 lo; } zig_u128;
+typedef struct { zig_align(16) zig_i64 hi; zig_u64 lo; } zig_i128;
+#endif
+
+#define zig_as_u128(hi, lo) ((zig_u128){ .h##i = (hi), .l##o = (lo) })
+#define zig_as_i128(hi, lo) ((zig_i128){ .h##i = (hi), .l##o = (lo) })
+
+#if _MSC_VER
+#define zig_as_constant_u128(hi, lo) { .h##i = (hi), .l##o = (lo) }
+#define zig_as_constant_i128(hi, lo) { .h##i = (hi), .l##o = (lo) }
+#else
+#define zig_as_constant_u128(hi, lo) zig_as_u128(hi, lo)
+#define zig_as_constant_i128(hi, lo) zig_as_i128(hi, lo)
+#endif
+#define zig_hi_u128(val) ((val).hi)
+#define zig_lo_u128(val) ((val).lo)
+#define zig_hi_i128(val) ((val).hi)
+#define zig_lo_i128(val) ((val).lo)
+#define zig_bitcast_u128(val) zig_as_u128((zig_u64)(val).hi, (val).lo)
+#define zig_bitcast_i128(val) zig_as_i128((zig_i64)(val).hi, (val).lo)
+#define zig_cmp_int128(Type) \
+    static inline zig_i32 zig_cmp_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return (lhs.hi == rhs.hi) \
+            ? (lhs.lo > rhs.lo) - (lhs.lo < rhs.lo) \
+            : (lhs.hi > rhs.hi) - (lhs.hi < rhs.hi); \
+    }
+#define zig_bit_int128(Type, operation, operator) \
+    static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return (zig_##Type){ .hi = lhs.hi operator rhs.hi, .lo = lhs.lo operator rhs.lo }; \
+    }
+
+#endif /* zig_has_int128 */
+
+#define zig_minInt_u128 zig_as_u128(zig_minInt_u64, zig_minInt_u64)
+#define zig_maxInt_u128 zig_as_u128(zig_maxInt_u64, zig_maxInt_u64)
+#define zig_minInt_i128 zig_as_i128(zig_minInt_i64, zig_minInt_u64)
+#define zig_maxInt_i128 zig_as_i128(zig_maxInt_i64, zig_maxInt_u64)
+
+zig_cmp_int128(u128)
+zig_cmp_int128(i128)
+
+zig_bit_int128(u128, and, &)
+zig_bit_int128(i128, and, &)
+
+zig_bit_int128(u128,  or, |)
+zig_bit_int128(i128,  or, |)
+
+zig_bit_int128(u128, xor, ^)
+zig_bit_int128(i128, xor, ^)
+
+static inline zig_u128 zig_shr_u128(zig_u128 lhs, zig_u8 rhs);
+
+#if zig_has_int128
+
+static inline zig_u128 zig_not_u128(zig_u128 val, zig_u8 bits) {
+    return val ^ zig_maxInt(u128, bits);
+}
+
+static inline zig_i128 zig_not_i128(zig_i128 val, zig_u8 bits) {
+    (void)bits;
+    return ~val;
+}
+
+static inline zig_u128 zig_shr_u128(zig_u128 lhs, zig_u8 rhs) {
+    return lhs >> rhs;
+}
+
+static inline zig_u128 zig_shl_u128(zig_u128 lhs, zig_u8 rhs) {
+    return lhs << rhs;
+}
+
+static inline zig_i128 zig_shl_i128(zig_i128 lhs, zig_u8 rhs) {
+    return lhs << rhs;
+}
+
+static inline zig_u128 zig_add_u128(zig_u128 lhs, zig_u128 rhs) {
+    return lhs + rhs;
+}
+
+static inline zig_i128 zig_add_i128(zig_i128 lhs, zig_i128 rhs) {
+    return lhs + rhs;
+}
+
+static inline zig_u128 zig_sub_u128(zig_u128 lhs, zig_u128 rhs) {
+    return lhs - rhs;
+}
+
+static inline zig_i128 zig_sub_i128(zig_i128 lhs, zig_i128 rhs) {
+    return lhs - rhs;
+}
+
+static inline zig_u128 zig_mul_u128(zig_u128 lhs, zig_u128 rhs) {
+    return lhs * rhs;
+}
+
+static inline zig_i128 zig_mul_i128(zig_i128 lhs, zig_i128 rhs) {
+    return lhs * rhs;
+}
+
+static inline zig_u128 zig_div_trunc_u128(zig_u128 lhs, zig_u128 rhs) {
+    return lhs / rhs;
+}
+
+static inline zig_i128 zig_div_trunc_i128(zig_i128 lhs, zig_i128 rhs) {
+    return lhs / rhs;
+}
+
+static inline zig_u128 zig_rem_u128(zig_u128 lhs, zig_u128 rhs) {
+    return lhs % rhs;
+}
+
+static inline zig_i128 zig_rem_i128(zig_i128 lhs, zig_i128 rhs) {
+    return lhs % rhs;
+}
+
+static inline zig_i128 zig_div_floor_i128(zig_i128 lhs, zig_i128 rhs) {
+    return zig_div_trunc_i128(lhs, rhs) - (((lhs ^ rhs) & zig_rem_i128(lhs, rhs)) < zig_as_i128(0, 0));
+}
+
+static inline zig_i128 zig_mod_i128(zig_i128 lhs, zig_i128 rhs) {
+    zig_i128 rem = zig_rem_i128(lhs, rhs);
+    return rem + (((lhs ^ rhs) & rem) < zig_as_i128(0, 0) ? rhs : zig_as_i128(0, 0));
+}
+
+#else /* zig_has_int128 */
+
+static inline zig_u128 zig_not_u128(zig_u128 val, zig_u8 bits) {
+    return (zig_u128){ .hi = zig_not_u64(val.hi, bits - zig_as_u8(64)), .lo = zig_not_u64(val.lo, zig_as_u8(64)) };
+}
+
+static inline zig_i128 zig_not_i128(zig_i128 val, zig_u8 bits) {
+    return (zig_i128){ .hi = zig_not_i64(val.hi, bits - zig_as_u8(64)), .lo = zig_not_u64(val.lo, zig_as_u8(64)) };
+}
+
+static inline zig_u128 zig_shr_u128(zig_u128 lhs, zig_u8 rhs) {
+    if (rhs == zig_as_u8(0)) return lhs;
+    if (rhs >= zig_as_u8(64)) return (zig_u128){ .hi = zig_minInt_u64, .lo = lhs.hi >> (rhs - zig_as_u8(64)) };
+    return (zig_u128){ .hi = lhs.hi >> rhs, .lo = lhs.hi << (zig_as_u8(64) - rhs) | lhs.lo >> rhs };
+}
+
+static inline zig_u128 zig_shl_u128(zig_u128 lhs, zig_u8 rhs) {
+    if (rhs == zig_as_u8(0)) return lhs;
+    if (rhs >= zig_as_u8(64)) return (zig_u128){ .hi = lhs.lo << (rhs - zig_as_u8(64)), .lo = zig_minInt_u64 };
+    return (zig_u128){ .hi = lhs.hi << rhs | lhs.lo >> (zig_as_u8(64) - rhs), .lo = lhs.lo << rhs };
+}
+
+static inline zig_i128 zig_shl_i128(zig_i128 lhs, zig_u8 rhs) {
+    if (rhs == zig_as_u8(0)) return lhs;
+    if (rhs >= zig_as_u8(64)) return (zig_i128){ .hi = lhs.lo << (rhs - zig_as_u8(64)), .lo = zig_minInt_u64 };
+    return (zig_i128){ .hi = lhs.hi << rhs | lhs.lo >> (zig_as_u8(64) - rhs), .lo = lhs.lo << rhs };
+}
+
+static inline zig_u128 zig_add_u128(zig_u128 lhs, zig_u128 rhs) {
+    zig_u128 res;
+    res.hi = lhs.hi + rhs.hi + zig_addo_u64(&res.lo, lhs.lo, rhs.lo, 64);
+    return res;
+}
+
+static inline zig_i128 zig_add_i128(zig_i128 lhs, zig_i128 rhs) {
+    zig_i128 res;
+    res.hi = lhs.hi + rhs.hi + zig_addo_u64(&res.lo, lhs.lo, rhs.lo, 64);
+    return res;
+}
+
+static inline zig_u128 zig_sub_u128(zig_u128 lhs, zig_u128 rhs) {
+    zig_u128 res;
+    res.hi = lhs.hi - rhs.hi - zig_subo_u64(&res.lo, lhs.lo, rhs.lo, 64);
+    return res;
+}
+
+static inline zig_i128 zig_sub_i128(zig_i128 lhs, zig_i128 rhs) {
+    zig_i128 res;
+    res.hi = lhs.hi - rhs.hi - zig_subo_u64(&res.lo, lhs.lo, rhs.lo, 64);
+    return res;
+}
+
+zig_extern zig_i128 __multi3(zig_i128 lhs, zig_i128 rhs);
+static zig_u128 zig_mul_u128(zig_u128 lhs, zig_u128 rhs) {
+    return zig_bitcast_u128(__multi3(zig_bitcast_i128(lhs), zig_bitcast_i128(rhs)));
+}
+
+static zig_i128 zig_mul_i128(zig_i128 lhs, zig_i128 rhs) {
+    return __multi3(lhs, rhs);
+}
+
+zig_extern zig_u128 __udivti3(zig_u128 lhs, zig_u128 rhs);
+static zig_u128 zig_div_trunc_u128(zig_u128 lhs, zig_u128 rhs) {
+    return __udivti3(lhs, rhs);
+};
+
+zig_extern zig_i128 __divti3(zig_i128 lhs, zig_i128 rhs);
+static zig_i128 zig_div_trunc_i128(zig_i128 lhs, zig_i128 rhs) {
+    return __divti3(lhs, rhs);
+};
+
+zig_extern zig_u128 __umodti3(zig_u128 lhs, zig_u128 rhs);
+static zig_u128 zig_rem_u128(zig_u128 lhs, zig_u128 rhs) {
+    return __umodti3(lhs, rhs);
+}
+
+zig_extern zig_i128 __modti3(zig_i128 lhs, zig_i128 rhs);
+static zig_i128 zig_rem_i128(zig_i128 lhs, zig_i128 rhs) {
+    return __modti3(lhs, rhs);
+}
+
+static inline zig_i128 zig_mod_i128(zig_i128 lhs, zig_i128 rhs) {
+    zig_i128 rem = zig_rem_i128(lhs, rhs);
+    return zig_add_i128(rem, (((lhs.hi ^ rhs.hi) & rem.hi) < zig_as_i64(0) ? rhs : zig_as_i128(0, 0)));
+}
+
+static inline zig_i128 zig_div_floor_i128(zig_i128 lhs, zig_i128 rhs) {
+    return zig_sub_i128(zig_div_trunc_i128(lhs, rhs), zig_as_i128(0, zig_cmp_i128(zig_and_i128(zig_xor_i128(lhs, rhs), zig_rem_i128(lhs, rhs)), zig_as_i128(0, 0)) < zig_as_i32(0)));
+}
+
+#endif /* zig_has_int128 */
+
+#define zig_div_floor_u128 zig_div_trunc_u128
+#define zig_mod_u128 zig_rem_u128
+
+static inline zig_u128 zig_nand_u128(zig_u128 lhs, zig_u128 rhs) {
+    return zig_not_u128(zig_and_u128(lhs, rhs), 128);
+}
+
+static inline zig_u128 zig_min_u128(zig_u128 lhs, zig_u128 rhs) {
+    return zig_cmp_u128(lhs, rhs) < zig_as_i32(0) ? lhs : rhs;
+}
+
+static inline zig_i128 zig_min_i128(zig_i128 lhs, zig_i128 rhs) {
+    return zig_cmp_i128(lhs, rhs) < zig_as_i32(0) ? lhs : rhs;
+}
+
+static inline zig_u128 zig_max_u128(zig_u128 lhs, zig_u128 rhs) {
+    return zig_cmp_u128(lhs, rhs) > zig_as_i32(0) ? lhs : rhs;
+}
+
+static inline zig_i128 zig_max_i128(zig_i128 lhs, zig_i128 rhs) {
+    return zig_cmp_i128(lhs, rhs) > zig_as_i32(0) ? lhs : rhs;
+}
+
+static inline zig_i128 zig_shr_i128(zig_i128 lhs, zig_u8 rhs) {
+    zig_i128 sign_mask = zig_cmp_i128(lhs, zig_as_i128(0, 0)) < zig_as_i32(0) ? zig_sub_i128(zig_as_i128(0, 0), zig_as_i128(0, 1)) : zig_as_i128(0, 0);
+    return zig_xor_i128(zig_bitcast_i128(zig_shr_u128(zig_bitcast_u128(zig_xor_i128(lhs, sign_mask)), rhs)), sign_mask);
+}
+
+static inline zig_u128 zig_wrap_u128(zig_u128 val, zig_u8 bits) {
+    return zig_and_u128(val, zig_maxInt(u128, bits));
+}
+
+static inline zig_i128 zig_wrap_i128(zig_i128 val, zig_u8 bits) {
+    return zig_as_i128(zig_wrap_i64(zig_hi_i128(val), bits - zig_as_u8(64)), zig_lo_i128(val));
+}
+
+static inline zig_u128 zig_shlw_u128(zig_u128 lhs, zig_u8 rhs, zig_u8 bits) {
+    return zig_wrap_u128(zig_shl_u128(lhs, rhs), bits);
+}
+
+static inline zig_i128 zig_shlw_i128(zig_i128 lhs, zig_u8 rhs, zig_u8 bits) {
+    return zig_wrap_i128(zig_bitcast_i128(zig_shl_u128(zig_bitcast_u128(lhs), rhs)), bits);
+}
+
+static inline zig_u128 zig_addw_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+    return zig_wrap_u128(zig_add_u128(lhs, rhs), bits);
+}
+
+static inline zig_i128 zig_addw_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+    return zig_wrap_i128(zig_bitcast_i128(zig_add_u128(zig_bitcast_u128(lhs), zig_bitcast_u128(rhs))), bits);
+}
+
+static inline zig_u128 zig_subw_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+    return zig_wrap_u128(zig_sub_u128(lhs, rhs), bits);
+}
+
+static inline zig_i128 zig_subw_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+    return zig_wrap_i128(zig_bitcast_i128(zig_sub_u128(zig_bitcast_u128(lhs), zig_bitcast_u128(rhs))), bits);
+}
+
+static inline zig_u128 zig_mulw_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+    return zig_wrap_u128(zig_mul_u128(lhs, rhs), bits);
+}
+
+static inline zig_i128 zig_mulw_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+    return zig_wrap_i128(zig_bitcast_i128(zig_mul_u128(zig_bitcast_u128(lhs), zig_bitcast_u128(rhs))), bits);
+}
+
+#if zig_has_int128
+
+static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+#if zig_has_builtin(add_overflow)
+    zig_u128 full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u128(full_res, bits);
+    return overflow || full_res < zig_minInt(u128, bits) || full_res > zig_maxInt(u128, bits);
+#else
+    *res = zig_addw_u128(lhs, rhs, bits);
+    return *res < lhs;
+#endif
+}
+
+zig_extern zig_i128  __addoti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
+static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+#if zig_has_builtin(add_overflow)
+    zig_i128 full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+#else
+    zig_c_int overflow_int;
+    zig_i128 full_res =  __addoti4(lhs, rhs, &overflow_int);
+    bool overflow = overflow_int != 0;
+#endif
+    *res = zig_wrap_i128(full_res, bits);
+    return overflow || full_res < zig_minInt(i128, bits) || full_res > zig_maxInt(i128, bits);
+}
+
+static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+#if zig_has_builtin(sub_overflow)
+    zig_u128 full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u128(full_res, bits);
+    return overflow || full_res < zig_minInt(u128, bits) || full_res > zig_maxInt(u128, bits);
+#else
+    *res = zig_subw_u128(lhs, rhs, bits);
+    return *res > lhs;
+#endif
+}
+
+zig_extern zig_i128  __suboti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
+static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+#if zig_has_builtin(sub_overflow)
+    zig_i128 full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+#else
+    zig_c_int overflow_int;
+    zig_i128 full_res = __suboti4(lhs, rhs, &overflow_int);
+    bool overflow = overflow_int != 0;
+#endif
+    *res = zig_wrap_i128(full_res, bits);
+    return overflow || full_res < zig_minInt(i128, bits) || full_res > zig_maxInt(i128, bits);
+}
+
+static inline bool zig_mulo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+#if zig_has_builtin(mul_overflow)
+    zig_u128 full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u128(full_res, bits);
+    return overflow || full_res < zig_minInt(u128, bits) || full_res > zig_maxInt(u128, bits);
+#else
+    *res = zig_mulw_u128(lhs, rhs, bits);
+    return rhs != zig_as_u128(0, 0) && lhs > zig_maxInt(u128, bits) / rhs;
+#endif
+}
+
+zig_extern zig_i128  __muloti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
+static inline bool zig_mulo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+#if zig_has_builtin(mul_overflow)
+    zig_i128 full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+#else
+    zig_c_int overflow_int;
+    zig_i128 full_res =  __muloti4(lhs, rhs, &overflow_int);
+    bool overflow = overflow_int != 0;
+#endif
+    *res = zig_wrap_i128(full_res, bits);
+    return overflow || full_res < zig_minInt(i128, bits) || full_res > zig_maxInt(i128, bits);
+}
+
+#else /* zig_has_int128 */
+
+static inline bool zig_overflow_u128(bool overflow, zig_u128 full_res, zig_u8 bits) {
+    return overflow ||
+        zig_cmp_u128(full_res, zig_minInt(u128, bits)) < zig_as_i32(0) ||
+        zig_cmp_u128(full_res, zig_maxInt(u128, bits)) > zig_as_i32(0);
+}
+
+static inline bool zig_overflow_i128(bool overflow, zig_i128 full_res, zig_u8 bits) {
+    return overflow ||
+        zig_cmp_i128(full_res, zig_minInt(i128, bits)) < zig_as_i32(0) ||
+        zig_cmp_i128(full_res, zig_maxInt(i128, bits)) > zig_as_i32(0);
+}
+
+static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+    zig_u128 full_res;
+    bool overflow =
+        zig_addo_u64(&full_res.hi, lhs.hi, rhs.hi, 64) |
+        zig_addo_u64(&full_res.hi, full_res.hi, zig_addo_u64(&full_res.lo, lhs.lo, rhs.lo, 64), 64);
+    *res = zig_wrap_u128(full_res, bits);
+    return zig_overflow_u128(overflow, full_res, bits);
+}
+
+zig_extern zig_i128 __addoti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
+static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+    zig_c_int overflow_int;
+    zig_i128 full_res = __addoti4(lhs, rhs, &overflow_int);
+    *res = zig_wrap_i128(full_res, bits);
+    return zig_overflow_i128(overflow_int, full_res, bits);
+}
+
+static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+    zig_u128 full_res;
+    bool overflow =
+        zig_subo_u64(&full_res.hi, lhs.hi, rhs.hi, 64) |
+        zig_subo_u64(&full_res.hi, full_res.hi, zig_subo_u64(&full_res.lo, lhs.lo, rhs.lo, 64), 64);
+    *res = zig_wrap_u128(full_res, bits);
+    return zig_overflow_u128(overflow, full_res, bits);
+}
+
+zig_extern zig_i128 __suboti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
+static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+    zig_c_int overflow_int;
+    zig_i128 full_res = __suboti4(lhs, rhs, &overflow_int);
+    *res = zig_wrap_i128(full_res, bits);
+    return zig_overflow_i128(overflow_int, full_res, bits);
+}
+
+static inline bool zig_mulo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+    *res = zig_mulw_u128(lhs, rhs, bits);
+    return zig_cmp_u128(*res, zig_as_u128(0, 0)) != zig_as_i32(0) &&
+        zig_cmp_u128(lhs, zig_div_trunc_u128(zig_maxInt(u128, bits), rhs)) > zig_as_i32(0);
+}
+
+zig_extern zig_i128 __muloti4(zig_i128 lhs, zig_i128 rhs, zig_c_int *overflow);
+static inline bool zig_mulo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+    zig_c_int overflow_int;
+    zig_i128 full_res = __muloti4(lhs, rhs, &overflow_int);
+    *res = zig_wrap_i128(full_res, bits);
+    return zig_overflow_i128(overflow_int, full_res, bits);
+}
+
+#endif /* zig_has_int128 */
+
+static inline bool zig_shlo_u128(zig_u128 *res, zig_u128 lhs, zig_u8 rhs, zig_u8 bits) {
+    *res = zig_shlw_u128(lhs, rhs, bits);
+    return zig_cmp_u128(lhs, zig_shr_u128(zig_maxInt(u128, bits), rhs)) > zig_as_i32(0);
+}
+
+static inline bool zig_shlo_i128(zig_i128 *res, zig_i128 lhs, zig_u8 rhs, zig_u8 bits) {
+    *res = zig_shlw_i128(lhs, rhs, bits);
+    zig_i128 mask = zig_bitcast_i128(zig_shl_u128(zig_maxInt_u128, bits - rhs - zig_as_u8(1)));
+    return zig_cmp_i128(zig_and_i128(lhs, mask), zig_as_i128(0, 0)) != zig_as_i32(0) &&
+           zig_cmp_i128(zig_and_i128(lhs, mask), mask) != zig_as_i32(0);
+}
+
+static inline zig_u128 zig_shls_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+    zig_u128 res;
+    if (zig_cmp_u128(rhs, zig_as_u128(0, bits)) >= zig_as_i32(0))
+        return zig_cmp_u128(lhs, zig_as_u128(0, 0)) != zig_as_i32(0) ? zig_maxInt(u128, bits) : lhs;
+
+#if zig_has_int128
+    return zig_shlo_u128(&res, lhs, (zig_u8)rhs, bits) ? zig_maxInt(u128, bits) : res;
+#else
+    return zig_shlo_u128(&res, lhs, (zig_u8)rhs.lo, bits) ? zig_maxInt(u128, bits) : res;
+#endif
+}
+
+static inline zig_i128 zig_shls_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+    zig_i128 res;
+    if (zig_cmp_u128(zig_bitcast_u128(rhs), zig_as_u128(0, bits)) < zig_as_i32(0) && !zig_shlo_i128(&res, lhs, zig_lo_i128(rhs), bits)) return res;
+    return zig_cmp_i128(lhs, zig_as_i128(0, 0)) < zig_as_i32(0) ? zig_minInt(i128, bits) : zig_maxInt(i128, bits);
+}
+
+static inline zig_u128 zig_adds_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+    zig_u128 res;
+    return zig_addo_u128(&res, lhs, rhs, bits) ? zig_maxInt(u128, bits) : res;
+}
+
+static inline zig_i128 zig_adds_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+    zig_i128 res;
+    if (!zig_addo_i128(&res, lhs, rhs, bits)) return res;
+    return zig_cmp_i128(res, zig_as_i128(0, 0)) >= zig_as_i32(0) ? zig_minInt(i128, bits) : zig_maxInt(i128, bits);
+}
+
+static inline zig_u128 zig_subs_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+    zig_u128 res;
+    return zig_subo_u128(&res, lhs, rhs, bits) ? zig_minInt(u128, bits) : res;
+}
+
+static inline zig_i128 zig_subs_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+    zig_i128 res;
+    if (!zig_subo_i128(&res, lhs, rhs, bits)) return res;
+    return zig_cmp_i128(res, zig_as_i128(0, 0)) >= zig_as_i32(0) ? zig_minInt(i128, bits) : zig_maxInt(i128, bits);
+}
+
+static inline zig_u128 zig_muls_u128(zig_u128 lhs, zig_u128 rhs, zig_u8 bits) {
+    zig_u128 res;
+    return zig_mulo_u128(&res, lhs, rhs, bits) ? zig_maxInt(u128, bits) : res;
+}
+
+static inline zig_i128 zig_muls_i128(zig_i128 lhs, zig_i128 rhs, zig_u8 bits) {
+    zig_i128 res;
+    if (!zig_mulo_i128(&res, lhs, rhs, bits)) return res;
+    return zig_cmp_i128(zig_xor_i128(lhs, rhs), zig_as_i128(0, 0)) < zig_as_i32(0) ? zig_minInt(i128, bits) : zig_maxInt(i128, bits);
+}
+
+static inline zig_u8 zig_clz_u128(zig_u128 val, zig_u8 bits) {
+    if (bits <= zig_as_u8(64)) return zig_clz_u64(zig_lo_u128(val), bits);
+    if (zig_hi_u128(val) != 0) return zig_clz_u64(zig_hi_u128(val), bits - zig_as_u8(64));
+    return zig_clz_u64(zig_lo_u128(val), zig_as_u8(64)) + (bits - zig_as_u8(64));
+}
+
+static inline zig_u8 zig_clz_i128(zig_i128 val, zig_u8 bits) {
+    return zig_clz_u128(zig_bitcast_u128(val), bits);
+}
+
+static inline zig_u8 zig_ctz_u128(zig_u128 val, zig_u8 bits) {
+    if (zig_lo_u128(val) != 0) return zig_ctz_u64(zig_lo_u128(val), zig_as_u8(64));
+    return zig_ctz_u64(zig_hi_u128(val), bits - zig_as_u8(64)) + zig_as_u8(64);
+}
+
+static inline zig_u8 zig_ctz_i128(zig_i128 val, zig_u8 bits) {
+    return zig_ctz_u128(zig_bitcast_u128(val), bits);
+}
+
+static inline zig_u8 zig_popcount_u128(zig_u128 val, zig_u8 bits) {
+    return zig_popcount_u64(zig_hi_u128(val), bits - zig_as_u8(64)) +
+           zig_popcount_u64(zig_lo_u128(val), zig_as_u8(64));
+}
+
+static inline zig_u8 zig_popcount_i128(zig_i128 val, zig_u8 bits) {
+    return zig_popcount_u128(zig_bitcast_u128(val), bits);
+}
+
+static inline zig_u128 zig_byte_swap_u128(zig_u128 val, zig_u8 bits) {
+    zig_u128 full_res;
+#if zig_has_builtin(bswap128)
+    full_res = __builtin_bswap128(val);
+#else
+    full_res = zig_as_u128(zig_byte_swap_u64(zig_lo_u128(val), zig_as_u8(64)),
+                           zig_byte_swap_u64(zig_hi_u128(val), zig_as_u8(64)));
+#endif
+    return zig_shr_u128(full_res, zig_as_u8(128) - bits);
+}
+
+static inline zig_i128 zig_byte_swap_i128(zig_i128 val, zig_u8 bits) {
+    return zig_bitcast_i128(zig_byte_swap_u128(zig_bitcast_u128(val), bits));
+}
+
+static inline zig_u128 zig_bit_reverse_u128(zig_u128 val, zig_u8 bits) {
+    return zig_shr_u128(zig_as_u128(zig_bit_reverse_u64(zig_lo_u128(val), zig_as_u8(64)),
+                                    zig_bit_reverse_u64(zig_hi_u128(val), zig_as_u8(64))),
+                        zig_as_u8(128) - bits);
+}
+
+static inline zig_i128 zig_bit_reverse_i128(zig_i128 val, zig_u8 bits) {
+    return zig_bitcast_i128(zig_bit_reverse_u128(zig_bitcast_u128(val), bits));
+}
+
+/* ========================= Floating Point Support ========================= */
+
+#if _MSC_VER
+#define zig_msvc_flt_inf ((double)(1e+300 * 1e+300))
+#define zig_msvc_flt_inff ((float)(1e+300 * 1e+300))
+#define zig_msvc_flt_infl ((long double)(1e+300 * 1e+300))
+#define zig_msvc_flt_nan ((double)(zig_msvc_flt_inf * 0.f))
+#define zig_msvc_flt_nanf ((float)(zig_msvc_flt_inf * 0.f))
+#define zig_msvc_flt_nanl ((long double)(zig_msvc_flt_inf * 0.f))
+#define __builtin_nan(str) nan(str)
+#define __builtin_nanf(str) nanf(str)
+#define __builtin_nanl(str) nanl(str)
+#define __builtin_inf() zig_msvc_flt_inf
+#define __builtin_inff() zig_msvc_flt_inff
+#define __builtin_infl() zig_msvc_flt_infl
+#endif
+
+#if (zig_has_builtin(nan) && zig_has_builtin(nans) && zig_has_builtin(inf)) || defined(zig_gnuc)
+#define zig_has_float_builtins 1
+#define zig_as_special_f16(sign, name, arg, repr) sign zig_as_f16(__builtin_##name, )(arg)
+#define zig_as_special_f32(sign, name, arg, repr) sign zig_as_f32(__builtin_##name, )(arg)
+#define zig_as_special_f64(sign, name, arg, repr) sign zig_as_f64(__builtin_##name, )(arg)
+#define zig_as_special_f80(sign, name, arg, repr) sign zig_as_f80(__builtin_##name, )(arg)
+#define zig_as_special_f128(sign, name, arg, repr) sign zig_as_f128(__builtin_##name, )(arg)
+#define zig_as_special_c_longdouble(sign, name, arg, repr) sign zig_as_c_longdouble(__builtin_##name, )(arg)
+#else
+#define zig_has_float_builtins 0
+#define zig_as_special_f16(sign, name, arg, repr) zig_float_from_repr_f16(repr)
+#define zig_as_special_f32(sign, name, arg, repr) zig_float_from_repr_f32(repr)
+#define zig_as_special_f64(sign, name, arg, repr) zig_float_from_repr_f64(repr)
+#define zig_as_special_f80(sign, name, arg, repr) zig_float_from_repr_f80(repr)
+#define zig_as_special_f128(sign, name, arg, repr)  zig_float_from_repr_f128(repr)
+#define zig_as_special_c_longdouble(sign, name, arg, repr) zig_float_from_repr_c_longdouble(repr)
+#endif
+
+#define zig_has_f16 1
+#define zig_bitSizeOf_f16 16
+#define zig_libc_name_f16(name) __##name##h
+#define zig_as_special_constant_f16(sign, name, arg, repr) zig_as_special_f16(sign, name, arg, repr)
+#if FLT_MANT_DIG == 11
+typedef float zig_f16;
+#define zig_as_f16(fp, repr) fp##f
+#elif DBL_MANT_DIG == 11
+typedef double zig_f16;
+#define zig_as_f16(fp, repr) fp
+#elif LDBL_MANT_DIG == 11
+#define zig_bitSizeOf_c_longdouble 16
+typedef long double zig_f16;
+#define zig_as_f16(fp, repr) fp##l
+#elif FLT16_MANT_DIG == 11 && (zig_has_builtin(inff16) || defined(zig_gnuc))
+typedef _Float16 zig_f16;
+#define zig_as_f16(fp, repr) fp##f16
+#elif defined(__SIZEOF_FP16__)
+typedef __fp16 zig_f16;
+#define zig_as_f16(fp, repr) fp##f16
+#else
+#undef zig_has_f16
+#define zig_has_f16 0
+#define zig_repr_f16 i16
+typedef zig_i16 zig_f16;
+#define zig_as_f16(fp, repr) repr
+#undef zig_as_special_f16
+#define zig_as_special_f16(sign, name, arg, repr) repr
+#undef zig_as_special_constant_f16
+#define zig_as_special_constant_f16(sign, name, arg, repr) repr
+#endif
+
+#define zig_has_f32 1
+#define zig_bitSizeOf_f32 32
+#define zig_libc_name_f32(name) name##f
+#if _MSC_VER
+#define zig_as_special_constant_f32(sign, name, arg, repr) sign zig_as_f32(zig_msvc_flt_##name, )
+#else
+#define zig_as_special_constant_f32(sign, name, arg, repr) zig_as_special_f32(sign, name, arg, repr)
+#endif
+#if FLT_MANT_DIG == 24
+typedef float zig_f32;
+#define zig_as_f32(fp, repr) fp##f
+#elif DBL_MANT_DIG == 24
+typedef double zig_f32;
+#define zig_as_f32(fp, repr) fp
+#elif LDBL_MANT_DIG == 24
+#define zig_bitSizeOf_c_longdouble 32
+typedef long double zig_f32;
+#define zig_as_f32(fp, repr) fp##l
+#elif FLT32_MANT_DIG == 24
+typedef _Float32 zig_f32;
+#define zig_as_f32(fp, repr) fp##f32
+#else
+#undef zig_has_f32
+#define zig_has_f32 0
+#define zig_repr_f32 i32
+typedef zig_i32 zig_f32;
+#define zig_as_f32(fp, repr) repr
+#undef zig_as_special_f32
+#define zig_as_special_f32(sign, name, arg, repr) repr
+#undef zig_as_special_constant_f32
+#define zig_as_special_constant_f32(sign, name, arg, repr) repr
+#endif
+
+#define zig_has_f64 1
+#define zig_bitSizeOf_f64 64
+#define zig_libc_name_f64(name) name
+#if _MSC_VER
+#ifdef ZIG_TARGET_ABI_MSVC
+#define zig_bitSizeOf_c_longdouble 64
+#endif
+#define zig_as_special_constant_f64(sign, name, arg, repr) sign zig_as_f64(zig_msvc_flt_##name, )
+#else /* _MSC_VER */
+#define zig_as_special_constant_f64(sign, name, arg, repr) zig_as_special_f64(sign, name, arg, repr)
+#endif /* _MSC_VER */
+#if FLT_MANT_DIG == 53
+typedef float zig_f64;
+#define zig_as_f64(fp, repr) fp##f
+#elif DBL_MANT_DIG == 53
+typedef double zig_f64;
+#define zig_as_f64(fp, repr) fp
+#elif LDBL_MANT_DIG == 53
+#define zig_bitSizeOf_c_longdouble 64
+typedef long double zig_f64;
+#define zig_as_f64(fp, repr) fp##l
+#elif FLT64_MANT_DIG == 53
+typedef _Float64 zig_f64;
+#define zig_as_f64(fp, repr) fp##f64
+#elif FLT32X_MANT_DIG == 53
+typedef _Float32x zig_f64;
+#define zig_as_f64(fp, repr) fp##f32x
+#else
+#undef zig_has_f64
+#define zig_has_f64 0
+#define zig_repr_f64 i64
+typedef zig_i64 zig_f64;
+#define zig_as_f64(fp, repr) repr
+#undef zig_as_special_f64
+#define zig_as_special_f64(sign, name, arg, repr) repr
+#undef zig_as_special_constant_f64
+#define zig_as_special_constant_f64(sign, name, arg, repr) repr
+#endif
+
+#define zig_has_f80 1
+#define zig_bitSizeOf_f80 80
+#define zig_libc_name_f80(name) __##name##x
+#define zig_as_special_constant_f80(sign, name, arg, repr) zig_as_special_f80(sign, name, arg, repr)
+#if FLT_MANT_DIG == 64
+typedef float zig_f80;
+#define zig_as_f80(fp, repr) fp##f
+#elif DBL_MANT_DIG == 64
+typedef double zig_f80;
+#define zig_as_f80(fp, repr) fp
+#elif LDBL_MANT_DIG == 64
+#define zig_bitSizeOf_c_longdouble 80
+typedef long double zig_f80;
+#define zig_as_f80(fp, repr) fp##l
+#elif FLT80_MANT_DIG == 64
+typedef _Float80 zig_f80;
+#define zig_as_f80(fp, repr) fp##f80
+#elif FLT64X_MANT_DIG == 64
+typedef _Float64x zig_f80;
+#define zig_as_f80(fp, repr) fp##f64x
+#elif defined(__SIZEOF_FLOAT80__)
+typedef __float80 zig_f80;
+#define zig_as_f80(fp, repr) fp##l
+#else
+#undef zig_has_f80
+#define zig_has_f80 0
+#define zig_repr_f80 i128
+typedef zig_i128 zig_f80;
+#define zig_as_f80(fp, repr) repr
+#undef zig_as_special_f80
+#define zig_as_special_f80(sign, name, arg, repr) repr
+#undef zig_as_special_constant_f80
+#define zig_as_special_constant_f80(sign, name, arg, repr) repr
+#endif
+
+#define zig_has_f128 1
+#define zig_bitSizeOf_f128 128
+#define zig_libc_name_f128(name) name##q
+#define zig_as_special_constant_f128(sign, name, arg, repr) zig_as_special_f128(sign, name, arg, repr)
+#if FLT_MANT_DIG == 113
+typedef float zig_f128;
+#define zig_as_f128(fp, repr) fp##f
+#elif DBL_MANT_DIG == 113
+typedef double zig_f128;
+#define zig_as_f128(fp, repr) fp
+#elif LDBL_MANT_DIG == 113
+#define zig_bitSizeOf_c_longdouble 128
+typedef long double zig_f128;
+#define zig_as_f128(fp, repr) fp##l
+#elif FLT128_MANT_DIG == 113
+typedef _Float128 zig_f128;
+#define zig_as_f128(fp, repr) fp##f128
+#elif FLT64X_MANT_DIG == 113
+typedef _Float64x zig_f128;
+#define zig_as_f128(fp, repr) fp##f64x
+#elif defined(__SIZEOF_FLOAT128__)
+typedef __float128 zig_f128;
+#define zig_as_f128(fp, repr) fp##q
+#undef zig_as_special_f128
+#define zig_as_special_f128(sign, name, arg, repr) sign __builtin_##name##f128(arg)
+#else
+#undef zig_has_f128
+#define zig_has_f128 0
+#define zig_repr_f128 i128
+typedef zig_i128 zig_f128;
+#define zig_as_f128(fp, repr) repr
+#undef zig_as_special_f128
+#define zig_as_special_f128(sign, name, arg, repr) repr
+#undef zig_as_special_constant_f128
+#define zig_as_special_constant_f128(sign, name, arg, repr) repr
+#endif
+
+#define zig_has_c_longdouble 1
+
+#ifdef ZIG_TARGET_ABI_MSVC
+#define zig_libc_name_c_longdouble(name) name
+#else
+#define zig_libc_name_c_longdouble(name) name##l
+#endif
+
+#define zig_as_special_constant_c_longdouble(sign, name, arg, repr) zig_as_special_c_longdouble(sign, name, arg, repr)
+#ifdef zig_bitSizeOf_c_longdouble
+
+#ifdef ZIG_TARGET_ABI_MSVC
+typedef double zig_c_longdouble;
+#undef zig_bitSizeOf_c_longdouble
+#define zig_bitSizeOf_c_longdouble 64
+#define zig_as_c_longdouble(fp, repr) fp
+#else
+typedef long double zig_c_longdouble;
+#define zig_as_c_longdouble(fp, repr) fp##l
+#endif
+
+#else /* zig_bitSizeOf_c_longdouble */
+
+#undef zig_has_c_longdouble
+#define zig_has_c_longdouble 0
+#define zig_bitSizeOf_c_longdouble 80
+#define zig_compiler_rt_abbrev_c_longdouble zig_compiler_rt_abbrev_f80
+#define zig_repr_c_longdouble i128
+typedef zig_i128 zig_c_longdouble;
+#define zig_as_c_longdouble(fp, repr) repr
+#undef zig_as_special_c_longdouble
+#define zig_as_special_c_longdouble(sign, name, arg, repr) repr
+#undef zig_as_special_constant_c_longdouble
+#define zig_as_special_constant_c_longdouble(sign, name, arg, repr) repr
+
+#endif /* zig_bitSizeOf_c_longdouble */
+
+#if !zig_has_float_builtins
+#define zig_float_from_repr(Type, ReprType) \
+    static inline zig_##Type zig_float_from_repr_##Type(zig_##ReprType repr) { \
+        return *((zig_##Type*)&repr); \
+    }
+
+zig_float_from_repr(f16, u16)
+zig_float_from_repr(f32, u32)
+zig_float_from_repr(f64, u64)
+zig_float_from_repr(f80, u128)
+zig_float_from_repr(f128, u128)
+#if zig_bitSizeOf_c_longdouble == 80
+zig_float_from_repr(c_longdouble, u128)
+#else
+#define zig_expand_float_from_repr(Type, ReprType) zig_float_from_repr(Type, ReprType)
+zig_expand_float_from_repr(c_longdouble, zig_expand_concat(u, zig_bitSizeOf_c_longdouble))
+#endif
+#endif
+
+#define zig_cast_f16 (zig_f16)
+#define zig_cast_f32 (zig_f32)
+#define zig_cast_f64 (zig_f64)
+
+#if _MSC_VER && !zig_has_f128
+#define zig_cast_f80
+#define zig_cast_c_longdouble
+#define zig_cast_f128
+#else
+#define zig_cast_f80 (zig_f80)
+#define zig_cast_c_longdouble (zig_c_longdouble)
+#define zig_cast_f128 (zig_f128)
+#endif
+
+#define zig_convert_builtin(ResType, operation, ArgType, version) \
+    zig_extern zig_##ResType zig_expand_concat(zig_expand_concat(zig_expand_concat(__##operation, \
+        zig_compiler_rt_abbrev_##ArgType), zig_compiler_rt_abbrev_##ResType), version)(zig_##ArgType);
+zig_convert_builtin(f16,  trunc,  f32,  2)
+zig_convert_builtin(f16,  trunc,  f64,  2)
+zig_convert_builtin(f16,  trunc,  f80,  2)
+zig_convert_builtin(f16,  trunc,  f128, 2)
+zig_convert_builtin(f32,  extend, f16,  2)
+zig_convert_builtin(f32,  trunc,  f64,  2)
+zig_convert_builtin(f32,  trunc,  f80,  2)
+zig_convert_builtin(f32,  trunc,  f128, 2)
+zig_convert_builtin(f64,  extend, f16,  2)
+zig_convert_builtin(f64,  extend, f32,  2)
+zig_convert_builtin(f64,  trunc,  f80,  2)
+zig_convert_builtin(f64,  trunc,  f128, 2)
+zig_convert_builtin(f80,  extend, f16,  2)
+zig_convert_builtin(f80,  extend, f32,  2)
+zig_convert_builtin(f80,  extend, f64,  2)
+zig_convert_builtin(f80,  trunc,  f128, 2)
+zig_convert_builtin(f128, extend, f16,  2)
+zig_convert_builtin(f128, extend, f32,  2)
+zig_convert_builtin(f128, extend, f64,  2)
+zig_convert_builtin(f128, extend, f80,  2)
+
+#define zig_float_negate_builtin_0(Type) \
+    static inline zig_##Type zig_neg_##Type(zig_##Type arg) { \
+        return zig_expand_concat(zig_xor_, zig_repr_##Type)(arg, zig_expand_minInt(zig_repr_##Type, zig_bitSizeOf_##Type)); \
+    }
+#define zig_float_negate_builtin_1(Type) \
+    static inline zig_##Type zig_neg_##Type(zig_##Type arg) { \
+        return -arg; \
+    }
+
+#define zig_float_less_builtin_0(Type, operation) \
+    zig_extern zig_i32 zig_expand_concat(zig_expand_concat(__##operation, \
+        zig_compiler_rt_abbrev_##Type), 2)(zig_##Type, zig_##Type); \
+    static inline zig_i32 zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return zig_expand_concat(zig_expand_concat(__##operation, zig_compiler_rt_abbrev_##Type), 2)(lhs, rhs); \
+    }
+#define zig_float_less_builtin_1(Type, operation) \
+    static inline zig_i32 zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return (!(lhs <= rhs) - (lhs < rhs)); \
+    }
+
+#define zig_float_greater_builtin_0(Type, operation) \
+    zig_float_less_builtin_0(Type, operation)
+#define zig_float_greater_builtin_1(Type, operation) \
+    static inline zig_i32 zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return ((lhs > rhs) - !(lhs >= rhs)); \
+    }
+
+#define zig_float_binary_builtin_0(Type, operation, operator) \
+    zig_extern zig_##Type zig_expand_concat(zig_expand_concat(__##operation, \
+        zig_compiler_rt_abbrev_##Type), 3)(zig_##Type, zig_##Type); \
+    static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return zig_expand_concat(zig_expand_concat(__##operation, zig_compiler_rt_abbrev_##Type), 3)(lhs, rhs); \
+    }
+#define zig_float_binary_builtin_1(Type, operation, operator) \
+    static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return lhs operator rhs; \
+    }
+
+#define zig_float_builtins(Type) \
+    zig_convert_builtin(i32,  fix,     Type, ) \
+    zig_convert_builtin(u32,  fixuns,  Type, ) \
+    zig_convert_builtin(i64,  fix,     Type, ) \
+    zig_convert_builtin(u64,  fixuns,  Type, ) \
+    zig_convert_builtin(i128, fix,     Type, ) \
+    zig_convert_builtin(u128, fixuns,  Type, ) \
+    zig_convert_builtin(Type, float,   i32,  ) \
+    zig_convert_builtin(Type, floatun, u32,  ) \
+    zig_convert_builtin(Type, float,   i64,  ) \
+    zig_convert_builtin(Type, floatun, u64,  ) \
+    zig_convert_builtin(Type, float,   i128, ) \
+    zig_convert_builtin(Type, floatun, u128, ) \
+    zig_expand_concat(zig_float_negate_builtin_,  zig_has_##Type)(Type) \
+    zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, cmp) \
+    zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, ne) \
+    zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, eq) \
+    zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, lt) \
+    zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, le) \
+    zig_expand_concat(zig_float_greater_builtin_, zig_has_##Type)(Type, gt) \
+    zig_expand_concat(zig_float_greater_builtin_, zig_has_##Type)(Type, ge) \
+    zig_expand_concat(zig_float_binary_builtin_,  zig_has_##Type)(Type, add, +) \
+    zig_expand_concat(zig_float_binary_builtin_,  zig_has_##Type)(Type, sub, -) \
+    zig_expand_concat(zig_float_binary_builtin_,  zig_has_##Type)(Type, mul, *) \
+    zig_expand_concat(zig_float_binary_builtin_,  zig_has_##Type)(Type, div, /) \
+    zig_extern zig_##Type zig_libc_name_##Type(sqrt)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(sin)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(cos)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(tan)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(exp)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(exp2)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(log)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(log2)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(log10)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(fabs)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(floor)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(ceil)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(round)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(trunc)(zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(fmod)(zig_##Type, zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(fmin)(zig_##Type, zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(fmax)(zig_##Type, zig_##Type); \
+    zig_extern zig_##Type zig_libc_name_##Type(fma)(zig_##Type, zig_##Type, zig_##Type); \
+\
+    static inline zig_##Type zig_div_trunc_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return zig_libc_name_##Type(trunc)(zig_div_##Type(lhs, rhs)); \
+    } \
+\
+    static inline zig_##Type zig_div_floor_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return zig_libc_name_##Type(floor)(zig_div_##Type(lhs, rhs)); \
+    } \
+\
+    static inline zig_##Type zig_mod_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return zig_sub_##Type(lhs, zig_mul_##Type(zig_div_floor_##Type(lhs, rhs), rhs)); \
+    }
+zig_float_builtins(f16)
+zig_float_builtins(f32)
+zig_float_builtins(f64)
+zig_float_builtins(f80)
+zig_float_builtins(f128)
+zig_float_builtins(c_longdouble)
+
+#if _MSC_VER && (_M_IX86 || _M_X64)
+
+// TODO: zig_msvc_atomic_load should load 32 bit without interlocked on x86, and load 64 bit without interlocked on x64
+
+#define zig_msvc_atomics(Type, suffix) \
+    static inline bool zig_msvc_cmpxchg_##Type(zig_##Type volatile* obj, zig_##Type* expected, zig_##Type desired) { \
+        zig_##Type comparand = *expected; \
+        zig_##Type initial = _InterlockedCompareExchange##suffix(obj, desired, comparand); \
+        bool exchanged = initial == comparand; \
+        if (!exchanged) { \
+            *expected = initial; \
+        } \
+        return exchanged; \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_xchg_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        return _InterlockedExchange##suffix(obj, value); \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_add_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        return _InterlockedExchangeAdd##suffix(obj, value); \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_sub_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        bool success = false; \
+        zig_##Type new; \
+        zig_##Type prev; \
+        while (!success) { \
+            prev = *obj; \
+            new = prev - value; \
+            success = zig_msvc_cmpxchg_##Type(obj, &prev, new); \
+        } \
+        return prev; \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_or_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        return _InterlockedOr##suffix(obj, value); \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_xor_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        return _InterlockedXor##suffix(obj, value); \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_and_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        return _InterlockedAnd##suffix(obj, value); \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_nand_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        bool success = false; \
+        zig_##Type new; \
+        zig_##Type prev; \
+        while (!success) { \
+            prev = *obj; \
+            new = ~(prev & value); \
+            success = zig_msvc_cmpxchg_##Type(obj, &prev, new); \
+        } \
+        return prev; \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_min_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        bool success = false; \
+        zig_##Type new; \
+        zig_##Type prev; \
+        while (!success) { \
+            prev = *obj; \
+            new = value < prev ? value : prev; \
+            success = zig_msvc_cmpxchg_##Type(obj, &prev, new); \
+        } \
+        return prev; \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_max_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        bool success = false; \
+        zig_##Type new; \
+        zig_##Type prev; \
+        while (!success) { \
+            prev = *obj; \
+            new = value > prev ? value : prev; \
+            success = zig_msvc_cmpxchg_##Type(obj, &prev, new); \
+        } \
+        return prev; \
+    } \
+    static inline void zig_msvc_atomic_store_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        _InterlockedExchange##suffix(obj, value); \
+    } \
+    static inline zig_##Type zig_msvc_atomic_load_##Type(zig_##Type volatile* obj) { \
+        return _InterlockedOr##suffix(obj, 0); \
+    }
+
+zig_msvc_atomics(u8, 8)
+zig_msvc_atomics(i8, 8)
+zig_msvc_atomics(u16, 16)
+zig_msvc_atomics(i16, 16)
+zig_msvc_atomics(u32, )
+zig_msvc_atomics(i32, )
+
+#if _M_X64
+zig_msvc_atomics(u64, 64)
+zig_msvc_atomics(i64, 64)
+#endif
+
+#define zig_msvc_flt_atomics(Type, ReprType, suffix) \
+    static inline bool zig_msvc_cmpxchg_##Type(zig_##Type volatile* obj, zig_##Type* expected, zig_##Type desired) { \
+        zig_##ReprType comparand = *((zig_##ReprType*)expected); \
+        zig_##ReprType initial = _InterlockedCompareExchange##suffix((zig_##ReprType volatile*)obj, *((zig_##ReprType*)&desired), comparand); \
+        bool exchanged = initial == comparand; \
+        if (!exchanged) { \
+            *expected = *((zig_##Type*)&initial); \
+        } \
+        return exchanged; \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_xchg_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        zig_##ReprType initial = _InterlockedExchange##suffix((zig_##ReprType volatile*)obj, *((zig_##ReprType*)&value)); \
+        return *((zig_##Type*)&initial); \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_add_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        bool success = false; \
+        zig_##ReprType new; \
+        zig_##Type prev; \
+        while (!success) { \
+            prev = *obj; \
+            new = prev + value; \
+            success = zig_msvc_cmpxchg_##Type(obj, &prev, *((zig_##ReprType*)&new)); \
+        } \
+        return prev; \
+    } \
+    static inline zig_##Type zig_msvc_atomicrmw_sub_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        bool success = false; \
+        zig_##ReprType new; \
+        zig_##Type prev; \
+        while (!success) { \
+            prev = *obj; \
+            new = prev - value; \
+            success = zig_msvc_cmpxchg_##Type(obj, &prev, *((zig_##ReprType*)&new)); \
+        } \
+        return prev; \
+    }
+
+zig_msvc_flt_atomics(f32, u32, )
+#if _M_X64
+zig_msvc_flt_atomics(f64, u64, 64)
+#endif
+
+#if _M_IX86
+static inline void zig_msvc_atomic_barrier() {
+    zig_i32 barrier;
+    __asm {
+        xchg barrier, eax
+    }
+}
+
+static inline void* zig_msvc_atomicrmw_xchg_p32(void** obj, zig_u32* arg) {
+    return _InterlockedExchangePointer(obj, arg);
+}
+
+static inline void zig_msvc_atomic_store_p32(void** obj, zig_u32* arg) {
+    _InterlockedExchangePointer(obj, arg);
+}
+
+static inline void* zig_msvc_atomic_load_p32(void** obj) {
+    return (void*)_InterlockedOr((void*)obj, 0);
+}
+
+static inline bool zig_msvc_cmpxchg_p32(void** obj, void** expected, void* desired) {
+    void* comparand = *expected;
+    void* initial = _InterlockedCompareExchangePointer(obj, desired, comparand);
+    bool exchanged = initial == comparand;
+    if (!exchanged) {
+        *expected = initial;
+    }
+    return exchanged;
+}
+#else /* _M_IX86 */
+static inline void* zig_msvc_atomicrmw_xchg_p64(void** obj, zig_u64* arg) {
+    return _InterlockedExchangePointer(obj, arg);
+}
+
+static inline void zig_msvc_atomic_store_p64(void** obj, zig_u64* arg) {
+    _InterlockedExchangePointer(obj, arg);
+}
+
+static inline void* zig_msvc_atomic_load_p64(void** obj) {
+    return (void*)_InterlockedOr64((void*)obj, 0);
+}
+
+static inline bool zig_msvc_cmpxchg_p64(void** obj, void** expected, void* desired) {
+    void* comparand = *expected;
+    void* initial = _InterlockedCompareExchangePointer(obj, desired, comparand);
+    bool exchanged = initial == comparand;
+    if (!exchanged) {
+        *expected = initial;
+    }
+    return exchanged;
+}
+
+static inline bool zig_msvc_cmpxchg_u128(zig_u128 volatile* obj, zig_u128* expected, zig_u128 desired) {
+    return _InterlockedCompareExchange128((zig_i64 volatile*)obj, desired.hi, desired.lo, (zig_i64*)expected);
+}
+
+static inline bool zig_msvc_cmpxchg_i128(zig_i128 volatile* obj, zig_i128* expected, zig_i128 desired) {
+    return _InterlockedCompareExchange128((zig_i64 volatile*)obj, desired.hi, desired.lo, (zig_u64*)expected);
+}
+
+#define zig_msvc_atomics_128xchg(Type) \
+    static inline zig_##Type zig_msvc_atomicrmw_xchg_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        bool success = false; \
+        zig_##Type prev; \
+        while (!success) { \
+            prev = *obj; \
+            success = zig_msvc_cmpxchg_##Type(obj, &prev, value); \
+        } \
+        return prev; \
+    }
+
+zig_msvc_atomics_128xchg(u128)
+zig_msvc_atomics_128xchg(i128)
+
+#define zig_msvc_atomics_128op(Type, operation) \
+    static inline zig_##Type zig_msvc_atomicrmw_##operation##_##Type(zig_##Type volatile* obj, zig_##Type value) { \
+        bool success = false; \
+        zig_##Type new; \
+        zig_##Type prev; \
+        while (!success) { \
+            prev = *obj; \
+            new = zig_##operation##_##Type(prev, value); \
+            success = zig_msvc_cmpxchg_##Type(obj, &prev, new); \
+        } \
+        return prev; \
+    }
+
+zig_msvc_atomics_128op(u128, add)
+zig_msvc_atomics_128op(u128, sub)
+zig_msvc_atomics_128op(u128, or)
+zig_msvc_atomics_128op(u128, xor)
+zig_msvc_atomics_128op(u128, and)
+zig_msvc_atomics_128op(u128, nand)
+zig_msvc_atomics_128op(u128, min)
+zig_msvc_atomics_128op(u128, max)
+#endif /* _M_IX86 */
+
+#endif /* _MSC_VER && (_M_IX86 || _M_X64) */
+
+/* ========================= Special Case Intrinsics ========================= */
+
+#if (_MSC_VER && _M_X64) || defined(__x86_64__)
+
+static inline void* zig_x86_64_windows_teb(void) {
+#if _MSC_VER
+    return (void*)__readgsqword(0x30);
+#else
+    void* teb;
+    __asm volatile(" movq %%gs:0x30, %[ptr]": [ptr]"=r"(teb)::);
+    return teb;
+#endif
+}
+
+#elif (_MSC_VER && _M_IX86) || defined(__i386__) || defined(__X86__)
+
+static inline void* zig_x86_windows_teb(void) {
+#if _MSC_VER
+    return (void*)__readfsdword(0x18);
+#else
+    void* teb;
+    __asm volatile(" movl %%fs:0x18, %[ptr]": [ptr]"=r"(teb)::);
+    return teb;
+#endif
+}
+
+#endif
+
+#if (_MSC_VER && (_M_IX86 || _M_X64)) || defined(__i386__) || defined(__x86_64__)
+
+static inline void zig_x86_cpuid(zig_u32 leaf_id, zig_u32 subid, zig_u32* eax, zig_u32* ebx, zig_u32* ecx, zig_u32* edx) {
+    zig_u32 cpu_info[4];
+#if _MSC_VER
+    __cpuidex(cpu_info, leaf_id, subid);
+#else
+    __cpuid_count(leaf_id, subid, cpu_info[0], cpu_info[1], cpu_info[2], cpu_info[3]);
+#endif
+    *eax = cpu_info[0];
+    *ebx = cpu_info[1];
+    *ecx = cpu_info[2];
+    *edx = cpu_info[3];
+}
+
+static inline zig_u32 zig_x86_get_xcr0(void) {
+#if _MSC_VER
+    return (zig_u32)_xgetbv(0);
+#else
+    zig_u32 eax;
+    zig_u32 edx;
+    __asm__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
+    return eax;
+#endif
+}
+
+#endif
diff --git a/stage1/zig1.wasm b/stage1/zig1.wasm
index 2fe8728cb3..d7bf519b41 100644
Binary files a/stage1/zig1.wasm and b/stage1/zig1.wasm differ
diff --git a/test/behavior/align.zig b/test/behavior/align.zig
index 162c798758..901ea3697a 100644
--- a/test/behavior/align.zig
+++ b/test/behavior/align.zig
@@ -551,7 +551,11 @@ test "align(N) on functions" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO this is not supported on MSVC
+
+    // This is not supported on MSVC
+    if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) {
+        return error.SkipZigTest;
+    }
 
     // function alignment is a compile error on wasm32/wasm64
     if (native_arch == .wasm32 or native_arch == .wasm64) return error.SkipZigTest;
diff --git a/test/behavior/asm.zig b/test/behavior/asm.zig
index f041963494..b242374ef8 100644
--- a/test/behavior/asm.zig
+++ b/test/behavior/asm.zig
@@ -7,6 +7,7 @@ const is_x86_64_linux = builtin.cpu.arch == .x86_64 and builtin.os.tag == .linux
 comptime {
     if (builtin.zig_backend != .stage2_arm and
         builtin.zig_backend != .stage2_aarch64 and
+        !(builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) and // MSVC doesn't support inline assembly
         is_x86_64_linux)
     {
         asm (
@@ -23,7 +24,8 @@ test "module level assembly" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+
+    if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) return error.SkipZigTest; // MSVC doesn't support inline assembly
 
     if (is_x86_64_linux) {
         try expect(this_is_my_alias() == 1234);
@@ -36,7 +38,8 @@ test "output constraint modifiers" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+
+    if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) return error.SkipZigTest; // MSVC doesn't support inline assembly
 
     // This is only testing compilation.
     var a: u32 = 3;
@@ -58,7 +61,8 @@ test "alternative constraints" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+
+    if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) return error.SkipZigTest; // MSVC doesn't support inline assembly
 
     // Make sure we allow commas as a separator for alternative constraints.
     var a: u32 = 3;
@@ -75,7 +79,8 @@ test "sized integer/float in asm input" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+
+    if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) return error.SkipZigTest; // MSVC doesn't support inline assembly
 
     asm volatile (""
         :
@@ -125,7 +130,8 @@ test "struct/array/union types as input values" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+
+    if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) return error.SkipZigTest; // MSVC doesn't support inline assembly
 
     asm volatile (""
         :
@@ -151,6 +157,8 @@ test "asm modifiers (AArch64)" {
     if (builtin.target.cpu.arch != .aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
 
+    if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) return error.SkipZigTest; // MSVC doesn't support inline assembly
+
     var x: u32 = 15;
     const double = asm ("add %[ret:w], %[in:w], %[in:w]"
         : [ret] "=r" (-> u32),
diff --git a/test/behavior/basic.zig b/test/behavior/basic.zig
index b82bfab99e..5a7bb0e8e2 100644
--- a/test/behavior/basic.zig
+++ b/test/behavior/basic.zig
@@ -387,6 +387,7 @@ fn hereIsAnOpaqueType(ptr: *OpaqueA) *OpaqueA {
 }
 
 test "take address of parameter" {
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -1143,3 +1144,17 @@ test "orelse coercion as function argument" {
     var foo = Container.init(optional orelse .{});
     try expect(foo.a.?.start == -1);
 }
+
+test "runtime-known globals initialized with undefined" {
+    const S = struct {
+        var array: [10]u32 = [_]u32{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+        var vp: [*]u32 = undefined;
+        var s: []u32 = undefined;
+    };
+
+    S.vp = &S.array;
+    S.s = S.vp[0..5];
+
+    try expect(S.s[0] == 1);
+    try expect(S.s[4] == 5);
+}
diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig
index 16f3c6e2dd..927caa965b 100644
--- a/test/behavior/cast.zig
+++ b/test/behavior/cast.zig
@@ -1568,3 +1568,12 @@ test "@volatileCast without a result location" {
     try expect(@TypeOf(z) == *i32);
     try expect(z.* == 1234);
 }
+
+test "coercion from single-item pointer to @as to slice" {
+    var x: u32 = 1;
+
+    // Why the following line gets a compile error?
+    const t: []u32 = @as(*[1]u32, &x);
+
+    try expect(t[0] == 1);
+}
diff --git a/test/behavior/const_slice_child.zig b/test/behavior/const_slice_child.zig
index 09c6a7233d..9ce526562c 100644
--- a/test/behavior/const_slice_child.zig
+++ b/test/behavior/const_slice_child.zig
@@ -9,6 +9,7 @@ var argv: [*]const [*]const u8 = undefined;
 test "const slice child" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
     const strs = [_][*]const u8{ "one", "two", "three" };
diff --git a/test/behavior/eval.zig b/test/behavior/eval.zig
index 680b0576d5..8364196f94 100644
--- a/test/behavior/eval.zig
+++ b/test/behavior/eval.zig
@@ -1338,6 +1338,7 @@ test "lazy sizeof is resolved in division" {
 
 test "lazy value is resolved as slice operand" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
     const A = struct { a: u32 };
diff --git a/test/behavior/field_parent_ptr.zig b/test/behavior/field_parent_ptr.zig
index 14a2362f5d..6bbd6ad7ef 100644
--- a/test/behavior/field_parent_ptr.zig
+++ b/test/behavior/field_parent_ptr.zig
@@ -44,3 +44,81 @@ fn testParentFieldPtrFirst(a: *const bool) !void {
     try expect(base == &foo);
     try expect(&base.a == a);
 }
+
+test "@fieldParentPtr untagged union" {
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+
+    try testFieldParentPtrUnion(&bar.c);
+    comptime try testFieldParentPtrUnion(&bar.c);
+}
+
+const Bar = union(enum) {
+    a: bool,
+    b: f32,
+    c: i32,
+    d: i32,
+};
+
+const bar = Bar{ .c = 42 };
+
+fn testFieldParentPtrUnion(c: *const i32) !void {
+    try expect(c == &bar.c);
+
+    const base = @fieldParentPtr(Bar, "c", c);
+    try expect(base == &bar);
+    try expect(&base.c == c);
+}
+
+test "@fieldParentPtr tagged union" {
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+
+    try testFieldParentPtrTaggedUnion(&bar_tagged.c);
+    comptime try testFieldParentPtrTaggedUnion(&bar_tagged.c);
+}
+
+const BarTagged = union(enum) {
+    a: bool,
+    b: f32,
+    c: i32,
+    d: i32,
+};
+
+const bar_tagged = BarTagged{ .c = 42 };
+
+fn testFieldParentPtrTaggedUnion(c: *const i32) !void {
+    try expect(c == &bar_tagged.c);
+
+    const base = @fieldParentPtr(BarTagged, "c", c);
+    try expect(base == &bar_tagged);
+    try expect(&base.c == c);
+}
+
+test "@fieldParentPtr extern union" {
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+
+    try testFieldParentPtrExternUnion(&bar_extern.c);
+    comptime try testFieldParentPtrExternUnion(&bar_extern.c);
+}
+
+const BarExtern = extern union {
+    a: bool,
+    b: f32,
+    c: i32,
+    d: i32,
+};
+
+const bar_extern = BarExtern{ .c = 42 };
+
+fn testFieldParentPtrExternUnion(c: *const i32) !void {
+    try expect(c == &bar_extern.c);
+
+    const base = @fieldParentPtr(BarExtern, "c", c);
+    try expect(base == &bar_extern);
+    try expect(&base.c == c);
+}
diff --git a/test/behavior/int_comparison_elision.zig b/test/behavior/int_comparison_elision.zig
index 5e13e00e83..ea26f02b7e 100644
--- a/test/behavior/int_comparison_elision.zig
+++ b/test/behavior/int_comparison_elision.zig
@@ -13,7 +13,6 @@ test "int comparison elision" {
 
     // TODO: support int types > 128 bits wide in other backends
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
diff --git a/test/behavior/lower_strlit_to_vector.zig b/test/behavior/lower_strlit_to_vector.zig
index adbca8f0df..427379636e 100644
--- a/test/behavior/lower_strlit_to_vector.zig
+++ b/test/behavior/lower_strlit_to_vector.zig
@@ -7,7 +7,6 @@ test "strlit to vector" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     const strlit = "0123456789abcdef0123456789ABCDEF";
     const vec_from_strlit: @Vector(32, u8) = strlit.*;
diff --git a/test/behavior/math.zig b/test/behavior/math.zig
index 8ab8614605..54263e1daf 100644
--- a/test/behavior/math.zig
+++ b/test/behavior/math.zig
@@ -1463,7 +1463,6 @@ test "vector integer addition" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
     const S = struct {
diff --git a/test/behavior/packed-struct.zig b/test/behavior/packed-struct.zig
index e1237a578b..85214bd7d8 100644
--- a/test/behavior/packed-struct.zig
+++ b/test/behavior/packed-struct.zig
@@ -603,7 +603,6 @@ test "packed struct initialized in bitcast" {
 test "pointer to container level packed struct field" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
 
diff --git a/test/behavior/pointers.zig b/test/behavior/pointers.zig
index 2d55292916..ec4ff332cf 100644
--- a/test/behavior/pointers.zig
+++ b/test/behavior/pointers.zig
@@ -507,7 +507,6 @@ test "ptrCast comptime known slice to C pointer" {
 }
 
 test "ptrToInt on a generic function" {
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
diff --git a/test/behavior/struct.zig b/test/behavior/struct.zig
index ed3e1ce88f..348e269682 100644
--- a/test/behavior/struct.zig
+++ b/test/behavior/struct.zig
@@ -1330,7 +1330,6 @@ test "struct field init value is size of the struct" {
 }
 
 test "under-aligned struct field" {
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
@@ -1578,3 +1577,38 @@ test "directly initiating tuple like struct" {
     const a = struct { u8 }{8};
     try expect(a[0] == 8);
 }
+
+test "instantiate struct with comptime field" {
+    {
+        var things = struct {
+            comptime foo: i8 = 1,
+        }{};
+
+        comptime std.debug.assert(things.foo == 1);
+    }
+
+    {
+        const T = struct {
+            comptime foo: i8 = 1,
+        };
+        var things = T{};
+
+        comptime std.debug.assert(things.foo == 1);
+    }
+
+    {
+        var things: struct {
+            comptime foo: i8 = 1,
+        } = .{};
+
+        comptime std.debug.assert(things.foo == 1);
+    }
+
+    {
+        var things: struct {
+            comptime foo: i8 = 1,
+        } = undefined; // Segmentation fault at address 0x0
+
+        comptime std.debug.assert(things.foo == 1);
+    }
+}
diff --git a/test/behavior/type_info.zig b/test/behavior/type_info.zig
index 419a2f231c..6f64c92006 100644
--- a/test/behavior/type_info.zig
+++ b/test/behavior/type_info.zig
@@ -603,3 +603,9 @@ test "@typeInfo decls ignore dependency loops" {
     };
     _ = S.foo;
 }
+
+test "type info of tuple of string literal default value" {
+    const struct_field = @typeInfo(@TypeOf(.{"hi"})).Struct.fields[0];
+    const value = @ptrCast(*align(1) const *const [2:0]u8, struct_field.default_value.?).*;
+    comptime std.debug.assert(value[0] == 'h');
+}
diff --git a/test/behavior/var_args.zig b/test/behavior/var_args.zig
index 97f90b559d..cdfbcc9188 100644
--- a/test/behavior/var_args.zig
+++ b/test/behavior/var_args.zig
@@ -96,10 +96,9 @@ fn doNothingWithFirstArg(args: anytype) void {
 test "simple variadic function" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
-    if (builtin.cpu.arch == .aarch64 and builtin.os.tag != .macos and builtin.zig_backend == .stage2_llvm) {
+    if (builtin.cpu.arch == .aarch64 and builtin.os.tag != .macos) {
         // https://github.com/ziglang/zig/issues/14096
         return error.SkipZigTest;
     }
@@ -112,6 +111,12 @@ test "simple variadic function" {
             return @cVaArg(&ap, c_int);
         }
 
+        fn compatible(_: c_int, ...) callconv(.C) c_int {
+            var ap = @cVaStart();
+            defer @cVaEnd(&ap);
+            return @cVaArg(&ap, c_int);
+        }
+
         fn add(count: c_int, ...) callconv(.C) c_int {
             var ap = @cVaStart();
             defer @cVaEnd(&ap);
@@ -124,8 +129,13 @@ test "simple variadic function" {
         }
     };
 
-    try std.testing.expectEqual(@as(c_int, 0), S.simple(@as(c_int, 0)));
-    try std.testing.expectEqual(@as(c_int, 1024), S.simple(@as(c_int, 1024)));
+    if (builtin.zig_backend != .stage2_c) {
+        // pre C23 doesn't support varargs without a preceding runtime arg.
+        try std.testing.expectEqual(@as(c_int, 0), S.simple(@as(c_int, 0)));
+        try std.testing.expectEqual(@as(c_int, 1024), S.simple(@as(c_int, 1024)));
+    }
+    try std.testing.expectEqual(@as(c_int, 0), S.compatible(undefined, @as(c_int, 0)));
+    try std.testing.expectEqual(@as(c_int, 1024), S.compatible(undefined, @as(c_int, 1024)));
     try std.testing.expectEqual(@as(c_int, 0), S.add(0));
     try std.testing.expectEqual(@as(c_int, 1), S.add(1, @as(c_int, 1)));
     try std.testing.expectEqual(@as(c_int, 3), S.add(2, @as(c_int, 1), @as(c_int, 2)));
@@ -134,10 +144,9 @@ test "simple variadic function" {
 test "variadic functions" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
-    if (builtin.cpu.arch == .aarch64 and builtin.os.tag != .macos and builtin.zig_backend == .stage2_llvm) {
+    if (builtin.cpu.arch == .aarch64 and builtin.os.tag != .macos) {
         // https://github.com/ziglang/zig/issues/14096
         return error.SkipZigTest;
     }
@@ -178,10 +187,9 @@ test "variadic functions" {
 test "copy VaList" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
-    if (builtin.cpu.arch == .aarch64 and builtin.os.tag != .macos and builtin.zig_backend == .stage2_llvm) {
+    if (builtin.cpu.arch == .aarch64 and builtin.os.tag != .macos) {
         // https://github.com/ziglang/zig/issues/14096
         return error.SkipZigTest;
     }
diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig
index e983e0cfb0..50fef7f646 100644
--- a/test/behavior/vector.zig
+++ b/test/behavior/vector.zig
@@ -75,7 +75,6 @@ test "vector int operators" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
     const S = struct {
@@ -178,7 +177,6 @@ test "tuple to vector" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
     if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) {
@@ -943,7 +941,6 @@ test "multiplication-assignment operator with an array operand" {
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
     const S = struct {
@@ -1247,10 +1244,10 @@ test "array operands to shuffle are coerced to vectors" {
 test "load packed vector element" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     var x: @Vector(2, u15) = .{ 1, 4 };
     try expect((&x[0]).* == 1);
@@ -1260,10 +1257,10 @@ test "load packed vector element" {
 test "store packed vector element" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
     var v = @Vector(4, u1){ 1, 1, 1, 1 };
     try expectEqual(@Vector(4, u1){ 1, 1, 1, 1 }, v);
diff --git a/test/cases/compile_errors/bitCast_same_size_but_bit_count_mismatch.zig b/test/cases/compile_errors/bitCast_same_size_but_bit_count_mismatch.zig
index f67a5d139f..2f7bd9c9bc 100644
--- a/test/cases/compile_errors/bitCast_same_size_but_bit_count_mismatch.zig
+++ b/test/cases/compile_errors/bitCast_same_size_but_bit_count_mismatch.zig
@@ -7,4 +7,4 @@ export fn entry(byte: u8) void {
 // backend=stage2
 // target=native
 //
-// :2:29: error: @bitCast size mismatch: destination type 'u7' has 7 bits but source type 'u8' has 8 bits
+// :2:16: error: @bitCast size mismatch: destination type 'u7' has 7 bits but source type 'u8' has 8 bits
diff --git a/test/cases/compile_errors/bitCast_with_different_sizes_inside_an_expression.zig b/test/cases/compile_errors/bitCast_with_different_sizes_inside_an_expression.zig
index 8951eee5c0..bf87ba8bc5 100644
--- a/test/cases/compile_errors/bitCast_with_different_sizes_inside_an_expression.zig
+++ b/test/cases/compile_errors/bitCast_with_different_sizes_inside_an_expression.zig
@@ -7,4 +7,4 @@ export fn entry() void {
 // backend=stage2
 // target=native
 //
-// :2:29: error: @bitCast size mismatch: destination type 'u8' has 8 bits but source type 'f32' has 32 bits
+// :2:16: error: @bitCast size mismatch: destination type 'u8' has 8 bits but source type 'f32' has 32 bits
diff --git a/test/cases/compile_errors/error_set_membership.zig b/test/cases/compile_errors/error_set_membership.zig
new file mode 100644
index 0000000000..5683e9594b
--- /dev/null
+++ b/test/cases/compile_errors/error_set_membership.zig
@@ -0,0 +1,31 @@
+const std = @import("std");
+
+const Error = error{InvalidCharacter};
+
+const Direction = enum { upside_down };
+
+const Barrrr = union(enum) {
+    float: f64,
+    direction: Direction,
+};
+
+fn fooey(bar: std.meta.Tag(Barrrr), args: []const []const u8) !Barrrr {
+    return switch (bar) {
+        .float => .{ .float = try std.fmt.parseFloat(f64, args[0]) },
+        .direction => if (std.mem.eql(u8, args[0], "upside_down"))
+            Barrrr{ .direction = .upside_down }
+        else
+            error.InvalidDirection,
+    };
+}
+
+pub fn main() Error!void {
+    std.debug.print("{}", .{try fooey(.direction, &[_][]const u8{ "one", "two", "three" })});
+}
+
+// error
+// backend=llvm
+// target=native
+//
+// :23:29: error: expected type 'error{InvalidCharacter}', found '@typeInfo(@typeInfo(@TypeOf(tmp.fooey)).Fn.return_type.?).ErrorUnion.error_set'
+// :23:29: note: 'error.InvalidDirection' not a member of destination error set
diff --git a/test/cases/compile_errors/fieldParentPtr-non_struct.zig b/test/cases/compile_errors/fieldParentPtr-non_struct.zig
index 0a2f46e5c9..7950c88537 100644
--- a/test/cases/compile_errors/fieldParentPtr-non_struct.zig
+++ b/test/cases/compile_errors/fieldParentPtr-non_struct.zig
@@ -7,4 +7,4 @@ export fn foo(a: *i32) *Foo {
 // backend=llvm
 // target=native
 //
-// :3:28: error: expected struct type, found 'i32'
+// :3:28: error: expected struct or union type, found 'i32'
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 21c8822eb3..e0b78b3000 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -288,4 +288,26 @@ pub fn addCases(ctx: *TestContext) !void {
     //, &[_][]const u8{
     //    "tmp.zig:4:1: error: unable to inline function",
     //});
+
+    {
+        const case = ctx.obj("file in multiple modules", .{});
+        case.backend = .stage2;
+
+        case.addSourceFile("foo.zig",
+            \\const dummy = 0;
+        );
+
+        case.addDepModule("foo", "foo.zig");
+
+        case.addError(
+            \\comptime {
+            \\    _ = @import("foo");
+            \\    _ = @import("foo.zig");
+            \\}
+        , &[_][]const u8{
+            ":1:1: error: file exists in multiple modules",
+            ":1:1: note: root of module root.foo",
+            ":3:17: note: imported from module root",
+        });
+    }
 }
diff --git a/test/link/macho/dead_strip_dylibs/build.zig b/test/link/macho/dead_strip_dylibs/build.zig
index 8b62cec6e6..af2f5cf0dc 100644
--- a/test/link/macho/dead_strip_dylibs/build.zig
+++ b/test/link/macho/dead_strip_dylibs/build.zig
@@ -29,7 +29,7 @@ pub fn build(b: *std.Build) void {
         exe.dead_strip_dylibs = true;
 
         const run_cmd = exe.run();
-        run_cmd.expected_exit_code = @bitCast(u8, @as(i8, -2)); // should fail
+        run_cmd.expected_term = .{ .Exited = @bitCast(u8, @as(i8, -2)) }; // should fail
         test_step.dependOn(&run_cmd.step);
     }
 }
diff --git a/test/src/compare_output.zig b/test/src/compare_output.zig
index edd48321c9..3bda3bdacd 100644
--- a/test/src/compare_output.zig
+++ b/test/src/compare_output.zig
@@ -168,7 +168,7 @@ pub const CompareOutputContext = struct {
                 run.addArgs(case.cli_args);
                 run.stderr_action = .ignore;
                 run.stdout_action = .ignore;
-                run.expected_exit_code = 126;
+                run.expected_term = .{ .Exited = 126 };
 
                 self.step.dependOn(&run.step);
             },
diff --git a/test/stage2/cbe.zig b/test/stage2/cbe.zig
index 6c0c5e03cf..e9750853a6 100644
--- a/test/stage2/cbe.zig
+++ b/test/stage2/cbe.zig
@@ -959,7 +959,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = a;
         \\}
     ,
-        \\zig_extern void start(zig_u8 const a0);
+        \\zig_extern void start(uint8_t const a0);
         \\
     );
     ctx.h("header with multiple param function", linux_x64,
@@ -967,19 +967,19 @@ pub fn addCases(ctx: *TestContext) !void {
         \\  _ = a; _ = b; _ = c;
         \\}
     ,
-        \\zig_extern void start(zig_u8 const a0, zig_u8 const a1, zig_u8 const a2);
+        \\zig_extern void start(uint8_t const a0, uint8_t const a1, uint8_t const a2);
         \\
     );
     ctx.h("header with u32 param function", linux_x64,
         \\export fn start(a: u32) void{ _ = a; }
     ,
-        \\zig_extern void start(zig_u32 const a0);
+        \\zig_extern void start(uint32_t const a0);
         \\
     );
     ctx.h("header with usize param function", linux_x64,
         \\export fn start(a: usize) void{ _ = a; }
     ,
-        \\zig_extern void start(zig_usize const a0);
+        \\zig_extern void start(uintptr_t const a0);
         \\
     );
     ctx.h("header with bool param function", linux_x64,
@@ -993,7 +993,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    unreachable;
         \\}
     ,
-        \\zig_extern zig_noreturn start(void);
+        \\zig_extern zig_noreturn void start(void);
         \\
     );
     ctx.h("header with multiple functions", linux_x64,
@@ -1009,7 +1009,7 @@ pub fn addCases(ctx: *TestContext) !void {
     ctx.h("header with multiple includes", linux_x64,
         \\export fn start(a: u32, b: usize) void{ _ = a; _ = b; }
     ,
-        \\zig_extern void start(zig_u32 const a0, zig_usize const a1);
+        \\zig_extern void start(uint32_t const a0, uintptr_t const a1);
         \\
     );
 }
diff --git a/test/stage2/nvptx.zig b/test/stage2/nvptx.zig
index c87f32add0..f08aa9fca4 100644
--- a/test/stage2/nvptx.zig
+++ b/test/stage2/nvptx.zig
@@ -97,6 +97,7 @@ pub fn addPtx(
         .updates = std.ArrayList(TestContext.Update).init(ctx.cases.allocator),
         .output_mode = .Obj,
         .files = std.ArrayList(TestContext.File).init(ctx.cases.allocator),
+        .deps = std.ArrayList(TestContext.DepModule).init(ctx.cases.allocator),
         .link_libc = false,
         .backend = .llvm,
         // Bug in Debug mode
diff --git a/test/standalone.zig b/test/standalone.zig
index 81eb1b0042..965139235c 100644
--- a/test/standalone.zig
+++ b/test/standalone.zig
@@ -84,6 +84,9 @@ pub fn addCases(cases: *tests.StandaloneContext) void {
         cases.addBuildFile("test/standalone/pie/build.zig", .{});
     }
     cases.addBuildFile("test/standalone/issue_12706/build.zig", .{});
+    if (std.os.have_sigpipe_support) {
+        cases.addBuildFile("test/standalone/sigpipe/build.zig", .{});
+    }
 
     // Ensure the development tools are buildable. Alphabetically sorted.
     // No need to build `tools/spirv/grammar.zig`.
@@ -104,4 +107,10 @@ pub fn addCases(cases: *tests.StandaloneContext) void {
     cases.addBuildFile("test/standalone/emit_asm_and_bin/build.zig", .{});
     cases.addBuildFile("test/standalone/issue_12588/build.zig", .{});
     cases.addBuildFile("test/standalone/embed_generated_file/build.zig", .{});
+
+    cases.addBuildFile("test/standalone/dep_diamond/build.zig", .{});
+    cases.addBuildFile("test/standalone/dep_triangle/build.zig", .{});
+    cases.addBuildFile("test/standalone/dep_recursive/build.zig", .{});
+    cases.addBuildFile("test/standalone/dep_mutually_recursive/build.zig", .{});
+    cases.addBuildFile("test/standalone/dep_shared_builtin/build.zig", .{});
 }
diff --git a/test/standalone/dep_diamond/bar.zig b/test/standalone/dep_diamond/bar.zig
new file mode 100644
index 0000000000..772d21dd58
--- /dev/null
+++ b/test/standalone/dep_diamond/bar.zig
@@ -0,0 +1 @@
+pub const shared = @import("shared");
diff --git a/test/standalone/dep_diamond/build.zig b/test/standalone/dep_diamond/build.zig
new file mode 100644
index 0000000000..b60f898f0b
--- /dev/null
+++ b/test/standalone/dep_diamond/build.zig
@@ -0,0 +1,28 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+    const optimize = b.standardOptimizeOption(.{});
+
+    const shared = b.createModule(.{
+        .source_file = .{ .path = "shared.zig" },
+    });
+
+    const exe = b.addExecutable(.{
+        .name = "test",
+        .root_source_file = .{ .path = "test.zig" },
+        .optimize = optimize,
+    });
+    exe.addAnonymousModule("foo", .{
+        .source_file = .{ .path = "foo.zig" },
+        .dependencies = &.{.{ .name = "shared", .module = shared }},
+    });
+    exe.addAnonymousModule("bar", .{
+        .source_file = .{ .path = "bar.zig" },
+        .dependencies = &.{.{ .name = "shared", .module = shared }},
+    });
+
+    const run = exe.run();
+
+    const test_step = b.step("test", "Test it");
+    test_step.dependOn(&run.step);
+}
diff --git a/test/standalone/dep_diamond/foo.zig b/test/standalone/dep_diamond/foo.zig
new file mode 100644
index 0000000000..772d21dd58
--- /dev/null
+++ b/test/standalone/dep_diamond/foo.zig
@@ -0,0 +1 @@
+pub const shared = @import("shared");
diff --git a/test/standalone/dep_diamond/shared.zig b/test/standalone/dep_diamond/shared.zig
new file mode 100644
index 0000000000..3d771dbba8
--- /dev/null
+++ b/test/standalone/dep_diamond/shared.zig
@@ -0,0 +1 @@
+// (empty)
diff --git a/test/standalone/dep_diamond/test.zig b/test/standalone/dep_diamond/test.zig
new file mode 100644
index 0000000000..227f442943
--- /dev/null
+++ b/test/standalone/dep_diamond/test.zig
@@ -0,0 +1,7 @@
+const foo = @import("foo");
+const bar = @import("bar");
+const assert = @import("std").debug.assert;
+
+pub fn main() void {
+    assert(foo.shared == bar.shared);
+}
diff --git a/test/standalone/dep_mutually_recursive/bar.zig b/test/standalone/dep_mutually_recursive/bar.zig
new file mode 100644
index 0000000000..68957b69e4
--- /dev/null
+++ b/test/standalone/dep_mutually_recursive/bar.zig
@@ -0,0 +1,6 @@
+const assert = @import("std").debug.assert;
+pub const foo = @import("foo");
+
+comptime {
+    assert(foo.bar == @This());
+}
diff --git a/test/standalone/dep_mutually_recursive/build.zig b/test/standalone/dep_mutually_recursive/build.zig
new file mode 100644
index 0000000000..0123646a9a
--- /dev/null
+++ b/test/standalone/dep_mutually_recursive/build.zig
@@ -0,0 +1,26 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+    const optimize = b.standardOptimizeOption(.{});
+
+    const foo = b.createModule(.{
+        .source_file = .{ .path = "foo.zig" },
+    });
+    const bar = b.createModule(.{
+        .source_file = .{ .path = "bar.zig" },
+    });
+    foo.dependencies.put("bar", bar) catch @panic("OOM");
+    bar.dependencies.put("foo", foo) catch @panic("OOM");
+
+    const exe = b.addExecutable(.{
+        .name = "test",
+        .root_source_file = .{ .path = "test.zig" },
+        .optimize = optimize,
+    });
+    exe.addModule("foo", foo);
+
+    const run = exe.run();
+
+    const test_step = b.step("test", "Test it");
+    test_step.dependOn(&run.step);
+}
diff --git a/test/standalone/dep_mutually_recursive/foo.zig b/test/standalone/dep_mutually_recursive/foo.zig
new file mode 100644
index 0000000000..60107fbdf6
--- /dev/null
+++ b/test/standalone/dep_mutually_recursive/foo.zig
@@ -0,0 +1,6 @@
+const assert = @import("std").debug.assert;
+pub const bar = @import("bar");
+
+comptime {
+    assert(bar.foo == @This());
+}
diff --git a/test/standalone/dep_mutually_recursive/test.zig b/test/standalone/dep_mutually_recursive/test.zig
new file mode 100644
index 0000000000..b7273ad1aa
--- /dev/null
+++ b/test/standalone/dep_mutually_recursive/test.zig
@@ -0,0 +1,7 @@
+const foo = @import("foo");
+const assert = @import("std").debug.assert;
+
+pub fn main() void {
+    assert(foo == foo.bar.foo);
+    assert(foo == foo.bar.foo.bar.foo);
+}
diff --git a/test/standalone/dep_recursive/build.zig b/test/standalone/dep_recursive/build.zig
new file mode 100644
index 0000000000..32d546e283
--- /dev/null
+++ b/test/standalone/dep_recursive/build.zig
@@ -0,0 +1,22 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+    const optimize = b.standardOptimizeOption(.{});
+
+    const foo = b.createModule(.{
+        .source_file = .{ .path = "foo.zig" },
+    });
+    foo.dependencies.put("foo", foo) catch @panic("OOM");
+
+    const exe = b.addExecutable(.{
+        .name = "test",
+        .root_source_file = .{ .path = "test.zig" },
+        .optimize = optimize,
+    });
+    exe.addModule("foo", foo);
+
+    const run = exe.run();
+
+    const test_step = b.step("test", "Test it");
+    test_step.dependOn(&run.step);
+}
diff --git a/test/standalone/dep_recursive/foo.zig b/test/standalone/dep_recursive/foo.zig
new file mode 100644
index 0000000000..f4a62c2d4f
--- /dev/null
+++ b/test/standalone/dep_recursive/foo.zig
@@ -0,0 +1,6 @@
+const assert = @import("std").debug.assert;
+pub const foo = @import("foo");
+
+comptime {
+    assert(foo == @This());
+}
diff --git a/test/standalone/dep_recursive/test.zig b/test/standalone/dep_recursive/test.zig
new file mode 100644
index 0000000000..f06ac0e018
--- /dev/null
+++ b/test/standalone/dep_recursive/test.zig
@@ -0,0 +1,8 @@
+const foo = @import("foo");
+const shared = @import("shared");
+const assert = @import("std").debug.assert;
+
+pub fn main() void {
+    assert(foo == foo.foo);
+    assert(foo == foo.foo.foo);
+}
diff --git a/test/standalone/dep_shared_builtin/build.zig b/test/standalone/dep_shared_builtin/build.zig
new file mode 100644
index 0000000000..6c029b654b
--- /dev/null
+++ b/test/standalone/dep_shared_builtin/build.zig
@@ -0,0 +1,19 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+    const optimize = b.standardOptimizeOption(.{});
+
+    const exe = b.addExecutable(.{
+        .name = "test",
+        .root_source_file = .{ .path = "test.zig" },
+        .optimize = optimize,
+    });
+    exe.addAnonymousModule("foo", .{
+        .source_file = .{ .path = "foo.zig" },
+    });
+
+    const run = exe.run();
+
+    const test_step = b.step("test", "Test it");
+    test_step.dependOn(&run.step);
+}
diff --git a/test/standalone/dep_shared_builtin/foo.zig b/test/standalone/dep_shared_builtin/foo.zig
new file mode 100644
index 0000000000..3b2719146e
--- /dev/null
+++ b/test/standalone/dep_shared_builtin/foo.zig
@@ -0,0 +1,3 @@
+pub const std = @import("std");
+pub const builtin = @import("builtin");
+pub const root = @import("root");
diff --git a/test/standalone/dep_shared_builtin/test.zig b/test/standalone/dep_shared_builtin/test.zig
new file mode 100644
index 0000000000..88a11f440a
--- /dev/null
+++ b/test/standalone/dep_shared_builtin/test.zig
@@ -0,0 +1,11 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const root = @import("root");
+const foo = @import("foo");
+
+pub fn main() void {
+    std.debug.assert(root == @This());
+    std.debug.assert(std == foo.std);
+    std.debug.assert(builtin == foo.builtin);
+    std.debug.assert(root == foo.root);
+}
diff --git a/test/standalone/dep_triangle/build.zig b/test/standalone/dep_triangle/build.zig
new file mode 100644
index 0000000000..f3b73aaf35
--- /dev/null
+++ b/test/standalone/dep_triangle/build.zig
@@ -0,0 +1,25 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+    const optimize = b.standardOptimizeOption(.{});
+
+    const shared = b.createModule(.{
+        .source_file = .{ .path = "shared.zig" },
+    });
+
+    const exe = b.addExecutable(.{
+        .name = "test",
+        .root_source_file = .{ .path = "test.zig" },
+        .optimize = optimize,
+    });
+    exe.addAnonymousModule("foo", .{
+        .source_file = .{ .path = "foo.zig" },
+        .dependencies = &.{.{ .name = "shared", .module = shared }},
+    });
+    exe.addModule("shared", shared);
+
+    const run = exe.run();
+
+    const test_step = b.step("test", "Test it");
+    test_step.dependOn(&run.step);
+}
diff --git a/test/standalone/dep_triangle/foo.zig b/test/standalone/dep_triangle/foo.zig
new file mode 100644
index 0000000000..772d21dd58
--- /dev/null
+++ b/test/standalone/dep_triangle/foo.zig
@@ -0,0 +1 @@
+pub const shared = @import("shared");
diff --git a/test/standalone/dep_triangle/shared.zig b/test/standalone/dep_triangle/shared.zig
new file mode 100644
index 0000000000..3d771dbba8
--- /dev/null
+++ b/test/standalone/dep_triangle/shared.zig
@@ -0,0 +1 @@
+// (empty)
diff --git a/test/standalone/dep_triangle/test.zig b/test/standalone/dep_triangle/test.zig
new file mode 100644
index 0000000000..f208e560fa
--- /dev/null
+++ b/test/standalone/dep_triangle/test.zig
@@ -0,0 +1,7 @@
+const foo = @import("foo");
+const shared = @import("shared");
+const assert = @import("std").debug.assert;
+
+pub fn main() void {
+    assert(foo.shared == shared);
+}
diff --git a/test/standalone/install_raw_hex/build.zig b/test/standalone/install_raw_hex/build.zig
index b0f938a344..6ed515e381 100644
--- a/test/standalone/install_raw_hex/build.zig
+++ b/test/standalone/install_raw_hex/build.zig
@@ -3,6 +3,9 @@ const std = @import("std");
 const CheckFileStep = std.Build.CheckFileStep;
 
 pub fn build(b: *std.Build) void {
+    const test_step = b.step("test", "Test the program");
+    b.default_step.dependOn(test_step);
+
     const target = .{
         .cpu_arch = .thumb,
         .cpu_model = .{ .explicit = &std.Target.arm.cpu.cortex_m4 },
@@ -19,12 +22,14 @@ pub fn build(b: *std.Build) void {
         .optimize = optimize,
     });
 
-    const test_step = b.step("test", "Test the program");
-    b.default_step.dependOn(test_step);
-
-    const hex_step = b.addInstallRaw(elf, "hello.hex", .{});
+    const hex_step = elf.addObjCopy(.{
+        .basename = "hello.hex",
+    });
     test_step.dependOn(&hex_step.step);
 
-    const explicit_format_hex_step = b.addInstallRaw(elf, "hello.foo", .{ .format = .hex });
+    const explicit_format_hex_step = elf.addObjCopy(.{
+        .basename = "hello.foo",
+        .format = .hex,
+    });
     test_step.dependOn(&explicit_format_hex_step.step);
 }
diff --git a/test/standalone/sigpipe/breakpipe.zig b/test/standalone/sigpipe/breakpipe.zig
new file mode 100644
index 0000000000..3623451db5
--- /dev/null
+++ b/test/standalone/sigpipe/breakpipe.zig
@@ -0,0 +1,21 @@
+const std = @import("std");
+const build_options = @import("build_options");
+
+pub const std_options = if (build_options.keep_sigpipe) struct {
+    pub const keep_sigpipe = true;
+} else struct {
+    // intentionally not setting keep_sigpipe to ensure the default behavior is equivalent to false
+};
+
+pub fn main() !void {
+    const pipe = try std.os.pipe();
+    std.os.close(pipe[0]);
+    _ = std.os.write(pipe[1], "a") catch |err| switch (err) {
+        error.BrokenPipe => {
+            try std.io.getStdOut().writer().writeAll("BrokenPipe\n");
+            std.os.exit(123);
+        },
+        else => |e| return e,
+    };
+    unreachable;
+}
diff --git a/test/standalone/sigpipe/build.zig b/test/standalone/sigpipe/build.zig
new file mode 100644
index 0000000000..763df5fe46
--- /dev/null
+++ b/test/standalone/sigpipe/build.zig
@@ -0,0 +1,35 @@
+const std = @import("std");
+const os = std.os;
+
+pub fn build(b: *std.build.Builder) !void {
+    const test_step = b.step("test", "Run the tests");
+
+    // This test runs "breakpipe" as a child process and that process
+    // depends on inheriting a SIGPIPE disposition of "default".
+    {
+        const act = os.Sigaction{
+            .handler = .{ .handler = os.SIG.DFL },
+            .mask = os.empty_sigset,
+            .flags = 0,
+        };
+        try os.sigaction(os.SIG.PIPE, &act, null);
+    }
+
+    for ([_]bool{ false, true }) |keep_sigpipe| {
+        const options = b.addOptions();
+        options.addOption(bool, "keep_sigpipe", keep_sigpipe);
+        const exe = b.addExecutable(.{
+            .name = "breakpipe",
+            .root_source_file = .{ .path = "breakpipe.zig" },
+        });
+        exe.addOptions("build_options", options);
+        const run = exe.run();
+        if (keep_sigpipe) {
+            run.expected_term = .{ .Signal = std.os.SIG.PIPE };
+        } else {
+            run.stdout_action = .{ .expect_exact = "BrokenPipe\n" };
+            run.expected_term = .{ .Exited = 123 };
+        }
+        test_step.dependOn(&run.step);
+    }
+}
diff --git a/test/tests.zig b/test/tests.zig
index 851de9f2a6..035311372f 100644
--- a/test/tests.zig
+++ b/test/tests.zig
@@ -58,14 +58,14 @@ const test_targets = blk: {
             .link_libc = true,
             .backend = .stage2_c,
         },
-        //.{
-        //    .target = .{
-        //        .cpu_arch = .x86_64,
-        //        .os_tag = .linux,
-        //        .abi = .none,
-        //    },
-        //    .backend = .stage2_x86_64,
-        //},
+        .{
+            .target = .{
+                .cpu_arch = .x86_64,
+                .os_tag = .linux,
+                .abi = .none,
+            },
+            .backend = .stage2_x86_64,
+        },
         .{
             .target = .{
                 .cpu_arch = .aarch64,