zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob c04bb1d2 (458045B) - Raw


      1 const std = @import("std");
      2 const build_options = @import("build_options");
      3 const builtin = @import("builtin");
      4 const assert = std.debug.assert;
      5 const codegen = @import("../../codegen.zig");
      6 const leb128 = std.leb;
      7 const link = @import("../../link.zig");
      8 const log = std.log.scoped(.codegen);
      9 const tracking_log = std.log.scoped(.tracking);
     10 const verbose_tracking_log = std.log.scoped(.verbose_tracking);
     11 const wip_mir_log = std.log.scoped(.wip_mir);
     12 const math = std.math;
     13 const mem = std.mem;
     14 const trace = @import("../../tracy.zig").trace;
     15 
     16 const Air = @import("../../Air.zig");
     17 const Allocator = mem.Allocator;
     18 const CodeGenError = codegen.CodeGenError;
     19 const Compilation = @import("../../Compilation.zig");
     20 const DebugInfoOutput = codegen.DebugInfoOutput;
     21 const DW = std.dwarf;
     22 const ErrorMsg = Module.ErrorMsg;
     23 const Result = codegen.Result;
     24 const Emit = @import("Emit.zig");
     25 const Liveness = @import("../../Liveness.zig");
     26 const Lower = @import("Lower.zig");
     27 const Mir = @import("Mir.zig");
     28 const Module = @import("../../Module.zig");
     29 const Target = std.Target;
     30 const Type = @import("../../type.zig").Type;
     31 const TypedValue = @import("../../TypedValue.zig");
     32 const Value = @import("../../value.zig").Value;
     33 
     34 const abi = @import("abi.zig");
     35 const bits = @import("bits.zig");
     36 const encoder = @import("encoder.zig");
     37 const errUnionErrorOffset = codegen.errUnionErrorOffset;
     38 const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
     39 
     40 const Condition = bits.Condition;
     41 const Immediate = bits.Immediate;
     42 const Memory = bits.Memory;
     43 const Register = bits.Register;
     44 const RegisterManager = abi.RegisterManager;
     45 const RegisterLock = RegisterManager.RegisterLock;
     46 const FrameIndex = bits.FrameIndex;
     47 
     48 const gp = abi.RegisterClass.gp;
     49 const sse = abi.RegisterClass.sse;
     50 
     51 const InnerError = CodeGenError || error{OutOfRegisters};
     52 
     53 gpa: Allocator,
     54 air: Air,
     55 liveness: Liveness,
     56 bin_file: *link.File,
     57 debug_output: DebugInfoOutput,
     58 target: *const std.Target,
     59 owner: Owner,
     60 err_msg: ?*ErrorMsg,
     61 args: []MCValue,
     62 ret_mcv: InstTracking,
     63 fn_type: Type,
     64 arg_index: u32,
     65 src_loc: Module.SrcLoc,
     66 
     67 eflags_inst: ?Air.Inst.Index = null,
     68 
     69 /// MIR Instructions
     70 mir_instructions: std.MultiArrayList(Mir.Inst) = .{},
     71 /// MIR extra data
     72 mir_extra: std.ArrayListUnmanaged(u32) = .{},
     73 
     74 /// Byte offset within the source file of the ending curly.
     75 end_di_line: u32,
     76 end_di_column: u32,
     77 
     78 /// The value is an offset into the `Function` `code` from the beginning.
     79 /// To perform the reloc, write 32-bit signed little-endian integer
     80 /// which is a relative jump, based on the address following the reloc.
     81 exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
     82 
     83 const_tracking: InstTrackingMap = .{},
     84 inst_tracking: InstTrackingMap = .{},
     85 
     86 // Key is the block instruction
     87 blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockData) = .{},
     88 
     89 register_manager: RegisterManager = .{},
     90 
     91 /// Generation of the current scope, increments by 1 for every entered scope.
     92 scope_generation: u32 = 0,
     93 
     94 frame_allocs: std.MultiArrayList(FrameAlloc) = .{},
     95 free_frame_indices: std.AutoArrayHashMapUnmanaged(FrameIndex, void) = .{},
     96 frame_locs: std.MultiArrayList(Mir.FrameLoc) = .{},
     97 
     98 /// Debug field, used to find bugs in the compiler.
     99 air_bookkeeping: @TypeOf(air_bookkeeping_init) = air_bookkeeping_init,
    100 
    101 /// For mir debug info, maps a mir index to a air index
    102 mir_to_air_map: @TypeOf(mir_to_air_map_init) = mir_to_air_map_init,
    103 
    104 const air_bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {};
    105 
    106 const mir_to_air_map_init = if (builtin.mode == .Debug) std.AutoHashMapUnmanaged(Mir.Inst.Index, Air.Inst.Index){} else {};
    107 
    108 const FrameAddr = struct { index: FrameIndex, off: i32 = 0 };
    109 const RegisterOffset = struct { reg: Register, off: i32 = 0 };
    110 
    111 const Owner = union(enum) {
    112     mod_fn: *const Module.Fn,
    113     lazy_sym: link.File.LazySymbol,
    114 
    115     fn getDecl(owner: Owner) Module.Decl.Index {
    116         return switch (owner) {
    117             .mod_fn => |mod_fn| mod_fn.owner_decl,
    118             .lazy_sym => |lazy_sym| lazy_sym.ty.getOwnerDecl(),
    119         };
    120     }
    121 
    122     fn getSymbolIndex(owner: Owner, ctx: *Self) !u32 {
    123         switch (owner) {
    124             .mod_fn => |mod_fn| {
    125                 const decl_index = mod_fn.owner_decl;
    126                 if (ctx.bin_file.cast(link.File.MachO)) |macho_file| {
    127                     const atom = try macho_file.getOrCreateAtomForDecl(decl_index);
    128                     return macho_file.getAtom(atom).getSymbolIndex().?;
    129                 } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| {
    130                     const atom = try coff_file.getOrCreateAtomForDecl(decl_index);
    131                     return coff_file.getAtom(atom).getSymbolIndex().?;
    132                 } else unreachable;
    133             },
    134             .lazy_sym => |lazy_sym| {
    135                 if (ctx.bin_file.cast(link.File.MachO)) |macho_file| {
    136                     const atom = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
    137                         return ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    138                     return macho_file.getAtom(atom).getSymbolIndex().?;
    139                 } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| {
    140                     const atom = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
    141                         return ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    142                     return coff_file.getAtom(atom).getSymbolIndex().?;
    143                 } else unreachable;
    144             },
    145         }
    146     }
    147 };
    148 
    149 pub const MCValue = union(enum) {
    150     /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
    151     /// TODO Look into deleting this tag and using `dead` instead, since every use
    152     /// of MCValue.none should be instead looking at the type and noticing it is 0 bits.
    153     none,
    154     /// Control flow will not allow this value to be observed.
    155     unreach,
    156     /// No more references to this value remain.
    157     /// The payload is the value of scope_generation at the point where the death occurred
    158     dead: u32,
    159     /// The value is undefined.
    160     undef,
    161     /// A pointer-sized integer that fits in a register.
    162     /// If the type is a pointer, this is the pointer address in virtual address space.
    163     immediate: u64,
    164     /// The value resides in the EFLAGS register.
    165     eflags: Condition,
    166     /// The value is in a register.
    167     register: Register,
    168     /// The value is a constant offset from the value in a register.
    169     register_offset: RegisterOffset,
    170     /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register.
    171     register_overflow: struct { reg: Register, eflags: Condition },
    172     /// The value is in memory at a hard-coded address.
    173     /// If the type is a pointer, it means the pointer address is at this memory location.
    174     memory: u64,
    175     /// The value is in memory at a constant offset from the address in a register.
    176     indirect: RegisterOffset,
    177     /// The value is in memory.
    178     /// Payload is a symbol index.
    179     load_direct: u32,
    180     /// The value is a pointer to a value in memory.
    181     /// Payload is a symbol index.
    182     lea_direct: u32,
    183     /// The value is in memory referenced indirectly via GOT.
    184     /// Payload is a symbol index.
    185     load_got: u32,
    186     /// The value is a pointer to a value referenced indirectly via GOT.
    187     /// Payload is a symbol index.
    188     lea_got: u32,
    189     /// The value is a threadlocal variable.
    190     /// Payload is a symbol index.
    191     load_tlv: u32,
    192     /// The value is a pointer to a threadlocal variable.
    193     /// Payload is a symbol index.
    194     lea_tlv: u32,
    195     /// The value stored at an offset from a frame index
    196     /// Payload is a frame address.
    197     load_frame: FrameAddr,
    198     /// The address of an offset from a frame index
    199     /// Payload is a frame address.
    200     lea_frame: FrameAddr,
    201     /// This indicates that we have already allocated a frame index for this instruction,
    202     /// but it has not been spilled there yet in the current control flow.
    203     /// Payload is a frame index.
    204     reserved_frame: FrameIndex,
    205 
    206     fn isMemory(mcv: MCValue) bool {
    207         return switch (mcv) {
    208             .memory, .indirect, .load_frame => true,
    209             else => false,
    210         };
    211     }
    212 
    213     fn isImmediate(mcv: MCValue) bool {
    214         return switch (mcv) {
    215             .immediate => true,
    216             else => false,
    217         };
    218     }
    219 
    220     fn isRegister(mcv: MCValue) bool {
    221         return switch (mcv) {
    222             .register => true,
    223             .register_offset => |reg_off| return reg_off.off == 0,
    224             else => false,
    225         };
    226     }
    227 
    228     fn isRegisterOffset(mcv: MCValue) bool {
    229         return switch (mcv) {
    230             .register, .register_offset => true,
    231             else => false,
    232         };
    233     }
    234 
    235     fn getReg(mcv: MCValue) ?Register {
    236         return switch (mcv) {
    237             .register => |reg| reg,
    238             .register_offset, .indirect => |ro| ro.reg,
    239             .register_overflow => |ro| ro.reg,
    240             else => null,
    241         };
    242     }
    243 
    244     fn getCondition(mcv: MCValue) ?Condition {
    245         return switch (mcv) {
    246             .eflags => |cc| cc,
    247             .register_overflow => |reg_ov| reg_ov.eflags,
    248             else => null,
    249         };
    250     }
    251 
    252     fn address(mcv: MCValue) MCValue {
    253         return switch (mcv) {
    254             .none,
    255             .unreach,
    256             .dead,
    257             .undef,
    258             .immediate,
    259             .eflags,
    260             .register,
    261             .register_offset,
    262             .register_overflow,
    263             .lea_direct,
    264             .lea_got,
    265             .lea_tlv,
    266             .lea_frame,
    267             .reserved_frame,
    268             => unreachable, // not in memory
    269             .memory => |addr| .{ .immediate = addr },
    270             .indirect => |reg_off| switch (reg_off.off) {
    271                 0 => .{ .register = reg_off.reg },
    272                 else => .{ .register_offset = reg_off },
    273             },
    274             .load_direct => |sym_index| .{ .lea_direct = sym_index },
    275             .load_got => |sym_index| .{ .lea_got = sym_index },
    276             .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
    277             .load_frame => |frame_addr| .{ .lea_frame = frame_addr },
    278         };
    279     }
    280 
    281     fn deref(mcv: MCValue) MCValue {
    282         return switch (mcv) {
    283             .none,
    284             .unreach,
    285             .dead,
    286             .undef,
    287             .eflags,
    288             .register_overflow,
    289             .memory,
    290             .indirect,
    291             .load_direct,
    292             .load_got,
    293             .load_tlv,
    294             .load_frame,
    295             .reserved_frame,
    296             => unreachable, // not a dereferenceable
    297             .immediate => |addr| .{ .memory = addr },
    298             .register => |reg| .{ .indirect = .{ .reg = reg } },
    299             .register_offset => |reg_off| .{ .indirect = reg_off },
    300             .lea_direct => |sym_index| .{ .load_direct = sym_index },
    301             .lea_got => |sym_index| .{ .load_got = sym_index },
    302             .lea_tlv => |sym_index| .{ .load_tlv = sym_index },
    303             .lea_frame => |frame_addr| .{ .load_frame = frame_addr },
    304         };
    305     }
    306 
    307     fn offset(mcv: MCValue, off: i32) MCValue {
    308         return switch (mcv) {
    309             .none,
    310             .unreach,
    311             .dead,
    312             .undef,
    313             .eflags,
    314             .register_overflow,
    315             .memory,
    316             .indirect,
    317             .load_direct,
    318             .lea_direct,
    319             .load_got,
    320             .lea_got,
    321             .load_tlv,
    322             .lea_tlv,
    323             .load_frame,
    324             .reserved_frame,
    325             => unreachable, // not offsettable
    326             .immediate => |imm| .{ .immediate = @bitCast(u64, @bitCast(i64, imm) +% off) },
    327             .register => |reg| .{ .register_offset = .{ .reg = reg, .off = off } },
    328             .register_offset => |reg_off| .{
    329                 .register_offset = .{ .reg = reg_off.reg, .off = reg_off.off + off },
    330             },
    331             .lea_frame => |frame_addr| .{
    332                 .lea_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
    333             },
    334         };
    335     }
    336 
    337     fn mem(mcv: MCValue, ptr_size: Memory.PtrSize) Memory {
    338         return switch (mcv) {
    339             .none,
    340             .unreach,
    341             .dead,
    342             .undef,
    343             .immediate,
    344             .eflags,
    345             .register,
    346             .register_offset,
    347             .register_overflow,
    348             .load_direct,
    349             .lea_direct,
    350             .load_got,
    351             .lea_got,
    352             .load_tlv,
    353             .lea_tlv,
    354             .lea_frame,
    355             .reserved_frame,
    356             => unreachable,
    357             .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr|
    358                 Memory.sib(ptr_size, .{ .base = .{ .reg = .ds }, .disp = small_addr })
    359             else
    360                 Memory.moffs(.ds, addr),
    361             .indirect => |reg_off| Memory.sib(ptr_size, .{
    362                 .base = .{ .reg = reg_off.reg },
    363                 .disp = reg_off.off,
    364             }),
    365             .load_frame => |frame_addr| Memory.sib(ptr_size, .{
    366                 .base = .{ .frame = frame_addr.index },
    367                 .disp = frame_addr.off,
    368             }),
    369         };
    370     }
    371 
    372     pub fn format(
    373         mcv: MCValue,
    374         comptime _: []const u8,
    375         _: std.fmt.FormatOptions,
    376         writer: anytype,
    377     ) @TypeOf(writer).Error!void {
    378         switch (mcv) {
    379             .none, .unreach, .dead, .undef => try writer.print("({s})", .{@tagName(mcv)}),
    380             .immediate => |pl| try writer.print("0x{x}", .{pl}),
    381             .memory => |pl| try writer.print("[ds:0x{x}]", .{pl}),
    382             inline .eflags, .register => |pl| try writer.print("{s}", .{@tagName(pl)}),
    383             .register_offset => |pl| try writer.print("{s} + 0x{x}", .{ @tagName(pl.reg), pl.off }),
    384             .register_overflow => |pl| try writer.print("{s}:{s}", .{ @tagName(pl.eflags), @tagName(pl.reg) }),
    385             .indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }),
    386             .load_direct => |pl| try writer.print("[direct:{d}]", .{pl}),
    387             .lea_direct => |pl| try writer.print("direct:{d}", .{pl}),
    388             .load_got => |pl| try writer.print("[got:{d}]", .{pl}),
    389             .lea_got => |pl| try writer.print("got:{d}", .{pl}),
    390             .load_tlv => |pl| try writer.print("[tlv:{d}]", .{pl}),
    391             .lea_tlv => |pl| try writer.print("tlv:{d}", .{pl}),
    392             .load_frame => |pl| try writer.print("[{} + 0x{x}]", .{ pl.index, pl.off }),
    393             .lea_frame => |pl| try writer.print("{} + 0x{x}", .{ pl.index, pl.off }),
    394             .reserved_frame => |pl| try writer.print("(dead:{})", .{pl}),
    395         }
    396     }
    397 };
    398 
    399 const InstTrackingMap = std.AutoArrayHashMapUnmanaged(Air.Inst.Index, InstTracking);
    400 const InstTracking = struct {
    401     long: MCValue,
    402     short: MCValue,
    403 
    404     fn init(result: MCValue) InstTracking {
    405         return .{ .long = switch (result) {
    406             .none,
    407             .unreach,
    408             .undef,
    409             .immediate,
    410             .memory,
    411             .load_direct,
    412             .lea_direct,
    413             .load_got,
    414             .lea_got,
    415             .load_tlv,
    416             .lea_tlv,
    417             .load_frame,
    418             .lea_frame,
    419             => result,
    420             .dead,
    421             .reserved_frame,
    422             => unreachable,
    423             .eflags,
    424             .register,
    425             .register_offset,
    426             .register_overflow,
    427             .indirect,
    428             => .none,
    429         }, .short = result };
    430     }
    431 
    432     fn getReg(self: InstTracking) ?Register {
    433         return self.short.getReg();
    434     }
    435 
    436     fn getCondition(self: InstTracking) ?Condition {
    437         return self.short.getCondition();
    438     }
    439 
    440     fn spill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void {
    441         if (std.meta.eql(self.long, self.short)) return; // Already spilled
    442         // Allocate or reuse frame index
    443         switch (self.long) {
    444             .none => self.long = try function.allocRegOrMem(inst, false),
    445             .load_frame => {},
    446             .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
    447             else => unreachable,
    448         }
    449         tracking_log.debug("spill %{d} from {} to {}", .{ inst, self.short, self.long });
    450         try function.genCopy(function.air.typeOfIndex(inst), self.long, self.short);
    451     }
    452 
    453     fn reuseFrame(self: *InstTracking) void {
    454         switch (self.long) {
    455             .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
    456             else => {},
    457         }
    458         self.short = switch (self.long) {
    459             .none,
    460             .unreach,
    461             .undef,
    462             .immediate,
    463             .memory,
    464             .load_direct,
    465             .lea_direct,
    466             .load_got,
    467             .lea_got,
    468             .load_tlv,
    469             .lea_tlv,
    470             .load_frame,
    471             .lea_frame,
    472             => self.long,
    473             .dead,
    474             .eflags,
    475             .register,
    476             .register_offset,
    477             .register_overflow,
    478             .indirect,
    479             .reserved_frame,
    480             => unreachable,
    481         };
    482     }
    483 
    484     fn trackSpill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void {
    485         function.freeValue(self.short);
    486         self.reuseFrame();
    487         tracking_log.debug("%{d} => {} (spilled)", .{ inst, self.* });
    488     }
    489 
    490     fn verifyMaterialize(self: *InstTracking, target: InstTracking) void {
    491         switch (self.long) {
    492             .none,
    493             .unreach,
    494             .undef,
    495             .immediate,
    496             .memory,
    497             .load_direct,
    498             .lea_direct,
    499             .load_got,
    500             .lea_got,
    501             .load_tlv,
    502             .lea_tlv,
    503             .lea_frame,
    504             => assert(std.meta.eql(self.long, target.long)),
    505             .load_frame,
    506             .reserved_frame,
    507             => switch (target.long) {
    508                 .none,
    509                 .load_frame,
    510                 .reserved_frame,
    511                 => {},
    512                 else => unreachable,
    513             },
    514             .dead,
    515             .eflags,
    516             .register,
    517             .register_offset,
    518             .register_overflow,
    519             .indirect,
    520             => unreachable,
    521         }
    522     }
    523 
    524     fn materialize(
    525         self: *InstTracking,
    526         function: *Self,
    527         inst: Air.Inst.Index,
    528         target: InstTracking,
    529     ) !void {
    530         self.verifyMaterialize(target);
    531         try self.materializeUnsafe(function, inst, target);
    532     }
    533 
    534     fn materializeUnsafe(
    535         self: *InstTracking,
    536         function: *Self,
    537         inst: Air.Inst.Index,
    538         target: InstTracking,
    539     ) !void {
    540         const ty = function.air.typeOfIndex(inst);
    541         if ((self.long == .none or self.long == .reserved_frame) and target.long == .load_frame)
    542             try function.genCopy(ty, target.long, self.short);
    543         try function.genCopy(ty, target.short, self.short);
    544     }
    545 
    546     fn trackMaterialize(self: *InstTracking, inst: Air.Inst.Index, target: InstTracking) void {
    547         self.verifyMaterialize(target);
    548         // Don't clobber reserved frame indices
    549         self.long = if (target.long == .none) switch (self.long) {
    550             .load_frame => |addr| .{ .reserved_frame = addr.index },
    551             .reserved_frame => self.long,
    552             else => target.long,
    553         } else target.long;
    554         self.short = target.short;
    555         tracking_log.debug("%{d} => {} (materialize)", .{ inst, self.* });
    556     }
    557 
    558     fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void {
    559         switch (self.short) {
    560             .dead => |die_generation| if (die_generation >= scope_generation) {
    561                 self.reuseFrame();
    562                 tracking_log.debug("%{d} => {} (resurrect)", .{ inst, self.* });
    563             },
    564             else => {},
    565         }
    566     }
    567 
    568     fn die(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void {
    569         function.freeValue(self.short);
    570         self.short = .{ .dead = function.scope_generation };
    571         tracking_log.debug("%{d} => {} (death)", .{ inst, self.* });
    572     }
    573 
    574     fn reuse(
    575         self: *InstTracking,
    576         function: *Self,
    577         new_inst: Air.Inst.Index,
    578         old_inst: Air.Inst.Index,
    579     ) void {
    580         self.short = .{ .dead = function.scope_generation };
    581         tracking_log.debug("%{d} => {} (reuse %{d})", .{ new_inst, self.*, old_inst });
    582     }
    583 
    584     pub fn format(
    585         self: InstTracking,
    586         comptime _: []const u8,
    587         _: std.fmt.FormatOptions,
    588         writer: anytype,
    589     ) @TypeOf(writer).Error!void {
    590         if (!std.meta.eql(self.long, self.short)) try writer.print("|{}| ", .{self.long});
    591         try writer.print("{}", .{self.short});
    592     }
    593 };
    594 
    595 const FrameAlloc = struct {
    596     abi_size: u31,
    597     abi_align: u5,
    598     ref_count: u16,
    599 
    600     fn init(alloc_abi: struct { size: u64, alignment: u32 }) FrameAlloc {
    601         assert(math.isPowerOfTwo(alloc_abi.alignment));
    602         return .{
    603             .abi_size = @intCast(u31, alloc_abi.size),
    604             .abi_align = math.log2_int(u32, alloc_abi.alignment),
    605             .ref_count = 0,
    606         };
    607     }
    608     fn initType(ty: Type, target: Target) FrameAlloc {
    609         return init(.{ .size = ty.abiSize(target), .alignment = ty.abiAlignment(target) });
    610     }
    611 };
    612 
    613 const StackAllocation = struct {
    614     inst: ?Air.Inst.Index,
    615     /// TODO do we need size? should be determined by inst.ty.abiSize(self.target.*)
    616     size: u32,
    617 };
    618 
    619 const BlockData = struct {
    620     relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
    621     state: State,
    622 
    623     fn deinit(self: *BlockData, gpa: Allocator) void {
    624         self.relocs.deinit(gpa);
    625         self.* = undefined;
    626     }
    627 };
    628 
    629 const Self = @This();
    630 
    631 pub fn generate(
    632     bin_file: *link.File,
    633     src_loc: Module.SrcLoc,
    634     module_fn: *Module.Fn,
    635     air: Air,
    636     liveness: Liveness,
    637     code: *std.ArrayList(u8),
    638     debug_output: DebugInfoOutput,
    639 ) CodeGenError!Result {
    640     if (build_options.skip_non_native and builtin.cpu.arch != bin_file.options.target.cpu.arch) {
    641         @panic("Attempted to compile for architecture that was disabled by build configuration");
    642     }
    643 
    644     const mod = bin_file.options.module.?;
    645     const fn_owner_decl = mod.declPtr(module_fn.owner_decl);
    646     assert(fn_owner_decl.has_tv);
    647     const fn_type = fn_owner_decl.ty;
    648 
    649     const gpa = bin_file.allocator;
    650     var function = Self{
    651         .gpa = gpa,
    652         .air = air,
    653         .liveness = liveness,
    654         .target = &bin_file.options.target,
    655         .bin_file = bin_file,
    656         .debug_output = debug_output,
    657         .owner = .{ .mod_fn = module_fn },
    658         .err_msg = null,
    659         .args = undefined, // populated after `resolveCallingConventionValues`
    660         .ret_mcv = undefined, // populated after `resolveCallingConventionValues`
    661         .fn_type = fn_type,
    662         .arg_index = 0,
    663         .src_loc = src_loc,
    664         .end_di_line = module_fn.rbrace_line,
    665         .end_di_column = module_fn.rbrace_column,
    666     };
    667     defer {
    668         function.frame_allocs.deinit(gpa);
    669         function.free_frame_indices.deinit(gpa);
    670         function.frame_locs.deinit(gpa);
    671         var block_it = function.blocks.valueIterator();
    672         while (block_it.next()) |block| block.deinit(gpa);
    673         function.blocks.deinit(gpa);
    674         function.inst_tracking.deinit(gpa);
    675         function.const_tracking.deinit(gpa);
    676         function.exitlude_jump_relocs.deinit(gpa);
    677         function.mir_instructions.deinit(gpa);
    678         function.mir_extra.deinit(gpa);
    679         if (builtin.mode == .Debug) function.mir_to_air_map.deinit(gpa);
    680     }
    681 
    682     wip_mir_log.debug("{}:", .{function.fmtDecl(module_fn.owner_decl)});
    683 
    684     try function.frame_allocs.resize(gpa, FrameIndex.named_count);
    685     function.frame_allocs.set(
    686         @enumToInt(FrameIndex.stack_frame),
    687         FrameAlloc.init(.{
    688             .size = 0,
    689             .alignment = if (mod.align_stack_fns.get(module_fn)) |set_align_stack|
    690                 set_align_stack.alignment
    691             else
    692                 1,
    693         }),
    694     );
    695     function.frame_allocs.set(
    696         @enumToInt(FrameIndex.call_frame),
    697         FrameAlloc.init(.{ .size = 0, .alignment = 1 }),
    698     );
    699 
    700     var call_info = function.resolveCallingConventionValues(fn_type, &.{}, .args_frame) catch |err| switch (err) {
    701         error.CodegenFail => return Result{ .fail = function.err_msg.? },
    702         error.OutOfRegisters => return Result{
    703             .fail = try ErrorMsg.create(
    704                 bin_file.allocator,
    705                 src_loc,
    706                 "CodeGen ran out of registers. This is a bug in the Zig compiler.",
    707                 .{},
    708             ),
    709         },
    710         else => |e| return e,
    711     };
    712     defer call_info.deinit(&function);
    713 
    714     function.args = call_info.args;
    715     function.ret_mcv = call_info.return_value;
    716     function.frame_allocs.set(@enumToInt(FrameIndex.ret_addr), FrameAlloc.init(.{
    717         .size = Type.usize.abiSize(function.target.*),
    718         .alignment = @min(Type.usize.abiAlignment(function.target.*), call_info.stack_align),
    719     }));
    720     function.frame_allocs.set(@enumToInt(FrameIndex.base_ptr), FrameAlloc.init(.{
    721         .size = Type.usize.abiSize(function.target.*),
    722         .alignment = @min(Type.usize.abiAlignment(function.target.*) * 2, call_info.stack_align),
    723     }));
    724     function.frame_allocs.set(
    725         @enumToInt(FrameIndex.args_frame),
    726         FrameAlloc.init(.{ .size = call_info.stack_byte_count, .alignment = call_info.stack_align }),
    727     );
    728 
    729     function.gen() catch |err| switch (err) {
    730         error.CodegenFail => return Result{ .fail = function.err_msg.? },
    731         error.OutOfRegisters => return Result{
    732             .fail = try ErrorMsg.create(bin_file.allocator, src_loc, "CodeGen ran out of registers. This is a bug in the Zig compiler.", .{}),
    733         },
    734         else => |e| return e,
    735     };
    736 
    737     var mir = Mir{
    738         .instructions = function.mir_instructions.toOwnedSlice(),
    739         .extra = try function.mir_extra.toOwnedSlice(bin_file.allocator),
    740         .frame_locs = function.frame_locs.toOwnedSlice(),
    741     };
    742     defer mir.deinit(bin_file.allocator);
    743 
    744     var emit = Emit{
    745         .lower = .{
    746             .allocator = bin_file.allocator,
    747             .mir = mir,
    748             .target = &bin_file.options.target,
    749             .src_loc = src_loc,
    750         },
    751         .bin_file = bin_file,
    752         .debug_output = debug_output,
    753         .code = code,
    754         .prev_di_pc = 0,
    755         .prev_di_line = module_fn.lbrace_line,
    756         .prev_di_column = module_fn.lbrace_column,
    757     };
    758     defer emit.deinit();
    759     emit.emitMir() catch |err| switch (err) {
    760         error.LowerFail, error.EmitFail => return Result{ .fail = emit.lower.err_msg.? },
    761         error.InvalidInstruction, error.CannotEncode => |e| {
    762             const msg = switch (e) {
    763                 error.InvalidInstruction => "CodeGen failed to find a viable instruction.",
    764                 error.CannotEncode => "CodeGen failed to encode the instruction.",
    765             };
    766             return Result{
    767                 .fail = try ErrorMsg.create(
    768                     bin_file.allocator,
    769                     src_loc,
    770                     "{s} This is a bug in the Zig compiler.",
    771                     .{msg},
    772                 ),
    773             };
    774         },
    775         else => |e| return e,
    776     };
    777 
    778     if (function.err_msg) |em| {
    779         return Result{ .fail = em };
    780     } else {
    781         return Result.ok;
    782     }
    783 }
    784 
    785 pub fn generateLazy(
    786     bin_file: *link.File,
    787     src_loc: Module.SrcLoc,
    788     lazy_sym: link.File.LazySymbol,
    789     code: *std.ArrayList(u8),
    790     debug_output: DebugInfoOutput,
    791 ) CodeGenError!Result {
    792     const gpa = bin_file.allocator;
    793     var function = Self{
    794         .gpa = gpa,
    795         .air = undefined,
    796         .liveness = undefined,
    797         .target = &bin_file.options.target,
    798         .bin_file = bin_file,
    799         .debug_output = debug_output,
    800         .owner = .{ .lazy_sym = lazy_sym },
    801         .err_msg = null,
    802         .args = undefined,
    803         .ret_mcv = undefined,
    804         .fn_type = undefined,
    805         .arg_index = undefined,
    806         .src_loc = src_loc,
    807         .end_di_line = undefined, // no debug info yet
    808         .end_di_column = undefined, // no debug info yet
    809     };
    810     defer {
    811         function.mir_instructions.deinit(gpa);
    812         function.mir_extra.deinit(gpa);
    813     }
    814 
    815     function.genLazy(lazy_sym) catch |err| switch (err) {
    816         error.CodegenFail => return Result{ .fail = function.err_msg.? },
    817         error.OutOfRegisters => return Result{
    818             .fail = try ErrorMsg.create(bin_file.allocator, src_loc, "CodeGen ran out of registers. This is a bug in the Zig compiler.", .{}),
    819         },
    820         else => |e| return e,
    821     };
    822 
    823     var mir = Mir{
    824         .instructions = function.mir_instructions.toOwnedSlice(),
    825         .extra = try function.mir_extra.toOwnedSlice(bin_file.allocator),
    826         .frame_locs = function.frame_locs.toOwnedSlice(),
    827     };
    828     defer mir.deinit(bin_file.allocator);
    829 
    830     var emit = Emit{
    831         .lower = .{
    832             .allocator = bin_file.allocator,
    833             .mir = mir,
    834             .target = &bin_file.options.target,
    835             .src_loc = src_loc,
    836         },
    837         .bin_file = bin_file,
    838         .debug_output = debug_output,
    839         .code = code,
    840         .prev_di_pc = undefined, // no debug info yet
    841         .prev_di_line = undefined, // no debug info yet
    842         .prev_di_column = undefined, // no debug info yet
    843     };
    844     defer emit.deinit();
    845     emit.emitMir() catch |err| switch (err) {
    846         error.LowerFail, error.EmitFail => return Result{ .fail = emit.lower.err_msg.? },
    847         error.InvalidInstruction, error.CannotEncode => |e| {
    848             const msg = switch (e) {
    849                 error.InvalidInstruction => "CodeGen failed to find a viable instruction.",
    850                 error.CannotEncode => "CodeGen failed to encode the instruction.",
    851             };
    852             return Result{
    853                 .fail = try ErrorMsg.create(
    854                     bin_file.allocator,
    855                     src_loc,
    856                     "{s} This is a bug in the Zig compiler.",
    857                     .{msg},
    858                 ),
    859             };
    860         },
    861         else => |e| return e,
    862     };
    863 
    864     if (function.err_msg) |em| {
    865         return Result{ .fail = em };
    866     } else {
    867         return Result.ok;
    868     }
    869 }
    870 
    871 const FormatDeclData = struct {
    872     mod: *Module,
    873     decl_index: Module.Decl.Index,
    874 };
    875 fn formatDecl(
    876     data: FormatDeclData,
    877     comptime _: []const u8,
    878     _: std.fmt.FormatOptions,
    879     writer: anytype,
    880 ) @TypeOf(writer).Error!void {
    881     try data.mod.declPtr(data.decl_index).renderFullyQualifiedName(data.mod, writer);
    882 }
    883 fn fmtDecl(self: *Self, decl_index: Module.Decl.Index) std.fmt.Formatter(formatDecl) {
    884     return .{ .data = .{
    885         .mod = self.bin_file.options.module.?,
    886         .decl_index = decl_index,
    887     } };
    888 }
    889 
    890 const FormatAirData = struct {
    891     self: *Self,
    892     inst: Air.Inst.Index,
    893 };
    894 fn formatAir(
    895     data: FormatAirData,
    896     comptime _: []const u8,
    897     _: std.fmt.FormatOptions,
    898     writer: anytype,
    899 ) @TypeOf(writer).Error!void {
    900     @import("../../print_air.zig").dumpInst(
    901         data.inst,
    902         data.self.bin_file.options.module.?,
    903         data.self.air,
    904         data.self.liveness,
    905     );
    906 }
    907 fn fmtAir(self: *Self, inst: Air.Inst.Index) std.fmt.Formatter(formatAir) {
    908     return .{ .data = .{ .self = self, .inst = inst } };
    909 }
    910 
    911 const FormatWipMirData = struct {
    912     self: *Self,
    913     inst: Mir.Inst.Index,
    914 };
    915 fn formatWipMir(
    916     data: FormatWipMirData,
    917     comptime _: []const u8,
    918     _: std.fmt.FormatOptions,
    919     writer: anytype,
    920 ) @TypeOf(writer).Error!void {
    921     var lower = Lower{
    922         .allocator = data.self.gpa,
    923         .mir = .{
    924             .instructions = data.self.mir_instructions.slice(),
    925             .extra = data.self.mir_extra.items,
    926             .frame_locs = (std.MultiArrayList(Mir.FrameLoc){}).slice(),
    927         },
    928         .target = data.self.target,
    929         .src_loc = data.self.src_loc,
    930     };
    931     for ((lower.lowerMir(data.inst) catch |err| switch (err) {
    932         error.LowerFail => {
    933             defer {
    934                 lower.err_msg.?.deinit(data.self.gpa);
    935                 lower.err_msg = null;
    936             }
    937             try writer.writeAll(lower.err_msg.?.msg);
    938             return;
    939         },
    940         error.OutOfMemory, error.InvalidInstruction, error.CannotEncode => |e| {
    941             try writer.writeAll(switch (e) {
    942                 error.OutOfMemory => "Out of memory",
    943                 error.InvalidInstruction => "CodeGen failed to find a viable instruction.",
    944                 error.CannotEncode => "CodeGen failed to encode the instruction.",
    945             });
    946             return;
    947         },
    948         else => |e| return e,
    949     }).insts) |lowered_inst| try writer.print("  | {}", .{lowered_inst});
    950 }
    951 fn fmtWipMir(self: *Self, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) {
    952     return .{ .data = .{ .self = self, .inst = inst } };
    953 }
    954 
    955 const FormatTrackingData = struct {
    956     self: *Self,
    957 };
    958 fn formatTracking(
    959     data: FormatTrackingData,
    960     comptime _: []const u8,
    961     _: std.fmt.FormatOptions,
    962     writer: anytype,
    963 ) @TypeOf(writer).Error!void {
    964     var it = data.self.inst_tracking.iterator();
    965     while (it.next()) |entry| try writer.print("\n%{d} = {}", .{ entry.key_ptr.*, entry.value_ptr.* });
    966 }
    967 fn fmtTracking(self: *Self) std.fmt.Formatter(formatTracking) {
    968     return .{ .data = .{ .self = self } };
    969 }
    970 
    971 fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index {
    972     const gpa = self.gpa;
    973     try self.mir_instructions.ensureUnusedCapacity(gpa, 1);
    974     const result_index = @intCast(Mir.Inst.Index, self.mir_instructions.len);
    975     self.mir_instructions.appendAssumeCapacity(inst);
    976     if (inst.tag != .pseudo or switch (inst.ops) {
    977         else => true,
    978         .pseudo_dbg_prologue_end_none,
    979         .pseudo_dbg_line_line_column,
    980         .pseudo_dbg_epilogue_begin_none,
    981         .pseudo_dead_none,
    982         => false,
    983     }) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)});
    984     return result_index;
    985 }
    986 
    987 fn addExtra(self: *Self, extra: anytype) Allocator.Error!u32 {
    988     const fields = std.meta.fields(@TypeOf(extra));
    989     try self.mir_extra.ensureUnusedCapacity(self.gpa, fields.len);
    990     return self.addExtraAssumeCapacity(extra);
    991 }
    992 
    993 fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 {
    994     const fields = std.meta.fields(@TypeOf(extra));
    995     const result = @intCast(u32, self.mir_extra.items.len);
    996     inline for (fields) |field| {
    997         self.mir_extra.appendAssumeCapacity(switch (field.type) {
    998             u32 => @field(extra, field.name),
    999             i32 => @bitCast(u32, @field(extra, field.name)),
   1000             else => @compileError("bad field type: " ++ field.name ++ ": " ++ @typeName(field.type)),
   1001         });
   1002     }
   1003     return result;
   1004 }
   1005 
   1006 /// A `cc` of `.z_and_np` clobbers `reg2`!
   1007 fn asmCmovccRegisterRegister(self: *Self, reg1: Register, reg2: Register, cc: bits.Condition) !void {
   1008     _ = try self.addInst(.{
   1009         .tag = switch (cc) {
   1010             else => .cmov,
   1011             .z_and_np, .nz_or_p => .pseudo,
   1012         },
   1013         .ops = switch (cc) {
   1014             else => .rr,
   1015             .z_and_np => .pseudo_cmov_z_and_np_rr,
   1016             .nz_or_p => .pseudo_cmov_nz_or_p_rr,
   1017         },
   1018         .data = .{ .rr = .{
   1019             .fixes = switch (cc) {
   1020                 else => Mir.Inst.Fixes.fromCondition(cc),
   1021                 .z_and_np, .nz_or_p => ._,
   1022             },
   1023             .r1 = reg1,
   1024             .r2 = reg2,
   1025         } },
   1026     });
   1027 }
   1028 
   1029 /// A `cc` of `.z_and_np` is not supported by this encoding!
   1030 fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condition) !void {
   1031     _ = try self.addInst(.{
   1032         .tag = switch (cc) {
   1033             else => .cmov,
   1034             .z_and_np => unreachable,
   1035             .nz_or_p => .pseudo,
   1036         },
   1037         .ops = switch (cc) {
   1038             else => switch (m) {
   1039                 .sib => .rm_sib,
   1040                 .rip => .rm_rip,
   1041                 else => unreachable,
   1042             },
   1043             .z_and_np => unreachable,
   1044             .nz_or_p => switch (m) {
   1045                 .sib => .pseudo_cmov_nz_or_p_rm_sib,
   1046                 .rip => .pseudo_cmov_nz_or_p_rm_rip,
   1047                 else => unreachable,
   1048             },
   1049         },
   1050         .data = .{ .rx = .{
   1051             .fixes = switch (cc) {
   1052                 else => Mir.Inst.Fixes.fromCondition(cc),
   1053                 .z_and_np => unreachable,
   1054                 .nz_or_p => ._,
   1055             },
   1056             .r1 = reg,
   1057             .payload = switch (m) {
   1058                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1059                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1060                 else => unreachable,
   1061             },
   1062         } },
   1063     });
   1064 }
   1065 
   1066 fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void {
   1067     _ = try self.addInst(.{
   1068         .tag = switch (cc) {
   1069             else => .set,
   1070             .z_and_np, .nz_or_p => .pseudo,
   1071         },
   1072         .ops = switch (cc) {
   1073             else => .r,
   1074             .z_and_np => .pseudo_set_z_and_np_r,
   1075             .nz_or_p => .pseudo_set_nz_or_p_r,
   1076         },
   1077         .data = switch (cc) {
   1078             else => .{ .r = .{
   1079                 .fixes = Mir.Inst.Fixes.fromCondition(cc),
   1080                 .r1 = reg,
   1081             } },
   1082             .z_and_np, .nz_or_p => .{ .rr = .{
   1083                 .r1 = reg,
   1084                 .r2 = (try self.register_manager.allocReg(null, gp)).to8(),
   1085             } },
   1086         },
   1087     });
   1088 }
   1089 
   1090 fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void {
   1091     const payload = switch (m) {
   1092         .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1093         .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1094         else => unreachable,
   1095     };
   1096     _ = try self.addInst(.{
   1097         .tag = switch (cc) {
   1098             else => .set,
   1099             .z_and_np, .nz_or_p => .pseudo,
   1100         },
   1101         .ops = switch (cc) {
   1102             else => switch (m) {
   1103                 .sib => .m_sib,
   1104                 .rip => .m_rip,
   1105                 else => unreachable,
   1106             },
   1107             .z_and_np => switch (m) {
   1108                 .sib => .pseudo_set_z_and_np_m_sib,
   1109                 .rip => .pseudo_set_z_and_np_m_rip,
   1110                 else => unreachable,
   1111             },
   1112             .nz_or_p => switch (m) {
   1113                 .sib => .pseudo_set_nz_or_p_m_sib,
   1114                 .rip => .pseudo_set_nz_or_p_m_rip,
   1115                 else => unreachable,
   1116             },
   1117         },
   1118         .data = switch (cc) {
   1119             else => .{ .x = .{
   1120                 .fixes = Mir.Inst.Fixes.fromCondition(cc),
   1121                 .payload = payload,
   1122             } },
   1123             .z_and_np, .nz_or_p => .{ .rx = .{
   1124                 .r1 = (try self.register_manager.allocReg(null, gp)).to8(),
   1125                 .payload = payload,
   1126             } },
   1127         },
   1128     });
   1129 }
   1130 
   1131 fn asmJmpReloc(self: *Self, target: Mir.Inst.Index) !Mir.Inst.Index {
   1132     return self.addInst(.{
   1133         .tag = .jmp,
   1134         .ops = .inst,
   1135         .data = .{ .inst = .{
   1136             .inst = target,
   1137         } },
   1138     });
   1139 }
   1140 
   1141 fn asmJccReloc(self: *Self, target: Mir.Inst.Index, cc: bits.Condition) !Mir.Inst.Index {
   1142     return self.addInst(.{
   1143         .tag = switch (cc) {
   1144             else => .j,
   1145             .z_and_np, .nz_or_p => .pseudo,
   1146         },
   1147         .ops = switch (cc) {
   1148             else => .inst,
   1149             .z_and_np => .pseudo_j_z_and_np_inst,
   1150             .nz_or_p => .pseudo_j_nz_or_p_inst,
   1151         },
   1152         .data = .{ .inst = .{
   1153             .fixes = switch (cc) {
   1154                 else => Mir.Inst.Fixes.fromCondition(cc),
   1155                 .z_and_np, .nz_or_p => ._,
   1156             },
   1157             .inst = target,
   1158         } },
   1159     });
   1160 }
   1161 
   1162 fn asmPlaceholder(self: *Self) !Mir.Inst.Index {
   1163     return self.addInst(.{
   1164         .tag = .pseudo,
   1165         .ops = .pseudo_dead_none,
   1166         .data = undefined,
   1167     });
   1168 }
   1169 
   1170 fn asmOpOnly(self: *Self, tag: Mir.Inst.FixedTag) !void {
   1171     _ = try self.addInst(.{
   1172         .tag = tag[1],
   1173         .ops = .none,
   1174         .data = .{ .none = .{
   1175             .fixes = tag[0],
   1176         } },
   1177     });
   1178 }
   1179 
   1180 fn asmPseudo(self: *Self, ops: Mir.Inst.Ops) !void {
   1181     _ = try self.addInst(.{
   1182         .tag = .pseudo,
   1183         .ops = ops,
   1184         .data = undefined,
   1185     });
   1186 }
   1187 
   1188 fn asmRegister(self: *Self, tag: Mir.Inst.FixedTag, reg: Register) !void {
   1189     _ = try self.addInst(.{
   1190         .tag = tag[1],
   1191         .ops = .r,
   1192         .data = .{ .r = .{
   1193             .fixes = tag[0],
   1194             .r1 = reg,
   1195         } },
   1196     });
   1197 }
   1198 
   1199 fn asmImmediate(self: *Self, tag: Mir.Inst.FixedTag, imm: Immediate) !void {
   1200     _ = try self.addInst(.{
   1201         .tag = tag[1],
   1202         .ops = switch (imm) {
   1203             .signed => .i_s,
   1204             .unsigned => .i_u,
   1205         },
   1206         .data = .{ .i = .{
   1207             .fixes = tag[0],
   1208             .i = switch (imm) {
   1209                 .signed => |s| @bitCast(u32, s),
   1210                 .unsigned => |u| @intCast(u32, u),
   1211             },
   1212         } },
   1213     });
   1214 }
   1215 
   1216 fn asmRegisterRegister(self: *Self, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void {
   1217     _ = try self.addInst(.{
   1218         .tag = tag[1],
   1219         .ops = .rr,
   1220         .data = .{ .rr = .{
   1221             .fixes = tag[0],
   1222             .r1 = reg1,
   1223             .r2 = reg2,
   1224         } },
   1225     });
   1226 }
   1227 
   1228 fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void {
   1229     const ops: Mir.Inst.Ops = switch (imm) {
   1230         .signed => .ri_s,
   1231         .unsigned => |u| if (math.cast(u32, u)) |_| .ri_u else .ri64,
   1232     };
   1233     _ = try self.addInst(.{
   1234         .tag = tag[1],
   1235         .ops = ops,
   1236         .data = switch (ops) {
   1237             .ri_s, .ri_u => .{ .ri = .{
   1238                 .fixes = tag[0],
   1239                 .r1 = reg,
   1240                 .i = switch (imm) {
   1241                     .signed => |s| @bitCast(u32, s),
   1242                     .unsigned => |u| @intCast(u32, u),
   1243                 },
   1244             } },
   1245             .ri64 => .{ .rx = .{
   1246                 .fixes = tag[0],
   1247                 .r1 = reg,
   1248                 .payload = try self.addExtra(Mir.Imm64.encode(imm.unsigned)),
   1249             } },
   1250             else => unreachable,
   1251         },
   1252     });
   1253 }
   1254 
   1255 fn asmRegisterRegisterRegister(
   1256     self: *Self,
   1257     tag: Mir.Inst.FixedTag,
   1258     reg1: Register,
   1259     reg2: Register,
   1260     reg3: Register,
   1261 ) !void {
   1262     _ = try self.addInst(.{
   1263         .tag = tag[1],
   1264         .ops = .rrr,
   1265         .data = .{ .rrr = .{
   1266             .fixes = tag[0],
   1267             .r1 = reg1,
   1268             .r2 = reg2,
   1269             .r3 = reg3,
   1270         } },
   1271     });
   1272 }
   1273 
   1274 fn asmRegisterRegisterRegisterImmediate(
   1275     self: *Self,
   1276     tag: Mir.Inst.FixedTag,
   1277     reg1: Register,
   1278     reg2: Register,
   1279     reg3: Register,
   1280     imm: Immediate,
   1281 ) !void {
   1282     _ = try self.addInst(.{
   1283         .tag = tag[1],
   1284         .ops = .rrri,
   1285         .data = .{ .rrri = .{
   1286             .fixes = tag[0],
   1287             .r1 = reg1,
   1288             .r2 = reg2,
   1289             .r3 = reg3,
   1290             .i = @intCast(u8, imm.unsigned),
   1291         } },
   1292     });
   1293 }
   1294 
   1295 fn asmRegisterRegisterImmediate(
   1296     self: *Self,
   1297     tag: Mir.Inst.FixedTag,
   1298     reg1: Register,
   1299     reg2: Register,
   1300     imm: Immediate,
   1301 ) !void {
   1302     _ = try self.addInst(.{
   1303         .tag = tag[1],
   1304         .ops = switch (imm) {
   1305             .signed => .rri_s,
   1306             .unsigned => .rri_u,
   1307         },
   1308         .data = .{ .rri = .{
   1309             .fixes = tag[0],
   1310             .r1 = reg1,
   1311             .r2 = reg2,
   1312             .i = switch (imm) {
   1313                 .signed => |s| @bitCast(u32, s),
   1314                 .unsigned => |u| @intCast(u32, u),
   1315             },
   1316         } },
   1317     });
   1318 }
   1319 
   1320 fn asmRegisterRegisterMemory(
   1321     self: *Self,
   1322     tag: Mir.Inst.FixedTag,
   1323     reg1: Register,
   1324     reg2: Register,
   1325     m: Memory,
   1326 ) !void {
   1327     _ = try self.addInst(.{
   1328         .tag = tag[1],
   1329         .ops = switch (m) {
   1330             .sib => .rrm_sib,
   1331             .rip => .rrm_rip,
   1332             else => unreachable,
   1333         },
   1334         .data = .{ .rrx = .{
   1335             .fixes = tag[0],
   1336             .r1 = reg1,
   1337             .r2 = reg2,
   1338             .payload = switch (m) {
   1339                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1340                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1341                 else => unreachable,
   1342             },
   1343         } },
   1344     });
   1345 }
   1346 
   1347 fn asmMemory(self: *Self, tag: Mir.Inst.FixedTag, m: Memory) !void {
   1348     _ = try self.addInst(.{
   1349         .tag = tag[1],
   1350         .ops = switch (m) {
   1351             .sib => .m_sib,
   1352             .rip => .m_rip,
   1353             else => unreachable,
   1354         },
   1355         .data = .{ .x = .{
   1356             .fixes = tag[0],
   1357             .payload = switch (m) {
   1358                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1359                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1360                 else => unreachable,
   1361             },
   1362         } },
   1363     });
   1364 }
   1365 
   1366 fn asmRegisterMemory(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, m: Memory) !void {
   1367     _ = try self.addInst(.{
   1368         .tag = tag[1],
   1369         .ops = switch (m) {
   1370             .sib => .rm_sib,
   1371             .rip => .rm_rip,
   1372             else => unreachable,
   1373         },
   1374         .data = .{ .rx = .{
   1375             .fixes = tag[0],
   1376             .r1 = reg,
   1377             .payload = switch (m) {
   1378                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1379                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1380                 else => unreachable,
   1381             },
   1382         } },
   1383     });
   1384 }
   1385 
   1386 fn asmRegisterMemoryImmediate(
   1387     self: *Self,
   1388     tag: Mir.Inst.FixedTag,
   1389     reg: Register,
   1390     m: Memory,
   1391     imm: Immediate,
   1392 ) !void {
   1393     _ = try self.addInst(.{
   1394         .tag = tag[1],
   1395         .ops = switch (m) {
   1396             .sib => .rmi_sib,
   1397             .rip => .rmi_rip,
   1398             else => unreachable,
   1399         },
   1400         .data = .{ .rix = .{
   1401             .fixes = tag[0],
   1402             .r1 = reg,
   1403             .i = @intCast(u8, imm.unsigned),
   1404             .payload = switch (m) {
   1405                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1406                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1407                 else => unreachable,
   1408             },
   1409         } },
   1410     });
   1411 }
   1412 
   1413 fn asmRegisterRegisterMemoryImmediate(
   1414     self: *Self,
   1415     tag: Mir.Inst.FixedTag,
   1416     reg1: Register,
   1417     reg2: Register,
   1418     m: Memory,
   1419     imm: Immediate,
   1420 ) !void {
   1421     _ = try self.addInst(.{
   1422         .tag = tag[1],
   1423         .ops = switch (m) {
   1424             .sib => .rrmi_sib,
   1425             .rip => .rrmi_rip,
   1426             else => unreachable,
   1427         },
   1428         .data = .{ .rrix = .{
   1429             .fixes = tag[0],
   1430             .r1 = reg1,
   1431             .r2 = reg2,
   1432             .i = @intCast(u8, imm.unsigned),
   1433             .payload = switch (m) {
   1434                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1435                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1436                 else => unreachable,
   1437             },
   1438         } },
   1439     });
   1440 }
   1441 
   1442 fn asmMemoryRegister(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, reg: Register) !void {
   1443     _ = try self.addInst(.{
   1444         .tag = tag[1],
   1445         .ops = switch (m) {
   1446             .sib => .mr_sib,
   1447             .rip => .mr_rip,
   1448             else => unreachable,
   1449         },
   1450         .data = .{ .rx = .{
   1451             .fixes = tag[0],
   1452             .r1 = reg,
   1453             .payload = switch (m) {
   1454                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1455                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1456                 else => unreachable,
   1457             },
   1458         } },
   1459     });
   1460 }
   1461 
   1462 fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void {
   1463     const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) {
   1464         .signed => |s| @bitCast(u32, s),
   1465         .unsigned => |u| @intCast(u32, u),
   1466     } });
   1467     assert(payload + 1 == switch (m) {
   1468         .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1469         .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1470         else => unreachable,
   1471     });
   1472     _ = try self.addInst(.{
   1473         .tag = tag[1],
   1474         .ops = switch (m) {
   1475             .sib => switch (imm) {
   1476                 .signed => .mi_sib_s,
   1477                 .unsigned => .mi_sib_u,
   1478             },
   1479             .rip => switch (imm) {
   1480                 .signed => .mi_rip_s,
   1481                 .unsigned => .mi_rip_u,
   1482             },
   1483             else => unreachable,
   1484         },
   1485         .data = .{ .x = .{
   1486             .fixes = tag[0],
   1487             .payload = payload,
   1488         } },
   1489     });
   1490 }
   1491 
   1492 fn asmMemoryRegisterRegister(
   1493     self: *Self,
   1494     tag: Mir.Inst.FixedTag,
   1495     m: Memory,
   1496     reg1: Register,
   1497     reg2: Register,
   1498 ) !void {
   1499     _ = try self.addInst(.{
   1500         .tag = tag[1],
   1501         .ops = switch (m) {
   1502             .sib => .mrr_sib,
   1503             .rip => .mrr_rip,
   1504             else => unreachable,
   1505         },
   1506         .data = .{ .rrx = .{
   1507             .fixes = tag[0],
   1508             .r1 = reg1,
   1509             .r2 = reg2,
   1510             .payload = switch (m) {
   1511                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1512                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1513                 else => unreachable,
   1514             },
   1515         } },
   1516     });
   1517 }
   1518 
   1519 fn asmMemoryRegisterImmediate(
   1520     self: *Self,
   1521     tag: Mir.Inst.FixedTag,
   1522     m: Memory,
   1523     reg: Register,
   1524     imm: Immediate,
   1525 ) !void {
   1526     _ = try self.addInst(.{
   1527         .tag = tag[1],
   1528         .ops = switch (m) {
   1529             .sib => .mri_sib,
   1530             .rip => .mri_rip,
   1531             else => unreachable,
   1532         },
   1533         .data = .{ .rix = .{
   1534             .fixes = tag[0],
   1535             .r1 = reg,
   1536             .i = @intCast(u8, imm.unsigned),
   1537             .payload = switch (m) {
   1538                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1539                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1540                 else => unreachable,
   1541             },
   1542         } },
   1543     });
   1544 }
   1545 
   1546 fn gen(self: *Self) InnerError!void {
   1547     const cc = self.fn_type.fnCallingConvention();
   1548     if (cc != .Naked) {
   1549         try self.asmRegister(.{ ._, .push }, .rbp);
   1550         const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
   1551         try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp);
   1552         const backpatch_frame_align = try self.asmPlaceholder();
   1553         const backpatch_frame_align_extra = try self.asmPlaceholder();
   1554         const backpatch_stack_alloc = try self.asmPlaceholder();
   1555         const backpatch_stack_alloc_extra = try self.asmPlaceholder();
   1556 
   1557         switch (self.ret_mcv.long) {
   1558             .none, .unreach => {},
   1559             .indirect => {
   1560                 // The address where to store the return value for the caller is in a
   1561                 // register which the callee is free to clobber. Therefore, we purposely
   1562                 // spill it to stack immediately.
   1563                 const frame_index =
   1564                     try self.allocFrameIndex(FrameAlloc.initType(Type.usize, self.target.*));
   1565                 try self.genSetMem(
   1566                     .{ .frame = frame_index },
   1567                     0,
   1568                     Type.usize,
   1569                     self.ret_mcv.long.address().offset(-self.ret_mcv.short.indirect.off),
   1570                 );
   1571                 self.ret_mcv.long = .{ .load_frame = .{ .index = frame_index } };
   1572                 tracking_log.debug("spill {} to {}", .{ self.ret_mcv.long, frame_index });
   1573             },
   1574             else => unreachable,
   1575         }
   1576 
   1577         try self.asmPseudo(.pseudo_dbg_prologue_end_none);
   1578 
   1579         try self.genBody(self.air.getMainBody());
   1580 
   1581         // TODO can single exitlude jump reloc be elided? What if it is not at the end of the code?
   1582         // Example:
   1583         // pub fn main() void {
   1584         //     maybeErr() catch return;
   1585         //     unreachable;
   1586         // }
   1587         // Eliding the reloc will cause a miscompilation in this case.
   1588         for (self.exitlude_jump_relocs.items) |jmp_reloc| {
   1589             self.mir_instructions.items(.data)[jmp_reloc].inst.inst =
   1590                 @intCast(u32, self.mir_instructions.len);
   1591         }
   1592 
   1593         try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
   1594         const backpatch_stack_dealloc = try self.asmPlaceholder();
   1595         const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder();
   1596         try self.asmRegister(.{ ._, .pop }, .rbp);
   1597         try self.asmOpOnly(.{ ._, .ret });
   1598 
   1599         const frame_layout = try self.computeFrameLayout();
   1600         const need_frame_align = frame_layout.stack_mask != math.maxInt(u32);
   1601         const need_stack_adjust = frame_layout.stack_adjust > 0;
   1602         const need_save_reg = frame_layout.save_reg_list.count() > 0;
   1603         if (need_frame_align) {
   1604             const page_align = @as(u32, math.maxInt(u32)) << 12;
   1605             self.mir_instructions.set(backpatch_frame_align, .{
   1606                 .tag = .@"and",
   1607                 .ops = .ri_s,
   1608                 .data = .{ .ri = .{
   1609                     .r1 = .rsp,
   1610                     .i = @max(frame_layout.stack_mask, page_align),
   1611                 } },
   1612             });
   1613             if (frame_layout.stack_mask < page_align) {
   1614                 self.mir_instructions.set(backpatch_frame_align_extra, .{
   1615                     .tag = .pseudo,
   1616                     .ops = .pseudo_probe_align_ri_s,
   1617                     .data = .{ .ri = .{
   1618                         .r1 = .rsp,
   1619                         .i = ~frame_layout.stack_mask & page_align,
   1620                     } },
   1621                 });
   1622             }
   1623         }
   1624         if (need_stack_adjust) {
   1625             const page_size: u32 = 1 << 12;
   1626             if (frame_layout.stack_adjust <= page_size) {
   1627                 self.mir_instructions.set(backpatch_stack_alloc, .{
   1628                     .tag = .sub,
   1629                     .ops = .ri_s,
   1630                     .data = .{ .ri = .{
   1631                         .r1 = .rsp,
   1632                         .i = frame_layout.stack_adjust,
   1633                     } },
   1634                 });
   1635             } else if (frame_layout.stack_adjust <
   1636                 page_size * Lower.pseudo_probe_adjust_unrolled_max_insts)
   1637             {
   1638                 self.mir_instructions.set(backpatch_stack_alloc, .{
   1639                     .tag = .pseudo,
   1640                     .ops = .pseudo_probe_adjust_unrolled_ri_s,
   1641                     .data = .{ .ri = .{
   1642                         .r1 = .rsp,
   1643                         .i = frame_layout.stack_adjust,
   1644                     } },
   1645                 });
   1646             } else {
   1647                 self.mir_instructions.set(backpatch_stack_alloc, .{
   1648                     .tag = .pseudo,
   1649                     .ops = .pseudo_probe_adjust_setup_rri_s,
   1650                     .data = .{ .rri = .{
   1651                         .r1 = .rsp,
   1652                         .r2 = .rax,
   1653                         .i = frame_layout.stack_adjust,
   1654                     } },
   1655                 });
   1656                 self.mir_instructions.set(backpatch_stack_alloc_extra, .{
   1657                     .tag = .pseudo,
   1658                     .ops = .pseudo_probe_adjust_loop_rr,
   1659                     .data = .{ .rr = .{
   1660                         .r1 = .rsp,
   1661                         .r2 = .rax,
   1662                     } },
   1663                 });
   1664             }
   1665         }
   1666         if (need_frame_align or need_stack_adjust) {
   1667             self.mir_instructions.set(backpatch_stack_dealloc, .{
   1668                 .tag = .mov,
   1669                 .ops = .rr,
   1670                 .data = .{ .rr = .{
   1671                     .r1 = .rsp,
   1672                     .r2 = .rbp,
   1673                 } },
   1674             });
   1675         }
   1676         if (need_save_reg) {
   1677             self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{
   1678                 .tag = .pseudo,
   1679                 .ops = .pseudo_push_reg_list,
   1680                 .data = .{ .reg_list = frame_layout.save_reg_list },
   1681             });
   1682             self.mir_instructions.set(backpatch_pop_callee_preserved_regs, .{
   1683                 .tag = .pseudo,
   1684                 .ops = .pseudo_pop_reg_list,
   1685                 .data = .{ .reg_list = frame_layout.save_reg_list },
   1686             });
   1687         }
   1688     } else {
   1689         try self.asmPseudo(.pseudo_dbg_prologue_end_none);
   1690         try self.genBody(self.air.getMainBody());
   1691         try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
   1692     }
   1693 
   1694     // Drop them off at the rbrace.
   1695     _ = try self.addInst(.{
   1696         .tag = .pseudo,
   1697         .ops = .pseudo_dbg_line_line_column,
   1698         .data = .{ .line_column = .{
   1699             .line = self.end_di_line,
   1700             .column = self.end_di_column,
   1701         } },
   1702     });
   1703 }
   1704 
   1705 fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
   1706     const air_tags = self.air.instructions.items(.tag);
   1707 
   1708     for (body) |inst| {
   1709         if (builtin.mode == .Debug) {
   1710             const mir_inst = @intCast(Mir.Inst.Index, self.mir_instructions.len);
   1711             try self.mir_to_air_map.put(self.gpa, mir_inst, inst);
   1712         }
   1713 
   1714         if (self.liveness.isUnused(inst) and !self.air.mustLower(inst)) continue;
   1715         wip_mir_log.debug("{}", .{self.fmtAir(inst)});
   1716         verbose_tracking_log.debug("{}", .{self.fmtTracking()});
   1717 
   1718         const old_air_bookkeeping = self.air_bookkeeping;
   1719         try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1);
   1720         switch (air_tags[inst]) {
   1721             // zig fmt: off
   1722             .not,
   1723             => |tag| try self.airUnOp(inst, tag),
   1724 
   1725             .add,
   1726             .addwrap,
   1727             .sub,
   1728             .subwrap,
   1729             .bool_and,
   1730             .bool_or,
   1731             .bit_and,
   1732             .bit_or,
   1733             .xor,
   1734             .min,
   1735             .max,
   1736             => |tag| try self.airBinOp(inst, tag),
   1737 
   1738             .ptr_add, .ptr_sub => |tag| try self.airPtrArithmetic(inst, tag),
   1739 
   1740             .shr, .shr_exact => try self.airShlShrBinOp(inst),
   1741             .shl, .shl_exact => try self.airShlShrBinOp(inst),
   1742 
   1743             .mul             => try self.airMulDivBinOp(inst),
   1744             .mulwrap         => try self.airMulDivBinOp(inst),
   1745             .rem             => try self.airMulDivBinOp(inst),
   1746             .mod             => try self.airMulDivBinOp(inst),
   1747 
   1748             .add_sat         => try self.airAddSat(inst),
   1749             .sub_sat         => try self.airSubSat(inst),
   1750             .mul_sat         => try self.airMulSat(inst),
   1751             .shl_sat         => try self.airShlSat(inst),
   1752             .slice           => try self.airSlice(inst),
   1753 
   1754             .sin,
   1755             .cos,
   1756             .tan,
   1757             .exp,
   1758             .exp2,
   1759             .log,
   1760             .log2,
   1761             .log10,
   1762             .round,
   1763             => try self.airUnaryMath(inst),
   1764 
   1765             .floor => try self.airRound(inst, 0b1_0_01),
   1766             .ceil => try self.airRound(inst, 0b1_0_10),
   1767             .trunc_float => try self.airRound(inst, 0b1_0_11),
   1768             .sqrt => try self.airSqrt(inst),
   1769             .neg, .fabs => try self.airFloatSign(inst),
   1770 
   1771             .add_with_overflow => try self.airAddSubWithOverflow(inst),
   1772             .sub_with_overflow => try self.airAddSubWithOverflow(inst),
   1773             .mul_with_overflow => try self.airMulWithOverflow(inst),
   1774             .shl_with_overflow => try self.airShlWithOverflow(inst),
   1775 
   1776             .div_float, .div_trunc, .div_floor, .div_exact => try self.airMulDivBinOp(inst),
   1777 
   1778             .cmp_lt  => try self.airCmp(inst, .lt),
   1779             .cmp_lte => try self.airCmp(inst, .lte),
   1780             .cmp_eq  => try self.airCmp(inst, .eq),
   1781             .cmp_gte => try self.airCmp(inst, .gte),
   1782             .cmp_gt  => try self.airCmp(inst, .gt),
   1783             .cmp_neq => try self.airCmp(inst, .neq),
   1784 
   1785             .cmp_vector => try self.airCmpVector(inst),
   1786             .cmp_lt_errors_len => try self.airCmpLtErrorsLen(inst),
   1787 
   1788             .alloc           => try self.airAlloc(inst),
   1789             .ret_ptr         => try self.airRetPtr(inst),
   1790             .arg             => try self.airArg(inst),
   1791             .assembly        => try self.airAsm(inst),
   1792             .bitcast         => try self.airBitCast(inst),
   1793             .block           => try self.airBlock(inst),
   1794             .br              => try self.airBr(inst),
   1795             .trap            => try self.airTrap(),
   1796             .breakpoint      => try self.airBreakpoint(),
   1797             .ret_addr        => try self.airRetAddr(inst),
   1798             .frame_addr      => try self.airFrameAddress(inst),
   1799             .fence           => try self.airFence(inst),
   1800             .cond_br         => try self.airCondBr(inst),
   1801             .dbg_stmt        => try self.airDbgStmt(inst),
   1802             .fptrunc         => try self.airFptrunc(inst),
   1803             .fpext           => try self.airFpext(inst),
   1804             .intcast         => try self.airIntCast(inst),
   1805             .trunc           => try self.airTrunc(inst),
   1806             .bool_to_int     => try self.airBoolToInt(inst),
   1807             .is_non_null     => try self.airIsNonNull(inst),
   1808             .is_non_null_ptr => try self.airIsNonNullPtr(inst),
   1809             .is_null         => try self.airIsNull(inst),
   1810             .is_null_ptr     => try self.airIsNullPtr(inst),
   1811             .is_non_err      => try self.airIsNonErr(inst),
   1812             .is_non_err_ptr  => try self.airIsNonErrPtr(inst),
   1813             .is_err          => try self.airIsErr(inst),
   1814             .is_err_ptr      => try self.airIsErrPtr(inst),
   1815             .load            => try self.airLoad(inst),
   1816             .loop            => try self.airLoop(inst),
   1817             .ptrtoint        => try self.airPtrToInt(inst),
   1818             .ret             => try self.airRet(inst),
   1819             .ret_load        => try self.airRetLoad(inst),
   1820             .store           => try self.airStore(inst, false),
   1821             .store_safe      => try self.airStore(inst, true),
   1822             .struct_field_ptr=> try self.airStructFieldPtr(inst),
   1823             .struct_field_val=> try self.airStructFieldVal(inst),
   1824             .array_to_slice  => try self.airArrayToSlice(inst),
   1825             .int_to_float    => try self.airIntToFloat(inst),
   1826             .float_to_int    => try self.airFloatToInt(inst),
   1827             .cmpxchg_strong  => try self.airCmpxchg(inst),
   1828             .cmpxchg_weak    => try self.airCmpxchg(inst),
   1829             .atomic_rmw      => try self.airAtomicRmw(inst),
   1830             .atomic_load     => try self.airAtomicLoad(inst),
   1831             .memcpy          => try self.airMemcpy(inst),
   1832             .memset          => try self.airMemset(inst, false),
   1833             .memset_safe     => try self.airMemset(inst, true),
   1834             .set_union_tag   => try self.airSetUnionTag(inst),
   1835             .get_union_tag   => try self.airGetUnionTag(inst),
   1836             .clz             => try self.airClz(inst),
   1837             .ctz             => try self.airCtz(inst),
   1838             .popcount        => try self.airPopcount(inst),
   1839             .byte_swap       => try self.airByteSwap(inst),
   1840             .bit_reverse     => try self.airBitReverse(inst),
   1841             .tag_name        => try self.airTagName(inst),
   1842             .error_name      => try self.airErrorName(inst),
   1843             .splat           => try self.airSplat(inst),
   1844             .select          => try self.airSelect(inst),
   1845             .shuffle         => try self.airShuffle(inst),
   1846             .reduce          => try self.airReduce(inst),
   1847             .aggregate_init  => try self.airAggregateInit(inst),
   1848             .union_init      => try self.airUnionInit(inst),
   1849             .prefetch        => try self.airPrefetch(inst),
   1850             .mul_add         => try self.airMulAdd(inst),
   1851             .addrspace_cast  => return self.fail("TODO implement addrspace_cast", .{}),
   1852 
   1853             .@"try"          => try self.airTry(inst),
   1854             .try_ptr         => try self.airTryPtr(inst),
   1855 
   1856             .dbg_var_ptr,
   1857             .dbg_var_val,
   1858             => try self.airDbgVar(inst),
   1859 
   1860             .dbg_inline_begin,
   1861             .dbg_inline_end,
   1862             => try self.airDbgInline(inst),
   1863 
   1864             .dbg_block_begin,
   1865             .dbg_block_end,
   1866             => try self.airDbgBlock(inst),
   1867 
   1868             .call              => try self.airCall(inst, .auto),
   1869             .call_always_tail  => try self.airCall(inst, .always_tail),
   1870             .call_never_tail   => try self.airCall(inst, .never_tail),
   1871             .call_never_inline => try self.airCall(inst, .never_inline),
   1872 
   1873             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
   1874             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
   1875             .atomic_store_release   => try self.airAtomicStore(inst, .Release),
   1876             .atomic_store_seq_cst   => try self.airAtomicStore(inst, .SeqCst),
   1877 
   1878             .struct_field_ptr_index_0 => try self.airStructFieldPtrIndex(inst, 0),
   1879             .struct_field_ptr_index_1 => try self.airStructFieldPtrIndex(inst, 1),
   1880             .struct_field_ptr_index_2 => try self.airStructFieldPtrIndex(inst, 2),
   1881             .struct_field_ptr_index_3 => try self.airStructFieldPtrIndex(inst, 3),
   1882 
   1883             .field_parent_ptr => try self.airFieldParentPtr(inst),
   1884 
   1885             .switch_br       => try self.airSwitchBr(inst),
   1886             .slice_ptr       => try self.airSlicePtr(inst),
   1887             .slice_len       => try self.airSliceLen(inst),
   1888 
   1889             .ptr_slice_len_ptr => try self.airPtrSliceLenPtr(inst),
   1890             .ptr_slice_ptr_ptr => try self.airPtrSlicePtrPtr(inst),
   1891 
   1892             .array_elem_val      => try self.airArrayElemVal(inst),
   1893             .slice_elem_val      => try self.airSliceElemVal(inst),
   1894             .slice_elem_ptr      => try self.airSliceElemPtr(inst),
   1895             .ptr_elem_val        => try self.airPtrElemVal(inst),
   1896             .ptr_elem_ptr        => try self.airPtrElemPtr(inst),
   1897 
   1898             .constant => unreachable, // excluded from function bodies
   1899             .const_ty => unreachable, // excluded from function bodies
   1900             .unreach  => if (self.wantSafety()) try self.airTrap() else self.finishAirBookkeeping(),
   1901 
   1902             .optional_payload           => try self.airOptionalPayload(inst),
   1903             .optional_payload_ptr       => try self.airOptionalPayloadPtr(inst),
   1904             .optional_payload_ptr_set   => try self.airOptionalPayloadPtrSet(inst),
   1905             .unwrap_errunion_err        => try self.airUnwrapErrUnionErr(inst),
   1906             .unwrap_errunion_payload    => try self.airUnwrapErrUnionPayload(inst),
   1907             .unwrap_errunion_err_ptr    => try self.airUnwrapErrUnionErrPtr(inst),
   1908             .unwrap_errunion_payload_ptr=> try self.airUnwrapErrUnionPayloadPtr(inst),
   1909             .errunion_payload_ptr_set   => try self.airErrUnionPayloadPtrSet(inst),
   1910             .err_return_trace           => try self.airErrReturnTrace(inst),
   1911             .set_err_return_trace       => try self.airSetErrReturnTrace(inst),
   1912             .save_err_return_trace_index=> try self.airSaveErrReturnTraceIndex(inst),
   1913 
   1914             .wrap_optional         => try self.airWrapOptional(inst),
   1915             .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
   1916             .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
   1917 
   1918             .add_optimized,
   1919             .addwrap_optimized,
   1920             .sub_optimized,
   1921             .subwrap_optimized,
   1922             .mul_optimized,
   1923             .mulwrap_optimized,
   1924             .div_float_optimized,
   1925             .div_trunc_optimized,
   1926             .div_floor_optimized,
   1927             .div_exact_optimized,
   1928             .rem_optimized,
   1929             .mod_optimized,
   1930             .neg_optimized,
   1931             .cmp_lt_optimized,
   1932             .cmp_lte_optimized,
   1933             .cmp_eq_optimized,
   1934             .cmp_gte_optimized,
   1935             .cmp_gt_optimized,
   1936             .cmp_neq_optimized,
   1937             .cmp_vector_optimized,
   1938             .reduce_optimized,
   1939             .float_to_int_optimized,
   1940             => return self.fail("TODO implement optimized float mode", .{}),
   1941 
   1942             .is_named_enum_value => return self.fail("TODO implement is_named_enum_value", .{}),
   1943             .error_set_has_value => return self.fail("TODO implement error_set_has_value", .{}),
   1944             .vector_store_elem => return self.fail("TODO implement vector_store_elem", .{}),
   1945 
   1946             .c_va_arg => return self.fail("TODO implement c_va_arg", .{}),
   1947             .c_va_copy => return self.fail("TODO implement c_va_copy", .{}),
   1948             .c_va_end => return self.fail("TODO implement c_va_end", .{}),
   1949             .c_va_start => return self.fail("TODO implement c_va_start", .{}),
   1950 
   1951             .wasm_memory_size => unreachable,
   1952             .wasm_memory_grow => unreachable,
   1953 
   1954             .work_item_id => unreachable,
   1955             .work_group_size => unreachable,
   1956             .work_group_id => unreachable,
   1957             // zig fmt: on
   1958         }
   1959 
   1960         assert(!self.register_manager.lockedRegsExist());
   1961 
   1962         if (std.debug.runtime_safety) {
   1963             if (self.air_bookkeeping < old_air_bookkeeping + 1) {
   1964                 std.debug.panic("in codegen.zig, handling of AIR instruction %{d} ('{}') did not do proper bookkeeping. Look for a missing call to finishAir.", .{ inst, air_tags[inst] });
   1965             }
   1966 
   1967             { // check consistency of tracked registers
   1968                 var it = self.register_manager.free_registers.iterator(.{ .kind = .unset });
   1969                 while (it.next()) |index| {
   1970                     const tracked_inst = self.register_manager.registers[index];
   1971                     const tracking = self.getResolvedInstValue(tracked_inst);
   1972                     assert(RegisterManager.indexOfRegIntoTracked(tracking.getReg().?).? == index);
   1973                 }
   1974             }
   1975         }
   1976     }
   1977     verbose_tracking_log.debug("{}", .{self.fmtTracking()});
   1978 }
   1979 
   1980 fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void {
   1981     switch (lazy_sym.ty.zigTypeTag()) {
   1982         .Enum => {
   1983             const enum_ty = lazy_sym.ty;
   1984             wip_mir_log.debug("{}.@tagName:", .{enum_ty.fmt(self.bin_file.options.module.?)});
   1985 
   1986             const param_regs = abi.getCAbiIntParamRegs(self.target.*);
   1987             const param_locks = self.register_manager.lockRegsAssumeUnused(2, param_regs[0..2].*);
   1988             defer for (param_locks) |lock| self.register_manager.unlockReg(lock);
   1989 
   1990             const ret_reg = param_regs[0];
   1991             const enum_mcv = MCValue{ .register = param_regs[1] };
   1992 
   1993             var exitlude_jump_relocs = try self.gpa.alloc(u32, enum_ty.enumFieldCount());
   1994             defer self.gpa.free(exitlude_jump_relocs);
   1995 
   1996             const data_reg = try self.register_manager.allocReg(null, gp);
   1997             const data_lock = self.register_manager.lockRegAssumeUnused(data_reg);
   1998             defer self.register_manager.unlockReg(data_lock);
   1999             try self.genLazySymbolRef(.lea, data_reg, .{ .kind = .const_data, .ty = enum_ty });
   2000 
   2001             var data_off: i32 = 0;
   2002             for (
   2003                 exitlude_jump_relocs,
   2004                 enum_ty.enumFields().keys(),
   2005                 0..,
   2006             ) |*exitlude_jump_reloc, tag_name, index| {
   2007                 var tag_pl = Value.Payload.U32{
   2008                     .base = .{ .tag = .enum_field_index },
   2009                     .data = @intCast(u32, index),
   2010                 };
   2011                 const tag_val = Value.initPayload(&tag_pl.base);
   2012                 const tag_mcv = try self.genTypedValue(.{ .ty = enum_ty, .val = tag_val });
   2013                 try self.genBinOpMir(.{ ._, .cmp }, enum_ty, enum_mcv, tag_mcv);
   2014                 const skip_reloc = try self.asmJccReloc(undefined, .ne);
   2015 
   2016                 try self.genSetMem(
   2017                     .{ .reg = ret_reg },
   2018                     0,
   2019                     Type.usize,
   2020                     .{ .register_offset = .{ .reg = data_reg, .off = data_off } },
   2021                 );
   2022                 try self.genSetMem(.{ .reg = ret_reg }, 8, Type.usize, .{ .immediate = tag_name.len });
   2023 
   2024                 exitlude_jump_reloc.* = try self.asmJmpReloc(undefined);
   2025                 try self.performReloc(skip_reloc);
   2026 
   2027                 data_off += @intCast(i32, tag_name.len + 1);
   2028             }
   2029 
   2030             try self.airTrap();
   2031 
   2032             for (exitlude_jump_relocs) |reloc| try self.performReloc(reloc);
   2033             try self.asmOpOnly(.{ ._, .ret });
   2034         },
   2035         else => return self.fail(
   2036             "TODO implement {s} for {}",
   2037             .{ @tagName(lazy_sym.kind), lazy_sym.ty.fmt(self.bin_file.options.module.?) },
   2038         ),
   2039     }
   2040 }
   2041 
   2042 fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) void {
   2043     const reg = value.getReg() orelse return;
   2044     if (self.register_manager.isRegFree(reg)) {
   2045         self.register_manager.getRegAssumeFree(reg, inst);
   2046     }
   2047 }
   2048 
   2049 fn freeValue(self: *Self, value: MCValue) void {
   2050     switch (value) {
   2051         .register => |reg| {
   2052             self.register_manager.freeReg(reg);
   2053         },
   2054         .register_offset => |reg_off| {
   2055             self.register_manager.freeReg(reg_off.reg);
   2056         },
   2057         .register_overflow => |reg_ov| {
   2058             self.register_manager.freeReg(reg_ov.reg);
   2059             self.eflags_inst = null;
   2060         },
   2061         .eflags => {
   2062             self.eflags_inst = null;
   2063         },
   2064         else => {}, // TODO process stack allocation death
   2065     }
   2066 }
   2067 
   2068 fn feed(self: *Self, bt: *Liveness.BigTomb, operand: Air.Inst.Ref) void {
   2069     if (bt.feed()) if (Air.refToIndex(operand)) |inst| self.processDeath(inst);
   2070 }
   2071 
   2072 /// Asserts there is already capacity to insert into top branch inst_table.
   2073 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
   2074     switch (self.air.instructions.items(.tag)[inst]) {
   2075         .constant, .const_ty => unreachable,
   2076         else => self.inst_tracking.getPtr(inst).?.die(self, inst),
   2077     }
   2078 }
   2079 
   2080 /// Called when there are no operands, and the instruction is always unreferenced.
   2081 fn finishAirBookkeeping(self: *Self) void {
   2082     if (std.debug.runtime_safety) {
   2083         self.air_bookkeeping += 1;
   2084     }
   2085 }
   2086 
   2087 fn finishAirResult(self: *Self, inst: Air.Inst.Index, result: MCValue) void {
   2088     if (self.liveness.isUnused(inst)) switch (result) {
   2089         .none, .dead, .unreach => {},
   2090         else => unreachable, // Why didn't the result die?
   2091     } else {
   2092         tracking_log.debug("%{d} => {} (birth)", .{ inst, result });
   2093         self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(result));
   2094         // In some cases, an operand may be reused as the result.
   2095         // If that operand died and was a register, it was freed by
   2096         // processDeath, so we have to "re-allocate" the register.
   2097         self.getValue(result, inst);
   2098     }
   2099     self.finishAirBookkeeping();
   2100 }
   2101 
   2102 fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref) void {
   2103     var tomb_bits = self.liveness.getTombBits(inst);
   2104     for (operands) |op| {
   2105         const dies = @truncate(u1, tomb_bits) != 0;
   2106         tomb_bits >>= 1;
   2107         if (!dies) continue;
   2108         const op_int = @enumToInt(op);
   2109         if (op_int < Air.Inst.Ref.typed_value_map.len) continue;
   2110         const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len);
   2111         self.processDeath(op_index);
   2112     }
   2113     self.finishAirResult(inst, result);
   2114 }
   2115 
   2116 const FrameLayout = struct {
   2117     stack_mask: u32,
   2118     stack_adjust: u32,
   2119     save_reg_list: Mir.RegisterList,
   2120 };
   2121 
   2122 fn setFrameLoc(
   2123     self: *Self,
   2124     frame_index: FrameIndex,
   2125     base: Register,
   2126     offset: *i32,
   2127     comptime aligned: bool,
   2128 ) void {
   2129     const frame_i = @enumToInt(frame_index);
   2130     if (aligned) {
   2131         const alignment = @as(i32, 1) << self.frame_allocs.items(.abi_align)[frame_i];
   2132         offset.* = mem.alignForwardGeneric(i32, offset.*, alignment);
   2133     }
   2134     self.frame_locs.set(frame_i, .{ .base = base, .disp = offset.* });
   2135     offset.* += self.frame_allocs.items(.abi_size)[frame_i];
   2136 }
   2137 
   2138 fn computeFrameLayout(self: *Self) !FrameLayout {
   2139     const frame_allocs_len = self.frame_allocs.len;
   2140     try self.frame_locs.resize(self.gpa, frame_allocs_len);
   2141     const stack_frame_order = try self.gpa.alloc(FrameIndex, frame_allocs_len - FrameIndex.named_count);
   2142     defer self.gpa.free(stack_frame_order);
   2143 
   2144     const frame_size = self.frame_allocs.items(.abi_size);
   2145     const frame_align = self.frame_allocs.items(.abi_align);
   2146     const frame_offset = self.frame_locs.items(.disp);
   2147 
   2148     for (stack_frame_order, FrameIndex.named_count..) |*frame_order, frame_index|
   2149         frame_order.* = @intToEnum(FrameIndex, frame_index);
   2150     {
   2151         const SortContext = struct {
   2152             frame_align: @TypeOf(frame_align),
   2153             pub fn lessThan(context: @This(), lhs: FrameIndex, rhs: FrameIndex) bool {
   2154                 return context.frame_align[@enumToInt(lhs)] > context.frame_align[@enumToInt(rhs)];
   2155             }
   2156         };
   2157         const sort_context = SortContext{ .frame_align = frame_align };
   2158         std.sort.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan);
   2159     }
   2160 
   2161     const call_frame_align = frame_align[@enumToInt(FrameIndex.call_frame)];
   2162     const stack_frame_align = frame_align[@enumToInt(FrameIndex.stack_frame)];
   2163     const args_frame_align = frame_align[@enumToInt(FrameIndex.args_frame)];
   2164     const needed_align = @max(call_frame_align, stack_frame_align);
   2165     const need_align_stack = needed_align > args_frame_align;
   2166 
   2167     // Create list of registers to save in the prologue.
   2168     // TODO handle register classes
   2169     var save_reg_list = Mir.RegisterList{};
   2170     const callee_preserved_regs = abi.getCalleePreservedRegs(self.target.*);
   2171     for (callee_preserved_regs) |reg| {
   2172         if (self.register_manager.isRegAllocated(reg)) {
   2173             save_reg_list.push(callee_preserved_regs, reg);
   2174         }
   2175     }
   2176 
   2177     var rbp_offset = @intCast(i32, save_reg_list.count() * 8);
   2178     self.setFrameLoc(.base_ptr, .rbp, &rbp_offset, false);
   2179     self.setFrameLoc(.ret_addr, .rbp, &rbp_offset, false);
   2180     self.setFrameLoc(.args_frame, .rbp, &rbp_offset, false);
   2181     const stack_frame_align_offset =
   2182         if (need_align_stack) 0 else frame_offset[@enumToInt(FrameIndex.args_frame)];
   2183 
   2184     var rsp_offset: i32 = 0;
   2185     self.setFrameLoc(.call_frame, .rsp, &rsp_offset, true);
   2186     self.setFrameLoc(.stack_frame, .rsp, &rsp_offset, true);
   2187     for (stack_frame_order) |frame_index| self.setFrameLoc(frame_index, .rsp, &rsp_offset, true);
   2188     rsp_offset += stack_frame_align_offset;
   2189     rsp_offset = mem.alignForwardGeneric(i32, rsp_offset, @as(i32, 1) << needed_align);
   2190     rsp_offset -= stack_frame_align_offset;
   2191     frame_size[@enumToInt(FrameIndex.call_frame)] =
   2192         @intCast(u31, rsp_offset - frame_offset[@enumToInt(FrameIndex.stack_frame)]);
   2193 
   2194     return .{
   2195         .stack_mask = @as(u32, math.maxInt(u32)) << (if (need_align_stack) needed_align else 0),
   2196         .stack_adjust = @intCast(u32, rsp_offset - frame_offset[@enumToInt(FrameIndex.call_frame)]),
   2197         .save_reg_list = save_reg_list,
   2198     };
   2199 }
   2200 
   2201 fn getFrameAddrAlignment(self: *Self, frame_addr: FrameAddr) u32 {
   2202     const alloc_align = @as(u32, 1) << self.frame_allocs.get(@enumToInt(frame_addr.index)).abi_align;
   2203     return @min(alloc_align, @bitCast(u32, frame_addr.off) & (alloc_align - 1));
   2204 }
   2205 
   2206 fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex {
   2207     const frame_allocs_slice = self.frame_allocs.slice();
   2208     const frame_size = frame_allocs_slice.items(.abi_size);
   2209     const frame_align = frame_allocs_slice.items(.abi_align);
   2210 
   2211     const stack_frame_align = &frame_align[@enumToInt(FrameIndex.stack_frame)];
   2212     stack_frame_align.* = @max(stack_frame_align.*, alloc.abi_align);
   2213 
   2214     for (self.free_frame_indices.keys(), 0..) |frame_index, free_i| {
   2215         const abi_size = frame_size[@enumToInt(frame_index)];
   2216         if (abi_size != alloc.abi_size) continue;
   2217         const abi_align = &frame_align[@enumToInt(frame_index)];
   2218         abi_align.* = @max(abi_align.*, alloc.abi_align);
   2219 
   2220         _ = self.free_frame_indices.swapRemoveAt(free_i);
   2221         return frame_index;
   2222     }
   2223     const frame_index = @intToEnum(FrameIndex, self.frame_allocs.len);
   2224     try self.frame_allocs.append(self.gpa, alloc);
   2225     return frame_index;
   2226 }
   2227 
   2228 /// Use a pointer instruction as the basis for allocating stack memory.
   2229 fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex {
   2230     const ptr_ty = self.air.typeOfIndex(inst);
   2231     const val_ty = ptr_ty.childType();
   2232     return self.allocFrameIndex(FrameAlloc.init(.{
   2233         .size = math.cast(u32, val_ty.abiSize(self.target.*)) orelse {
   2234             const mod = self.bin_file.options.module.?;
   2235             return self.fail("type '{}' too big to fit into stack frame", .{val_ty.fmt(mod)});
   2236         },
   2237         .alignment = @max(ptr_ty.ptrAlignment(self.target.*), 1),
   2238     }));
   2239 }
   2240 
   2241 fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
   2242     return self.allocRegOrMemAdvanced(self.air.typeOfIndex(inst), inst, reg_ok);
   2243 }
   2244 
   2245 fn allocTempRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool) !MCValue {
   2246     return self.allocRegOrMemAdvanced(elem_ty, null, reg_ok);
   2247 }
   2248 
   2249 fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue {
   2250     const abi_size = math.cast(u32, ty.abiSize(self.target.*)) orelse {
   2251         const mod = self.bin_file.options.module.?;
   2252         return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(mod)});
   2253     };
   2254 
   2255     if (reg_ok) need_mem: {
   2256         if (abi_size <= @as(u32, switch (ty.zigTypeTag()) {
   2257             .Float => switch (ty.floatBits(self.target.*)) {
   2258                 16, 32, 64, 128 => 16,
   2259                 80 => break :need_mem,
   2260                 else => unreachable,
   2261             },
   2262             .Vector => switch (ty.childType().zigTypeTag()) {
   2263                 .Float => switch (ty.childType().floatBits(self.target.*)) {
   2264                     16, 32, 64 => if (self.hasFeature(.avx)) 32 else 16,
   2265                     80, 128 => break :need_mem,
   2266                     else => unreachable,
   2267                 },
   2268                 else => break :need_mem,
   2269             },
   2270             else => 8,
   2271         })) {
   2272             if (self.register_manager.tryAllocReg(inst, regClassForType(ty))) |reg| {
   2273                 return MCValue{ .register = registerAlias(reg, abi_size) };
   2274             }
   2275         }
   2276     }
   2277 
   2278     const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ty, self.target.*));
   2279     return .{ .load_frame = .{ .index = frame_index } };
   2280 }
   2281 
   2282 fn regClassForType(ty: Type) RegisterManager.RegisterBitSet {
   2283     return switch (ty.zigTypeTag()) {
   2284         .Float, .Vector => sse,
   2285         else => gp,
   2286     };
   2287 }
   2288 
   2289 const State = struct {
   2290     registers: RegisterManager.TrackedRegisters,
   2291     reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking,
   2292     free_registers: RegisterManager.RegisterBitSet,
   2293     inst_tracking_len: u32,
   2294     scope_generation: u32,
   2295 };
   2296 
   2297 fn initRetroactiveState(self: *Self) State {
   2298     var state: State = undefined;
   2299     state.inst_tracking_len = @intCast(u32, self.inst_tracking.count());
   2300     state.scope_generation = self.scope_generation;
   2301     return state;
   2302 }
   2303 
   2304 fn saveRetroactiveState(self: *Self, state: *State) !void {
   2305     try self.spillEflagsIfOccupied();
   2306     const free_registers = self.register_manager.free_registers;
   2307     var it = free_registers.iterator(.{ .kind = .unset });
   2308     while (it.next()) |index| {
   2309         const tracked_inst = self.register_manager.registers[index];
   2310         state.registers[index] = tracked_inst;
   2311         state.reg_tracking[index] = self.inst_tracking.get(tracked_inst).?;
   2312     }
   2313     state.free_registers = free_registers;
   2314 }
   2315 
   2316 fn saveState(self: *Self) !State {
   2317     var state = self.initRetroactiveState();
   2318     try self.saveRetroactiveState(&state);
   2319     return state;
   2320 }
   2321 
   2322 fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, comptime opts: struct {
   2323     emit_instructions: bool,
   2324     update_tracking: bool,
   2325     resurrect: bool,
   2326     close_scope: bool,
   2327 }) !void {
   2328     if (opts.close_scope) {
   2329         for (
   2330             self.inst_tracking.keys()[state.inst_tracking_len..],
   2331             self.inst_tracking.values()[state.inst_tracking_len..],
   2332         ) |inst, *tracking| tracking.die(self, inst);
   2333         self.inst_tracking.shrinkRetainingCapacity(state.inst_tracking_len);
   2334     }
   2335 
   2336     if (opts.resurrect) for (
   2337         self.inst_tracking.keys()[0..state.inst_tracking_len],
   2338         self.inst_tracking.values()[0..state.inst_tracking_len],
   2339     ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation);
   2340     for (deaths) |death| self.processDeath(death);
   2341 
   2342     for (0..state.registers.len) |index| {
   2343         const current_maybe_inst = if (self.register_manager.free_registers.isSet(index))
   2344             null
   2345         else
   2346             self.register_manager.registers[index];
   2347         const target_maybe_inst = if (state.free_registers.isSet(index))
   2348             null
   2349         else
   2350             state.registers[index];
   2351         if (std.debug.runtime_safety) if (target_maybe_inst) |target_inst|
   2352             assert(self.inst_tracking.getIndex(target_inst).? < state.inst_tracking_len);
   2353         if (opts.emit_instructions) {
   2354             if (current_maybe_inst) |current_inst| {
   2355                 try self.inst_tracking.getPtr(current_inst).?.spill(self, current_inst);
   2356             }
   2357             if (target_maybe_inst) |target_inst| {
   2358                 try self.inst_tracking.getPtr(target_inst).?.materialize(
   2359                     self,
   2360                     target_inst,
   2361                     state.reg_tracking[index],
   2362                 );
   2363             }
   2364         }
   2365         if (opts.update_tracking) {
   2366             if (current_maybe_inst) |current_inst| {
   2367                 self.inst_tracking.getPtr(current_inst).?.trackSpill(self, current_inst);
   2368             }
   2369             {
   2370                 const reg = RegisterManager.regAtTrackedIndex(
   2371                     @intCast(RegisterManager.RegisterBitSet.ShiftInt, index),
   2372                 );
   2373                 self.register_manager.freeReg(reg);
   2374                 self.register_manager.getRegAssumeFree(reg, target_maybe_inst);
   2375             }
   2376             if (target_maybe_inst) |target_inst| {
   2377                 self.inst_tracking.getPtr(target_inst).?.trackMaterialize(
   2378                     target_inst,
   2379                     state.reg_tracking[index],
   2380                 );
   2381             }
   2382         }
   2383     }
   2384     if (opts.emit_instructions) if (self.eflags_inst) |inst|
   2385         try self.inst_tracking.getPtr(inst).?.spill(self, inst);
   2386     if (opts.update_tracking) if (self.eflags_inst) |inst| {
   2387         self.eflags_inst = null;
   2388         self.inst_tracking.getPtr(inst).?.trackSpill(self, inst);
   2389     };
   2390 
   2391     if (opts.update_tracking and std.debug.runtime_safety) {
   2392         assert(self.eflags_inst == null);
   2393         assert(self.register_manager.free_registers.eql(state.free_registers));
   2394         var used_reg_it = state.free_registers.iterator(.{ .kind = .unset });
   2395         while (used_reg_it.next()) |index|
   2396             assert(self.register_manager.registers[index] == state.registers[index]);
   2397     }
   2398 }
   2399 
   2400 pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void {
   2401     const tracking = self.inst_tracking.getPtr(inst).?;
   2402     assert(tracking.getReg().?.id() == reg.id());
   2403     try tracking.spill(self, inst);
   2404     tracking.trackSpill(self, inst);
   2405 }
   2406 
   2407 pub fn spillEflagsIfOccupied(self: *Self) !void {
   2408     if (self.eflags_inst) |inst| {
   2409         self.eflags_inst = null;
   2410         const tracking = self.inst_tracking.getPtr(inst).?;
   2411         assert(tracking.getCondition() != null);
   2412         try tracking.spill(self, inst);
   2413         tracking.trackSpill(self, inst);
   2414     }
   2415 }
   2416 
   2417 pub fn spillRegisters(self: *Self, registers: []const Register) !void {
   2418     for (registers) |reg| {
   2419         try self.register_manager.getReg(reg, null);
   2420     }
   2421 }
   2422 
   2423 /// Copies a value to a register without tracking the register. The register is not considered
   2424 /// allocated. A second call to `copyToTmpRegister` may return the same register.
   2425 /// This can have a side effect of spilling instructions to the stack to free up a register.
   2426 fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
   2427     const reg = try self.register_manager.allocReg(null, regClassForType(ty));
   2428     try self.genSetReg(reg, ty, mcv);
   2429     return reg;
   2430 }
   2431 
   2432 /// Allocates a new register and copies `mcv` into it.
   2433 /// `reg_owner` is the instruction that gets associated with the register in the register table.
   2434 /// This can have a side effect of spilling instructions to the stack to free up a register.
   2435 /// WARNING make sure that the allocated register matches the returned MCValue from an instruction!
   2436 fn copyToRegisterWithInstTracking(
   2437     self: *Self,
   2438     reg_owner: Air.Inst.Index,
   2439     ty: Type,
   2440     mcv: MCValue,
   2441 ) !MCValue {
   2442     const reg: Register = try self.register_manager.allocReg(reg_owner, regClassForType(ty));
   2443     try self.genSetReg(reg, ty, mcv);
   2444     return MCValue{ .register = reg };
   2445 }
   2446 
   2447 fn airAlloc(self: *Self, inst: Air.Inst.Index) !void {
   2448     const result = MCValue{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } };
   2449     return self.finishAir(inst, result, .{ .none, .none, .none });
   2450 }
   2451 
   2452 fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void {
   2453     const result: MCValue = switch (self.ret_mcv.long) {
   2454         else => unreachable,
   2455         .none => .{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } },
   2456         .load_frame => .{ .register_offset = .{
   2457             .reg = (try self.copyToRegisterWithInstTracking(
   2458                 inst,
   2459                 self.air.typeOfIndex(inst),
   2460                 self.ret_mcv.long,
   2461             )).register,
   2462             .off = self.ret_mcv.short.indirect.off,
   2463         } },
   2464     };
   2465     return self.finishAir(inst, result, .{ .none, .none, .none });
   2466 }
   2467 
   2468 fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
   2469     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   2470     const dst_ty = self.air.typeOfIndex(inst);
   2471     const dst_bits = dst_ty.floatBits(self.target.*);
   2472     const src_ty = self.air.typeOf(ty_op.operand);
   2473     const src_bits = src_ty.floatBits(self.target.*);
   2474 
   2475     const src_mcv = try self.resolveInst(ty_op.operand);
   2476     const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   2477         src_mcv
   2478     else
   2479         try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   2480     const dst_reg = dst_mcv.getReg().?.to128();
   2481     const dst_lock = self.register_manager.lockReg(dst_reg);
   2482     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   2483 
   2484     if (dst_bits == 16 and self.hasFeature(.f16c)) {
   2485         switch (src_bits) {
   2486             32 => {
   2487                 const mat_src_reg = if (src_mcv.isRegister())
   2488                     src_mcv.getReg().?
   2489                 else
   2490                     try self.copyToTmpRegister(src_ty, src_mcv);
   2491                 try self.asmRegisterRegisterImmediate(
   2492                     .{ .v_, .cvtps2ph },
   2493                     dst_reg,
   2494                     mat_src_reg.to128(),
   2495                     Immediate.u(0b1_00),
   2496                 );
   2497             },
   2498             else => return self.fail("TODO implement airFptrunc from {} to {}", .{
   2499                 src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   2500             }),
   2501         }
   2502     } else if (src_bits == 64 and dst_bits == 32) {
   2503         if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   2504             .{ .v_, .cvtsd2ss },
   2505             dst_reg,
   2506             dst_reg,
   2507             src_mcv.mem(.qword),
   2508         ) else try self.asmRegisterRegisterRegister(
   2509             .{ .v_, .cvtsd2ss },
   2510             dst_reg,
   2511             dst_reg,
   2512             (if (src_mcv.isRegister())
   2513                 src_mcv.getReg().?
   2514             else
   2515                 try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2516         ) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
   2517             .{ ._, .cvtsd2ss },
   2518             dst_reg,
   2519             src_mcv.mem(.qword),
   2520         ) else try self.asmRegisterRegister(
   2521             .{ ._, .cvtsd2ss },
   2522             dst_reg,
   2523             (if (src_mcv.isRegister())
   2524                 src_mcv.getReg().?
   2525             else
   2526                 try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2527         );
   2528     } else return self.fail("TODO implement airFptrunc from {} to {}", .{
   2529         src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   2530     });
   2531     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   2532 }
   2533 
   2534 fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
   2535     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   2536     const dst_ty = self.air.typeOfIndex(inst);
   2537     const dst_bits = dst_ty.floatBits(self.target.*);
   2538     const src_ty = self.air.typeOf(ty_op.operand);
   2539     const src_bits = src_ty.floatBits(self.target.*);
   2540 
   2541     const src_mcv = try self.resolveInst(ty_op.operand);
   2542     const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   2543         src_mcv
   2544     else
   2545         try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   2546     const dst_reg = dst_mcv.getReg().?.to128();
   2547     const dst_lock = self.register_manager.lockReg(dst_reg);
   2548     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   2549 
   2550     if (src_bits == 16 and self.hasFeature(.f16c)) {
   2551         const mat_src_reg = if (src_mcv.isRegister())
   2552             src_mcv.getReg().?
   2553         else
   2554             try self.copyToTmpRegister(src_ty, src_mcv);
   2555         try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
   2556         switch (dst_bits) {
   2557             32 => {},
   2558             64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg),
   2559             else => return self.fail("TODO implement airFpext from {} to {}", .{
   2560                 src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   2561             }),
   2562         }
   2563     } else if (src_bits == 32 and dst_bits == 64) {
   2564         if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   2565             .{ .v_, .cvtss2sd },
   2566             dst_reg,
   2567             dst_reg,
   2568             src_mcv.mem(.dword),
   2569         ) else try self.asmRegisterRegisterRegister(
   2570             .{ .v_, .cvtss2sd },
   2571             dst_reg,
   2572             dst_reg,
   2573             (if (src_mcv.isRegister())
   2574                 src_mcv.getReg().?
   2575             else
   2576                 try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2577         ) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
   2578             .{ ._, .cvtss2sd },
   2579             dst_reg,
   2580             src_mcv.mem(.dword),
   2581         ) else try self.asmRegisterRegister(
   2582             .{ ._, .cvtss2sd },
   2583             dst_reg,
   2584             (if (src_mcv.isRegister())
   2585                 src_mcv.getReg().?
   2586             else
   2587                 try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2588         );
   2589     } else return self.fail("TODO implement airFpext from {} to {}", .{
   2590         src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   2591     });
   2592     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   2593 }
   2594 
   2595 fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
   2596     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   2597 
   2598     const src_ty = self.air.typeOf(ty_op.operand);
   2599     const src_int_info = src_ty.intInfo(self.target.*);
   2600     const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*));
   2601     const src_mcv = try self.resolveInst(ty_op.operand);
   2602     const src_lock = switch (src_mcv) {
   2603         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   2604         else => null,
   2605     };
   2606     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   2607 
   2608     const dst_ty = self.air.typeOfIndex(inst);
   2609     const dst_int_info = dst_ty.intInfo(self.target.*);
   2610     const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
   2611     const dst_mcv = if (dst_abi_size <= src_abi_size and
   2612         self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   2613         src_mcv
   2614     else
   2615         try self.allocRegOrMem(inst, true);
   2616 
   2617     const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty;
   2618     const signedness: std.builtin.Signedness = if (dst_int_info.signedness == .signed and
   2619         src_int_info.signedness == .signed) .signed else .unsigned;
   2620     switch (dst_mcv) {
   2621         .register => |dst_reg| {
   2622             const min_abi_size = @min(dst_abi_size, src_abi_size);
   2623             const tag: Mir.Inst.FixedTag = switch (signedness) {
   2624                 .signed => if (min_abi_size >= 4) .{ ._d, .movsx } else .{ ._, .movsx },
   2625                 .unsigned => if (min_abi_size >= 4) .{ ._, .mov } else .{ ._, .movzx },
   2626             };
   2627             const dst_alias = switch (tag[1]) {
   2628                 .movsx => dst_reg.to64(),
   2629                 .mov, .movzx => if (min_abi_size > 4) dst_reg.to64() else dst_reg.to32(),
   2630                 else => unreachable,
   2631             };
   2632             switch (src_mcv) {
   2633                 .register => |src_reg| {
   2634                     try self.asmRegisterRegister(
   2635                         tag,
   2636                         dst_alias,
   2637                         registerAlias(src_reg, min_abi_size),
   2638                     );
   2639                 },
   2640                 .memory, .indirect, .load_frame => try self.asmRegisterMemory(
   2641                     tag,
   2642                     dst_alias,
   2643                     src_mcv.mem(Memory.PtrSize.fromSize(min_abi_size)),
   2644                 ),
   2645                 else => return self.fail("TODO airIntCast from {s} to {s}", .{
   2646                     @tagName(src_mcv),
   2647                     @tagName(dst_mcv),
   2648                 }),
   2649             }
   2650             if (self.regExtraBits(min_ty) > 0) try self.truncateRegister(min_ty, dst_reg);
   2651         },
   2652         else => {
   2653             try self.genCopy(min_ty, dst_mcv, src_mcv);
   2654             const extra = dst_abi_size * 8 - dst_int_info.bits;
   2655             if (extra > 0) {
   2656                 try self.genShiftBinOpMir(
   2657                     switch (signedness) {
   2658                         .signed => .{ ._l, .sa },
   2659                         .unsigned => .{ ._l, .sh },
   2660                     },
   2661                     dst_ty,
   2662                     dst_mcv,
   2663                     .{ .immediate = extra },
   2664                 );
   2665                 try self.genShiftBinOpMir(
   2666                     switch (signedness) {
   2667                         .signed => .{ ._r, .sa },
   2668                         .unsigned => .{ ._r, .sh },
   2669                     },
   2670                     dst_ty,
   2671                     dst_mcv,
   2672                     .{ .immediate = extra },
   2673                 );
   2674             }
   2675         },
   2676     }
   2677     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   2678 }
   2679 
   2680 fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
   2681     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   2682 
   2683     const dst_ty = self.air.typeOfIndex(inst);
   2684     const dst_abi_size = dst_ty.abiSize(self.target.*);
   2685     if (dst_abi_size > 8) {
   2686         return self.fail("TODO implement trunc for abi sizes larger than 8", .{});
   2687     }
   2688 
   2689     const src_mcv = try self.resolveInst(ty_op.operand);
   2690     const src_lock = switch (src_mcv) {
   2691         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   2692         else => null,
   2693     };
   2694     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   2695 
   2696     const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   2697         src_mcv
   2698     else
   2699         try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   2700 
   2701     // when truncating a `u16` to `u5`, for example, those top 3 bits in the result
   2702     // have to be removed. this only happens if the dst if not a power-of-two size.
   2703     if (self.regExtraBits(dst_ty) > 0) try self.truncateRegister(dst_ty, dst_mcv.register.to64());
   2704 
   2705     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   2706 }
   2707 
   2708 fn airBoolToInt(self: *Self, inst: Air.Inst.Index) !void {
   2709     const un_op = self.air.instructions.items(.data)[inst].un_op;
   2710     const ty = self.air.typeOfIndex(inst);
   2711 
   2712     const operand = try self.resolveInst(un_op);
   2713     const dst_mcv = if (self.reuseOperand(inst, un_op, 0, operand))
   2714         operand
   2715     else
   2716         try self.copyToRegisterWithInstTracking(inst, ty, operand);
   2717 
   2718     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
   2719 }
   2720 
   2721 fn airSlice(self: *Self, inst: Air.Inst.Index) !void {
   2722     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   2723     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   2724 
   2725     const slice_ty = self.air.typeOfIndex(inst);
   2726     const ptr = try self.resolveInst(bin_op.lhs);
   2727     const ptr_ty = self.air.typeOf(bin_op.lhs);
   2728     const len = try self.resolveInst(bin_op.rhs);
   2729     const len_ty = self.air.typeOf(bin_op.rhs);
   2730 
   2731     const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, self.target.*));
   2732     try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr);
   2733     try self.genSetMem(
   2734         .{ .frame = frame_index },
   2735         @intCast(i32, ptr_ty.abiSize(self.target.*)),
   2736         len_ty,
   2737         len,
   2738     );
   2739 
   2740     const result = MCValue{ .load_frame = .{ .index = frame_index } };
   2741     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   2742 }
   2743 
   2744 fn airUnOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
   2745     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   2746     const dst_mcv = try self.genUnOp(inst, tag, ty_op.operand);
   2747     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   2748 }
   2749 
   2750 fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
   2751     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   2752     const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
   2753     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   2754 }
   2755 
   2756 fn airPtrArithmetic(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
   2757     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   2758     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   2759     const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
   2760     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   2761 }
   2762 
   2763 fn activeIntBits(self: *Self, dst_air: Air.Inst.Ref) u16 {
   2764     const air_tag = self.air.instructions.items(.tag);
   2765     const air_data = self.air.instructions.items(.data);
   2766 
   2767     const dst_ty = self.air.typeOf(dst_air);
   2768     const dst_info = dst_ty.intInfo(self.target.*);
   2769     if (Air.refToIndex(dst_air)) |inst| {
   2770         switch (air_tag[inst]) {
   2771             .constant => {
   2772                 const src_val = self.air.values[air_data[inst].ty_pl.payload];
   2773                 var space: Value.BigIntSpace = undefined;
   2774                 const src_int = src_val.toBigInt(&space, self.target.*);
   2775                 return @intCast(u16, src_int.bitCountTwosComp()) +
   2776                     @boolToInt(src_int.positive and dst_info.signedness == .signed);
   2777             },
   2778             .intcast => {
   2779                 const src_ty = self.air.typeOf(air_data[inst].ty_op.operand);
   2780                 const src_info = src_ty.intInfo(self.target.*);
   2781                 return @min(switch (src_info.signedness) {
   2782                     .signed => switch (dst_info.signedness) {
   2783                         .signed => src_info.bits,
   2784                         .unsigned => src_info.bits - 1,
   2785                     },
   2786                     .unsigned => switch (dst_info.signedness) {
   2787                         .signed => src_info.bits + 1,
   2788                         .unsigned => src_info.bits,
   2789                     },
   2790                 }, dst_info.bits);
   2791             },
   2792             else => {},
   2793         }
   2794     }
   2795     return dst_info.bits;
   2796 }
   2797 
   2798 fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void {
   2799     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   2800     const result = result: {
   2801         const tag = self.air.instructions.items(.tag)[inst];
   2802         const dst_ty = self.air.typeOfIndex(inst);
   2803         if (dst_ty.zigTypeTag() == .Float)
   2804             break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
   2805 
   2806         const dst_info = dst_ty.intInfo(self.target.*);
   2807         var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) {
   2808             .signed => .int_signed,
   2809             .unsigned => .int_unsigned,
   2810         } }, .data = switch (tag) {
   2811             else => unreachable,
   2812             .mul, .mulwrap => math.max3(
   2813                 self.activeIntBits(bin_op.lhs),
   2814                 self.activeIntBits(bin_op.rhs),
   2815                 dst_info.bits / 2,
   2816             ),
   2817             .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_info.bits,
   2818         } };
   2819         const src_ty = Type.initPayload(&src_pl.base);
   2820 
   2821         try self.spillEflagsIfOccupied();
   2822         try self.spillRegisters(&.{ .rax, .rdx });
   2823         const lhs = try self.resolveInst(bin_op.lhs);
   2824         const rhs = try self.resolveInst(bin_op.rhs);
   2825         break :result try self.genMulDivBinOp(tag, inst, dst_ty, src_ty, lhs, rhs);
   2826     };
   2827     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   2828 }
   2829 
   2830 fn airAddSat(self: *Self, inst: Air.Inst.Index) !void {
   2831     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   2832     const ty = self.air.typeOf(bin_op.lhs);
   2833 
   2834     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   2835     const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
   2836         lhs_mcv
   2837     else
   2838         try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv);
   2839     const dst_reg = dst_mcv.register;
   2840     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   2841     defer self.register_manager.unlockReg(dst_lock);
   2842 
   2843     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   2844     const rhs_lock = switch (rhs_mcv) {
   2845         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   2846         else => null,
   2847     };
   2848     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   2849 
   2850     const limit_reg = try self.register_manager.allocReg(null, gp);
   2851     const limit_mcv = MCValue{ .register = limit_reg };
   2852     const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
   2853     defer self.register_manager.unlockReg(limit_lock);
   2854 
   2855     const reg_bits = self.regBitSize(ty);
   2856     const cc: Condition = if (ty.isSignedInt()) cc: {
   2857         try self.genSetReg(limit_reg, ty, dst_mcv);
   2858         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
   2859         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
   2860             .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1,
   2861         });
   2862         break :cc .o;
   2863     } else cc: {
   2864         try self.genSetReg(limit_reg, ty, .{
   2865             .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits),
   2866         });
   2867         break :cc .c;
   2868     };
   2869     try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv);
   2870 
   2871     const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2);
   2872     try self.asmCmovccRegisterRegister(
   2873         registerAlias(dst_reg, cmov_abi_size),
   2874         registerAlias(limit_reg, cmov_abi_size),
   2875         cc,
   2876     );
   2877 
   2878     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   2879 }
   2880 
   2881 fn airSubSat(self: *Self, inst: Air.Inst.Index) !void {
   2882     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   2883     const ty = self.air.typeOf(bin_op.lhs);
   2884 
   2885     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   2886     const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
   2887         lhs_mcv
   2888     else
   2889         try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv);
   2890     const dst_reg = dst_mcv.register;
   2891     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   2892     defer self.register_manager.unlockReg(dst_lock);
   2893 
   2894     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   2895     const rhs_lock = switch (rhs_mcv) {
   2896         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   2897         else => null,
   2898     };
   2899     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   2900 
   2901     const limit_reg = try self.register_manager.allocReg(null, gp);
   2902     const limit_mcv = MCValue{ .register = limit_reg };
   2903     const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
   2904     defer self.register_manager.unlockReg(limit_lock);
   2905 
   2906     const reg_bits = self.regBitSize(ty);
   2907     const cc: Condition = if (ty.isSignedInt()) cc: {
   2908         try self.genSetReg(limit_reg, ty, dst_mcv);
   2909         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
   2910         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
   2911             .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1,
   2912         });
   2913         break :cc .o;
   2914     } else cc: {
   2915         try self.genSetReg(limit_reg, ty, .{ .immediate = 0 });
   2916         break :cc .c;
   2917     };
   2918     try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv);
   2919 
   2920     const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2);
   2921     try self.asmCmovccRegisterRegister(
   2922         registerAlias(dst_reg, cmov_abi_size),
   2923         registerAlias(limit_reg, cmov_abi_size),
   2924         cc,
   2925     );
   2926 
   2927     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   2928 }
   2929 
   2930 fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
   2931     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   2932     const ty = self.air.typeOf(bin_op.lhs);
   2933 
   2934     try self.spillRegisters(&.{ .rax, .rdx });
   2935     const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx });
   2936     defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
   2937 
   2938     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   2939     const lhs_lock = switch (lhs_mcv) {
   2940         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   2941         else => null,
   2942     };
   2943     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   2944 
   2945     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   2946     const rhs_lock = switch (rhs_mcv) {
   2947         .register => |reg| self.register_manager.lockReg(reg),
   2948         else => null,
   2949     };
   2950     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   2951 
   2952     const limit_reg = try self.register_manager.allocReg(null, gp);
   2953     const limit_mcv = MCValue{ .register = limit_reg };
   2954     const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
   2955     defer self.register_manager.unlockReg(limit_lock);
   2956 
   2957     const reg_bits = self.regBitSize(ty);
   2958     const cc: Condition = if (ty.isSignedInt()) cc: {
   2959         try self.genSetReg(limit_reg, ty, lhs_mcv);
   2960         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv);
   2961         try self.genShiftBinOpMir(.{ ._, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
   2962         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
   2963             .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1,
   2964         });
   2965         break :cc .o;
   2966     } else cc: {
   2967         try self.genSetReg(limit_reg, ty, .{
   2968             .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits),
   2969         });
   2970         break :cc .c;
   2971     };
   2972 
   2973     const dst_mcv = try self.genMulDivBinOp(.mul, inst, ty, ty, lhs_mcv, rhs_mcv);
   2974     const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2);
   2975     try self.asmCmovccRegisterRegister(
   2976         registerAlias(dst_mcv.register, cmov_abi_size),
   2977         registerAlias(limit_reg, cmov_abi_size),
   2978         cc,
   2979     );
   2980 
   2981     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   2982 }
   2983 
   2984 fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
   2985     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   2986     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   2987     const result: MCValue = result: {
   2988         const tag = self.air.instructions.items(.tag)[inst];
   2989         const ty = self.air.typeOf(bin_op.lhs);
   2990         switch (ty.zigTypeTag()) {
   2991             .Vector => return self.fail("TODO implement add/sub with overflow for Vector type", .{}),
   2992             .Int => {
   2993                 try self.spillEflagsIfOccupied();
   2994 
   2995                 const partial_mcv = try self.genBinOp(null, switch (tag) {
   2996                     .add_with_overflow => .add,
   2997                     .sub_with_overflow => .sub,
   2998                     else => unreachable,
   2999                 }, bin_op.lhs, bin_op.rhs);
   3000                 const int_info = ty.intInfo(self.target.*);
   3001                 const cc: Condition = switch (int_info.signedness) {
   3002                     .unsigned => .c,
   3003                     .signed => .o,
   3004                 };
   3005 
   3006                 const tuple_ty = self.air.typeOfIndex(inst);
   3007                 if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) {
   3008                     switch (partial_mcv) {
   3009                         .register => |reg| {
   3010                             self.eflags_inst = inst;
   3011                             break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
   3012                         },
   3013                         else => {},
   3014                     }
   3015 
   3016                     const frame_index =
   3017                         try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
   3018                     try self.genSetMem(
   3019                         .{ .frame = frame_index },
   3020                         @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
   3021                         Type.u1,
   3022                         .{ .eflags = cc },
   3023                     );
   3024                     try self.genSetMem(
   3025                         .{ .frame = frame_index },
   3026                         @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)),
   3027                         ty,
   3028                         partial_mcv,
   3029                     );
   3030                     break :result .{ .load_frame = .{ .index = frame_index } };
   3031                 }
   3032 
   3033                 const frame_index =
   3034                     try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
   3035                 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
   3036                 break :result .{ .load_frame = .{ .index = frame_index } };
   3037             },
   3038             else => unreachable,
   3039         }
   3040     };
   3041     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3042 }
   3043 
   3044 fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
   3045     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   3046     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3047     const result: MCValue = result: {
   3048         const lhs_ty = self.air.typeOf(bin_op.lhs);
   3049         const rhs_ty = self.air.typeOf(bin_op.rhs);
   3050         switch (lhs_ty.zigTypeTag()) {
   3051             .Vector => return self.fail("TODO implement shl with overflow for Vector type", .{}),
   3052             .Int => {
   3053                 try self.spillEflagsIfOccupied();
   3054 
   3055                 try self.register_manager.getReg(.rcx, null);
   3056                 const lhs = try self.resolveInst(bin_op.lhs);
   3057                 const rhs = try self.resolveInst(bin_op.rhs);
   3058 
   3059                 const int_info = lhs_ty.intInfo(self.target.*);
   3060 
   3061                 const partial_mcv = try self.genShiftBinOp(.shl, null, lhs, rhs, lhs_ty, rhs_ty);
   3062                 const partial_lock = switch (partial_mcv) {
   3063                     .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3064                     else => null,
   3065                 };
   3066                 defer if (partial_lock) |lock| self.register_manager.unlockReg(lock);
   3067 
   3068                 const tmp_mcv = try self.genShiftBinOp(.shr, null, partial_mcv, rhs, lhs_ty, rhs_ty);
   3069                 const tmp_lock = switch (tmp_mcv) {
   3070                     .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3071                     else => null,
   3072                 };
   3073                 defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
   3074 
   3075                 try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs);
   3076                 const cc = Condition.ne;
   3077 
   3078                 const tuple_ty = self.air.typeOfIndex(inst);
   3079                 if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) {
   3080                     switch (partial_mcv) {
   3081                         .register => |reg| {
   3082                             self.eflags_inst = inst;
   3083                             break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
   3084                         },
   3085                         else => {},
   3086                     }
   3087 
   3088                     const frame_index =
   3089                         try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
   3090                     try self.genSetMem(
   3091                         .{ .frame = frame_index },
   3092                         @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
   3093                         tuple_ty.structFieldType(1),
   3094                         .{ .eflags = cc },
   3095                     );
   3096                     try self.genSetMem(
   3097                         .{ .frame = frame_index },
   3098                         @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)),
   3099                         tuple_ty.structFieldType(0),
   3100                         partial_mcv,
   3101                     );
   3102                     break :result .{ .load_frame = .{ .index = frame_index } };
   3103                 }
   3104 
   3105                 const frame_index =
   3106                     try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
   3107                 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
   3108                 break :result .{ .load_frame = .{ .index = frame_index } };
   3109             },
   3110             else => unreachable,
   3111         }
   3112     };
   3113     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3114 }
   3115 
   3116 fn genSetFrameTruncatedOverflowCompare(
   3117     self: *Self,
   3118     tuple_ty: Type,
   3119     frame_index: FrameIndex,
   3120     src_mcv: MCValue,
   3121     overflow_cc: ?Condition,
   3122 ) !void {
   3123     const src_lock = switch (src_mcv) {
   3124         .register => |reg| self.register_manager.lockReg(reg),
   3125         else => null,
   3126     };
   3127     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   3128 
   3129     const ty = tuple_ty.structFieldType(0);
   3130     const int_info = ty.intInfo(self.target.*);
   3131 
   3132     var hi_limb_pl = Type.Payload.Bits{
   3133         .base = .{ .tag = switch (int_info.signedness) {
   3134             .signed => .int_signed,
   3135             .unsigned => .int_unsigned,
   3136         } },
   3137         .data = (int_info.bits - 1) % 64 + 1,
   3138     };
   3139     const hi_limb_ty = Type.initPayload(&hi_limb_pl.base);
   3140 
   3141     var rest_pl = Type.Payload.Bits{
   3142         .base = .{ .tag = .int_unsigned },
   3143         .data = int_info.bits - hi_limb_pl.data,
   3144     };
   3145     const rest_ty = Type.initPayload(&rest_pl.base);
   3146 
   3147     const temp_regs = try self.register_manager.allocRegs(3, .{ null, null, null }, gp);
   3148     const temp_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs);
   3149     defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
   3150 
   3151     const overflow_reg = temp_regs[0];
   3152     if (overflow_cc) |cc| try self.asmSetccRegister(overflow_reg.to8(), cc);
   3153 
   3154     const scratch_reg = temp_regs[1];
   3155     const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8;
   3156     const hi_limb_mcv = if (hi_limb_off > 0)
   3157         src_mcv.address().offset(int_info.bits / 64 * 8).deref()
   3158     else
   3159         src_mcv;
   3160     try self.genSetReg(scratch_reg, hi_limb_ty, hi_limb_mcv);
   3161     try self.truncateRegister(hi_limb_ty, scratch_reg);
   3162     try self.genBinOpMir(.{ ._, .cmp }, hi_limb_ty, .{ .register = scratch_reg }, hi_limb_mcv);
   3163 
   3164     const eq_reg = temp_regs[2];
   3165     if (overflow_cc) |_| {
   3166         try self.asmSetccRegister(eq_reg.to8(), .ne);
   3167         try self.genBinOpMir(
   3168             .{ ._, .@"or" },
   3169             Type.u8,
   3170             .{ .register = overflow_reg },
   3171             .{ .register = eq_reg },
   3172         );
   3173     }
   3174 
   3175     const payload_off = @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*));
   3176     if (hi_limb_off > 0) try self.genSetMem(.{ .frame = frame_index }, payload_off, rest_ty, src_mcv);
   3177     try self.genSetMem(
   3178         .{ .frame = frame_index },
   3179         payload_off + hi_limb_off,
   3180         hi_limb_ty,
   3181         .{ .register = scratch_reg },
   3182     );
   3183     try self.genSetMem(
   3184         .{ .frame = frame_index },
   3185         @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
   3186         tuple_ty.structFieldType(1),
   3187         if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne },
   3188     );
   3189 }
   3190 
   3191 fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
   3192     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   3193     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3194     const dst_ty = self.air.typeOf(bin_op.lhs);
   3195     const result: MCValue = switch (dst_ty.zigTypeTag()) {
   3196         .Vector => return self.fail("TODO implement mul_with_overflow for Vector type", .{}),
   3197         .Int => result: {
   3198             try self.spillEflagsIfOccupied();
   3199             try self.spillRegisters(&.{ .rax, .rdx });
   3200 
   3201             const dst_info = dst_ty.intInfo(self.target.*);
   3202             const cc: Condition = switch (dst_info.signedness) {
   3203                 .unsigned => .c,
   3204                 .signed => .o,
   3205             };
   3206 
   3207             const lhs_active_bits = self.activeIntBits(bin_op.lhs);
   3208             const rhs_active_bits = self.activeIntBits(bin_op.rhs);
   3209             var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) {
   3210                 .signed => .int_signed,
   3211                 .unsigned => .int_unsigned,
   3212             } }, .data = math.max3(lhs_active_bits, rhs_active_bits, dst_info.bits / 2) };
   3213             const src_ty = Type.initPayload(&src_pl.base);
   3214 
   3215             const lhs = try self.resolveInst(bin_op.lhs);
   3216             const rhs = try self.resolveInst(bin_op.rhs);
   3217 
   3218             const tuple_ty = self.air.typeOfIndex(inst);
   3219             const extra_bits = if (dst_info.bits <= 64)
   3220                 self.regExtraBits(dst_ty)
   3221             else
   3222                 dst_info.bits % 64;
   3223             const partial_mcv = if (dst_info.signedness == .signed and extra_bits > 0) dst: {
   3224                 const rhs_lock: ?RegisterLock = switch (rhs) {
   3225                     .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3226                     else => null,
   3227                 };
   3228                 defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   3229 
   3230                 const dst_reg: Register = blk: {
   3231                     if (lhs.isRegister()) break :blk lhs.register;
   3232                     break :blk try self.copyToTmpRegister(dst_ty, lhs);
   3233                 };
   3234                 const dst_mcv = MCValue{ .register = dst_reg };
   3235                 const dst_reg_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   3236                 defer self.register_manager.unlockReg(dst_reg_lock);
   3237 
   3238                 const rhs_mcv: MCValue = blk: {
   3239                     if (rhs.isRegister() or rhs.isMemory()) break :blk rhs;
   3240                     break :blk MCValue{ .register = try self.copyToTmpRegister(dst_ty, rhs) };
   3241                 };
   3242                 const rhs_mcv_lock: ?RegisterLock = switch (rhs_mcv) {
   3243                     .register => |reg| self.register_manager.lockReg(reg),
   3244                     else => null,
   3245                 };
   3246                 defer if (rhs_mcv_lock) |lock| self.register_manager.unlockReg(lock);
   3247 
   3248                 try self.genIntMulComplexOpMir(Type.isize, dst_mcv, rhs_mcv);
   3249                 break :dst dst_mcv;
   3250             } else try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs);
   3251 
   3252             switch (partial_mcv) {
   3253                 .register => |reg| if (extra_bits == 0) {
   3254                     self.eflags_inst = inst;
   3255                     break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
   3256                 } else {
   3257                     const frame_index =
   3258                         try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
   3259                     try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
   3260                     break :result .{ .load_frame = .{ .index = frame_index } };
   3261                 },
   3262                 else => {
   3263                     // For now, this is the only supported multiply that doesn't fit in a register,
   3264                     // so cc being set is impossible.
   3265 
   3266                     assert(dst_info.bits <= 128 and src_pl.data == 64);
   3267 
   3268                     const frame_index =
   3269                         try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
   3270                     if (dst_info.bits >= lhs_active_bits + rhs_active_bits) {
   3271                         try self.genSetMem(
   3272                             .{ .frame = frame_index },
   3273                             @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)),
   3274                             tuple_ty.structFieldType(0),
   3275                             partial_mcv,
   3276                         );
   3277                         try self.genSetMem(
   3278                             .{ .frame = frame_index },
   3279                             @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
   3280                             tuple_ty.structFieldType(1),
   3281                             .{ .immediate = 0 },
   3282                         );
   3283                     } else try self.genSetFrameTruncatedOverflowCompare(
   3284                         tuple_ty,
   3285                         frame_index,
   3286                         partial_mcv,
   3287                         null,
   3288                     );
   3289                     break :result .{ .load_frame = .{ .index = frame_index } };
   3290                 },
   3291             }
   3292         },
   3293         else => unreachable,
   3294     };
   3295     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3296 }
   3297 
   3298 /// Generates signed or unsigned integer multiplication/division.
   3299 /// Clobbers .rax and .rdx registers.
   3300 /// Quotient is saved in .rax and remainder in .rdx.
   3301 fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void {
   3302     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
   3303     if (abi_size > 8) {
   3304         return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{});
   3305     }
   3306 
   3307     try self.genSetReg(.rax, ty, lhs);
   3308     switch (tag[1]) {
   3309         else => unreachable,
   3310         .mul => {},
   3311         .div => switch (tag[0]) {
   3312             ._ => try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx),
   3313             .i_ => switch (self.regBitSize(ty)) {
   3314                 8 => try self.asmOpOnly(.{ ._, .cbw }),
   3315                 16 => try self.asmOpOnly(.{ ._, .cwd }),
   3316                 32 => try self.asmOpOnly(.{ ._, .cdq }),
   3317                 64 => try self.asmOpOnly(.{ ._, .cqo }),
   3318                 else => unreachable,
   3319             },
   3320             else => unreachable,
   3321         },
   3322     }
   3323 
   3324     const mat_rhs: MCValue = switch (rhs) {
   3325         .register, .indirect, .load_frame => rhs,
   3326         else => .{ .register = try self.copyToTmpRegister(ty, rhs) },
   3327     };
   3328     switch (mat_rhs) {
   3329         .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)),
   3330         .memory, .indirect, .load_frame => try self.asmMemory(
   3331             tag,
   3332             mat_rhs.mem(Memory.PtrSize.fromSize(abi_size)),
   3333         ),
   3334         else => unreachable,
   3335     }
   3336 }
   3337 
   3338 /// Always returns a register.
   3339 /// Clobbers .rax and .rdx registers.
   3340 fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue {
   3341     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
   3342     const int_info = ty.intInfo(self.target.*);
   3343     const dividend: Register = switch (lhs) {
   3344         .register => |reg| reg,
   3345         else => try self.copyToTmpRegister(ty, lhs),
   3346     };
   3347     const dividend_lock = self.register_manager.lockReg(dividend);
   3348     defer if (dividend_lock) |lock| self.register_manager.unlockReg(lock);
   3349 
   3350     const divisor: Register = switch (rhs) {
   3351         .register => |reg| reg,
   3352         else => try self.copyToTmpRegister(ty, rhs),
   3353     };
   3354     const divisor_lock = self.register_manager.lockReg(divisor);
   3355     defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock);
   3356 
   3357     try self.genIntMulDivOpMir(
   3358         switch (int_info.signedness) {
   3359             .signed => .{ .i_, .div },
   3360             .unsigned => .{ ._, .div },
   3361         },
   3362         ty,
   3363         .{ .register = dividend },
   3364         .{ .register = divisor },
   3365     );
   3366 
   3367     try self.asmRegisterRegister(
   3368         .{ ._, .xor },
   3369         registerAlias(divisor, abi_size),
   3370         registerAlias(dividend, abi_size),
   3371     );
   3372     try self.asmRegisterImmediate(
   3373         .{ ._r, .sa },
   3374         registerAlias(divisor, abi_size),
   3375         Immediate.u(int_info.bits - 1),
   3376     );
   3377     try self.asmRegisterRegister(
   3378         .{ ._, .@"test" },
   3379         registerAlias(.rdx, abi_size),
   3380         registerAlias(.rdx, abi_size),
   3381     );
   3382     try self.asmCmovccRegisterRegister(
   3383         registerAlias(divisor, abi_size),
   3384         registerAlias(.rdx, abi_size),
   3385         .z,
   3386     );
   3387     try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax });
   3388     return MCValue{ .register = divisor };
   3389 }
   3390 
   3391 fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void {
   3392     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   3393 
   3394     try self.spillRegisters(&.{.rcx});
   3395 
   3396     const tag = self.air.instructions.items(.tag)[inst];
   3397     try self.register_manager.getReg(.rcx, null);
   3398     const lhs = try self.resolveInst(bin_op.lhs);
   3399     const rhs = try self.resolveInst(bin_op.rhs);
   3400     const lhs_ty = self.air.typeOf(bin_op.lhs);
   3401     const rhs_ty = self.air.typeOf(bin_op.rhs);
   3402 
   3403     const result = try self.genShiftBinOp(tag, inst, lhs, rhs, lhs_ty, rhs_ty);
   3404 
   3405     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3406 }
   3407 
   3408 fn airShlSat(self: *Self, inst: Air.Inst.Index) !void {
   3409     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   3410     _ = bin_op;
   3411     return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
   3412     //return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3413 }
   3414 
   3415 fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void {
   3416     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3417     const result: MCValue = result: {
   3418         const pl_ty = self.air.typeOfIndex(inst);
   3419         const opt_mcv = try self.resolveInst(ty_op.operand);
   3420 
   3421         if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) {
   3422             switch (opt_mcv) {
   3423                 .register => |reg| try self.truncateRegister(pl_ty, reg),
   3424                 .register_overflow => |ro| try self.truncateRegister(pl_ty, ro.reg),
   3425                 else => {},
   3426             }
   3427             break :result opt_mcv;
   3428         }
   3429 
   3430         const pl_mcv = try self.allocRegOrMem(inst, true);
   3431         try self.genCopy(pl_ty, pl_mcv, switch (opt_mcv) {
   3432             else => opt_mcv,
   3433             .register_overflow => |ro| .{ .register = ro.reg },
   3434         });
   3435         break :result pl_mcv;
   3436     };
   3437     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3438 }
   3439 
   3440 fn airOptionalPayloadPtr(self: *Self, inst: Air.Inst.Index) !void {
   3441     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3442 
   3443     const dst_ty = self.air.typeOfIndex(inst);
   3444     const opt_mcv = try self.resolveInst(ty_op.operand);
   3445 
   3446     const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv))
   3447         opt_mcv
   3448     else
   3449         try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv);
   3450     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3451 }
   3452 
   3453 fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
   3454     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3455     const result = result: {
   3456         const dst_ty = self.air.typeOfIndex(inst);
   3457         const src_ty = self.air.typeOf(ty_op.operand);
   3458         const opt_ty = src_ty.childType();
   3459         const src_mcv = try self.resolveInst(ty_op.operand);
   3460 
   3461         if (opt_ty.optionalReprIsPayload()) {
   3462             break :result if (self.liveness.isUnused(inst))
   3463                 .unreach
   3464             else if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3465                 src_mcv
   3466             else
   3467                 try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   3468         }
   3469 
   3470         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3471             src_mcv
   3472         else
   3473             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   3474 
   3475         const pl_ty = dst_ty.childType();
   3476         const pl_abi_size = @intCast(i32, pl_ty.abiSize(self.target.*));
   3477         try self.genSetMem(.{ .reg = dst_mcv.register }, pl_abi_size, Type.bool, .{ .immediate = 1 });
   3478         break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv;
   3479     };
   3480     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3481 }
   3482 
   3483 fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
   3484     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3485     const err_union_ty = self.air.typeOf(ty_op.operand);
   3486     const err_ty = err_union_ty.errorUnionSet();
   3487     const payload_ty = err_union_ty.errorUnionPayload();
   3488     const operand = try self.resolveInst(ty_op.operand);
   3489 
   3490     const result: MCValue = result: {
   3491         if (err_ty.errorSetIsEmpty()) {
   3492             break :result MCValue{ .immediate = 0 };
   3493         }
   3494 
   3495         if (!payload_ty.hasRuntimeBitsIgnoreComptime()) {
   3496             break :result operand;
   3497         }
   3498 
   3499         const err_off = errUnionErrorOffset(payload_ty, self.target.*);
   3500         switch (operand) {
   3501             .register => |reg| {
   3502                 // TODO reuse operand
   3503                 const eu_lock = self.register_manager.lockReg(reg);
   3504                 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
   3505 
   3506                 const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand);
   3507                 if (err_off > 0) {
   3508                     const shift = @intCast(u6, err_off * 8);
   3509                     try self.genShiftBinOpMir(
   3510                         .{ ._r, .sh },
   3511                         err_union_ty,
   3512                         result,
   3513                         .{ .immediate = shift },
   3514                     );
   3515                 } else {
   3516                     try self.truncateRegister(Type.anyerror, result.register);
   3517                 }
   3518                 break :result result;
   3519             },
   3520             .load_frame => |frame_addr| break :result .{ .load_frame = .{
   3521                 .index = frame_addr.index,
   3522                 .off = frame_addr.off + @intCast(i32, err_off),
   3523             } },
   3524             else => return self.fail("TODO implement unwrap_err_err for {}", .{operand}),
   3525         }
   3526     };
   3527     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3528 }
   3529 
   3530 fn airUnwrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void {
   3531     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3532     const err_union_ty = self.air.typeOf(ty_op.operand);
   3533     const operand = try self.resolveInst(ty_op.operand);
   3534     const result = try self.genUnwrapErrorUnionPayloadMir(inst, err_union_ty, operand);
   3535     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3536 }
   3537 
   3538 fn genUnwrapErrorUnionPayloadMir(
   3539     self: *Self,
   3540     maybe_inst: ?Air.Inst.Index,
   3541     err_union_ty: Type,
   3542     err_union: MCValue,
   3543 ) !MCValue {
   3544     const payload_ty = err_union_ty.errorUnionPayload();
   3545 
   3546     const result: MCValue = result: {
   3547         if (!payload_ty.hasRuntimeBitsIgnoreComptime()) break :result .none;
   3548 
   3549         const payload_off = errUnionPayloadOffset(payload_ty, self.target.*);
   3550         switch (err_union) {
   3551             .load_frame => |frame_addr| break :result .{ .load_frame = .{
   3552                 .index = frame_addr.index,
   3553                 .off = frame_addr.off + @intCast(i32, payload_off),
   3554             } },
   3555             .register => |reg| {
   3556                 // TODO reuse operand
   3557                 const eu_lock = self.register_manager.lockReg(reg);
   3558                 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
   3559 
   3560                 const result_mcv: MCValue = if (maybe_inst) |inst|
   3561                     try self.copyToRegisterWithInstTracking(inst, err_union_ty, err_union)
   3562                 else
   3563                     .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) };
   3564                 if (payload_off > 0) {
   3565                     const shift = @intCast(u6, payload_off * 8);
   3566                     try self.genShiftBinOpMir(
   3567                         .{ ._r, .sh },
   3568                         err_union_ty,
   3569                         result_mcv,
   3570                         .{ .immediate = shift },
   3571                     );
   3572                 } else {
   3573                     try self.truncateRegister(payload_ty, result_mcv.register);
   3574                 }
   3575                 break :result result_mcv;
   3576             },
   3577             else => return self.fail("TODO implement genUnwrapErrorUnionPayloadMir for {}", .{err_union}),
   3578         }
   3579     };
   3580 
   3581     return result;
   3582 }
   3583 
   3584 // *(E!T) -> E
   3585 fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void {
   3586     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3587 
   3588     const src_ty = self.air.typeOf(ty_op.operand);
   3589     const src_mcv = try self.resolveInst(ty_op.operand);
   3590     const src_reg = switch (src_mcv) {
   3591         .register => |reg| reg,
   3592         else => try self.copyToTmpRegister(src_ty, src_mcv),
   3593     };
   3594     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   3595     defer self.register_manager.unlockReg(src_lock);
   3596 
   3597     const dst_reg = try self.register_manager.allocReg(inst, gp);
   3598     const dst_mcv = MCValue{ .register = dst_reg };
   3599     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   3600     defer self.register_manager.unlockReg(dst_lock);
   3601 
   3602     const eu_ty = src_ty.childType();
   3603     const pl_ty = eu_ty.errorUnionPayload();
   3604     const err_ty = eu_ty.errorUnionSet();
   3605     const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*));
   3606     const err_abi_size = @intCast(u32, err_ty.abiSize(self.target.*));
   3607     try self.asmRegisterMemory(
   3608         .{ ._, .mov },
   3609         registerAlias(dst_reg, err_abi_size),
   3610         Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{
   3611             .base = .{ .reg = src_reg },
   3612             .disp = err_off,
   3613         }),
   3614     );
   3615 
   3616     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3617 }
   3618 
   3619 // *(E!T) -> *T
   3620 fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void {
   3621     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3622 
   3623     const src_ty = self.air.typeOf(ty_op.operand);
   3624     const src_mcv = try self.resolveInst(ty_op.operand);
   3625     const src_reg = switch (src_mcv) {
   3626         .register => |reg| reg,
   3627         else => try self.copyToTmpRegister(src_ty, src_mcv),
   3628     };
   3629     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   3630     defer self.register_manager.unlockReg(src_lock);
   3631 
   3632     const dst_ty = self.air.typeOfIndex(inst);
   3633     const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3634         src_reg
   3635     else
   3636         try self.register_manager.allocReg(inst, gp);
   3637     const dst_mcv = MCValue{ .register = dst_reg };
   3638     const dst_lock = self.register_manager.lockReg(dst_reg);
   3639     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   3640 
   3641     const eu_ty = src_ty.childType();
   3642     const pl_ty = eu_ty.errorUnionPayload();
   3643     const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*));
   3644     const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
   3645     try self.asmRegisterMemory(
   3646         .{ ._, .lea },
   3647         registerAlias(dst_reg, dst_abi_size),
   3648         Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }),
   3649     );
   3650 
   3651     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3652 }
   3653 
   3654 fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
   3655     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3656     const result: MCValue = result: {
   3657         const src_ty = self.air.typeOf(ty_op.operand);
   3658         const src_mcv = try self.resolveInst(ty_op.operand);
   3659         const src_reg = switch (src_mcv) {
   3660             .register => |reg| reg,
   3661             else => try self.copyToTmpRegister(src_ty, src_mcv),
   3662         };
   3663         const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   3664         defer self.register_manager.unlockReg(src_lock);
   3665 
   3666         const eu_ty = src_ty.childType();
   3667         const pl_ty = eu_ty.errorUnionPayload();
   3668         const err_ty = eu_ty.errorUnionSet();
   3669         const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*));
   3670         const err_abi_size = @intCast(u32, err_ty.abiSize(self.target.*));
   3671         try self.asmMemoryImmediate(
   3672             .{ ._, .mov },
   3673             Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{
   3674                 .base = .{ .reg = src_reg },
   3675                 .disp = err_off,
   3676             }),
   3677             Immediate.u(0),
   3678         );
   3679 
   3680         if (self.liveness.isUnused(inst)) break :result .unreach;
   3681 
   3682         const dst_ty = self.air.typeOfIndex(inst);
   3683         const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3684             src_reg
   3685         else
   3686             try self.register_manager.allocReg(inst, gp);
   3687         const dst_lock = self.register_manager.lockReg(dst_reg);
   3688         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   3689 
   3690         const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*));
   3691         const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
   3692         try self.asmRegisterMemory(
   3693             .{ ._, .lea },
   3694             registerAlias(dst_reg, dst_abi_size),
   3695             Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }),
   3696         );
   3697         break :result .{ .register = dst_reg };
   3698     };
   3699     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3700 }
   3701 
   3702 fn airErrReturnTrace(self: *Self, inst: Air.Inst.Index) !void {
   3703     _ = inst;
   3704     return self.fail("TODO implement airErrReturnTrace for {}", .{self.target.cpu.arch});
   3705     //return self.finishAir(inst, result, .{ .none, .none, .none });
   3706 }
   3707 
   3708 fn airSetErrReturnTrace(self: *Self, inst: Air.Inst.Index) !void {
   3709     _ = inst;
   3710     return self.fail("TODO implement airSetErrReturnTrace for {}", .{self.target.cpu.arch});
   3711 }
   3712 
   3713 fn airSaveErrReturnTraceIndex(self: *Self, inst: Air.Inst.Index) !void {
   3714     _ = inst;
   3715     return self.fail("TODO implement airSaveErrReturnTraceIndex for {}", .{self.target.cpu.arch});
   3716 }
   3717 
   3718 fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void {
   3719     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3720     const result: MCValue = result: {
   3721         const pl_ty = self.air.typeOf(ty_op.operand);
   3722         if (!pl_ty.hasRuntimeBits()) break :result .{ .immediate = 1 };
   3723 
   3724         const opt_ty = self.air.typeOfIndex(inst);
   3725         const pl_mcv = try self.resolveInst(ty_op.operand);
   3726         const same_repr = opt_ty.optionalReprIsPayload();
   3727         if (same_repr and self.reuseOperand(inst, ty_op.operand, 0, pl_mcv)) break :result pl_mcv;
   3728 
   3729         const pl_lock: ?RegisterLock = switch (pl_mcv) {
   3730             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3731             else => null,
   3732         };
   3733         defer if (pl_lock) |lock| self.register_manager.unlockReg(lock);
   3734 
   3735         const opt_mcv = try self.allocRegOrMem(inst, true);
   3736         try self.genCopy(pl_ty, opt_mcv, pl_mcv);
   3737 
   3738         if (!same_repr) {
   3739             const pl_abi_size = @intCast(i32, pl_ty.abiSize(self.target.*));
   3740             switch (opt_mcv) {
   3741                 else => unreachable,
   3742 
   3743                 .register => |opt_reg| try self.asmRegisterImmediate(
   3744                     .{ ._s, .bt },
   3745                     opt_reg,
   3746                     Immediate.u(@intCast(u6, pl_abi_size * 8)),
   3747                 ),
   3748 
   3749                 .load_frame => |frame_addr| try self.asmMemoryImmediate(
   3750                     .{ ._, .mov },
   3751                     Memory.sib(.byte, .{
   3752                         .base = .{ .frame = frame_addr.index },
   3753                         .disp = frame_addr.off + pl_abi_size,
   3754                     }),
   3755                     Immediate.u(1),
   3756                 ),
   3757             }
   3758         }
   3759         break :result opt_mcv;
   3760     };
   3761     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3762 }
   3763 
   3764 /// T to E!T
   3765 fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void {
   3766     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3767 
   3768     const eu_ty = self.air.getRefType(ty_op.ty);
   3769     const pl_ty = eu_ty.errorUnionPayload();
   3770     const err_ty = eu_ty.errorUnionSet();
   3771     const operand = try self.resolveInst(ty_op.operand);
   3772 
   3773     const result: MCValue = result: {
   3774         if (!pl_ty.hasRuntimeBitsIgnoreComptime()) break :result .{ .immediate = 0 };
   3775 
   3776         const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, self.target.*));
   3777         const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*));
   3778         const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*));
   3779         try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand);
   3780         try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 });
   3781         break :result .{ .load_frame = .{ .index = frame_index } };
   3782     };
   3783     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3784 }
   3785 
   3786 /// E to E!T
   3787 fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
   3788     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3789 
   3790     const eu_ty = self.air.getRefType(ty_op.ty);
   3791     const pl_ty = eu_ty.errorUnionPayload();
   3792     const err_ty = eu_ty.errorUnionSet();
   3793 
   3794     const result: MCValue = result: {
   3795         if (!pl_ty.hasRuntimeBitsIgnoreComptime()) break :result try self.resolveInst(ty_op.operand);
   3796 
   3797         const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, self.target.*));
   3798         const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*));
   3799         const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*));
   3800         try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef);
   3801         const operand = try self.resolveInst(ty_op.operand);
   3802         try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand);
   3803         break :result .{ .load_frame = .{ .index = frame_index } };
   3804     };
   3805     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3806 }
   3807 
   3808 fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void {
   3809     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3810     const result = result: {
   3811         const src_mcv = try self.resolveInst(ty_op.operand);
   3812         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
   3813 
   3814         const dst_mcv = try self.allocRegOrMem(inst, true);
   3815         const dst_ty = self.air.typeOfIndex(inst);
   3816         try self.genCopy(dst_ty, dst_mcv, src_mcv);
   3817         break :result dst_mcv;
   3818     };
   3819     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3820 }
   3821 
   3822 fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void {
   3823     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3824 
   3825     const operand = try self.resolveInst(ty_op.operand);
   3826     const dst_mcv: MCValue = blk: {
   3827         switch (operand) {
   3828             .load_frame => |frame_addr| break :blk .{ .load_frame = .{
   3829                 .index = frame_addr.index,
   3830                 .off = frame_addr.off + 8,
   3831             } },
   3832             else => return self.fail("TODO implement slice_len for {}", .{operand}),
   3833         }
   3834     };
   3835 
   3836     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3837 }
   3838 
   3839 fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void {
   3840     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3841 
   3842     const src_ty = self.air.typeOf(ty_op.operand);
   3843     const src_mcv = try self.resolveInst(ty_op.operand);
   3844     const src_reg = switch (src_mcv) {
   3845         .register => |reg| reg,
   3846         else => try self.copyToTmpRegister(src_ty, src_mcv),
   3847     };
   3848     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   3849     defer self.register_manager.unlockReg(src_lock);
   3850 
   3851     const dst_ty = self.air.typeOfIndex(inst);
   3852     const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3853         src_reg
   3854     else
   3855         try self.register_manager.allocReg(inst, gp);
   3856     const dst_mcv = MCValue{ .register = dst_reg };
   3857     const dst_lock = self.register_manager.lockReg(dst_reg);
   3858     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   3859 
   3860     const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
   3861     try self.asmRegisterMemory(
   3862         .{ ._, .lea },
   3863         registerAlias(dst_reg, dst_abi_size),
   3864         Memory.sib(.qword, .{
   3865             .base = .{ .reg = src_reg },
   3866             .disp = @divExact(self.target.cpu.arch.ptrBitWidth(), 8),
   3867         }),
   3868     );
   3869 
   3870     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3871 }
   3872 
   3873 fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void {
   3874     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3875 
   3876     const dst_ty = self.air.typeOfIndex(inst);
   3877     const opt_mcv = try self.resolveInst(ty_op.operand);
   3878 
   3879     const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv))
   3880         opt_mcv
   3881     else
   3882         try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv);
   3883     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3884 }
   3885 
   3886 fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Register {
   3887     const reg: Register = blk: {
   3888         switch (index) {
   3889             .immediate => |imm| {
   3890                 // Optimisation: if index MCValue is an immediate, we can multiply in `comptime`
   3891                 // and set the register directly to the scaled offset as an immediate.
   3892                 const reg = try self.register_manager.allocReg(null, gp);
   3893                 try self.genSetReg(reg, index_ty, .{ .immediate = imm * elem_size });
   3894                 break :blk reg;
   3895             },
   3896             else => {
   3897                 const reg = try self.copyToTmpRegister(index_ty, index);
   3898                 try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size });
   3899                 break :blk reg;
   3900             },
   3901         }
   3902     };
   3903     return reg;
   3904 }
   3905 
   3906 fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue {
   3907     const slice_ty = self.air.typeOf(lhs);
   3908     const slice_mcv = try self.resolveInst(lhs);
   3909     const slice_mcv_lock: ?RegisterLock = switch (slice_mcv) {
   3910         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3911         else => null,
   3912     };
   3913     defer if (slice_mcv_lock) |lock| self.register_manager.unlockReg(lock);
   3914 
   3915     const elem_ty = slice_ty.childType();
   3916     const elem_size = elem_ty.abiSize(self.target.*);
   3917     var buf: Type.SlicePtrFieldTypeBuffer = undefined;
   3918     const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf);
   3919 
   3920     const index_ty = self.air.typeOf(rhs);
   3921     const index_mcv = try self.resolveInst(rhs);
   3922     const index_mcv_lock: ?RegisterLock = switch (index_mcv) {
   3923         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3924         else => null,
   3925     };
   3926     defer if (index_mcv_lock) |lock| self.register_manager.unlockReg(lock);
   3927 
   3928     const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_size);
   3929     const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   3930     defer self.register_manager.unlockReg(offset_reg_lock);
   3931 
   3932     const addr_reg = try self.register_manager.allocReg(null, gp);
   3933     try self.genSetReg(addr_reg, Type.usize, slice_mcv);
   3934     // TODO we could allocate register here, but need to expect addr register and potentially
   3935     // offset register.
   3936     try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{
   3937         .register = offset_reg,
   3938     });
   3939     return MCValue{ .register = addr_reg.to64() };
   3940 }
   3941 
   3942 fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
   3943     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   3944     const slice_ty = self.air.typeOf(bin_op.lhs);
   3945 
   3946     var buf: Type.SlicePtrFieldTypeBuffer = undefined;
   3947     const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf);
   3948     const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs);
   3949     const dst_mcv = try self.allocRegOrMem(inst, false);
   3950     try self.load(dst_mcv, slice_ptr_field_type, elem_ptr);
   3951 
   3952     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   3953 }
   3954 
   3955 fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void {
   3956     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   3957     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3958     const dst_mcv = try self.genSliceElemPtr(extra.lhs, extra.rhs);
   3959     return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
   3960 }
   3961 
   3962 fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
   3963     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   3964 
   3965     const array_ty = self.air.typeOf(bin_op.lhs);
   3966     const array = try self.resolveInst(bin_op.lhs);
   3967     const array_lock: ?RegisterLock = switch (array) {
   3968         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3969         else => null,
   3970     };
   3971     defer if (array_lock) |lock| self.register_manager.unlockReg(lock);
   3972 
   3973     const elem_ty = array_ty.childType();
   3974     const elem_abi_size = elem_ty.abiSize(self.target.*);
   3975 
   3976     const index_ty = self.air.typeOf(bin_op.rhs);
   3977     const index = try self.resolveInst(bin_op.rhs);
   3978     const index_lock: ?RegisterLock = switch (index) {
   3979         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3980         else => null,
   3981     };
   3982     defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
   3983 
   3984     const offset_reg = try self.elemOffset(index_ty, index, elem_abi_size);
   3985     const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   3986     defer self.register_manager.unlockReg(offset_reg_lock);
   3987 
   3988     const addr_reg = try self.register_manager.allocReg(null, gp);
   3989     switch (array) {
   3990         .register => {
   3991             const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, self.target.*));
   3992             try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array);
   3993             try self.asmRegisterMemory(
   3994                 .{ ._, .lea },
   3995                 addr_reg,
   3996                 Memory.sib(.qword, .{ .base = .{ .frame = frame_index } }),
   3997             );
   3998         },
   3999         .load_frame => |frame_addr| try self.asmRegisterMemory(
   4000             .{ ._, .lea },
   4001             addr_reg,
   4002             Memory.sib(.qword, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off }),
   4003         ),
   4004         .memory,
   4005         .load_direct,
   4006         .load_got,
   4007         .load_tlv,
   4008         => try self.genSetReg(addr_reg, Type.usize, array.address()),
   4009         .lea_direct, .lea_tlv => unreachable,
   4010         else => return self.fail("TODO implement array_elem_val when array is {}", .{array}),
   4011     }
   4012 
   4013     // TODO we could allocate register here, but need to expect addr register and potentially
   4014     // offset register.
   4015     const dst_mcv = try self.allocRegOrMem(inst, false);
   4016     try self.genBinOpMir(
   4017         .{ ._, .add },
   4018         Type.usize,
   4019         .{ .register = addr_reg },
   4020         .{ .register = offset_reg },
   4021     );
   4022     try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } });
   4023 
   4024     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   4025 }
   4026 
   4027 fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void {
   4028     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   4029     const ptr_ty = self.air.typeOf(bin_op.lhs);
   4030 
   4031     // this is identical to the `airPtrElemPtr` codegen expect here an
   4032     // additional `mov` is needed at the end to get the actual value
   4033 
   4034     const elem_ty = ptr_ty.elemType2();
   4035     const elem_abi_size = @intCast(u32, elem_ty.abiSize(self.target.*));
   4036     const index_ty = self.air.typeOf(bin_op.rhs);
   4037     const index_mcv = try self.resolveInst(bin_op.rhs);
   4038     const index_lock = switch (index_mcv) {
   4039         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4040         else => null,
   4041     };
   4042     defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
   4043 
   4044     const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
   4045     const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   4046     defer self.register_manager.unlockReg(offset_lock);
   4047 
   4048     const ptr_mcv = try self.resolveInst(bin_op.lhs);
   4049     const elem_ptr_reg = if (ptr_mcv.isRegister() and self.liveness.operandDies(inst, 0))
   4050         ptr_mcv.register
   4051     else
   4052         try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   4053     const elem_ptr_lock = self.register_manager.lockRegAssumeUnused(elem_ptr_reg);
   4054     defer self.register_manager.unlockReg(elem_ptr_lock);
   4055     try self.asmRegisterRegister(
   4056         .{ ._, .add },
   4057         elem_ptr_reg,
   4058         offset_reg,
   4059     );
   4060 
   4061     const dst_mcv = try self.allocRegOrMem(inst, true);
   4062     const dst_lock = switch (dst_mcv) {
   4063         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4064         else => null,
   4065     };
   4066     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   4067     try self.load(dst_mcv, ptr_ty, .{ .register = elem_ptr_reg });
   4068 
   4069     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   4070 }
   4071 
   4072 fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void {
   4073     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   4074     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
   4075 
   4076     const ptr_ty = self.air.typeOf(extra.lhs);
   4077     const ptr = try self.resolveInst(extra.lhs);
   4078     const ptr_lock: ?RegisterLock = switch (ptr) {
   4079         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4080         else => null,
   4081     };
   4082     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
   4083 
   4084     const elem_ty = ptr_ty.elemType2();
   4085     const elem_abi_size = elem_ty.abiSize(self.target.*);
   4086     const index_ty = self.air.typeOf(extra.rhs);
   4087     const index = try self.resolveInst(extra.rhs);
   4088     const index_lock: ?RegisterLock = switch (index) {
   4089         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4090         else => null,
   4091     };
   4092     defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
   4093 
   4094     const offset_reg = try self.elemOffset(index_ty, index, elem_abi_size);
   4095     const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   4096     defer self.register_manager.unlockReg(offset_reg_lock);
   4097 
   4098     const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr);
   4099     try self.genBinOpMir(.{ ._, .add }, ptr_ty, dst_mcv, .{ .register = offset_reg });
   4100 
   4101     return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
   4102 }
   4103 
   4104 fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
   4105     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   4106     const ptr_union_ty = self.air.typeOf(bin_op.lhs);
   4107     const union_ty = ptr_union_ty.childType();
   4108     const tag_ty = self.air.typeOf(bin_op.rhs);
   4109     const layout = union_ty.unionGetLayout(self.target.*);
   4110 
   4111     if (layout.tag_size == 0) {
   4112         return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
   4113     }
   4114 
   4115     const ptr = try self.resolveInst(bin_op.lhs);
   4116     const ptr_lock: ?RegisterLock = switch (ptr) {
   4117         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4118         else => null,
   4119     };
   4120     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
   4121 
   4122     const tag = try self.resolveInst(bin_op.rhs);
   4123     const tag_lock: ?RegisterLock = switch (tag) {
   4124         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4125         else => null,
   4126     };
   4127     defer if (tag_lock) |lock| self.register_manager.unlockReg(lock);
   4128 
   4129     const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align < layout.payload_align) blk: {
   4130         // TODO reusing the operand
   4131         const reg = try self.copyToTmpRegister(ptr_union_ty, ptr);
   4132         try self.genBinOpMir(
   4133             .{ ._, .add },
   4134             ptr_union_ty,
   4135             .{ .register = reg },
   4136             .{ .immediate = layout.payload_size },
   4137         );
   4138         break :blk MCValue{ .register = reg };
   4139     } else ptr;
   4140 
   4141     var ptr_tag_pl = ptr_union_ty.ptrInfo();
   4142     ptr_tag_pl.data.pointee_type = tag_ty;
   4143     const ptr_tag_ty = Type.initPayload(&ptr_tag_pl.base);
   4144     try self.store(ptr_tag_ty, adjusted_ptr, tag);
   4145 
   4146     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
   4147 }
   4148 
   4149 fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
   4150     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4151 
   4152     const tag_ty = self.air.typeOfIndex(inst);
   4153     const union_ty = self.air.typeOf(ty_op.operand);
   4154     const layout = union_ty.unionGetLayout(self.target.*);
   4155 
   4156     if (layout.tag_size == 0) {
   4157         return self.finishAir(inst, .none, .{ ty_op.operand, .none, .none });
   4158     }
   4159 
   4160     // TODO reusing the operand
   4161     const operand = try self.resolveInst(ty_op.operand);
   4162     const operand_lock: ?RegisterLock = switch (operand) {
   4163         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4164         else => null,
   4165     };
   4166     defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
   4167 
   4168     const tag_abi_size = tag_ty.abiSize(self.target.*);
   4169     const dst_mcv: MCValue = blk: {
   4170         switch (operand) {
   4171             .load_frame => |frame_addr| {
   4172                 if (tag_abi_size <= 8) {
   4173                     const off: i32 = if (layout.tag_align < layout.payload_align)
   4174                         @intCast(i32, layout.payload_size)
   4175                     else
   4176                         0;
   4177                     break :blk try self.copyToRegisterWithInstTracking(inst, tag_ty, .{
   4178                         .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
   4179                     });
   4180                 }
   4181 
   4182                 return self.fail("TODO implement get_union_tag for ABI larger than 8 bytes and operand {}", .{operand});
   4183             },
   4184             .register => {
   4185                 const shift: u6 = if (layout.tag_align < layout.payload_align)
   4186                     @intCast(u6, layout.payload_size * 8)
   4187                 else
   4188                     0;
   4189                 const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand);
   4190                 try self.genShiftBinOpMir(.{ ._r, .sh }, Type.usize, result, .{ .immediate = shift });
   4191                 break :blk MCValue{
   4192                     .register = registerAlias(result.register, @intCast(u32, layout.tag_size)),
   4193                 };
   4194             },
   4195             else => return self.fail("TODO implement get_union_tag for {}", .{operand}),
   4196         }
   4197     };
   4198 
   4199     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   4200 }
   4201 
   4202 fn airClz(self: *Self, inst: Air.Inst.Index) !void {
   4203     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4204     const result = result: {
   4205         const dst_ty = self.air.typeOfIndex(inst);
   4206         const src_ty = self.air.typeOf(ty_op.operand);
   4207 
   4208         const src_mcv = try self.resolveInst(ty_op.operand);
   4209         const mat_src_mcv = switch (src_mcv) {
   4210             .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
   4211             else => src_mcv,
   4212         };
   4213         const mat_src_lock = switch (mat_src_mcv) {
   4214             .register => |reg| self.register_manager.lockReg(reg),
   4215             else => null,
   4216         };
   4217         defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
   4218 
   4219         const dst_reg = try self.register_manager.allocReg(inst, gp);
   4220         const dst_mcv = MCValue{ .register = dst_reg };
   4221         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   4222         defer self.register_manager.unlockReg(dst_lock);
   4223 
   4224         const src_bits = src_ty.bitSize(self.target.*);
   4225         if (self.hasFeature(.lzcnt)) {
   4226             if (src_bits <= 64) {
   4227                 try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv);
   4228 
   4229                 const extra_bits = self.regExtraBits(src_ty);
   4230                 if (extra_bits > 0) {
   4231                     try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits });
   4232                 }
   4233             } else if (src_bits <= 128) {
   4234                 const tmp_reg = try self.register_manager.allocReg(null, gp);
   4235                 const tmp_mcv = MCValue{ .register = tmp_reg };
   4236                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   4237                 defer self.register_manager.unlockReg(tmp_lock);
   4238 
   4239                 try self.genBinOpMir(.{ ._, .lzcnt }, Type.u64, dst_mcv, mat_src_mcv);
   4240                 try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 });
   4241                 try self.genBinOpMir(
   4242                     .{ ._, .lzcnt },
   4243                     Type.u64,
   4244                     tmp_mcv,
   4245                     mat_src_mcv.address().offset(8).deref(),
   4246                 );
   4247                 try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc);
   4248 
   4249                 if (src_bits < 128) {
   4250                     try self.genBinOpMir(
   4251                         .{ ._, .sub },
   4252                         dst_ty,
   4253                         dst_mcv,
   4254                         .{ .immediate = 128 - src_bits },
   4255                     );
   4256                 }
   4257             } else return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
   4258             break :result dst_mcv;
   4259         }
   4260 
   4261         if (src_bits > 64)
   4262             return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
   4263         if (math.isPowerOfTwo(src_bits)) {
   4264             const imm_reg = try self.copyToTmpRegister(dst_ty, .{
   4265                 .immediate = src_bits ^ (src_bits - 1),
   4266             });
   4267             try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv);
   4268 
   4269             const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2);
   4270             try self.asmCmovccRegisterRegister(
   4271                 registerAlias(dst_reg, cmov_abi_size),
   4272                 registerAlias(imm_reg, cmov_abi_size),
   4273                 .z,
   4274             );
   4275 
   4276             try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 });
   4277         } else {
   4278             const imm_reg = try self.copyToTmpRegister(dst_ty, .{
   4279                 .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - self.regBitSize(dst_ty)),
   4280             });
   4281             try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv);
   4282 
   4283             const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2);
   4284             try self.asmCmovccRegisterRegister(
   4285                 registerAlias(imm_reg, cmov_abi_size),
   4286                 registerAlias(dst_reg, cmov_abi_size),
   4287                 .nz,
   4288             );
   4289 
   4290             try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 });
   4291             try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .register = imm_reg });
   4292         }
   4293         break :result dst_mcv;
   4294     };
   4295     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4296 }
   4297 
   4298 fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
   4299     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4300     const result = result: {
   4301         const dst_ty = self.air.typeOfIndex(inst);
   4302         const src_ty = self.air.typeOf(ty_op.operand);
   4303         const src_bits = src_ty.bitSize(self.target.*);
   4304 
   4305         const src_mcv = try self.resolveInst(ty_op.operand);
   4306         const mat_src_mcv = switch (src_mcv) {
   4307             .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
   4308             else => src_mcv,
   4309         };
   4310         const mat_src_lock = switch (mat_src_mcv) {
   4311             .register => |reg| self.register_manager.lockReg(reg),
   4312             else => null,
   4313         };
   4314         defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
   4315 
   4316         const dst_reg = try self.register_manager.allocReg(inst, gp);
   4317         const dst_mcv = MCValue{ .register = dst_reg };
   4318         const dst_lock = self.register_manager.lockReg(dst_reg);
   4319         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   4320 
   4321         if (self.hasFeature(.bmi)) {
   4322             if (src_bits <= 64) {
   4323                 const extra_bits = self.regExtraBits(src_ty);
   4324                 const masked_mcv = if (extra_bits > 0) masked: {
   4325                     const tmp_mcv = tmp: {
   4326                         if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0))
   4327                             break :tmp src_mcv;
   4328                         try self.genSetReg(dst_reg, src_ty, src_mcv);
   4329                         break :tmp dst_mcv;
   4330                     };
   4331                     try self.genBinOpMir(
   4332                         .{ ._, .@"or" },
   4333                         src_ty,
   4334                         tmp_mcv,
   4335                         .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - extra_bits)) <<
   4336                             @intCast(u6, src_bits) },
   4337                     );
   4338                     break :masked tmp_mcv;
   4339                 } else mat_src_mcv;
   4340                 try self.genBinOpMir(.{ ._, .tzcnt }, src_ty, dst_mcv, masked_mcv);
   4341             } else if (src_bits <= 128) {
   4342                 const tmp_reg = try self.register_manager.allocReg(null, gp);
   4343                 const tmp_mcv = MCValue{ .register = tmp_reg };
   4344                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   4345                 defer self.register_manager.unlockReg(tmp_lock);
   4346 
   4347                 const masked_mcv = if (src_bits < 128) masked: {
   4348                     try self.genCopy(Type.u64, dst_mcv, mat_src_mcv.address().offset(8).deref());
   4349                     try self.genBinOpMir(
   4350                         .{ ._, .@"or" },
   4351                         Type.u64,
   4352                         dst_mcv,
   4353                         .{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(u6, src_bits - 64) },
   4354                     );
   4355                     break :masked dst_mcv;
   4356                 } else mat_src_mcv.address().offset(8).deref();
   4357                 try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, dst_mcv, masked_mcv);
   4358                 try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 });
   4359                 try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, tmp_mcv, mat_src_mcv);
   4360                 try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc);
   4361             } else return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
   4362             break :result dst_mcv;
   4363         }
   4364 
   4365         if (src_bits > 64)
   4366             return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
   4367 
   4368         const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits });
   4369         try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv);
   4370 
   4371         const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2);
   4372         try self.asmCmovccRegisterRegister(
   4373             registerAlias(dst_reg, cmov_abi_size),
   4374             registerAlias(width_reg, cmov_abi_size),
   4375             .z,
   4376         );
   4377         break :result dst_mcv;
   4378     };
   4379     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4380 }
   4381 
   4382 fn airPopcount(self: *Self, inst: Air.Inst.Index) !void {
   4383     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4384     const result: MCValue = result: {
   4385         const src_ty = self.air.typeOf(ty_op.operand);
   4386         const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*));
   4387         const src_mcv = try self.resolveInst(ty_op.operand);
   4388 
   4389         if (self.hasFeature(.popcnt)) {
   4390             const mat_src_mcv = switch (src_mcv) {
   4391                 .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
   4392                 else => src_mcv,
   4393             };
   4394             const mat_src_lock = switch (mat_src_mcv) {
   4395                 .register => |reg| self.register_manager.lockReg(reg),
   4396                 else => null,
   4397             };
   4398             defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
   4399 
   4400             const dst_mcv: MCValue =
   4401                 if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4402                 src_mcv
   4403             else
   4404                 .{ .register = try self.register_manager.allocReg(inst, gp) };
   4405 
   4406             const popcnt_ty = if (src_abi_size > 1) src_ty else Type.u16;
   4407             try self.genBinOpMir(.{ ._, .popcnt }, popcnt_ty, dst_mcv, mat_src_mcv);
   4408             break :result dst_mcv;
   4409         }
   4410 
   4411         const mask = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - src_abi_size * 8);
   4412         const imm_0_1 = Immediate.u(mask / 0b1_1);
   4413         const imm_00_11 = Immediate.u(mask / 0b01_01);
   4414         const imm_0000_1111 = Immediate.u(mask / 0b0001_0001);
   4415         const imm_0000_0001 = Immediate.u(mask / 0b1111_1111);
   4416 
   4417         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4418             src_mcv
   4419         else
   4420             try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv);
   4421         const dst_reg = dst_mcv.register;
   4422         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   4423         defer self.register_manager.unlockReg(dst_lock);
   4424 
   4425         const tmp_reg = try self.register_manager.allocReg(null, gp);
   4426         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   4427         defer self.register_manager.unlockReg(tmp_lock);
   4428 
   4429         {
   4430             const dst = registerAlias(dst_reg, src_abi_size);
   4431             const tmp = registerAlias(tmp_reg, src_abi_size);
   4432             const imm = if (src_abi_size > 4)
   4433                 try self.register_manager.allocReg(null, gp)
   4434             else
   4435                 undefined;
   4436 
   4437             // dst = operand
   4438             try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   4439             // tmp = operand
   4440             try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1));
   4441             // tmp = operand >> 1
   4442             if (src_abi_size > 4) {
   4443                 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1);
   4444                 try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   4445             } else try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1);
   4446             // tmp = (operand >> 1) & 0x55...55
   4447             try self.asmRegisterRegister(.{ ._, .sub }, dst, tmp);
   4448             // dst = temp1 = operand - ((operand >> 1) & 0x55...55)
   4449             try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   4450             // tmp = temp1
   4451             try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2));
   4452             // dst = temp1 >> 2
   4453             if (src_abi_size > 4) {
   4454                 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11);
   4455                 try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   4456                 try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   4457             } else {
   4458                 try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11);
   4459                 try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11);
   4460             }
   4461             // tmp = temp1 & 0x33...33
   4462             // dst = (temp1 >> 2) & 0x33...33
   4463             try self.asmRegisterRegister(.{ ._, .add }, tmp, dst);
   4464             // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33)
   4465             try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp);
   4466             // dst = temp2
   4467             try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(4));
   4468             // tmp = temp2 >> 4
   4469             try self.asmRegisterRegister(.{ ._, .add }, dst, tmp);
   4470             // dst = temp2 + (temp2 >> 4)
   4471             if (src_abi_size > 4) {
   4472                 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111);
   4473                 try self.asmRegisterImmediate(.{ ._, .mov }, tmp, imm_0000_0001);
   4474                 try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   4475                 try self.asmRegisterRegister(.{ .i_, .mul }, dst, tmp);
   4476             } else {
   4477                 try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
   4478                 if (src_abi_size > 1) {
   4479                     try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst, dst, imm_0000_0001);
   4480                 }
   4481             }
   4482             // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f
   4483             // dst = temp3 * 0x01...01
   4484             if (src_abi_size > 1) {
   4485                 try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u((src_abi_size - 1) * 8));
   4486             }
   4487             // dst = (temp3 * 0x01...01) >> (bits - 8)
   4488         }
   4489         break :result dst_mcv;
   4490     };
   4491     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4492 }
   4493 
   4494 fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, mem_ok: bool) !MCValue {
   4495     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4496 
   4497     const src_bits = self.regBitSize(src_ty);
   4498     const src_lock = switch (src_mcv) {
   4499         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4500         else => null,
   4501     };
   4502     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   4503 
   4504     switch (src_bits) {
   4505         else => unreachable,
   4506         8 => return if ((mem_ok or src_mcv.isRegister()) and
   4507             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4508             src_mcv
   4509         else
   4510             try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv),
   4511         16 => if ((mem_ok or src_mcv.isRegister()) and
   4512             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4513         {
   4514             try self.genBinOpMir(.{ ._l, .ro }, src_ty, src_mcv, .{ .immediate = 8 });
   4515             return src_mcv;
   4516         },
   4517         32, 64 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
   4518             try self.genUnOpMir(.{ ._, .bswap }, src_ty, src_mcv);
   4519             return src_mcv;
   4520         },
   4521     }
   4522 
   4523     if (src_mcv.isRegister()) {
   4524         const dst_mcv: MCValue = if (mem_ok)
   4525             try self.allocRegOrMem(inst, true)
   4526         else
   4527             .{ .register = try self.register_manager.allocReg(inst, gp) };
   4528         if (dst_mcv.isRegister()) {
   4529             const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register);
   4530             defer self.register_manager.unlockReg(dst_lock);
   4531 
   4532             try self.genSetReg(dst_mcv.register, src_ty, src_mcv);
   4533             switch (src_bits) {
   4534                 else => unreachable,
   4535                 16 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }),
   4536                 32, 64 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv),
   4537             }
   4538         } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
   4539         return dst_mcv;
   4540     }
   4541 
   4542     const dst_reg = try self.register_manager.allocReg(inst, gp);
   4543     const dst_mcv = MCValue{ .register = dst_reg };
   4544     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   4545     defer self.register_manager.unlockReg(dst_lock);
   4546 
   4547     try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
   4548     return dst_mcv;
   4549 }
   4550 
   4551 fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void {
   4552     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4553 
   4554     const src_ty = self.air.typeOf(ty_op.operand);
   4555     const src_mcv = try self.resolveInst(ty_op.operand);
   4556 
   4557     const dst_mcv = try self.byteSwap(inst, src_ty, src_mcv, true);
   4558     switch (self.regExtraBits(src_ty)) {
   4559         0 => {},
   4560         else => |extra| try self.genBinOpMir(
   4561             if (src_ty.isSignedInt()) .{ ._r, .sa } else .{ ._r, .sh },
   4562             src_ty,
   4563             dst_mcv,
   4564             .{ .immediate = extra },
   4565         ),
   4566     }
   4567 
   4568     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   4569 }
   4570 
   4571 fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
   4572     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4573 
   4574     const src_ty = self.air.typeOf(ty_op.operand);
   4575     const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*));
   4576     const src_mcv = try self.resolveInst(ty_op.operand);
   4577 
   4578     const dst_mcv = try self.byteSwap(inst, src_ty, src_mcv, false);
   4579     const dst_reg = dst_mcv.register;
   4580     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   4581     defer self.register_manager.unlockReg(dst_lock);
   4582 
   4583     const tmp_reg = try self.register_manager.allocReg(null, gp);
   4584     const tmp_lock = self.register_manager.lockReg(tmp_reg);
   4585     defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
   4586 
   4587     {
   4588         const dst = registerAlias(dst_reg, src_abi_size);
   4589         const tmp = registerAlias(tmp_reg, src_abi_size);
   4590         const imm = if (src_abi_size > 4)
   4591             try self.register_manager.allocReg(null, gp)
   4592         else
   4593             undefined;
   4594 
   4595         const mask = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - src_abi_size * 8);
   4596         const imm_0000_1111 = Immediate.u(mask / 0b0001_0001);
   4597         const imm_00_11 = Immediate.u(mask / 0b01_01);
   4598         const imm_0_1 = Immediate.u(mask / 0b1_1);
   4599 
   4600         // dst = temp1 = bswap(operand)
   4601         try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   4602         // tmp = temp1
   4603         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(4));
   4604         // dst = temp1 >> 4
   4605         if (src_abi_size > 4) {
   4606             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111);
   4607             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   4608             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   4609         } else {
   4610             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111);
   4611             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
   4612         }
   4613         // tmp = temp1 & 0x0F...0F
   4614         // dst = (temp1 >> 4) & 0x0F...0F
   4615         try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, Immediate.u(4));
   4616         // tmp = (temp1 & 0x0F...0F) << 4
   4617         try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp);
   4618         // dst = temp2 = ((temp1 >> 4) & 0x0F...0F) | ((temp1 & 0x0F...0F) << 4)
   4619         try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   4620         // tmp = temp2
   4621         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2));
   4622         // dst = temp2 >> 2
   4623         if (src_abi_size > 4) {
   4624             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11);
   4625             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   4626             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   4627         } else {
   4628             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11);
   4629             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11);
   4630         }
   4631         // tmp = temp2 & 0x33...33
   4632         // dst = (temp2 >> 2) & 0x33...33
   4633         try self.asmRegisterMemory(
   4634             .{ ._, .lea },
   4635             if (src_abi_size > 4) tmp.to64() else tmp.to32(),
   4636             Memory.sib(.qword, .{
   4637                 .base = .{ .reg = dst.to64() },
   4638                 .scale_index = .{ .index = tmp.to64(), .scale = 1 << 2 },
   4639             }),
   4640         );
   4641         // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2)
   4642         try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp);
   4643         // dst = temp3
   4644         try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1));
   4645         // tmp = temp3 >> 1
   4646         if (src_abi_size > 4) {
   4647             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1);
   4648             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   4649             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   4650         } else {
   4651             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0_1);
   4652             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1);
   4653         }
   4654         // dst = temp3 & 0x55...55
   4655         // tmp = (temp3 >> 1) & 0x55...55
   4656         try self.asmRegisterMemory(
   4657             .{ ._, .lea },
   4658             if (src_abi_size > 4) dst.to64() else dst.to32(),
   4659             Memory.sib(.qword, .{
   4660                 .base = .{ .reg = tmp.to64() },
   4661                 .scale_index = .{ .index = dst.to64(), .scale = 1 << 1 },
   4662             }),
   4663         );
   4664         // dst = ((temp3 >> 1) & 0x55...55) + ((temp3 & 0x55...55) << 1)
   4665     }
   4666 
   4667     switch (self.regExtraBits(src_ty)) {
   4668         0 => {},
   4669         else => |extra| try self.genBinOpMir(
   4670             if (src_ty.isSignedInt()) .{ ._r, .sa } else .{ ._r, .sh },
   4671             src_ty,
   4672             dst_mcv,
   4673             .{ .immediate = extra },
   4674         ),
   4675     }
   4676 
   4677     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   4678 }
   4679 
   4680 fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
   4681     const un_op = self.air.instructions.items(.data)[inst].un_op;
   4682     const ty = self.air.typeOf(un_op);
   4683     const ty_bits = ty.floatBits(self.target.*);
   4684 
   4685     var arena = std.heap.ArenaAllocator.init(self.gpa);
   4686     defer arena.deinit();
   4687 
   4688     const ExpectedContents = union {
   4689         f16: Value.Payload.Float_16,
   4690         f32: Value.Payload.Float_32,
   4691         f64: Value.Payload.Float_64,
   4692         f80: Value.Payload.Float_80,
   4693         f128: Value.Payload.Float_128,
   4694     };
   4695     var stack align(@alignOf(ExpectedContents)) =
   4696         std.heap.stackFallback(@sizeOf(ExpectedContents), arena.allocator());
   4697 
   4698     var vec_pl = Type.Payload.Array{
   4699         .base = .{ .tag = .vector },
   4700         .data = .{
   4701             .len = @divExact(128, ty_bits),
   4702             .elem_type = ty,
   4703         },
   4704     };
   4705     const vec_ty = Type.initPayload(&vec_pl.base);
   4706 
   4707     var sign_pl = Value.Payload.SubValue{
   4708         .base = .{ .tag = .repeated },
   4709         .data = try Value.floatToValue(-0.0, stack.get(), ty, self.target.*),
   4710     };
   4711     const sign_val = Value.initPayload(&sign_pl.base);
   4712 
   4713     const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val });
   4714 
   4715     const src_mcv = try self.resolveInst(un_op);
   4716     const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
   4717         src_mcv
   4718     else
   4719         try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
   4720     const dst_lock = self.register_manager.lockReg(dst_mcv.register);
   4721     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   4722 
   4723     const tag = self.air.instructions.items(.tag)[inst];
   4724     try self.genBinOpMir(switch (ty_bits) {
   4725         // No point using an extra prefix byte for *pd which performs the same operation.
   4726         16, 32, 64, 128 => switch (tag) {
   4727             .neg => .{ ._ps, .xor },
   4728             .fabs => .{ ._ps, .andn },
   4729             else => unreachable,
   4730         },
   4731         80 => return self.fail("TODO implement airFloatSign for {}", .{
   4732             ty.fmt(self.bin_file.options.module.?),
   4733         }),
   4734         else => unreachable,
   4735     }, vec_ty, dst_mcv, sign_mcv);
   4736     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
   4737 }
   4738 
   4739 fn airRound(self: *Self, inst: Air.Inst.Index, mode: u4) !void {
   4740     const un_op = self.air.instructions.items(.data)[inst].un_op;
   4741     const ty = self.air.typeOf(un_op);
   4742 
   4743     const src_mcv = try self.resolveInst(un_op);
   4744     const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
   4745         src_mcv
   4746     else
   4747         try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
   4748     const dst_reg = dst_mcv.getReg().?;
   4749     const dst_lock = self.register_manager.lockReg(dst_reg);
   4750     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   4751     try self.genRound(ty, dst_reg, src_mcv, mode);
   4752     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
   4753 }
   4754 
   4755 fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4) !void {
   4756     if (!self.hasFeature(.sse4_1))
   4757         return self.fail("TODO implement genRound without sse4_1 feature", .{});
   4758 
   4759     const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) {
   4760         .Float => switch (ty.floatBits(self.target.*)) {
   4761             32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
   4762             64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
   4763             16, 80, 128 => null,
   4764             else => unreachable,
   4765         },
   4766         .Vector => switch (ty.childType().zigTypeTag()) {
   4767             .Float => switch (ty.childType().floatBits(self.target.*)) {
   4768                 32 => switch (ty.vectorLen()) {
   4769                     1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
   4770                     2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round },
   4771                     5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null,
   4772                     else => null,
   4773                 },
   4774                 64 => switch (ty.vectorLen()) {
   4775                     1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
   4776                     2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round },
   4777                     3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null,
   4778                     else => null,
   4779                 },
   4780                 16, 80, 128 => null,
   4781                 else => unreachable,
   4782             },
   4783             else => null,
   4784         },
   4785         else => unreachable,
   4786     })) |tag| tag else return self.fail("TODO implement genRound for {}", .{
   4787         ty.fmt(self.bin_file.options.module.?),
   4788     });
   4789 
   4790     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
   4791     const dst_alias = registerAlias(dst_reg, abi_size);
   4792     switch (mir_tag[0]) {
   4793         .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   4794             mir_tag,
   4795             dst_alias,
   4796             dst_alias,
   4797             src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   4798             Immediate.u(mode),
   4799         ) else try self.asmRegisterRegisterRegisterImmediate(
   4800             mir_tag,
   4801             dst_alias,
   4802             dst_alias,
   4803             registerAlias(if (src_mcv.isRegister())
   4804                 src_mcv.getReg().?
   4805             else
   4806                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
   4807             Immediate.u(mode),
   4808         ),
   4809         else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
   4810             mir_tag,
   4811             dst_alias,
   4812             src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   4813             Immediate.u(mode),
   4814         ) else try self.asmRegisterRegisterImmediate(
   4815             mir_tag,
   4816             dst_alias,
   4817             registerAlias(if (src_mcv.isRegister())
   4818                 src_mcv.getReg().?
   4819             else
   4820                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
   4821             Immediate.u(mode),
   4822         ),
   4823     }
   4824 }
   4825 
   4826 fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
   4827     const un_op = self.air.instructions.items(.data)[inst].un_op;
   4828     const ty = self.air.typeOf(un_op);
   4829     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
   4830 
   4831     const src_mcv = try self.resolveInst(un_op);
   4832     const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
   4833         src_mcv
   4834     else
   4835         try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
   4836     const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
   4837     const dst_lock = self.register_manager.lockReg(dst_reg);
   4838     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   4839 
   4840     const result: MCValue = result: {
   4841         const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) {
   4842             .Float => switch (ty.floatBits(self.target.*)) {
   4843                 16 => if (self.hasFeature(.f16c)) {
   4844                     const mat_src_reg = if (src_mcv.isRegister())
   4845                         src_mcv.getReg().?
   4846                     else
   4847                         try self.copyToTmpRegister(ty, src_mcv);
   4848                     try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
   4849                     try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg);
   4850                     try self.asmRegisterRegisterImmediate(
   4851                         .{ .v_, .cvtps2ph },
   4852                         dst_reg,
   4853                         dst_reg,
   4854                         Immediate.u(0b1_00),
   4855                     );
   4856                     break :result dst_mcv;
   4857                 } else null,
   4858                 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
   4859                 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
   4860                 80, 128 => null,
   4861                 else => unreachable,
   4862             },
   4863             .Vector => switch (ty.childType().zigTypeTag()) {
   4864                 .Float => switch (ty.childType().floatBits(self.target.*)) {
   4865                     16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) {
   4866                         1 => {
   4867                             try self.asmRegisterRegister(
   4868                                 .{ .v_, .cvtph2ps },
   4869                                 dst_reg,
   4870                                 (if (src_mcv.isRegister())
   4871                                     src_mcv.getReg().?
   4872                                 else
   4873                                     try self.copyToTmpRegister(ty, src_mcv)).to128(),
   4874                             );
   4875                             try self.asmRegisterRegisterRegister(
   4876                                 .{ .v_ss, .sqrt },
   4877                                 dst_reg,
   4878                                 dst_reg,
   4879                                 dst_reg,
   4880                             );
   4881                             try self.asmRegisterRegisterImmediate(
   4882                                 .{ .v_, .cvtps2ph },
   4883                                 dst_reg,
   4884                                 dst_reg,
   4885                                 Immediate.u(0b1_00),
   4886                             );
   4887                             break :result dst_mcv;
   4888                         },
   4889                         2...8 => {
   4890                             const wide_reg = registerAlias(dst_reg, abi_size * 2);
   4891                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
   4892                                 .{ .v_, .cvtph2ps },
   4893                                 wide_reg,
   4894                                 src_mcv.mem(Memory.PtrSize.fromSize(
   4895                                     @intCast(u32, @divExact(wide_reg.bitSize(), 16)),
   4896                                 )),
   4897                             ) else try self.asmRegisterRegister(
   4898                                 .{ .v_, .cvtph2ps },
   4899                                 wide_reg,
   4900                                 (if (src_mcv.isRegister())
   4901                                     src_mcv.getReg().?
   4902                                 else
   4903                                     try self.copyToTmpRegister(ty, src_mcv)).to128(),
   4904                             );
   4905                             try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg);
   4906                             try self.asmRegisterRegisterImmediate(
   4907                                 .{ .v_, .cvtps2ph },
   4908                                 dst_reg,
   4909                                 wide_reg,
   4910                                 Immediate.u(0b1_00),
   4911                             );
   4912                             break :result dst_mcv;
   4913                         },
   4914                         else => null,
   4915                     } else null,
   4916                     32 => switch (ty.vectorLen()) {
   4917                         1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
   4918                         2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt },
   4919                         5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null,
   4920                         else => null,
   4921                     },
   4922                     64 => switch (ty.vectorLen()) {
   4923                         1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
   4924                         2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt },
   4925                         3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null,
   4926                         else => null,
   4927                     },
   4928                     80, 128 => null,
   4929                     else => unreachable,
   4930                 },
   4931                 else => unreachable,
   4932             },
   4933             else => unreachable,
   4934         })) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{
   4935             ty.fmt(self.bin_file.options.module.?),
   4936         });
   4937         switch (mir_tag[0]) {
   4938             .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   4939                 mir_tag,
   4940                 dst_reg,
   4941                 dst_reg,
   4942                 src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   4943             ) else try self.asmRegisterRegisterRegister(
   4944                 mir_tag,
   4945                 dst_reg,
   4946                 dst_reg,
   4947                 registerAlias(if (src_mcv.isRegister())
   4948                     src_mcv.getReg().?
   4949                 else
   4950                     try self.copyToTmpRegister(ty, src_mcv), abi_size),
   4951             ),
   4952             else => if (src_mcv.isMemory()) try self.asmRegisterMemory(
   4953                 mir_tag,
   4954                 dst_reg,
   4955                 src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   4956             ) else try self.asmRegisterRegister(
   4957                 mir_tag,
   4958                 dst_reg,
   4959                 registerAlias(if (src_mcv.isRegister())
   4960                     src_mcv.getReg().?
   4961                 else
   4962                     try self.copyToTmpRegister(ty, src_mcv), abi_size),
   4963             ),
   4964         }
   4965         break :result dst_mcv;
   4966     };
   4967     return self.finishAir(inst, result, .{ un_op, .none, .none });
   4968 }
   4969 
   4970 fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void {
   4971     const un_op = self.air.instructions.items(.data)[inst].un_op;
   4972     _ = un_op;
   4973     return self.fail("TODO implement airUnaryMath for {}", .{
   4974         self.air.instructions.items(.tag)[inst],
   4975     });
   4976     //return self.finishAir(inst, result, .{ un_op, .none, .none });
   4977 }
   4978 
   4979 fn reuseOperand(
   4980     self: *Self,
   4981     inst: Air.Inst.Index,
   4982     operand: Air.Inst.Ref,
   4983     op_index: Liveness.OperandInt,
   4984     mcv: MCValue,
   4985 ) bool {
   4986     return self.reuseOperandAdvanced(inst, operand, op_index, mcv, inst);
   4987 }
   4988 
   4989 fn reuseOperandAdvanced(
   4990     self: *Self,
   4991     inst: Air.Inst.Index,
   4992     operand: Air.Inst.Ref,
   4993     op_index: Liveness.OperandInt,
   4994     mcv: MCValue,
   4995     tracked_inst: Air.Inst.Index,
   4996 ) bool {
   4997     if (!self.liveness.operandDies(inst, op_index))
   4998         return false;
   4999 
   5000     switch (mcv) {
   5001         .register => |reg| {
   5002             // If it's in the registers table, need to associate the register with the
   5003             // new instruction.
   5004             if (!self.register_manager.isRegFree(reg)) {
   5005                 if (RegisterManager.indexOfRegIntoTracked(reg)) |index| {
   5006                     self.register_manager.registers[index] = tracked_inst;
   5007                 }
   5008             }
   5009         },
   5010         .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false,
   5011         else => return false,
   5012     }
   5013 
   5014     // Prevent the operand deaths processing code from deallocating it.
   5015     self.liveness.clearOperandDeath(inst, op_index);
   5016     const op_inst = Air.refToIndex(operand).?;
   5017     self.getResolvedInstValue(op_inst).reuse(self, tracked_inst, op_inst);
   5018 
   5019     return true;
   5020 }
   5021 
   5022 fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
   5023     const ptr_info = ptr_ty.ptrInfo().data;
   5024 
   5025     const val_ty = ptr_info.pointee_type;
   5026     const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*));
   5027     const limb_abi_size: u32 = @min(val_abi_size, 8);
   5028     const limb_abi_bits = limb_abi_size * 8;
   5029     const val_byte_off = @intCast(i32, ptr_info.bit_offset / limb_abi_bits * limb_abi_size);
   5030     const val_bit_off = ptr_info.bit_offset % limb_abi_bits;
   5031     const val_extra_bits = self.regExtraBits(val_ty);
   5032 
   5033     if (val_abi_size > 8) return self.fail("TODO implement packed load of {}", .{
   5034         val_ty.fmt(self.bin_file.options.module.?),
   5035     });
   5036 
   5037     const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   5038     const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
   5039     defer self.register_manager.unlockReg(ptr_lock);
   5040 
   5041     const dst_reg = switch (dst_mcv) {
   5042         .register => |reg| reg,
   5043         else => try self.register_manager.allocReg(null, gp),
   5044     };
   5045     const dst_lock = self.register_manager.lockReg(dst_reg);
   5046     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5047 
   5048     const load_abi_size =
   5049         if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2;
   5050     if (load_abi_size <= 8) {
   5051         const load_reg = registerAlias(dst_reg, load_abi_size);
   5052         try self.asmRegisterMemory(
   5053             .{ ._, .mov },
   5054             load_reg,
   5055             Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{
   5056                 .base = .{ .reg = ptr_reg },
   5057                 .disp = val_byte_off,
   5058             }),
   5059         );
   5060         try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(val_bit_off));
   5061     } else {
   5062         const tmp_reg = registerAlias(try self.register_manager.allocReg(null, gp), val_abi_size);
   5063         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5064         defer self.register_manager.unlockReg(tmp_lock);
   5065 
   5066         const dst_alias = registerAlias(dst_reg, val_abi_size);
   5067         try self.asmRegisterMemory(
   5068             .{ ._, .mov },
   5069             dst_alias,
   5070             Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{
   5071                 .base = .{ .reg = ptr_reg },
   5072                 .disp = val_byte_off,
   5073             }),
   5074         );
   5075         try self.asmRegisterMemory(
   5076             .{ ._, .mov },
   5077             tmp_reg,
   5078             Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{
   5079                 .base = .{ .reg = ptr_reg },
   5080                 .disp = val_byte_off + 1,
   5081             }),
   5082         );
   5083         try self.asmRegisterRegisterImmediate(
   5084             .{ ._rd, .sh },
   5085             dst_alias,
   5086             tmp_reg,
   5087             Immediate.u(val_bit_off),
   5088         );
   5089     }
   5090 
   5091     if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg);
   5092     try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg });
   5093 }
   5094 
   5095 fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
   5096     const dst_ty = ptr_ty.childType();
   5097     switch (ptr_mcv) {
   5098         .none,
   5099         .unreach,
   5100         .dead,
   5101         .undef,
   5102         .eflags,
   5103         .register_overflow,
   5104         .reserved_frame,
   5105         => unreachable, // not a valid pointer
   5106         .immediate,
   5107         .register,
   5108         .register_offset,
   5109         .lea_direct,
   5110         .lea_got,
   5111         .lea_tlv,
   5112         .lea_frame,
   5113         => try self.genCopy(dst_ty, dst_mcv, ptr_mcv.deref()),
   5114         .memory,
   5115         .indirect,
   5116         .load_direct,
   5117         .load_got,
   5118         .load_tlv,
   5119         .load_frame,
   5120         => {
   5121             const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   5122             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   5123             defer self.register_manager.unlockReg(addr_lock);
   5124 
   5125             try self.genCopy(dst_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } });
   5126         },
   5127     }
   5128 }
   5129 
   5130 fn airLoad(self: *Self, inst: Air.Inst.Index) !void {
   5131     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   5132     const elem_ty = self.air.typeOfIndex(inst);
   5133     const result: MCValue = result: {
   5134         if (!elem_ty.hasRuntimeBitsIgnoreComptime()) break :result .none;
   5135 
   5136         try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
   5137         const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
   5138         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
   5139 
   5140         const ptr_ty = self.air.typeOf(ty_op.operand);
   5141         const elem_size = elem_ty.abiSize(self.target.*);
   5142 
   5143         const elem_rc = regClassForType(elem_ty);
   5144         const ptr_rc = regClassForType(ptr_ty);
   5145 
   5146         const ptr_mcv = try self.resolveInst(ty_op.operand);
   5147         const dst_mcv = if (elem_size <= 8 and elem_rc.supersetOf(ptr_rc) and
   5148             self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv))
   5149             // The MCValue that holds the pointer can be re-used as the value.
   5150             ptr_mcv
   5151         else
   5152             try self.allocRegOrMem(inst, true);
   5153 
   5154         if (ptr_ty.ptrInfo().data.host_size > 0) {
   5155             try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv);
   5156         } else {
   5157             try self.load(dst_mcv, ptr_ty, ptr_mcv);
   5158         }
   5159         break :result dst_mcv;
   5160     };
   5161     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   5162 }
   5163 
   5164 fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
   5165     const ptr_info = ptr_ty.ptrInfo().data;
   5166     const src_ty = ptr_ty.childType();
   5167 
   5168     const limb_abi_size: u16 = @min(ptr_info.host_size, 8);
   5169     const limb_abi_bits = limb_abi_size * 8;
   5170 
   5171     const src_bit_size = src_ty.bitSize(self.target.*);
   5172     const src_byte_off = @intCast(i32, ptr_info.bit_offset / limb_abi_bits * limb_abi_size);
   5173     const src_bit_off = ptr_info.bit_offset % limb_abi_bits;
   5174 
   5175     const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   5176     const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
   5177     defer self.register_manager.unlockReg(ptr_lock);
   5178 
   5179     var limb_i: u16 = 0;
   5180     while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) {
   5181         const part_bit_off = if (limb_i == 0) src_bit_off else 0;
   5182         const part_bit_size =
   5183             @min(src_bit_off + src_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off;
   5184         const limb_mem = Memory.sib(Memory.PtrSize.fromSize(limb_abi_size), .{
   5185             .base = .{ .reg = ptr_reg },
   5186             .disp = src_byte_off + limb_i * limb_abi_bits,
   5187         });
   5188 
   5189         const part_mask = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - part_bit_size)) <<
   5190             @intCast(u6, part_bit_off);
   5191         const part_mask_not = part_mask ^
   5192             (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - limb_abi_bits));
   5193         if (limb_abi_size <= 4) {
   5194             try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.u(part_mask_not));
   5195         } else if (math.cast(i32, @bitCast(i64, part_mask_not))) |small| {
   5196             try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.s(small));
   5197         } else {
   5198             const part_mask_reg = try self.register_manager.allocReg(null, gp);
   5199             try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, Immediate.u(part_mask_not));
   5200             try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg);
   5201         }
   5202 
   5203         if (src_bit_size <= 64) {
   5204             const tmp_reg = try self.register_manager.allocReg(null, gp);
   5205             const tmp_mcv = MCValue{ .register = tmp_reg };
   5206             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5207             defer self.register_manager.unlockReg(tmp_lock);
   5208 
   5209             try self.genSetReg(tmp_reg, src_ty, src_mcv);
   5210             switch (limb_i) {
   5211                 0 => try self.genShiftBinOpMir(
   5212                     .{ ._l, .sh },
   5213                     src_ty,
   5214                     tmp_mcv,
   5215                     .{ .immediate = src_bit_off },
   5216                 ),
   5217                 1 => try self.genShiftBinOpMir(
   5218                     .{ ._r, .sh },
   5219                     src_ty,
   5220                     tmp_mcv,
   5221                     .{ .immediate = limb_abi_bits - src_bit_off },
   5222                 ),
   5223                 else => unreachable,
   5224             }
   5225             try self.genBinOpMir(.{ ._, .@"and" }, src_ty, tmp_mcv, .{ .immediate = part_mask });
   5226             try self.asmMemoryRegister(
   5227                 .{ ._, .@"or" },
   5228                 limb_mem,
   5229                 registerAlias(tmp_reg, limb_abi_size),
   5230             );
   5231         } else return self.fail("TODO: implement packed store of {}", .{
   5232             src_ty.fmt(self.bin_file.options.module.?),
   5233         });
   5234     }
   5235 }
   5236 
   5237 fn store(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
   5238     const src_ty = ptr_ty.childType();
   5239     switch (ptr_mcv) {
   5240         .none,
   5241         .unreach,
   5242         .dead,
   5243         .undef,
   5244         .eflags,
   5245         .register_overflow,
   5246         .reserved_frame,
   5247         => unreachable, // not a valid pointer
   5248         .immediate,
   5249         .register,
   5250         .register_offset,
   5251         .lea_direct,
   5252         .lea_got,
   5253         .lea_tlv,
   5254         .lea_frame,
   5255         => try self.genCopy(src_ty, ptr_mcv.deref(), src_mcv),
   5256         .memory,
   5257         .indirect,
   5258         .load_direct,
   5259         .load_got,
   5260         .load_tlv,
   5261         .load_frame,
   5262         => {
   5263             const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   5264             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   5265             defer self.register_manager.unlockReg(addr_lock);
   5266 
   5267             try self.genCopy(src_ty, .{ .indirect = .{ .reg = addr_reg } }, src_mcv);
   5268         },
   5269     }
   5270 }
   5271 
   5272 fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
   5273     if (safety) {
   5274         // TODO if the value is undef, write 0xaa bytes to dest
   5275     } else {
   5276         // TODO if the value is undef, don't lower this instruction
   5277     }
   5278     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   5279     const ptr_mcv = try self.resolveInst(bin_op.lhs);
   5280     const ptr_ty = self.air.typeOf(bin_op.lhs);
   5281     const src_mcv = try self.resolveInst(bin_op.rhs);
   5282     if (ptr_ty.ptrInfo().data.host_size > 0) {
   5283         try self.packedStore(ptr_ty, ptr_mcv, src_mcv);
   5284     } else {
   5285         try self.store(ptr_ty, ptr_mcv, src_mcv);
   5286     }
   5287     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
   5288 }
   5289 
   5290 fn airStructFieldPtr(self: *Self, inst: Air.Inst.Index) !void {
   5291     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   5292     const extra = self.air.extraData(Air.StructField, ty_pl.payload).data;
   5293     const result = try self.fieldPtr(inst, extra.struct_operand, extra.field_index);
   5294     return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none });
   5295 }
   5296 
   5297 fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void {
   5298     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   5299     const result = try self.fieldPtr(inst, ty_op.operand, index);
   5300     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   5301 }
   5302 
   5303 fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue {
   5304     const ptr_field_ty = self.air.typeOfIndex(inst);
   5305     const ptr_container_ty = self.air.typeOf(operand);
   5306     const container_ty = ptr_container_ty.childType();
   5307     const field_offset = @intCast(i32, switch (container_ty.containerLayout()) {
   5308         .Auto, .Extern => container_ty.structFieldOffset(index, self.target.*),
   5309         .Packed => if (container_ty.zigTypeTag() == .Struct and
   5310             ptr_field_ty.ptrInfo().data.host_size == 0)
   5311             container_ty.packedStructFieldByteOffset(index, self.target.*)
   5312         else
   5313             0,
   5314     });
   5315 
   5316     const src_mcv = try self.resolveInst(operand);
   5317     const dst_mcv = if (switch (src_mcv) {
   5318         .immediate, .lea_frame => true,
   5319         .register, .register_offset => self.reuseOperand(inst, operand, 0, src_mcv),
   5320         else => false,
   5321     }) src_mcv else try self.copyToRegisterWithInstTracking(inst, ptr_field_ty, src_mcv);
   5322     return dst_mcv.offset(field_offset);
   5323 }
   5324 
   5325 fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
   5326     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   5327     const extra = self.air.extraData(Air.StructField, ty_pl.payload).data;
   5328     const result: MCValue = result: {
   5329         const operand = extra.struct_operand;
   5330         const index = extra.field_index;
   5331 
   5332         const container_ty = self.air.typeOf(operand);
   5333         const container_rc = regClassForType(container_ty);
   5334         const field_ty = container_ty.structFieldType(index);
   5335         if (!field_ty.hasRuntimeBitsIgnoreComptime()) break :result .none;
   5336         const field_rc = regClassForType(field_ty);
   5337 
   5338         const src_mcv = try self.resolveInst(operand);
   5339         const field_off = switch (container_ty.containerLayout()) {
   5340             .Auto, .Extern => @intCast(u32, container_ty.structFieldOffset(index, self.target.*) * 8),
   5341             .Packed => if (container_ty.castTag(.@"struct")) |struct_obj|
   5342                 struct_obj.data.packedFieldBitOffset(self.target.*, index)
   5343             else
   5344                 0,
   5345         };
   5346 
   5347         switch (src_mcv) {
   5348             .load_frame => |frame_addr| {
   5349                 if (field_off % 8 == 0) {
   5350                     const off_mcv =
   5351                         src_mcv.address().offset(@intCast(i32, @divExact(field_off, 8))).deref();
   5352                     if (self.reuseOperand(inst, operand, 0, src_mcv)) break :result off_mcv;
   5353 
   5354                     const dst_mcv = try self.allocRegOrMem(inst, true);
   5355                     try self.genCopy(field_ty, dst_mcv, off_mcv);
   5356                     break :result dst_mcv;
   5357                 }
   5358 
   5359                 const field_abi_size = @intCast(u32, field_ty.abiSize(self.target.*));
   5360                 const limb_abi_size: u32 = @min(field_abi_size, 8);
   5361                 const limb_abi_bits = limb_abi_size * 8;
   5362                 const field_byte_off = @intCast(i32, field_off / limb_abi_bits * limb_abi_size);
   5363                 const field_bit_off = field_off % limb_abi_bits;
   5364 
   5365                 if (field_abi_size > 8) {
   5366                     return self.fail("TODO implement struct_field_val with large packed field", .{});
   5367                 }
   5368 
   5369                 const dst_reg = try self.register_manager.allocReg(inst, gp);
   5370                 const field_extra_bits = self.regExtraBits(field_ty);
   5371                 const load_abi_size =
   5372                     if (field_bit_off < field_extra_bits) field_abi_size else field_abi_size * 2;
   5373                 if (load_abi_size <= 8) {
   5374                     const load_reg = registerAlias(dst_reg, load_abi_size);
   5375                     try self.asmRegisterMemory(
   5376                         .{ ._, .mov },
   5377                         load_reg,
   5378                         Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{
   5379                             .base = .{ .frame = frame_addr.index },
   5380                             .disp = frame_addr.off + field_byte_off,
   5381                         }),
   5382                     );
   5383                     try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(field_bit_off));
   5384                 } else {
   5385                     const tmp_reg = registerAlias(
   5386                         try self.register_manager.allocReg(null, gp),
   5387                         field_abi_size,
   5388                     );
   5389                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5390                     defer self.register_manager.unlockReg(tmp_lock);
   5391 
   5392                     const dst_alias = registerAlias(dst_reg, field_abi_size);
   5393                     try self.asmRegisterMemory(
   5394                         .{ ._, .mov },
   5395                         dst_alias,
   5396                         Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{
   5397                             .base = .{ .frame = frame_addr.index },
   5398                             .disp = frame_addr.off + field_byte_off,
   5399                         }),
   5400                     );
   5401                     try self.asmRegisterMemory(
   5402                         .{ ._, .mov },
   5403                         tmp_reg,
   5404                         Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{
   5405                             .base = .{ .frame = frame_addr.index },
   5406                             .disp = frame_addr.off + field_byte_off + @intCast(i32, limb_abi_size),
   5407                         }),
   5408                     );
   5409                     try self.asmRegisterRegisterImmediate(
   5410                         .{ ._rd, .sh },
   5411                         dst_alias,
   5412                         tmp_reg,
   5413                         Immediate.u(field_bit_off),
   5414                     );
   5415                 }
   5416 
   5417                 if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg);
   5418 
   5419                 const dst_mcv = MCValue{ .register = dst_reg };
   5420                 break :result if (field_rc.supersetOf(gp))
   5421                     dst_mcv
   5422                 else
   5423                     try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
   5424             },
   5425             .register => |reg| {
   5426                 const reg_lock = self.register_manager.lockRegAssumeUnused(reg);
   5427                 defer self.register_manager.unlockReg(reg_lock);
   5428 
   5429                 const dst_reg = if (src_mcv.isRegister() and field_rc.supersetOf(container_rc) and
   5430                     self.reuseOperand(inst, operand, 0, src_mcv))
   5431                     src_mcv.getReg().?
   5432                 else
   5433                     try self.copyToTmpRegister(Type.usize, .{ .register = reg.to64() });
   5434                 const dst_mcv = MCValue{ .register = dst_reg };
   5435                 const dst_lock = self.register_manager.lockReg(dst_reg);
   5436                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5437 
   5438                 // Shift by struct_field_offset.
   5439                 try self.genShiftBinOpMir(
   5440                     .{ ._r, .sh },
   5441                     Type.usize,
   5442                     dst_mcv,
   5443                     .{ .immediate = field_off },
   5444                 );
   5445 
   5446                 // Mask to field_bit_size bits
   5447                 const field_bit_size = field_ty.bitSize(self.target.*);
   5448                 const mask = ~@as(u64, 0) >> @intCast(u6, 64 - field_bit_size);
   5449 
   5450                 const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask });
   5451                 try self.genBinOpMir(.{ ._, .@"and" }, Type.usize, dst_mcv, .{ .register = tmp_reg });
   5452 
   5453                 const signedness =
   5454                     if (field_ty.isAbiInt()) field_ty.intInfo(self.target.*).signedness else .unsigned;
   5455                 const field_byte_size = @intCast(u32, field_ty.abiSize(self.target.*));
   5456                 if (signedness == .signed and field_byte_size < 8) {
   5457                     try self.asmRegisterRegister(
   5458                         if (field_byte_size >= 4) .{ ._d, .movsx } else .{ ._, .movsx },
   5459                         dst_mcv.register,
   5460                         registerAlias(dst_mcv.register, field_byte_size),
   5461                     );
   5462                 }
   5463 
   5464                 break :result if (field_rc.supersetOf(gp))
   5465                     dst_mcv
   5466                 else
   5467                     try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
   5468             },
   5469             .register_overflow => |ro| {
   5470                 switch (index) {
   5471                     // Get wrapped value for overflow operation.
   5472                     0 => break :result if (self.liveness.operandDies(inst, 0))
   5473                         .{ .register = ro.reg }
   5474                     else
   5475                         try self.copyToRegisterWithInstTracking(
   5476                             inst,
   5477                             Type.usize,
   5478                             .{ .register = ro.reg },
   5479                         ),
   5480                     // Get overflow bit.
   5481                     1 => if (self.liveness.operandDies(inst, 0)) {
   5482                         self.eflags_inst = inst;
   5483                         break :result .{ .eflags = ro.eflags };
   5484                     } else {
   5485                         const dst_reg = try self.register_manager.allocReg(inst, gp);
   5486                         try self.asmSetccRegister(dst_reg.to8(), ro.eflags);
   5487                         break :result .{ .register = dst_reg.to8() };
   5488                     },
   5489                     else => unreachable,
   5490                 }
   5491             },
   5492             else => return self.fail("TODO implement codegen struct_field_val for {}", .{src_mcv}),
   5493         }
   5494     };
   5495     return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none });
   5496 }
   5497 
   5498 fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void {
   5499     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   5500     const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data;
   5501 
   5502     const inst_ty = self.air.typeOfIndex(inst);
   5503     const parent_ty = inst_ty.childType();
   5504     const field_offset = @intCast(i32, parent_ty.structFieldOffset(extra.field_index, self.target.*));
   5505 
   5506     const src_mcv = try self.resolveInst(extra.field_ptr);
   5507     const dst_mcv = if (src_mcv.isRegisterOffset() and
   5508         self.reuseOperand(inst, extra.field_ptr, 0, src_mcv))
   5509         src_mcv
   5510     else
   5511         try self.copyToRegisterWithInstTracking(inst, inst_ty, src_mcv);
   5512     const result = dst_mcv.offset(-field_offset);
   5513     return self.finishAir(inst, result, .{ extra.field_ptr, .none, .none });
   5514 }
   5515 
   5516 fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue {
   5517     const src_ty = self.air.typeOf(src_air);
   5518     const src_mcv = try self.resolveInst(src_air);
   5519     if (src_ty.zigTypeTag() == .Vector) {
   5520         return self.fail("TODO implement genUnOp for {}", .{src_ty.fmt(self.bin_file.options.module.?)});
   5521     }
   5522 
   5523     switch (src_mcv) {
   5524         .eflags => |cc| switch (tag) {
   5525             .not => return .{ .eflags = cc.negate() },
   5526             else => {},
   5527         },
   5528         else => {},
   5529     }
   5530 
   5531     const src_lock = switch (src_mcv) {
   5532         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5533         else => null,
   5534     };
   5535     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   5536 
   5537     const dst_mcv: MCValue = dst: {
   5538         if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) break :dst src_mcv;
   5539 
   5540         const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, maybe_inst, true);
   5541         try self.genCopy(src_ty, dst_mcv, src_mcv);
   5542         break :dst dst_mcv;
   5543     };
   5544     const dst_lock = switch (dst_mcv) {
   5545         .register => |reg| self.register_manager.lockReg(reg),
   5546         else => null,
   5547     };
   5548     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5549 
   5550     switch (tag) {
   5551         .not => {
   5552             const limb_abi_size = @intCast(u16, @min(src_ty.abiSize(self.target.*), 8));
   5553             const int_info = if (src_ty.tag() == .bool)
   5554                 std.builtin.Type.Int{ .signedness = .unsigned, .bits = 1 }
   5555             else
   5556                 src_ty.intInfo(self.target.*);
   5557             var byte_off: i32 = 0;
   5558             while (byte_off * 8 < int_info.bits) : (byte_off += limb_abi_size) {
   5559                 var limb_pl = Type.Payload.Bits{
   5560                     .base = .{ .tag = switch (int_info.signedness) {
   5561                         .signed => .int_signed,
   5562                         .unsigned => .int_unsigned,
   5563                     } },
   5564                     .data = @intCast(u16, @min(int_info.bits - byte_off * 8, limb_abi_size * 8)),
   5565                 };
   5566                 const limb_ty = Type.initPayload(&limb_pl.base);
   5567                 const limb_mcv = switch (byte_off) {
   5568                     0 => dst_mcv,
   5569                     else => dst_mcv.address().offset(byte_off).deref(),
   5570                 };
   5571 
   5572                 if (limb_pl.base.tag == .int_unsigned and self.regExtraBits(limb_ty) > 0) {
   5573                     const mask = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - limb_pl.data);
   5574                     try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask });
   5575                 } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv);
   5576             }
   5577         },
   5578         .neg => try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv),
   5579         else => unreachable,
   5580     }
   5581     return dst_mcv;
   5582 }
   5583 
   5584 fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void {
   5585     const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
   5586     if (abi_size > 8) return self.fail("TODO implement {} for {}", .{
   5587         mir_tag,
   5588         dst_ty.fmt(self.bin_file.options.module.?),
   5589     });
   5590     switch (dst_mcv) {
   5591         .none,
   5592         .unreach,
   5593         .dead,
   5594         .undef,
   5595         .immediate,
   5596         .register_offset,
   5597         .eflags,
   5598         .register_overflow,
   5599         .lea_direct,
   5600         .lea_got,
   5601         .lea_tlv,
   5602         .lea_frame,
   5603         .reserved_frame,
   5604         => unreachable, // unmodifiable destination
   5605         .register => |dst_reg| try self.asmRegister(mir_tag, registerAlias(dst_reg, abi_size)),
   5606         .memory, .load_got, .load_direct, .load_tlv => {
   5607             const addr_reg = try self.register_manager.allocReg(null, gp);
   5608             const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   5609             defer self.register_manager.unlockReg(addr_reg_lock);
   5610 
   5611             try self.genSetReg(addr_reg, Type.usize, dst_mcv.address());
   5612             try self.asmMemory(
   5613                 mir_tag,
   5614                 Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ .reg = addr_reg } }),
   5615             );
   5616         },
   5617         .indirect, .load_frame => try self.asmMemory(
   5618             mir_tag,
   5619             dst_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   5620         ),
   5621     }
   5622 }
   5623 
   5624 /// Clobbers .rcx for non-immediate shift value.
   5625 fn genShiftBinOpMir(
   5626     self: *Self,
   5627     tag: Mir.Inst.FixedTag,
   5628     ty: Type,
   5629     lhs_mcv: MCValue,
   5630     shift_mcv: MCValue,
   5631 ) !void {
   5632     const rhs_mcv: MCValue = rhs: {
   5633         switch (shift_mcv) {
   5634             .immediate => |imm| switch (imm) {
   5635                 0 => return,
   5636                 else => break :rhs shift_mcv,
   5637             },
   5638             .register => |shift_reg| if (shift_reg == .rcx) break :rhs shift_mcv,
   5639             else => {},
   5640         }
   5641         self.register_manager.getRegAssumeFree(.rcx, null);
   5642         try self.genSetReg(.cl, Type.u8, shift_mcv);
   5643         break :rhs .{ .register = .rcx };
   5644     };
   5645 
   5646     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
   5647     if (abi_size <= 8) {
   5648         switch (lhs_mcv) {
   5649             .register => |lhs_reg| switch (rhs_mcv) {
   5650                 .immediate => |rhs_imm| try self.asmRegisterImmediate(
   5651                     tag,
   5652                     registerAlias(lhs_reg, abi_size),
   5653                     Immediate.u(rhs_imm),
   5654                 ),
   5655                 .register => |rhs_reg| try self.asmRegisterRegister(
   5656                     tag,
   5657                     registerAlias(lhs_reg, abi_size),
   5658                     registerAlias(rhs_reg, 1),
   5659                 ),
   5660                 else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   5661                     @tagName(lhs_mcv),
   5662                     @tagName(rhs_mcv),
   5663                 }),
   5664             },
   5665             .memory, .indirect, .load_frame => {
   5666                 const lhs_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (lhs_mcv) {
   5667                     .memory => |addr| .{
   5668                         .base = .{ .reg = .ds },
   5669                         .disp = math.cast(i32, @bitCast(i64, addr)) orelse
   5670                             return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   5671                             @tagName(lhs_mcv),
   5672                             @tagName(rhs_mcv),
   5673                         }),
   5674                     },
   5675                     .indirect => |reg_off| .{
   5676                         .base = .{ .reg = reg_off.reg },
   5677                         .disp = reg_off.off,
   5678                     },
   5679                     .load_frame => |frame_addr| .{
   5680                         .base = .{ .frame = frame_addr.index },
   5681                         .disp = frame_addr.off,
   5682                     },
   5683                     else => unreachable,
   5684                 });
   5685                 switch (rhs_mcv) {
   5686                     .immediate => |rhs_imm| try self.asmMemoryImmediate(
   5687                         tag,
   5688                         lhs_mem,
   5689                         Immediate.u(rhs_imm),
   5690                     ),
   5691                     .register => |rhs_reg| try self.asmMemoryRegister(
   5692                         tag,
   5693                         lhs_mem,
   5694                         registerAlias(rhs_reg, 1),
   5695                     ),
   5696                     else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   5697                         @tagName(lhs_mcv),
   5698                         @tagName(rhs_mcv),
   5699                     }),
   5700                 }
   5701             },
   5702             else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   5703                 @tagName(lhs_mcv),
   5704                 @tagName(rhs_mcv),
   5705             }),
   5706         }
   5707     } else if (abi_size <= 16) {
   5708         const tmp_reg = try self.register_manager.allocReg(null, gp);
   5709         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5710         defer self.register_manager.unlockReg(tmp_lock);
   5711 
   5712         const info: struct { offsets: [2]i32, double_tag: Mir.Inst.FixedTag } = switch (tag[0]) {
   5713             ._l => .{ .offsets = .{ 0, 8 }, .double_tag = .{ ._ld, .sh } },
   5714             ._r => .{ .offsets = .{ 8, 0 }, .double_tag = .{ ._rd, .sh } },
   5715             else => unreachable,
   5716         };
   5717         switch (lhs_mcv) {
   5718             .load_frame => |dst_frame_addr| switch (rhs_mcv) {
   5719                 .immediate => |rhs_imm| if (rhs_imm == 0) {} else if (rhs_imm < 64) {
   5720                     try self.asmRegisterMemory(
   5721                         .{ ._, .mov },
   5722                         tmp_reg,
   5723                         Memory.sib(.qword, .{
   5724                             .base = .{ .frame = dst_frame_addr.index },
   5725                             .disp = dst_frame_addr.off + info.offsets[0],
   5726                         }),
   5727                     );
   5728                     try self.asmMemoryRegisterImmediate(
   5729                         info.double_tag,
   5730                         Memory.sib(.qword, .{
   5731                             .base = .{ .frame = dst_frame_addr.index },
   5732                             .disp = dst_frame_addr.off + info.offsets[1],
   5733                         }),
   5734                         tmp_reg,
   5735                         Immediate.u(rhs_imm),
   5736                     );
   5737                     try self.asmMemoryImmediate(
   5738                         tag,
   5739                         Memory.sib(.qword, .{
   5740                             .base = .{ .frame = dst_frame_addr.index },
   5741                             .disp = dst_frame_addr.off + info.offsets[0],
   5742                         }),
   5743                         Immediate.u(rhs_imm),
   5744                     );
   5745                 } else {
   5746                     assert(rhs_imm < 128);
   5747                     try self.asmRegisterMemory(
   5748                         .{ ._, .mov },
   5749                         tmp_reg,
   5750                         Memory.sib(.qword, .{
   5751                             .base = .{ .frame = dst_frame_addr.index },
   5752                             .disp = dst_frame_addr.off + info.offsets[0],
   5753                         }),
   5754                     );
   5755                     if (rhs_imm > 64) {
   5756                         try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(rhs_imm - 64));
   5757                     }
   5758                     try self.asmMemoryRegister(
   5759                         .{ ._, .mov },
   5760                         Memory.sib(.qword, .{
   5761                             .base = .{ .frame = dst_frame_addr.index },
   5762                             .disp = dst_frame_addr.off + info.offsets[1],
   5763                         }),
   5764                         tmp_reg,
   5765                     );
   5766                     if (tag[0] == ._r and tag[1] == .sa) try self.asmMemoryImmediate(
   5767                         tag,
   5768                         Memory.sib(.qword, .{
   5769                             .base = .{ .frame = dst_frame_addr.index },
   5770                             .disp = dst_frame_addr.off + info.offsets[0],
   5771                         }),
   5772                         Immediate.u(63),
   5773                     ) else {
   5774                         try self.asmRegisterRegister(.{ ._, .xor }, tmp_reg.to32(), tmp_reg.to32());
   5775                         try self.asmMemoryRegister(
   5776                             .{ ._, .mov },
   5777                             Memory.sib(.qword, .{
   5778                                 .base = .{ .frame = dst_frame_addr.index },
   5779                                 .disp = dst_frame_addr.off + info.offsets[0],
   5780                             }),
   5781                             tmp_reg,
   5782                         );
   5783                     }
   5784                 },
   5785                 else => {
   5786                     const first_reg = try self.register_manager.allocReg(null, gp);
   5787                     const first_lock = self.register_manager.lockRegAssumeUnused(first_reg);
   5788                     defer self.register_manager.unlockReg(first_lock);
   5789 
   5790                     const second_reg = try self.register_manager.allocReg(null, gp);
   5791                     const second_lock = self.register_manager.lockRegAssumeUnused(second_reg);
   5792                     defer self.register_manager.unlockReg(second_lock);
   5793 
   5794                     try self.genSetReg(.cl, Type.u8, rhs_mcv);
   5795                     try self.asmRegisterMemory(
   5796                         .{ ._, .mov },
   5797                         first_reg,
   5798                         Memory.sib(.qword, .{
   5799                             .base = .{ .frame = dst_frame_addr.index },
   5800                             .disp = dst_frame_addr.off + info.offsets[0],
   5801                         }),
   5802                     );
   5803                     try self.asmRegisterMemory(
   5804                         .{ ._, .mov },
   5805                         second_reg,
   5806                         Memory.sib(.qword, .{
   5807                             .base = .{ .frame = dst_frame_addr.index },
   5808                             .disp = dst_frame_addr.off + info.offsets[1],
   5809                         }),
   5810                     );
   5811                     if (tag[0] == ._r and tag[1] == .sa) {
   5812                         try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, first_reg);
   5813                         try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63));
   5814                     } else try self.asmRegisterRegister(
   5815                         .{ ._, .xor },
   5816                         tmp_reg.to32(),
   5817                         tmp_reg.to32(),
   5818                     );
   5819                     try self.asmRegisterRegisterRegister(info.double_tag, second_reg, first_reg, .cl);
   5820                     try self.asmRegisterRegister(tag, first_reg, .cl);
   5821                     try self.asmRegisterImmediate(.{ ._, .cmp }, .cl, Immediate.u(64));
   5822                     try self.asmCmovccRegisterRegister(second_reg, first_reg, .ae);
   5823                     try self.asmCmovccRegisterRegister(first_reg, tmp_reg, .ae);
   5824                     try self.asmMemoryRegister(
   5825                         .{ ._, .mov },
   5826                         Memory.sib(.qword, .{
   5827                             .base = .{ .frame = dst_frame_addr.index },
   5828                             .disp = dst_frame_addr.off + info.offsets[1],
   5829                         }),
   5830                         second_reg,
   5831                     );
   5832                     try self.asmMemoryRegister(
   5833                         .{ ._, .mov },
   5834                         Memory.sib(.qword, .{
   5835                             .base = .{ .frame = dst_frame_addr.index },
   5836                             .disp = dst_frame_addr.off + info.offsets[0],
   5837                         }),
   5838                         first_reg,
   5839                     );
   5840                 },
   5841             },
   5842             else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   5843                 @tagName(lhs_mcv),
   5844                 @tagName(rhs_mcv),
   5845             }),
   5846         }
   5847     } else return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   5848         @tagName(lhs_mcv),
   5849         @tagName(rhs_mcv),
   5850     });
   5851 }
   5852 
   5853 /// Result is always a register.
   5854 /// Clobbers .rcx for non-immediate rhs, therefore care is needed to spill .rcx upfront.
   5855 /// Asserts .rcx is free.
   5856 fn genShiftBinOp(
   5857     self: *Self,
   5858     air_tag: Air.Inst.Tag,
   5859     maybe_inst: ?Air.Inst.Index,
   5860     lhs_mcv: MCValue,
   5861     rhs_mcv: MCValue,
   5862     lhs_ty: Type,
   5863     rhs_ty: Type,
   5864 ) !MCValue {
   5865     if (lhs_ty.zigTypeTag() == .Vector) {
   5866         return self.fail("TODO implement genShiftBinOp for {}", .{lhs_ty.fmtDebug()});
   5867     }
   5868 
   5869     assert(rhs_ty.abiSize(self.target.*) == 1);
   5870 
   5871     const lhs_abi_size = lhs_ty.abiSize(self.target.*);
   5872     if (lhs_abi_size > 16) {
   5873         return self.fail("TODO implement genShiftBinOp for {}", .{lhs_ty.fmtDebug()});
   5874     }
   5875 
   5876     try self.register_manager.getReg(.rcx, null);
   5877     const rcx_lock = self.register_manager.lockRegAssumeUnused(.rcx);
   5878     defer self.register_manager.unlockReg(rcx_lock);
   5879 
   5880     const lhs_lock = switch (lhs_mcv) {
   5881         .register => |reg| self.register_manager.lockReg(reg),
   5882         else => null,
   5883     };
   5884     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   5885 
   5886     const rhs_lock = switch (rhs_mcv) {
   5887         .register => |reg| self.register_manager.lockReg(reg),
   5888         else => null,
   5889     };
   5890     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   5891 
   5892     const dst_mcv: MCValue = dst: {
   5893         if (maybe_inst) |inst| {
   5894             const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   5895             if (self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) break :dst lhs_mcv;
   5896         }
   5897         const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
   5898         try self.genCopy(lhs_ty, dst_mcv, lhs_mcv);
   5899         break :dst dst_mcv;
   5900     };
   5901 
   5902     const signedness = lhs_ty.intInfo(self.target.*).signedness;
   5903     try self.genShiftBinOpMir(switch (air_tag) {
   5904         .shl, .shl_exact => switch (signedness) {
   5905             .signed => .{ ._l, .sa },
   5906             .unsigned => .{ ._l, .sh },
   5907         },
   5908         .shr, .shr_exact => switch (signedness) {
   5909             .signed => .{ ._r, .sa },
   5910             .unsigned => .{ ._r, .sh },
   5911         },
   5912         else => unreachable,
   5913     }, lhs_ty, dst_mcv, rhs_mcv);
   5914     return dst_mcv;
   5915 }
   5916 
   5917 /// Result is always a register.
   5918 /// Clobbers .rax and .rdx therefore care is needed to spill .rax and .rdx upfront.
   5919 /// Asserts .rax and .rdx are free.
   5920 fn genMulDivBinOp(
   5921     self: *Self,
   5922     tag: Air.Inst.Tag,
   5923     maybe_inst: ?Air.Inst.Index,
   5924     dst_ty: Type,
   5925     src_ty: Type,
   5926     lhs: MCValue,
   5927     rhs: MCValue,
   5928 ) !MCValue {
   5929     if (dst_ty.zigTypeTag() == .Vector or dst_ty.zigTypeTag() == .Float) {
   5930         return self.fail("TODO implement genMulDivBinOp for {}", .{dst_ty.fmtDebug()});
   5931     }
   5932     const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
   5933     const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*));
   5934     if (switch (tag) {
   5935         else => unreachable,
   5936         .mul, .mulwrap => dst_abi_size != src_abi_size and dst_abi_size != src_abi_size * 2,
   5937         .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_abi_size != src_abi_size,
   5938     } or src_abi_size > 8) return self.fail("TODO implement genMulDivBinOp from {} to {}", .{
   5939         src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   5940     });
   5941     const ty = if (dst_abi_size <= 8) dst_ty else src_ty;
   5942     const abi_size = if (dst_abi_size <= 8) dst_abi_size else src_abi_size;
   5943 
   5944     assert(self.register_manager.isRegFree(.rax));
   5945     assert(self.register_manager.isRegFree(.rdx));
   5946 
   5947     const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx });
   5948     defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
   5949 
   5950     const signedness = ty.intInfo(self.target.*).signedness;
   5951     switch (tag) {
   5952         .mul,
   5953         .mulwrap,
   5954         .rem,
   5955         .div_trunc,
   5956         .div_exact,
   5957         => {
   5958             const track_inst_rax = switch (tag) {
   5959                 .mul, .mulwrap => if (dst_abi_size <= 8) maybe_inst else null,
   5960                 .div_exact, .div_trunc => maybe_inst,
   5961                 else => null,
   5962             };
   5963             const track_inst_rdx = switch (tag) {
   5964                 .rem => maybe_inst,
   5965                 else => null,
   5966             };
   5967             try self.register_manager.getReg(.rax, track_inst_rax);
   5968             try self.register_manager.getReg(.rdx, track_inst_rdx);
   5969 
   5970             try self.genIntMulDivOpMir(switch (signedness) {
   5971                 .signed => switch (tag) {
   5972                     .mul, .mulwrap => .{ .i_, .mul },
   5973                     .div_trunc, .div_exact, .rem => .{ .i_, .div },
   5974                     else => unreachable,
   5975                 },
   5976                 .unsigned => switch (tag) {
   5977                     .mul, .mulwrap => .{ ._, .mul },
   5978                     .div_trunc, .div_exact, .rem => .{ ._, .div },
   5979                     else => unreachable,
   5980                 },
   5981             }, ty, lhs, rhs);
   5982 
   5983             if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) {
   5984                 .mul, .mulwrap, .div_trunc, .div_exact => .rax,
   5985                 .rem => .rdx,
   5986                 else => unreachable,
   5987             }, dst_abi_size) };
   5988 
   5989             const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false);
   5990             try self.asmMemoryRegister(
   5991                 .{ ._, .mov },
   5992                 Memory.sib(.qword, .{
   5993                     .base = .{ .frame = dst_mcv.load_frame.index },
   5994                     .disp = dst_mcv.load_frame.off,
   5995                 }),
   5996                 .rax,
   5997             );
   5998             try self.asmMemoryRegister(
   5999                 .{ ._, .mov },
   6000                 Memory.sib(.qword, .{
   6001                     .base = .{ .frame = dst_mcv.load_frame.index },
   6002                     .disp = dst_mcv.load_frame.off + 8,
   6003                 }),
   6004                 .rdx,
   6005             );
   6006             return dst_mcv;
   6007         },
   6008 
   6009         .mod => {
   6010             try self.register_manager.getReg(.rax, null);
   6011             try self.register_manager.getReg(.rdx, if (signedness == .unsigned) maybe_inst else null);
   6012 
   6013             switch (signedness) {
   6014                 .signed => {
   6015                     const lhs_lock = switch (lhs) {
   6016                         .register => |reg| self.register_manager.lockReg(reg),
   6017                         else => null,
   6018                     };
   6019                     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   6020                     const rhs_lock = switch (rhs) {
   6021                         .register => |reg| self.register_manager.lockReg(reg),
   6022                         else => null,
   6023                     };
   6024                     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   6025 
   6026                     // hack around hazard between rhs and div_floor by copying rhs to another register
   6027                     const rhs_copy = try self.copyToTmpRegister(ty, rhs);
   6028                     const rhs_copy_lock = self.register_manager.lockRegAssumeUnused(rhs_copy);
   6029                     defer self.register_manager.unlockReg(rhs_copy_lock);
   6030 
   6031                     const div_floor = try self.genInlineIntDivFloor(ty, lhs, rhs);
   6032                     try self.genIntMulComplexOpMir(ty, div_floor, .{ .register = rhs_copy });
   6033                     const div_floor_lock = self.register_manager.lockReg(div_floor.register);
   6034                     defer if (div_floor_lock) |lock| self.register_manager.unlockReg(lock);
   6035 
   6036                     const result: MCValue = if (maybe_inst) |inst|
   6037                         try self.copyToRegisterWithInstTracking(inst, ty, lhs)
   6038                     else
   6039                         .{ .register = try self.copyToTmpRegister(ty, lhs) };
   6040                     try self.genBinOpMir(.{ ._, .sub }, ty, result, div_floor);
   6041 
   6042                     return result;
   6043                 },
   6044                 .unsigned => {
   6045                     try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, rhs);
   6046                     return .{ .register = registerAlias(.rdx, abi_size) };
   6047                 },
   6048             }
   6049         },
   6050 
   6051         .div_floor => {
   6052             try self.register_manager.getReg(.rax, if (signedness == .unsigned) maybe_inst else null);
   6053             try self.register_manager.getReg(.rdx, null);
   6054 
   6055             const lhs_lock: ?RegisterLock = switch (lhs) {
   6056                 .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   6057                 else => null,
   6058             };
   6059             defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   6060 
   6061             const actual_rhs: MCValue = blk: {
   6062                 switch (signedness) {
   6063                     .signed => {
   6064                         const rhs_lock: ?RegisterLock = switch (rhs) {
   6065                             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   6066                             else => null,
   6067                         };
   6068                         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   6069 
   6070                         if (maybe_inst) |inst| {
   6071                             break :blk try self.copyToRegisterWithInstTracking(inst, ty, rhs);
   6072                         }
   6073                         break :blk MCValue{ .register = try self.copyToTmpRegister(ty, rhs) };
   6074                     },
   6075                     .unsigned => break :blk rhs,
   6076                 }
   6077             };
   6078             const rhs_lock: ?RegisterLock = switch (actual_rhs) {
   6079                 .register => |reg| self.register_manager.lockReg(reg),
   6080                 else => null,
   6081             };
   6082             defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   6083 
   6084             switch (signedness) {
   6085                 .signed => return try self.genInlineIntDivFloor(ty, lhs, actual_rhs),
   6086                 .unsigned => {
   6087                     try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, actual_rhs);
   6088                     return .{ .register = registerAlias(.rax, abi_size) };
   6089                 },
   6090             }
   6091         },
   6092 
   6093         else => unreachable,
   6094     }
   6095 }
   6096 
   6097 fn genBinOp(
   6098     self: *Self,
   6099     maybe_inst: ?Air.Inst.Index,
   6100     air_tag: Air.Inst.Tag,
   6101     lhs_air: Air.Inst.Ref,
   6102     rhs_air: Air.Inst.Ref,
   6103 ) !MCValue {
   6104     const lhs_mcv = try self.resolveInst(lhs_air);
   6105     const rhs_mcv = try self.resolveInst(rhs_air);
   6106     const lhs_ty = self.air.typeOf(lhs_air);
   6107     const rhs_ty = self.air.typeOf(rhs_air);
   6108     const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*));
   6109 
   6110     switch (lhs_mcv) {
   6111         .immediate => |imm| switch (imm) {
   6112             0 => switch (air_tag) {
   6113                 .sub, .subwrap => return self.genUnOp(maybe_inst, .neg, rhs_air),
   6114                 else => {},
   6115             },
   6116             else => {},
   6117         },
   6118         else => {},
   6119     }
   6120 
   6121     const is_commutative = switch (air_tag) {
   6122         .add,
   6123         .addwrap,
   6124         .mul,
   6125         .bool_or,
   6126         .bit_or,
   6127         .bool_and,
   6128         .bit_and,
   6129         .xor,
   6130         .min,
   6131         .max,
   6132         => true,
   6133 
   6134         else => false,
   6135     };
   6136     const vec_op = switch (lhs_ty.zigTypeTag()) {
   6137         else => false,
   6138         .Float, .Vector => true,
   6139     };
   6140 
   6141     const lhs_lock: ?RegisterLock = switch (lhs_mcv) {
   6142         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   6143         else => null,
   6144     };
   6145     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   6146 
   6147     const rhs_lock: ?RegisterLock = switch (rhs_mcv) {
   6148         .register => |reg| self.register_manager.lockReg(reg),
   6149         else => null,
   6150     };
   6151     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   6152 
   6153     var flipped = false;
   6154     var copied_to_dst = true;
   6155     const dst_mcv: MCValue = dst: {
   6156         if (maybe_inst) |inst| {
   6157             if ((!vec_op or lhs_mcv.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs_mcv)) {
   6158                 break :dst lhs_mcv;
   6159             }
   6160             if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and
   6161                 self.reuseOperand(inst, rhs_air, 1, rhs_mcv))
   6162             {
   6163                 flipped = true;
   6164                 break :dst rhs_mcv;
   6165             }
   6166         }
   6167         const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
   6168         if (vec_op and lhs_mcv.isRegister() and self.hasFeature(.avx))
   6169             copied_to_dst = false
   6170         else
   6171             try self.genCopy(lhs_ty, dst_mcv, lhs_mcv);
   6172         break :dst dst_mcv;
   6173     };
   6174     const dst_lock: ?RegisterLock = switch (dst_mcv) {
   6175         .register => |reg| self.register_manager.lockReg(reg),
   6176         else => null,
   6177     };
   6178     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   6179 
   6180     const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
   6181     if (!vec_op) {
   6182         switch (air_tag) {
   6183             .add,
   6184             .addwrap,
   6185             => try self.genBinOpMir(.{ ._, .add }, lhs_ty, dst_mcv, src_mcv),
   6186 
   6187             .sub,
   6188             .subwrap,
   6189             => try self.genBinOpMir(.{ ._, .sub }, lhs_ty, dst_mcv, src_mcv),
   6190 
   6191             .ptr_add,
   6192             .ptr_sub,
   6193             => {
   6194                 const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv);
   6195                 const tmp_mcv = MCValue{ .register = tmp_reg };
   6196                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6197                 defer self.register_manager.unlockReg(tmp_lock);
   6198 
   6199                 const elem_size = lhs_ty.elemType2().abiSize(self.target.*);
   6200                 try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size });
   6201                 try self.genBinOpMir(
   6202                     switch (air_tag) {
   6203                         .ptr_add => .{ ._, .add },
   6204                         .ptr_sub => .{ ._, .sub },
   6205                         else => unreachable,
   6206                     },
   6207                     lhs_ty,
   6208                     dst_mcv,
   6209                     tmp_mcv,
   6210                 );
   6211             },
   6212 
   6213             .bool_or,
   6214             .bit_or,
   6215             => try self.genBinOpMir(.{ ._, .@"or" }, lhs_ty, dst_mcv, src_mcv),
   6216 
   6217             .bool_and,
   6218             .bit_and,
   6219             => try self.genBinOpMir(.{ ._, .@"and" }, lhs_ty, dst_mcv, src_mcv),
   6220 
   6221             .xor => try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv),
   6222 
   6223             .min,
   6224             .max,
   6225             => {
   6226                 const mat_src_mcv: MCValue = if (switch (src_mcv) {
   6227                     .immediate,
   6228                     .eflags,
   6229                     .register_offset,
   6230                     .load_direct,
   6231                     .lea_direct,
   6232                     .load_got,
   6233                     .lea_got,
   6234                     .load_tlv,
   6235                     .lea_tlv,
   6236                     .lea_frame,
   6237                     => true,
   6238                     .memory => |addr| math.cast(i32, @bitCast(i64, addr)) == null,
   6239                     else => false,
   6240                 }) .{ .register = try self.copyToTmpRegister(rhs_ty, src_mcv) } else src_mcv;
   6241                 const mat_mcv_lock = switch (mat_src_mcv) {
   6242                     .register => |reg| self.register_manager.lockReg(reg),
   6243                     else => null,
   6244                 };
   6245                 defer if (mat_mcv_lock) |lock| self.register_manager.unlockReg(lock);
   6246 
   6247                 try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, dst_mcv, mat_src_mcv);
   6248 
   6249                 const int_info = lhs_ty.intInfo(self.target.*);
   6250                 const cc: Condition = switch (int_info.signedness) {
   6251                     .unsigned => switch (air_tag) {
   6252                         .min => .a,
   6253                         .max => .b,
   6254                         else => unreachable,
   6255                     },
   6256                     .signed => switch (air_tag) {
   6257                         .min => .g,
   6258                         .max => .l,
   6259                         else => unreachable,
   6260                     },
   6261                 };
   6262 
   6263                 const cmov_abi_size = @max(@intCast(u32, lhs_ty.abiSize(self.target.*)), 2);
   6264                 const tmp_reg = switch (dst_mcv) {
   6265                     .register => |reg| reg,
   6266                     else => try self.copyToTmpRegister(lhs_ty, dst_mcv),
   6267                 };
   6268                 const tmp_lock = self.register_manager.lockReg(tmp_reg);
   6269                 defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
   6270                 switch (mat_src_mcv) {
   6271                     .none,
   6272                     .unreach,
   6273                     .dead,
   6274                     .undef,
   6275                     .immediate,
   6276                     .eflags,
   6277                     .register_offset,
   6278                     .register_overflow,
   6279                     .load_direct,
   6280                     .lea_direct,
   6281                     .load_got,
   6282                     .lea_got,
   6283                     .load_tlv,
   6284                     .lea_tlv,
   6285                     .lea_frame,
   6286                     .reserved_frame,
   6287                     => unreachable,
   6288                     .register => |src_reg| try self.asmCmovccRegisterRegister(
   6289                         registerAlias(tmp_reg, cmov_abi_size),
   6290                         registerAlias(src_reg, cmov_abi_size),
   6291                         cc,
   6292                     ),
   6293                     .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
   6294                         registerAlias(tmp_reg, cmov_abi_size),
   6295                         Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), switch (mat_src_mcv) {
   6296                             .memory => |addr| .{
   6297                                 .base = .{ .reg = .ds },
   6298                                 .disp = @intCast(i32, @bitCast(i64, addr)),
   6299                             },
   6300                             .indirect => |reg_off| .{
   6301                                 .base = .{ .reg = reg_off.reg },
   6302                                 .disp = reg_off.off,
   6303                             },
   6304                             .load_frame => |frame_addr| .{
   6305                                 .base = .{ .frame = frame_addr.index },
   6306                                 .disp = frame_addr.off,
   6307                             },
   6308                             else => unreachable,
   6309                         }),
   6310                         cc,
   6311                     ),
   6312                 }
   6313                 try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg });
   6314             },
   6315 
   6316             else => return self.fail("TODO implement genBinOp for {s} {}", .{
   6317                 @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   6318             }),
   6319         }
   6320         return dst_mcv;
   6321     }
   6322 
   6323     const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
   6324     const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
   6325         else => unreachable,
   6326         .Float => switch (lhs_ty.floatBits(self.target.*)) {
   6327             16 => if (self.hasFeature(.f16c)) {
   6328                 const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
   6329                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6330                 defer self.register_manager.unlockReg(tmp_lock);
   6331 
   6332                 if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   6333                     .{ .vp_w, .insr },
   6334                     dst_reg,
   6335                     dst_reg,
   6336                     src_mcv.mem(.word),
   6337                     Immediate.u(1),
   6338                 ) else try self.asmRegisterRegisterRegister(
   6339                     .{ .vp_, .unpcklwd },
   6340                     dst_reg,
   6341                     dst_reg,
   6342                     (if (src_mcv.isRegister())
   6343                         src_mcv.getReg().?
   6344                     else
   6345                         try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   6346                 );
   6347                 try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
   6348                 try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
   6349                 try self.asmRegisterRegisterRegister(
   6350                     switch (air_tag) {
   6351                         .add => .{ .v_ss, .add },
   6352                         .sub => .{ .v_ss, .sub },
   6353                         .mul => .{ .v_ss, .mul },
   6354                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
   6355                         .max => .{ .v_ss, .max },
   6356                         .min => .{ .v_ss, .max },
   6357                         else => unreachable,
   6358                     },
   6359                     dst_reg,
   6360                     dst_reg,
   6361                     tmp_reg,
   6362                 );
   6363                 try self.asmRegisterRegisterImmediate(
   6364                     .{ .v_, .cvtps2ph },
   6365                     dst_reg,
   6366                     dst_reg,
   6367                     Immediate.u(0b1_00),
   6368                 );
   6369                 return dst_mcv;
   6370             } else null,
   6371             32 => switch (air_tag) {
   6372                 .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
   6373                 .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
   6374                 .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
   6375                 .div_float,
   6376                 .div_trunc,
   6377                 .div_floor,
   6378                 .div_exact,
   6379                 => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
   6380                 .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
   6381                 .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
   6382                 else => unreachable,
   6383             },
   6384             64 => switch (air_tag) {
   6385                 .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
   6386                 .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
   6387                 .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
   6388                 .div_float,
   6389                 .div_trunc,
   6390                 .div_floor,
   6391                 .div_exact,
   6392                 => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
   6393                 .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
   6394                 .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
   6395                 else => unreachable,
   6396             },
   6397             80, 128 => null,
   6398             else => unreachable,
   6399         },
   6400         .Vector => switch (lhs_ty.childType().zigTypeTag()) {
   6401             else => null,
   6402             .Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
   6403                 16 => if (self.hasFeature(.f16c)) switch (lhs_ty.vectorLen()) {
   6404                     1 => {
   6405                         const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
   6406                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6407                         defer self.register_manager.unlockReg(tmp_lock);
   6408 
   6409                         if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   6410                             .{ .vp_w, .insr },
   6411                             dst_reg,
   6412                             dst_reg,
   6413                             src_mcv.mem(.word),
   6414                             Immediate.u(1),
   6415                         ) else try self.asmRegisterRegisterRegister(
   6416                             .{ .vp_, .unpcklwd },
   6417                             dst_reg,
   6418                             dst_reg,
   6419                             (if (src_mcv.isRegister())
   6420                                 src_mcv.getReg().?
   6421                             else
   6422                                 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   6423                         );
   6424                         try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
   6425                         try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
   6426                         try self.asmRegisterRegisterRegister(
   6427                             switch (air_tag) {
   6428                                 .add => .{ .v_ss, .add },
   6429                                 .sub => .{ .v_ss, .sub },
   6430                                 .mul => .{ .v_ss, .mul },
   6431                                 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
   6432                                 .max => .{ .v_ss, .max },
   6433                                 .min => .{ .v_ss, .max },
   6434                                 else => unreachable,
   6435                             },
   6436                             dst_reg,
   6437                             dst_reg,
   6438                             tmp_reg,
   6439                         );
   6440                         try self.asmRegisterRegisterImmediate(
   6441                             .{ .v_, .cvtps2ph },
   6442                             dst_reg,
   6443                             dst_reg,
   6444                             Immediate.u(0b1_00),
   6445                         );
   6446                         return dst_mcv;
   6447                     },
   6448                     2 => {
   6449                         const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
   6450                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6451                         defer self.register_manager.unlockReg(tmp_lock);
   6452 
   6453                         if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
   6454                             .{ .vp_d, .insr },
   6455                             dst_reg,
   6456                             src_mcv.mem(.dword),
   6457                             Immediate.u(1),
   6458                         ) else try self.asmRegisterRegisterRegister(
   6459                             .{ .v_ps, .unpckl },
   6460                             dst_reg,
   6461                             dst_reg,
   6462                             (if (src_mcv.isRegister())
   6463                                 src_mcv.getReg().?
   6464                             else
   6465                                 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   6466                         );
   6467                         try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
   6468                         try self.asmRegisterRegisterRegister(
   6469                             .{ .v_ps, .movhl },
   6470                             tmp_reg,
   6471                             dst_reg,
   6472                             dst_reg,
   6473                         );
   6474                         try self.asmRegisterRegisterRegister(
   6475                             switch (air_tag) {
   6476                                 .add => .{ .v_ps, .add },
   6477                                 .sub => .{ .v_ps, .sub },
   6478                                 .mul => .{ .v_ps, .mul },
   6479                                 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   6480                                 .max => .{ .v_ps, .max },
   6481                                 .min => .{ .v_ps, .max },
   6482                                 else => unreachable,
   6483                             },
   6484                             dst_reg,
   6485                             dst_reg,
   6486                             tmp_reg,
   6487                         );
   6488                         try self.asmRegisterRegisterImmediate(
   6489                             .{ .v_, .cvtps2ph },
   6490                             dst_reg,
   6491                             dst_reg,
   6492                             Immediate.u(0b1_00),
   6493                         );
   6494                         return dst_mcv;
   6495                     },
   6496                     3...4 => {
   6497                         const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
   6498                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6499                         defer self.register_manager.unlockReg(tmp_lock);
   6500 
   6501                         try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
   6502                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
   6503                             .{ .v_, .cvtph2ps },
   6504                             tmp_reg,
   6505                             src_mcv.mem(.qword),
   6506                         ) else try self.asmRegisterRegister(
   6507                             .{ .v_, .cvtph2ps },
   6508                             tmp_reg,
   6509                             (if (src_mcv.isRegister())
   6510                                 src_mcv.getReg().?
   6511                             else
   6512                                 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   6513                         );
   6514                         try self.asmRegisterRegisterRegister(
   6515                             switch (air_tag) {
   6516                                 .add => .{ .v_ps, .add },
   6517                                 .sub => .{ .v_ps, .sub },
   6518                                 .mul => .{ .v_ps, .mul },
   6519                                 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   6520                                 .max => .{ .v_ps, .max },
   6521                                 .min => .{ .v_ps, .max },
   6522                                 else => unreachable,
   6523                             },
   6524                             dst_reg,
   6525                             dst_reg,
   6526                             tmp_reg,
   6527                         );
   6528                         try self.asmRegisterRegisterImmediate(
   6529                             .{ .v_, .cvtps2ph },
   6530                             dst_reg,
   6531                             dst_reg,
   6532                             Immediate.u(0b1_00),
   6533                         );
   6534                         return dst_mcv;
   6535                     },
   6536                     5...8 => {
   6537                         const tmp_reg = (try self.register_manager.allocReg(null, sse)).to256();
   6538                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6539                         defer self.register_manager.unlockReg(tmp_lock);
   6540 
   6541                         try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg);
   6542                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
   6543                             .{ .v_, .cvtph2ps },
   6544                             tmp_reg,
   6545                             src_mcv.mem(.xword),
   6546                         ) else try self.asmRegisterRegister(
   6547                             .{ .v_, .cvtph2ps },
   6548                             tmp_reg,
   6549                             (if (src_mcv.isRegister())
   6550                                 src_mcv.getReg().?
   6551                             else
   6552                                 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   6553                         );
   6554                         try self.asmRegisterRegisterRegister(
   6555                             switch (air_tag) {
   6556                                 .add => .{ .v_ps, .add },
   6557                                 .sub => .{ .v_ps, .sub },
   6558                                 .mul => .{ .v_ps, .mul },
   6559                                 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   6560                                 .max => .{ .v_ps, .max },
   6561                                 .min => .{ .v_ps, .max },
   6562                                 else => unreachable,
   6563                             },
   6564                             dst_reg.to256(),
   6565                             dst_reg.to256(),
   6566                             tmp_reg,
   6567                         );
   6568                         try self.asmRegisterRegisterImmediate(
   6569                             .{ .v_, .cvtps2ph },
   6570                             dst_reg,
   6571                             dst_reg.to256(),
   6572                             Immediate.u(0b1_00),
   6573                         );
   6574                         return dst_mcv;
   6575                     },
   6576                     else => null,
   6577                 } else null,
   6578                 32 => switch (lhs_ty.vectorLen()) {
   6579                     1 => switch (air_tag) {
   6580                         .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
   6581                         .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
   6582                         .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
   6583                         .div_float,
   6584                         .div_trunc,
   6585                         .div_floor,
   6586                         .div_exact,
   6587                         => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
   6588                         .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
   6589                         .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
   6590                         else => unreachable,
   6591                     },
   6592                     2...4 => switch (air_tag) {
   6593                         .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add },
   6594                         .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub },
   6595                         .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul },
   6596                         .div_float,
   6597                         .div_trunc,
   6598                         .div_floor,
   6599                         .div_exact,
   6600                         => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div },
   6601                         .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max },
   6602                         .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min },
   6603                         else => unreachable,
   6604                     },
   6605                     5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
   6606                         .add => .{ .v_ps, .add },
   6607                         .sub => .{ .v_ps, .sub },
   6608                         .mul => .{ .v_ps, .mul },
   6609                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   6610                         .max => .{ .v_ps, .max },
   6611                         .min => .{ .v_ps, .min },
   6612                         else => unreachable,
   6613                     } else null,
   6614                     else => null,
   6615                 },
   6616                 64 => switch (lhs_ty.vectorLen()) {
   6617                     1 => switch (air_tag) {
   6618                         .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
   6619                         .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
   6620                         .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
   6621                         .div_float,
   6622                         .div_trunc,
   6623                         .div_floor,
   6624                         .div_exact,
   6625                         => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
   6626                         .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
   6627                         .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
   6628                         else => unreachable,
   6629                     },
   6630                     2 => switch (air_tag) {
   6631                         .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add },
   6632                         .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub },
   6633                         .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul },
   6634                         .div_float,
   6635                         .div_trunc,
   6636                         .div_floor,
   6637                         .div_exact,
   6638                         => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div },
   6639                         .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max },
   6640                         .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min },
   6641                         else => unreachable,
   6642                     },
   6643                     3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
   6644                         .add => .{ .v_pd, .add },
   6645                         .sub => .{ .v_pd, .sub },
   6646                         .mul => .{ .v_pd, .mul },
   6647                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div },
   6648                         .max => .{ .v_pd, .max },
   6649                         .min => .{ .v_pd, .min },
   6650                         else => unreachable,
   6651                     } else null,
   6652                     else => null,
   6653                 },
   6654                 80, 128 => null,
   6655                 else => unreachable,
   6656             },
   6657         },
   6658     })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
   6659         @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   6660     });
   6661     if (self.hasFeature(.avx)) {
   6662         const src1_alias =
   6663             if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
   6664         if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   6665             mir_tag,
   6666             dst_reg,
   6667             src1_alias,
   6668             src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   6669         ) else try self.asmRegisterRegisterRegister(
   6670             mir_tag,
   6671             dst_reg,
   6672             src1_alias,
   6673             registerAlias(if (src_mcv.isRegister())
   6674                 src_mcv.getReg().?
   6675             else
   6676                 try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
   6677         );
   6678     } else {
   6679         assert(copied_to_dst);
   6680         if (src_mcv.isMemory()) try self.asmRegisterMemory(
   6681             mir_tag,
   6682             dst_reg,
   6683             src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   6684         ) else try self.asmRegisterRegister(
   6685             mir_tag,
   6686             dst_reg,
   6687             registerAlias(if (src_mcv.isRegister())
   6688                 src_mcv.getReg().?
   6689             else
   6690                 try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
   6691         );
   6692     }
   6693     switch (air_tag) {
   6694         .add, .sub, .mul, .div_float, .div_exact => {},
   6695         .div_trunc, .div_floor => try self.genRound(
   6696             lhs_ty,
   6697             dst_reg,
   6698             .{ .register = dst_reg },
   6699             switch (air_tag) {
   6700                 .div_trunc => 0b1_0_11,
   6701                 .div_floor => 0b1_0_01,
   6702                 else => unreachable,
   6703             },
   6704         ),
   6705         .max, .min => {}, // TODO: unordered select
   6706         else => unreachable,
   6707     }
   6708     return dst_mcv;
   6709 }
   6710 
   6711 fn genBinOpMir(
   6712     self: *Self,
   6713     mir_tag: Mir.Inst.FixedTag,
   6714     ty: Type,
   6715     dst_mcv: MCValue,
   6716     src_mcv: MCValue,
   6717 ) !void {
   6718     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
   6719     switch (dst_mcv) {
   6720         .none,
   6721         .unreach,
   6722         .dead,
   6723         .undef,
   6724         .immediate,
   6725         .eflags,
   6726         .register_overflow,
   6727         .lea_direct,
   6728         .lea_got,
   6729         .lea_tlv,
   6730         .lea_frame,
   6731         .reserved_frame,
   6732         => unreachable, // unmodifiable destination
   6733         .register, .register_offset => {
   6734             assert(dst_mcv.isRegister());
   6735             const dst_reg = dst_mcv.getReg().?;
   6736             const dst_alias = registerAlias(dst_reg, abi_size);
   6737             switch (src_mcv) {
   6738                 .none,
   6739                 .unreach,
   6740                 .dead,
   6741                 .undef,
   6742                 .register_overflow,
   6743                 .reserved_frame,
   6744                 => unreachable,
   6745                 .register => |src_reg| try self.asmRegisterRegister(
   6746                     mir_tag,
   6747                     dst_alias,
   6748                     registerAlias(src_reg, abi_size),
   6749                 ),
   6750                 .immediate => |imm| switch (self.regBitSize(ty)) {
   6751                     8 => try self.asmRegisterImmediate(
   6752                         mir_tag,
   6753                         dst_alias,
   6754                         if (math.cast(i8, @bitCast(i64, imm))) |small|
   6755                             Immediate.s(small)
   6756                         else
   6757                             Immediate.u(@intCast(u8, imm)),
   6758                     ),
   6759                     16 => try self.asmRegisterImmediate(
   6760                         mir_tag,
   6761                         dst_alias,
   6762                         if (math.cast(i16, @bitCast(i64, imm))) |small|
   6763                             Immediate.s(small)
   6764                         else
   6765                             Immediate.u(@intCast(u16, imm)),
   6766                     ),
   6767                     32 => try self.asmRegisterImmediate(
   6768                         mir_tag,
   6769                         dst_alias,
   6770                         if (math.cast(i32, @bitCast(i64, imm))) |small|
   6771                             Immediate.s(small)
   6772                         else
   6773                             Immediate.u(@intCast(u32, imm)),
   6774                     ),
   6775                     64 => if (math.cast(i32, @bitCast(i64, imm))) |small|
   6776                         try self.asmRegisterImmediate(mir_tag, dst_alias, Immediate.s(small))
   6777                     else
   6778                         try self.asmRegisterRegister(mir_tag, dst_alias, registerAlias(
   6779                             try self.copyToTmpRegister(ty, src_mcv),
   6780                             abi_size,
   6781                         )),
   6782                     else => unreachable,
   6783                 },
   6784                 .eflags,
   6785                 .register_offset,
   6786                 .memory,
   6787                 .indirect,
   6788                 .load_direct,
   6789                 .lea_direct,
   6790                 .load_got,
   6791                 .lea_got,
   6792                 .load_tlv,
   6793                 .lea_tlv,
   6794                 .load_frame,
   6795                 .lea_frame,
   6796                 => {
   6797                     blk: {
   6798                         return self.asmRegisterMemory(
   6799                             mir_tag,
   6800                             registerAlias(dst_reg, abi_size),
   6801                             Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
   6802                                 .memory => |addr| .{
   6803                                     .base = .{ .reg = .ds },
   6804                                     .disp = math.cast(i32, addr) orelse break :blk,
   6805                                 },
   6806                                 .indirect => |reg_off| .{
   6807                                     .base = .{ .reg = reg_off.reg },
   6808                                     .disp = reg_off.off,
   6809                                 },
   6810                                 .load_frame => |frame_addr| .{
   6811                                     .base = .{ .frame = frame_addr.index },
   6812                                     .disp = frame_addr.off,
   6813                                 },
   6814                                 else => break :blk,
   6815                             }),
   6816                         );
   6817                     }
   6818 
   6819                     const dst_reg_lock = self.register_manager.lockReg(dst_reg);
   6820                     defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock);
   6821 
   6822                     switch (src_mcv) {
   6823                         .eflags,
   6824                         .register_offset,
   6825                         .lea_direct,
   6826                         .lea_got,
   6827                         .lea_tlv,
   6828                         .lea_frame,
   6829                         => {
   6830                             const reg = try self.copyToTmpRegister(ty, src_mcv);
   6831                             return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg });
   6832                         },
   6833                         .memory,
   6834                         .load_direct,
   6835                         .load_got,
   6836                         .load_tlv,
   6837                         => {
   6838                             var ptr_pl = Type.Payload.ElemType{
   6839                                 .base = .{ .tag = .single_const_pointer },
   6840                                 .data = ty,
   6841                             };
   6842                             const ptr_ty = Type.initPayload(&ptr_pl.base);
   6843                             const addr_reg = try self.copyToTmpRegister(ptr_ty, src_mcv.address());
   6844                             return self.genBinOpMir(mir_tag, ty, dst_mcv, .{
   6845                                 .indirect = .{ .reg = addr_reg },
   6846                             });
   6847                         },
   6848                         else => unreachable,
   6849                     }
   6850                 },
   6851             }
   6852         },
   6853         .memory, .indirect, .load_got, .load_direct, .load_tlv, .load_frame => {
   6854             const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock };
   6855             const limb_abi_size: u32 = @min(abi_size, 8);
   6856 
   6857             const dst_info: OpInfo = switch (dst_mcv) {
   6858                 else => unreachable,
   6859                 .memory, .load_got, .load_direct, .load_tlv => dst: {
   6860                     const dst_addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
   6861                     const dst_addr_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg);
   6862                     errdefer self.register_manager.unlockReg(dst_addr_lock);
   6863 
   6864                     try self.genSetReg(dst_addr_reg, Type.usize, dst_mcv.address());
   6865                     break :dst .{
   6866                         .addr_reg = dst_addr_reg,
   6867                         .addr_lock = dst_addr_lock,
   6868                     };
   6869                 },
   6870                 .load_frame => null,
   6871             };
   6872             defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock);
   6873 
   6874             const src_info: OpInfo = switch (src_mcv) {
   6875                 .none,
   6876                 .unreach,
   6877                 .dead,
   6878                 .undef,
   6879                 .register_overflow,
   6880                 .reserved_frame,
   6881                 => unreachable,
   6882                 .immediate,
   6883                 .register,
   6884                 .register_offset,
   6885                 .eflags,
   6886                 .indirect,
   6887                 .lea_direct,
   6888                 .lea_got,
   6889                 .lea_tlv,
   6890                 .load_frame,
   6891                 .lea_frame,
   6892                 => null,
   6893                 .memory, .load_got, .load_direct, .load_tlv => src: {
   6894                     switch (src_mcv) {
   6895                         .memory => |addr| if (math.cast(i32, @bitCast(i64, addr)) != null and
   6896                             math.cast(i32, @bitCast(i64, addr) + abi_size - limb_abi_size) != null)
   6897                             break :src null,
   6898                         .load_got, .load_direct, .load_tlv => {},
   6899                         else => unreachable,
   6900                     }
   6901 
   6902                     const src_addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
   6903                     const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
   6904                     errdefer self.register_manager.unlockReg(src_addr_lock);
   6905 
   6906                     try self.genSetReg(src_addr_reg, Type.usize, src_mcv.address());
   6907                     break :src .{
   6908                         .addr_reg = src_addr_reg,
   6909                         .addr_lock = src_addr_lock,
   6910                     };
   6911                 },
   6912             };
   6913             defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock);
   6914 
   6915             const ty_signedness =
   6916                 if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned;
   6917             const limb_ty = if (abi_size <= 8) ty else switch (ty_signedness) {
   6918                 .signed => Type.usize,
   6919                 .unsigned => Type.isize,
   6920             };
   6921             var off: i32 = 0;
   6922             while (off < abi_size) : (off += 8) {
   6923                 const mir_limb_tag: Mir.Inst.FixedTag = switch (off) {
   6924                     0 => mir_tag,
   6925                     else => switch (mir_tag[1]) {
   6926                         .add => .{ ._, .adc },
   6927                         .sub, .cmp => .{ ._, .sbb },
   6928                         .@"or", .@"and", .xor => mir_tag,
   6929                         else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{
   6930                             @tagName(mir_tag[1]),
   6931                         }),
   6932                     },
   6933                 };
   6934                 const dst_limb_mem = Memory.sib(
   6935                     Memory.PtrSize.fromSize(limb_abi_size),
   6936                     switch (dst_mcv) {
   6937                         .memory,
   6938                         .load_got,
   6939                         .load_direct,
   6940                         .load_tlv,
   6941                         => .{ .base = .{ .reg = dst_info.?.addr_reg }, .disp = off },
   6942                         .indirect => |reg_off| .{
   6943                             .base = .{ .reg = reg_off.reg },
   6944                             .disp = reg_off.off + off,
   6945                         },
   6946                         .load_frame => |frame_addr| .{
   6947                             .base = .{ .frame = frame_addr.index },
   6948                             .disp = frame_addr.off + off,
   6949                         },
   6950                         else => unreachable,
   6951                     },
   6952                 );
   6953                 switch (src_mcv) {
   6954                     .none,
   6955                     .unreach,
   6956                     .dead,
   6957                     .undef,
   6958                     .register_overflow,
   6959                     .reserved_frame,
   6960                     => unreachable,
   6961                     .register => |src_reg| switch (off) {
   6962                         0 => try self.asmMemoryRegister(
   6963                             mir_limb_tag,
   6964                             dst_limb_mem,
   6965                             registerAlias(src_reg, limb_abi_size),
   6966                         ),
   6967                         else => unreachable,
   6968                     },
   6969                     .immediate => |src_imm| {
   6970                         const imm = switch (off) {
   6971                             0 => src_imm,
   6972                             else => switch (ty_signedness) {
   6973                                 .signed => @bitCast(u64, @bitCast(i64, src_imm) >> 63),
   6974                                 .unsigned => 0,
   6975                             },
   6976                         };
   6977                         switch (self.regBitSize(limb_ty)) {
   6978                             8 => try self.asmMemoryImmediate(
   6979                                 mir_limb_tag,
   6980                                 dst_limb_mem,
   6981                                 if (math.cast(i8, @bitCast(i64, imm))) |small|
   6982                                     Immediate.s(small)
   6983                                 else
   6984                                     Immediate.u(@intCast(u8, imm)),
   6985                             ),
   6986                             16 => try self.asmMemoryImmediate(
   6987                                 mir_limb_tag,
   6988                                 dst_limb_mem,
   6989                                 if (math.cast(i16, @bitCast(i64, imm))) |small|
   6990                                     Immediate.s(small)
   6991                                 else
   6992                                     Immediate.u(@intCast(u16, imm)),
   6993                             ),
   6994                             32 => try self.asmMemoryImmediate(
   6995                                 mir_limb_tag,
   6996                                 dst_limb_mem,
   6997                                 if (math.cast(i32, @bitCast(i64, imm))) |small|
   6998                                     Immediate.s(small)
   6999                                 else
   7000                                     Immediate.u(@intCast(u32, imm)),
   7001                             ),
   7002                             64 => if (math.cast(i32, @bitCast(i64, imm))) |small|
   7003                                 try self.asmMemoryImmediate(
   7004                                     mir_limb_tag,
   7005                                     dst_limb_mem,
   7006                                     Immediate.s(small),
   7007                                 )
   7008                             else
   7009                                 try self.asmMemoryRegister(
   7010                                     mir_limb_tag,
   7011                                     dst_limb_mem,
   7012                                     registerAlias(
   7013                                         try self.copyToTmpRegister(limb_ty, .{ .immediate = imm }),
   7014                                         limb_abi_size,
   7015                                     ),
   7016                                 ),
   7017                             else => unreachable,
   7018                         }
   7019                     },
   7020                     .register_offset,
   7021                     .eflags,
   7022                     .memory,
   7023                     .indirect,
   7024                     .load_direct,
   7025                     .lea_direct,
   7026                     .load_got,
   7027                     .lea_got,
   7028                     .load_tlv,
   7029                     .lea_tlv,
   7030                     .load_frame,
   7031                     .lea_frame,
   7032                     => {
   7033                         const src_limb_reg = try self.copyToTmpRegister(limb_ty, if (src_info) |info| .{
   7034                             .indirect = .{ .reg = info.addr_reg, .off = off },
   7035                         } else switch (src_mcv) {
   7036                             .eflags,
   7037                             .register_offset,
   7038                             .lea_direct,
   7039                             .lea_got,
   7040                             .lea_tlv,
   7041                             .lea_frame,
   7042                             => switch (off) {
   7043                                 0 => src_mcv,
   7044                                 else => .{ .immediate = 0 },
   7045                             },
   7046                             .memory => |addr| .{ .memory = @bitCast(u64, @bitCast(i64, addr) + off) },
   7047                             .indirect => |reg_off| .{ .indirect = .{
   7048                                 .reg = reg_off.reg,
   7049                                 .off = reg_off.off + off,
   7050                             } },
   7051                             .load_frame => |frame_addr| .{ .load_frame = .{
   7052                                 .index = frame_addr.index,
   7053                                 .off = frame_addr.off + off,
   7054                             } },
   7055                             else => unreachable,
   7056                         });
   7057                         try self.asmMemoryRegister(
   7058                             mir_limb_tag,
   7059                             dst_limb_mem,
   7060                             registerAlias(src_limb_reg, limb_abi_size),
   7061                         );
   7062                     },
   7063                 }
   7064             }
   7065         },
   7066     }
   7067 }
   7068 
   7069 /// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
   7070 /// Does not support byte-size operands.
   7071 fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
   7072     const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
   7073     switch (dst_mcv) {
   7074         .none,
   7075         .unreach,
   7076         .dead,
   7077         .undef,
   7078         .immediate,
   7079         .register_offset,
   7080         .eflags,
   7081         .register_overflow,
   7082         .lea_direct,
   7083         .lea_got,
   7084         .lea_tlv,
   7085         .lea_frame,
   7086         .reserved_frame,
   7087         => unreachable, // unmodifiable destination
   7088         .register => |dst_reg| {
   7089             const dst_alias = registerAlias(dst_reg, abi_size);
   7090             const dst_lock = self.register_manager.lockReg(dst_reg);
   7091             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   7092 
   7093             switch (src_mcv) {
   7094                 .none,
   7095                 .unreach,
   7096                 .dead,
   7097                 .undef,
   7098                 .register_overflow,
   7099                 .reserved_frame,
   7100                 => unreachable,
   7101                 .register => |src_reg| try self.asmRegisterRegister(
   7102                     .{ .i_, .mul },
   7103                     dst_alias,
   7104                     registerAlias(src_reg, abi_size),
   7105                 ),
   7106                 .immediate => |imm| {
   7107                     if (math.cast(i32, imm)) |small| {
   7108                         try self.asmRegisterRegisterImmediate(
   7109                             .{ .i_, .mul },
   7110                             dst_alias,
   7111                             dst_alias,
   7112                             Immediate.s(small),
   7113                         );
   7114                     } else {
   7115                         const src_reg = try self.copyToTmpRegister(dst_ty, src_mcv);
   7116                         return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg });
   7117                     }
   7118                 },
   7119                 .register_offset,
   7120                 .eflags,
   7121                 .load_direct,
   7122                 .lea_direct,
   7123                 .load_got,
   7124                 .lea_got,
   7125                 .load_tlv,
   7126                 .lea_tlv,
   7127                 .lea_frame,
   7128                 => try self.asmRegisterRegister(
   7129                     .{ .i_, .mul },
   7130                     dst_alias,
   7131                     registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size),
   7132                 ),
   7133                 .memory, .indirect, .load_frame => try self.asmRegisterMemory(
   7134                     .{ .i_, .mul },
   7135                     dst_alias,
   7136                     Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
   7137                         .memory => |addr| .{
   7138                             .base = .{ .reg = .ds },
   7139                             .disp = math.cast(i32, @bitCast(i64, addr)) orelse
   7140                                 return self.asmRegisterRegister(
   7141                                 .{ .i_, .mul },
   7142                                 dst_alias,
   7143                                 registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size),
   7144                             ),
   7145                         },
   7146                         .indirect => |reg_off| .{
   7147                             .base = .{ .reg = reg_off.reg },
   7148                             .disp = reg_off.off,
   7149                         },
   7150                         .load_frame => |frame_addr| .{
   7151                             .base = .{ .frame = frame_addr.index },
   7152                             .disp = frame_addr.off,
   7153                         },
   7154                         else => unreachable,
   7155                     }),
   7156                 ),
   7157             }
   7158         },
   7159         .memory, .indirect, .load_direct, .load_got, .load_tlv, .load_frame => {
   7160             const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv);
   7161             const tmp_mcv = MCValue{ .register = tmp_reg };
   7162             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7163             defer self.register_manager.unlockReg(tmp_lock);
   7164 
   7165             try self.genIntMulComplexOpMir(dst_ty, tmp_mcv, src_mcv);
   7166             try self.genCopy(dst_ty, dst_mcv, tmp_mcv);
   7167         },
   7168     }
   7169 }
   7170 
   7171 fn airArg(self: *Self, inst: Air.Inst.Index) !void {
   7172     // skip zero-bit arguments as they don't have a corresponding arg instruction
   7173     var arg_index = self.arg_index;
   7174     while (self.args[arg_index] == .none) arg_index += 1;
   7175     self.arg_index = arg_index + 1;
   7176 
   7177     const result: MCValue = if (self.liveness.isUnused(inst)) .unreach else result: {
   7178         const dst_mcv = self.args[arg_index];
   7179         switch (dst_mcv) {
   7180             .register => |reg| self.register_manager.getRegAssumeFree(reg, inst),
   7181             .load_frame => {},
   7182             else => return self.fail("TODO implement arg for {}", .{dst_mcv}),
   7183         }
   7184 
   7185         const ty = self.air.typeOfIndex(inst);
   7186         const src_index = self.air.instructions.items(.data)[inst].arg.src_index;
   7187         const name = self.owner.mod_fn.getParamName(self.bin_file.options.module.?, src_index);
   7188         try self.genArgDbgInfo(ty, name, dst_mcv);
   7189 
   7190         break :result dst_mcv;
   7191     };
   7192     return self.finishAir(inst, result, .{ .none, .none, .none });
   7193 }
   7194 
   7195 fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void {
   7196     switch (self.debug_output) {
   7197         .dwarf => |dw| {
   7198             const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
   7199                 .register => |reg| .{ .register = reg.dwarfLocOp() },
   7200                 // TODO use a frame index
   7201                 .load_frame => return,
   7202                 //.stack_offset => |off| .{
   7203                 //    .stack = .{
   7204                 //        // TODO handle -fomit-frame-pointer
   7205                 //        .fp_register = Register.rbp.dwarfLocOpDeref(),
   7206                 //        .offset = -off,
   7207                 //    },
   7208                 //},
   7209                 else => unreachable, // not a valid function parameter
   7210             };
   7211             // TODO: this might need adjusting like the linkers do.
   7212             // Instead of flattening the owner and passing Decl.Index here we may
   7213             // want to special case LazySymbol in DWARF linker too.
   7214             try dw.genArgDbgInfo(name, ty, self.owner.getDecl(), loc);
   7215         },
   7216         .plan9 => {},
   7217         .none => {},
   7218     }
   7219 }
   7220 
   7221 fn genVarDbgInfo(
   7222     self: Self,
   7223     tag: Air.Inst.Tag,
   7224     ty: Type,
   7225     mcv: MCValue,
   7226     name: [:0]const u8,
   7227 ) !void {
   7228     const is_ptr = switch (tag) {
   7229         .dbg_var_ptr => true,
   7230         .dbg_var_val => false,
   7231         else => unreachable,
   7232     };
   7233 
   7234     switch (self.debug_output) {
   7235         .dwarf => |dw| {
   7236             const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
   7237                 .register => |reg| .{ .register = reg.dwarfLocOp() },
   7238                 // TODO use a frame index
   7239                 .load_frame, .lea_frame => return,
   7240                 //=> |off| .{ .stack = .{
   7241                 //    .fp_register = Register.rbp.dwarfLocOpDeref(),
   7242                 //    .offset = -off,
   7243                 //} },
   7244                 .memory => |address| .{ .memory = address },
   7245                 .load_got => |sym_index| .{ .linker_load = .{ .type = .got, .sym_index = sym_index } },
   7246                 .load_direct => |sym_index| .{ .linker_load = .{ .type = .direct, .sym_index = sym_index } },
   7247                 .immediate => |x| .{ .immediate = x },
   7248                 .undef => .undef,
   7249                 .none => .none,
   7250                 else => blk: {
   7251                     log.debug("TODO generate debug info for {}", .{mcv});
   7252                     break :blk .nop;
   7253                 },
   7254             };
   7255             // TODO: this might need adjusting like the linkers do.
   7256             // Instead of flattening the owner and passing Decl.Index here we may
   7257             // want to special case LazySymbol in DWARF linker too.
   7258             try dw.genVarDbgInfo(name, ty, self.owner.getDecl(), is_ptr, loc);
   7259         },
   7260         .plan9 => {},
   7261         .none => {},
   7262     }
   7263 }
   7264 
   7265 fn airTrap(self: *Self) !void {
   7266     try self.asmOpOnly(.{ ._, .ud2 });
   7267     return self.finishAirBookkeeping();
   7268 }
   7269 
   7270 fn airBreakpoint(self: *Self) !void {
   7271     try self.asmOpOnly(.{ ._, .int3 });
   7272     return self.finishAirBookkeeping();
   7273 }
   7274 
   7275 fn airRetAddr(self: *Self, inst: Air.Inst.Index) !void {
   7276     const dst_mcv = try self.allocRegOrMem(inst, true);
   7277     try self.genCopy(Type.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } });
   7278     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
   7279 }
   7280 
   7281 fn airFrameAddress(self: *Self, inst: Air.Inst.Index) !void {
   7282     const dst_mcv = try self.allocRegOrMem(inst, true);
   7283     try self.genCopy(Type.usize, dst_mcv, .{ .lea_frame = .{ .index = .base_ptr } });
   7284     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
   7285 }
   7286 
   7287 fn airFence(self: *Self, inst: Air.Inst.Index) !void {
   7288     const order = self.air.instructions.items(.data)[inst].fence;
   7289     switch (order) {
   7290         .Unordered, .Monotonic => unreachable,
   7291         .Acquire, .Release, .AcqRel => {},
   7292         .SeqCst => try self.asmOpOnly(.{ ._, .mfence }),
   7293     }
   7294     return self.finishAirBookkeeping();
   7295 }
   7296 
   7297 fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void {
   7298     if (modifier == .always_tail) return self.fail("TODO implement tail calls for x86_64", .{});
   7299     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   7300     const callee = pl_op.operand;
   7301     const extra = self.air.extraData(Air.Call, pl_op.payload);
   7302     const args = @ptrCast([]const Air.Inst.Ref, self.air.extra[extra.end..][0..extra.data.args_len]);
   7303     const ty = self.air.typeOf(callee);
   7304 
   7305     const fn_ty = switch (ty.zigTypeTag()) {
   7306         .Fn => ty,
   7307         .Pointer => ty.childType(),
   7308         else => unreachable,
   7309     };
   7310 
   7311     var info = try self.resolveCallingConventionValues(fn_ty, args[fn_ty.fnParamLen()..], .call_frame);
   7312     defer info.deinit(self);
   7313 
   7314     // We need a properly aligned and sized call frame to be able to call this function.
   7315     {
   7316         const needed_call_frame =
   7317             FrameAlloc.init(.{ .size = info.stack_byte_count, .alignment = info.stack_align });
   7318         const frame_allocs_slice = self.frame_allocs.slice();
   7319         const stack_frame_size =
   7320             &frame_allocs_slice.items(.abi_size)[@enumToInt(FrameIndex.call_frame)];
   7321         stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size);
   7322         const stack_frame_align =
   7323             &frame_allocs_slice.items(.abi_align)[@enumToInt(FrameIndex.call_frame)];
   7324         stack_frame_align.* = @max(stack_frame_align.*, needed_call_frame.abi_align);
   7325     }
   7326 
   7327     try self.spillEflagsIfOccupied();
   7328     try self.spillRegisters(abi.getCallerPreservedRegs(self.target.*));
   7329 
   7330     // set stack arguments first because this can clobber registers
   7331     // also clobber spill arguments as we go
   7332     switch (info.return_value.long) {
   7333         .none, .unreach => {},
   7334         .indirect => |reg_off| try self.spillRegisters(&.{reg_off.reg}),
   7335         else => unreachable,
   7336     }
   7337     for (args, info.args) |arg, mc_arg| {
   7338         const arg_ty = self.air.typeOf(arg);
   7339         const arg_mcv = try self.resolveInst(arg);
   7340         switch (mc_arg) {
   7341             .none => {},
   7342             .register => |reg| try self.spillRegisters(&.{reg}),
   7343             .load_frame => try self.genCopy(arg_ty, mc_arg, arg_mcv),
   7344             else => unreachable,
   7345         }
   7346     }
   7347 
   7348     // now we are free to set register arguments
   7349     const ret_lock = switch (info.return_value.long) {
   7350         .none, .unreach => null,
   7351         .indirect => |reg_off| lock: {
   7352             const ret_ty = fn_ty.fnReturnType();
   7353             const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ret_ty, self.target.*));
   7354             try self.genSetReg(reg_off.reg, Type.usize, .{
   7355                 .lea_frame = .{ .index = frame_index, .off = -reg_off.off },
   7356             });
   7357             info.return_value.short = .{ .load_frame = .{ .index = frame_index } };
   7358             break :lock self.register_manager.lockRegAssumeUnused(reg_off.reg);
   7359         },
   7360         else => unreachable,
   7361     };
   7362     defer if (ret_lock) |lock| self.register_manager.unlockReg(lock);
   7363 
   7364     for (args, info.args) |arg, mc_arg| {
   7365         const arg_ty = self.air.typeOf(arg);
   7366         const arg_mcv = try self.resolveInst(arg);
   7367         switch (mc_arg) {
   7368             .none, .load_frame => {},
   7369             .register => try self.genCopy(arg_ty, mc_arg, arg_mcv),
   7370             else => unreachable,
   7371         }
   7372     }
   7373 
   7374     // Due to incremental compilation, how function calls are generated depends
   7375     // on linking.
   7376     const mod = self.bin_file.options.module.?;
   7377     if (self.air.value(callee)) |func_value| {
   7378         if (func_value.castTag(.function)) |func_payload| {
   7379             const func = func_payload.data;
   7380 
   7381             if (self.bin_file.cast(link.File.Elf)) |elf_file| {
   7382                 const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl);
   7383                 const atom = elf_file.getAtom(atom_index);
   7384                 _ = try atom.getOrCreateOffsetTableEntry(elf_file);
   7385                 const got_addr = atom.getOffsetTableAddress(elf_file);
   7386                 try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{
   7387                     .base = .{ .reg = .ds },
   7388                     .disp = @intCast(i32, got_addr),
   7389                 }));
   7390             } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
   7391                 const atom = try coff_file.getOrCreateAtomForDecl(func.owner_decl);
   7392                 const sym_index = coff_file.getAtom(atom).getSymbolIndex().?;
   7393                 try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
   7394                 try self.asmRegister(.{ ._, .call }, .rax);
   7395             } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
   7396                 const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl);
   7397                 const sym_index = macho_file.getAtom(atom).getSymbolIndex().?;
   7398                 try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
   7399                 try self.asmRegister(.{ ._, .call }, .rax);
   7400             } else if (self.bin_file.cast(link.File.Plan9)) |p9| {
   7401                 const decl_block_index = try p9.seeDecl(func.owner_decl);
   7402                 const decl_block = p9.getDeclBlock(decl_block_index);
   7403                 const ptr_bits = self.target.cpu.arch.ptrBitWidth();
   7404                 const ptr_bytes: u64 = @divExact(ptr_bits, 8);
   7405                 const got_addr = p9.bases.data;
   7406                 const got_index = decl_block.got_index.?;
   7407                 const fn_got_addr = got_addr + got_index * ptr_bytes;
   7408                 try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{
   7409                     .base = .{ .reg = .ds },
   7410                     .disp = @intCast(i32, fn_got_addr),
   7411                 }));
   7412             } else unreachable;
   7413         } else if (func_value.castTag(.extern_fn)) |func_payload| {
   7414             const extern_fn = func_payload.data;
   7415             const decl_name = mem.sliceTo(mod.declPtr(extern_fn.owner_decl).name, 0);
   7416             const lib_name = mem.sliceTo(extern_fn.lib_name, 0);
   7417             if (self.bin_file.cast(link.File.Coff)) |coff_file| {
   7418                 const atom_index = try self.owner.getSymbolIndex(self);
   7419                 const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name);
   7420                 _ = try self.addInst(.{
   7421                     .tag = .mov,
   7422                     .ops = .import_reloc,
   7423                     .data = .{ .rx = .{
   7424                         .r1 = .rax,
   7425                         .payload = try self.addExtra(Mir.Reloc{
   7426                             .atom_index = atom_index,
   7427                             .sym_index = sym_index,
   7428                         }),
   7429                     } },
   7430                 });
   7431                 try self.asmRegister(.{ ._, .call }, .rax);
   7432             } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
   7433                 const atom_index = try self.owner.getSymbolIndex(self);
   7434                 const sym_index = try macho_file.getGlobalSymbol(decl_name, lib_name);
   7435                 _ = try self.addInst(.{
   7436                     .tag = .call,
   7437                     .ops = .extern_fn_reloc,
   7438                     .data = .{ .reloc = .{
   7439                         .atom_index = atom_index,
   7440                         .sym_index = sym_index,
   7441                     } },
   7442                 });
   7443             } else {
   7444                 return self.fail("TODO implement calling extern functions", .{});
   7445             }
   7446         } else {
   7447             return self.fail("TODO implement calling bitcasted functions", .{});
   7448         }
   7449     } else {
   7450         assert(ty.zigTypeTag() == .Pointer);
   7451         const mcv = try self.resolveInst(callee);
   7452         try self.genSetReg(.rax, Type.usize, mcv);
   7453         try self.asmRegister(.{ ._, .call }, .rax);
   7454     }
   7455 
   7456     var bt = self.liveness.iterateBigTomb(inst);
   7457     self.feed(&bt, callee);
   7458     for (args) |arg| self.feed(&bt, arg);
   7459 
   7460     const result = if (self.liveness.isUnused(inst)) .unreach else info.return_value.short;
   7461     return self.finishAirResult(inst, result);
   7462 }
   7463 
   7464 fn airRet(self: *Self, inst: Air.Inst.Index) !void {
   7465     const un_op = self.air.instructions.items(.data)[inst].un_op;
   7466     const operand = try self.resolveInst(un_op);
   7467     const ret_ty = self.fn_type.fnReturnType();
   7468     switch (self.ret_mcv.short) {
   7469         .none => {},
   7470         .register => try self.genCopy(ret_ty, self.ret_mcv.short, operand),
   7471         .indirect => |reg_off| {
   7472             try self.register_manager.getReg(reg_off.reg, null);
   7473             const lock = self.register_manager.lockRegAssumeUnused(reg_off.reg);
   7474             defer self.register_manager.unlockReg(lock);
   7475 
   7476             try self.genSetReg(reg_off.reg, Type.usize, self.ret_mcv.long);
   7477             try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ret_ty, operand);
   7478         },
   7479         else => unreachable,
   7480     }
   7481     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
   7482     // which is available if the jump is 127 bytes or less forward.
   7483     const jmp_reloc = try self.asmJmpReloc(undefined);
   7484     try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc);
   7485     return self.finishAir(inst, .unreach, .{ un_op, .none, .none });
   7486 }
   7487 
   7488 fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void {
   7489     const un_op = self.air.instructions.items(.data)[inst].un_op;
   7490     const ptr = try self.resolveInst(un_op);
   7491     const ptr_ty = self.air.typeOf(un_op);
   7492     switch (self.ret_mcv.short) {
   7493         .none => {},
   7494         .register => try self.load(self.ret_mcv.short, ptr_ty, ptr),
   7495         .indirect => |reg_off| try self.genSetReg(reg_off.reg, ptr_ty, ptr),
   7496         else => unreachable,
   7497     }
   7498     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
   7499     // which is available if the jump is 127 bytes or less forward.
   7500     const jmp_reloc = try self.asmJmpReloc(undefined);
   7501     try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc);
   7502     return self.finishAir(inst, .unreach, .{ un_op, .none, .none });
   7503 }
   7504 
   7505 fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
   7506     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   7507     const ty = self.air.typeOf(bin_op.lhs);
   7508 
   7509     try self.spillEflagsIfOccupied();
   7510     self.eflags_inst = inst;
   7511 
   7512     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   7513     const lhs_lock = switch (lhs_mcv) {
   7514         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   7515         else => null,
   7516     };
   7517     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   7518 
   7519     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   7520     const rhs_lock = switch (rhs_mcv) {
   7521         .register => |reg| self.register_manager.lockReg(reg),
   7522         else => null,
   7523     };
   7524     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   7525 
   7526     const result = MCValue{
   7527         .eflags = switch (ty.zigTypeTag()) {
   7528             else => result: {
   7529                 var flipped = false;
   7530                 const dst_mcv: MCValue = if (lhs_mcv.isRegister() or lhs_mcv.isMemory())
   7531                     lhs_mcv
   7532                 else if (rhs_mcv.isRegister() or rhs_mcv.isMemory()) dst: {
   7533                     flipped = true;
   7534                     break :dst rhs_mcv;
   7535                 } else .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) };
   7536                 const dst_lock = switch (dst_mcv) {
   7537                     .register => |reg| self.register_manager.lockReg(reg),
   7538                     else => null,
   7539                 };
   7540                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   7541                 const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
   7542 
   7543                 try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, src_mcv);
   7544                 break :result Condition.fromCompareOperator(
   7545                     if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned,
   7546                     if (flipped) op.reverse() else op,
   7547                 );
   7548             },
   7549             .Float => result: {
   7550                 const flipped = switch (op) {
   7551                     .lt, .lte => true,
   7552                     .eq, .gte, .gt, .neq => false,
   7553                 };
   7554 
   7555                 const dst_mcv = if (flipped) rhs_mcv else lhs_mcv;
   7556                 const dst_reg = if (dst_mcv.isRegister())
   7557                     dst_mcv.getReg().?
   7558                 else
   7559                     try self.copyToTmpRegister(ty, dst_mcv);
   7560                 const dst_lock = self.register_manager.lockReg(dst_reg);
   7561                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   7562                 const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
   7563 
   7564                 switch (ty.floatBits(self.target.*)) {
   7565                     16 => if (self.hasFeature(.f16c)) {
   7566                         const tmp1_reg = (try self.register_manager.allocReg(null, sse)).to128();
   7567                         const tmp1_mcv = MCValue{ .register = tmp1_reg };
   7568                         const tmp1_lock = self.register_manager.lockRegAssumeUnused(tmp1_reg);
   7569                         defer self.register_manager.unlockReg(tmp1_lock);
   7570 
   7571                         const tmp2_reg = (try self.register_manager.allocReg(null, sse)).to128();
   7572                         const tmp2_mcv = MCValue{ .register = tmp2_reg };
   7573                         const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg);
   7574                         defer self.register_manager.unlockReg(tmp2_lock);
   7575 
   7576                         if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   7577                             .{ .vp_w, .insr },
   7578                             tmp1_reg,
   7579                             dst_reg.to128(),
   7580                             src_mcv.mem(.word),
   7581                             Immediate.u(1),
   7582                         ) else try self.asmRegisterRegisterRegister(
   7583                             .{ .vp_, .unpcklwd },
   7584                             tmp1_reg,
   7585                             dst_reg.to128(),
   7586                             (if (src_mcv.isRegister())
   7587                                 src_mcv.getReg().?
   7588                             else
   7589                                 try self.copyToTmpRegister(ty, src_mcv)).to128(),
   7590                         );
   7591                         try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg);
   7592                         try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg);
   7593                         try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv);
   7594                     } else return self.fail("TODO implement airCmp for {}", .{
   7595                         ty.fmt(self.bin_file.options.module.?),
   7596                     }),
   7597                     32 => try self.genBinOpMir(
   7598                         .{ ._ss, .ucomi },
   7599                         ty,
   7600                         .{ .register = dst_reg },
   7601                         src_mcv,
   7602                     ),
   7603                     64 => try self.genBinOpMir(
   7604                         .{ ._sd, .ucomi },
   7605                         ty,
   7606                         .{ .register = dst_reg },
   7607                         src_mcv,
   7608                     ),
   7609                     else => return self.fail("TODO implement airCmp for {}", .{
   7610                         ty.fmt(self.bin_file.options.module.?),
   7611                     }),
   7612                 }
   7613 
   7614                 break :result switch (if (flipped) op.reverse() else op) {
   7615                     .lt, .lte => unreachable, // required to have been canonicalized to gt(e)
   7616                     .gt => .a,
   7617                     .gte => .ae,
   7618                     .eq => .z_and_np,
   7619                     .neq => .nz_or_p,
   7620                 };
   7621             },
   7622         },
   7623     };
   7624     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   7625 }
   7626 
   7627 fn airCmpVector(self: *Self, inst: Air.Inst.Index) !void {
   7628     _ = inst;
   7629     return self.fail("TODO implement airCmpVector for {}", .{self.target.cpu.arch});
   7630 }
   7631 
   7632 fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void {
   7633     const mod = self.bin_file.options.module.?;
   7634     const un_op = self.air.instructions.items(.data)[inst].un_op;
   7635 
   7636     const addr_reg = try self.register_manager.allocReg(null, gp);
   7637     const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   7638     defer self.register_manager.unlockReg(addr_lock);
   7639     try self.genLazySymbolRef(.lea, addr_reg, link.File.LazySymbol.initDecl(.const_data, null, mod));
   7640 
   7641     try self.spillEflagsIfOccupied();
   7642     self.eflags_inst = inst;
   7643 
   7644     const op_ty = self.air.typeOf(un_op);
   7645     const op_abi_size = @intCast(u32, op_ty.abiSize(self.target.*));
   7646     const op_mcv = try self.resolveInst(un_op);
   7647     const dst_reg = switch (op_mcv) {
   7648         .register => |reg| reg,
   7649         else => try self.copyToTmpRegister(op_ty, op_mcv),
   7650     };
   7651     try self.asmRegisterMemory(
   7652         .{ ._, .cmp },
   7653         registerAlias(dst_reg, op_abi_size),
   7654         Memory.sib(Memory.PtrSize.fromSize(op_abi_size), .{ .base = .{ .reg = addr_reg } }),
   7655     );
   7656     const result = MCValue{ .eflags = .b };
   7657     return self.finishAir(inst, result, .{ un_op, .none, .none });
   7658 }
   7659 
   7660 fn airTry(self: *Self, inst: Air.Inst.Index) !void {
   7661     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   7662     const extra = self.air.extraData(Air.Try, pl_op.payload);
   7663     const body = self.air.extra[extra.end..][0..extra.data.body_len];
   7664     const err_union_ty = self.air.typeOf(pl_op.operand);
   7665     const result = try self.genTry(inst, pl_op.operand, body, err_union_ty, false);
   7666     return self.finishAir(inst, result, .{ .none, .none, .none });
   7667 }
   7668 
   7669 fn airTryPtr(self: *Self, inst: Air.Inst.Index) !void {
   7670     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   7671     const extra = self.air.extraData(Air.TryPtr, ty_pl.payload);
   7672     const body = self.air.extra[extra.end..][0..extra.data.body_len];
   7673     const err_union_ty = self.air.typeOf(extra.data.ptr).childType();
   7674     const result = try self.genTry(inst, extra.data.ptr, body, err_union_ty, true);
   7675     return self.finishAir(inst, result, .{ .none, .none, .none });
   7676 }
   7677 
   7678 fn genTry(
   7679     self: *Self,
   7680     inst: Air.Inst.Index,
   7681     err_union: Air.Inst.Ref,
   7682     body: []const Air.Inst.Index,
   7683     err_union_ty: Type,
   7684     operand_is_ptr: bool,
   7685 ) !MCValue {
   7686     if (operand_is_ptr) {
   7687         return self.fail("TODO genTry for pointers", .{});
   7688     }
   7689     const liveness_cond_br = self.liveness.getCondBr(inst);
   7690 
   7691     const err_union_mcv = try self.resolveInst(err_union);
   7692     const is_err_mcv = try self.isErr(null, err_union_ty, err_union_mcv);
   7693 
   7694     const reloc = try self.genCondBrMir(Type.anyerror, is_err_mcv);
   7695 
   7696     if (self.liveness.operandDies(inst, 0)) {
   7697         if (Air.refToIndex(err_union)) |err_union_inst| self.processDeath(err_union_inst);
   7698     }
   7699 
   7700     self.scope_generation += 1;
   7701     const state = try self.saveState();
   7702 
   7703     for (liveness_cond_br.else_deaths) |operand| self.processDeath(operand);
   7704     try self.genBody(body);
   7705     try self.restoreState(state, &.{}, .{
   7706         .emit_instructions = false,
   7707         .update_tracking = true,
   7708         .resurrect = true,
   7709         .close_scope = true,
   7710     });
   7711 
   7712     try self.performReloc(reloc);
   7713 
   7714     for (liveness_cond_br.then_deaths) |operand| self.processDeath(operand);
   7715 
   7716     const result = if (self.liveness.isUnused(inst))
   7717         .unreach
   7718     else
   7719         try self.genUnwrapErrorUnionPayloadMir(inst, err_union_ty, err_union_mcv);
   7720     return result;
   7721 }
   7722 
   7723 fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void {
   7724     const dbg_stmt = self.air.instructions.items(.data)[inst].dbg_stmt;
   7725     _ = try self.addInst(.{
   7726         .tag = .pseudo,
   7727         .ops = .pseudo_dbg_line_line_column,
   7728         .data = .{ .line_column = .{
   7729             .line = dbg_stmt.line,
   7730             .column = dbg_stmt.column,
   7731         } },
   7732     });
   7733     return self.finishAirBookkeeping();
   7734 }
   7735 
   7736 fn airDbgInline(self: *Self, inst: Air.Inst.Index) !void {
   7737     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   7738     const function = self.air.values[ty_pl.payload].castTag(.function).?.data;
   7739     // TODO emit debug info for function change
   7740     _ = function;
   7741     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
   7742 }
   7743 
   7744 fn airDbgBlock(self: *Self, inst: Air.Inst.Index) !void {
   7745     // TODO emit debug info lexical block
   7746     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
   7747 }
   7748 
   7749 fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void {
   7750     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   7751     const operand = pl_op.operand;
   7752     const ty = self.air.typeOf(operand);
   7753     const mcv = try self.resolveInst(operand);
   7754 
   7755     const name = self.air.nullTerminatedString(pl_op.payload);
   7756 
   7757     const tag = self.air.instructions.items(.tag)[inst];
   7758     try self.genVarDbgInfo(tag, ty, mcv, name);
   7759 
   7760     return self.finishAir(inst, .unreach, .{ operand, .none, .none });
   7761 }
   7762 
   7763 fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 {
   7764     const abi_size = ty.abiSize(self.target.*);
   7765     switch (mcv) {
   7766         .eflags => |cc| {
   7767             // Here we map the opposites since the jump is to the false branch.
   7768             return self.asmJccReloc(undefined, cc.negate());
   7769         },
   7770         .register => |reg| {
   7771             try self.spillEflagsIfOccupied();
   7772             try self.asmRegisterImmediate(.{ ._, .@"test" }, reg, Immediate.u(1));
   7773             return self.asmJccReloc(undefined, .e);
   7774         },
   7775         .immediate,
   7776         .load_frame,
   7777         => {
   7778             try self.spillEflagsIfOccupied();
   7779             if (abi_size <= 8) {
   7780                 const reg = try self.copyToTmpRegister(ty, mcv);
   7781                 return self.genCondBrMir(ty, .{ .register = reg });
   7782             }
   7783             return self.fail("TODO implement condbr when condition is {} with abi larger than 8 bytes", .{mcv});
   7784         },
   7785         else => return self.fail("TODO implement condbr when condition is {s}", .{@tagName(mcv)}),
   7786     }
   7787     return 0; // TODO
   7788 }
   7789 
   7790 fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
   7791     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   7792     const cond = try self.resolveInst(pl_op.operand);
   7793     const cond_ty = self.air.typeOf(pl_op.operand);
   7794     const extra = self.air.extraData(Air.CondBr, pl_op.payload);
   7795     const then_body = self.air.extra[extra.end..][0..extra.data.then_body_len];
   7796     const else_body = self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len];
   7797     const liveness_cond_br = self.liveness.getCondBr(inst);
   7798 
   7799     const reloc = try self.genCondBrMir(cond_ty, cond);
   7800 
   7801     // If the condition dies here in this condbr instruction, process
   7802     // that death now instead of later as this has an effect on
   7803     // whether it needs to be spilled in the branches
   7804     if (self.liveness.operandDies(inst, 0)) {
   7805         if (Air.refToIndex(pl_op.operand)) |op_inst| self.processDeath(op_inst);
   7806     }
   7807 
   7808     self.scope_generation += 1;
   7809     const state = try self.saveState();
   7810 
   7811     for (liveness_cond_br.then_deaths) |operand| self.processDeath(operand);
   7812     try self.genBody(then_body);
   7813     try self.restoreState(state, &.{}, .{
   7814         .emit_instructions = false,
   7815         .update_tracking = true,
   7816         .resurrect = true,
   7817         .close_scope = true,
   7818     });
   7819 
   7820     try self.performReloc(reloc);
   7821 
   7822     for (liveness_cond_br.else_deaths) |operand| self.processDeath(operand);
   7823     try self.genBody(else_body);
   7824     try self.restoreState(state, &.{}, .{
   7825         .emit_instructions = false,
   7826         .update_tracking = true,
   7827         .resurrect = true,
   7828         .close_scope = true,
   7829     });
   7830 
   7831     // We already took care of pl_op.operand earlier, so we're going
   7832     // to pass .none here
   7833     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
   7834 }
   7835 
   7836 fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MCValue {
   7837     switch (opt_mcv) {
   7838         .register_overflow => |ro| return .{ .eflags = ro.eflags.negate() },
   7839         else => {},
   7840     }
   7841 
   7842     try self.spillEflagsIfOccupied();
   7843     self.eflags_inst = inst;
   7844 
   7845     var pl_buf: Type.Payload.ElemType = undefined;
   7846     const pl_ty = opt_ty.optionalChild(&pl_buf);
   7847 
   7848     var ptr_buf: Type.SlicePtrFieldTypeBuffer = undefined;
   7849     const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload())
   7850         .{ .off = 0, .ty = if (pl_ty.isSlice()) pl_ty.slicePtrFieldType(&ptr_buf) else pl_ty }
   7851     else
   7852         .{ .off = @intCast(i32, pl_ty.abiSize(self.target.*)), .ty = Type.bool };
   7853 
   7854     switch (opt_mcv) {
   7855         .none,
   7856         .unreach,
   7857         .dead,
   7858         .undef,
   7859         .immediate,
   7860         .eflags,
   7861         .register_offset,
   7862         .register_overflow,
   7863         .lea_direct,
   7864         .lea_got,
   7865         .lea_tlv,
   7866         .lea_frame,
   7867         .reserved_frame,
   7868         => unreachable,
   7869 
   7870         .register => |opt_reg| {
   7871             if (some_info.off == 0) {
   7872                 const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*));
   7873                 const alias_reg = registerAlias(opt_reg, some_abi_size);
   7874                 assert(some_abi_size * 8 == alias_reg.bitSize());
   7875                 try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg);
   7876                 return .{ .eflags = .z };
   7877             }
   7878             assert(some_info.ty.tag() == .bool);
   7879             const opt_abi_size = @intCast(u32, opt_ty.abiSize(self.target.*));
   7880             try self.asmRegisterImmediate(
   7881                 .{ ._, .bt },
   7882                 registerAlias(opt_reg, opt_abi_size),
   7883                 Immediate.u(@intCast(u6, some_info.off * 8)),
   7884             );
   7885             return .{ .eflags = .nc };
   7886         },
   7887 
   7888         .memory,
   7889         .load_got,
   7890         .load_direct,
   7891         .load_tlv,
   7892         => {
   7893             const addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
   7894             const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   7895             defer self.register_manager.unlockReg(addr_reg_lock);
   7896 
   7897             try self.genSetReg(addr_reg, Type.usize, opt_mcv.address());
   7898             const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*));
   7899             try self.asmMemoryImmediate(
   7900                 .{ ._, .cmp },
   7901                 Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{
   7902                     .base = .{ .reg = addr_reg },
   7903                     .disp = some_info.off,
   7904                 }),
   7905                 Immediate.u(0),
   7906             );
   7907             return .{ .eflags = .e };
   7908         },
   7909 
   7910         .indirect, .load_frame => {
   7911             const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*));
   7912             try self.asmMemoryImmediate(
   7913                 .{ ._, .cmp },
   7914                 Memory.sib(Memory.PtrSize.fromSize(some_abi_size), switch (opt_mcv) {
   7915                     .indirect => |reg_off| .{
   7916                         .base = .{ .reg = reg_off.reg },
   7917                         .disp = reg_off.off + some_info.off,
   7918                     },
   7919                     .load_frame => |frame_addr| .{
   7920                         .base = .{ .frame = frame_addr.index },
   7921                         .disp = frame_addr.off + some_info.off,
   7922                     },
   7923                     else => unreachable,
   7924                 }),
   7925                 Immediate.u(0),
   7926             );
   7927             return .{ .eflags = .e };
   7928         },
   7929     }
   7930 }
   7931 
   7932 fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
   7933     try self.spillEflagsIfOccupied();
   7934     self.eflags_inst = inst;
   7935 
   7936     const opt_ty = ptr_ty.childType();
   7937     var pl_buf: Type.Payload.ElemType = undefined;
   7938     const pl_ty = opt_ty.optionalChild(&pl_buf);
   7939 
   7940     var ptr_buf: Type.SlicePtrFieldTypeBuffer = undefined;
   7941     const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload())
   7942         .{ .off = 0, .ty = if (pl_ty.isSlice()) pl_ty.slicePtrFieldType(&ptr_buf) else pl_ty }
   7943     else
   7944         .{ .off = @intCast(i32, pl_ty.abiSize(self.target.*)), .ty = Type.bool };
   7945 
   7946     const ptr_reg = switch (ptr_mcv) {
   7947         .register => |reg| reg,
   7948         else => try self.copyToTmpRegister(ptr_ty, ptr_mcv),
   7949     };
   7950     const ptr_lock = self.register_manager.lockReg(ptr_reg);
   7951     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
   7952 
   7953     const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*));
   7954     try self.asmMemoryImmediate(
   7955         .{ ._, .cmp },
   7956         Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{
   7957             .base = .{ .reg = ptr_reg },
   7958             .disp = some_info.off,
   7959         }),
   7960         Immediate.u(0),
   7961     );
   7962     return .{ .eflags = .e };
   7963 }
   7964 
   7965 fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) !MCValue {
   7966     const err_type = ty.errorUnionSet();
   7967 
   7968     if (err_type.errorSetIsEmpty()) {
   7969         return MCValue{ .immediate = 0 }; // always false
   7970     }
   7971 
   7972     try self.spillEflagsIfOccupied();
   7973     if (maybe_inst) |inst| {
   7974         self.eflags_inst = inst;
   7975     }
   7976 
   7977     const err_off = errUnionErrorOffset(ty.errorUnionPayload(), self.target.*);
   7978     switch (operand) {
   7979         .register => |reg| {
   7980             const eu_lock = self.register_manager.lockReg(reg);
   7981             defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
   7982 
   7983             const tmp_reg = try self.copyToTmpRegister(ty, operand);
   7984             if (err_off > 0) {
   7985                 const shift = @intCast(u6, err_off * 8);
   7986                 try self.genShiftBinOpMir(
   7987                     .{ ._r, .sh },
   7988                     ty,
   7989                     .{ .register = tmp_reg },
   7990                     .{ .immediate = shift },
   7991                 );
   7992             } else {
   7993                 try self.truncateRegister(Type.anyerror, tmp_reg);
   7994             }
   7995             try self.genBinOpMir(
   7996                 .{ ._, .cmp },
   7997                 Type.anyerror,
   7998                 .{ .register = tmp_reg },
   7999                 .{ .immediate = 0 },
   8000             );
   8001         },
   8002         .load_frame => |frame_addr| try self.genBinOpMir(
   8003             .{ ._, .cmp },
   8004             Type.anyerror,
   8005             .{ .load_frame = .{
   8006                 .index = frame_addr.index,
   8007                 .off = frame_addr.off + @intCast(i32, err_off),
   8008             } },
   8009             .{ .immediate = 0 },
   8010         ),
   8011         else => return self.fail("TODO implement isErr for {}", .{operand}),
   8012     }
   8013 
   8014     return MCValue{ .eflags = .a };
   8015 }
   8016 
   8017 fn isNonErr(self: *Self, inst: Air.Inst.Index, ty: Type, operand: MCValue) !MCValue {
   8018     const is_err_res = try self.isErr(inst, ty, operand);
   8019     switch (is_err_res) {
   8020         .eflags => |cc| {
   8021             assert(cc == .a);
   8022             return MCValue{ .eflags = cc.negate() };
   8023         },
   8024         .immediate => |imm| {
   8025             assert(imm == 0);
   8026             return MCValue{ .immediate = 1 };
   8027         },
   8028         else => unreachable,
   8029     }
   8030 }
   8031 
   8032 fn airIsNull(self: *Self, inst: Air.Inst.Index) !void {
   8033     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8034     const operand = try self.resolveInst(un_op);
   8035     const ty = self.air.typeOf(un_op);
   8036     const result = try self.isNull(inst, ty, operand);
   8037     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8038 }
   8039 
   8040 fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void {
   8041     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8042     const operand = try self.resolveInst(un_op);
   8043     const ty = self.air.typeOf(un_op);
   8044     const result = try self.isNullPtr(inst, ty, operand);
   8045     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8046 }
   8047 
   8048 fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void {
   8049     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8050     const operand = try self.resolveInst(un_op);
   8051     const ty = self.air.typeOf(un_op);
   8052     const result = switch (try self.isNull(inst, ty, operand)) {
   8053         .eflags => |cc| .{ .eflags = cc.negate() },
   8054         else => unreachable,
   8055     };
   8056     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8057 }
   8058 
   8059 fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void {
   8060     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8061     const operand = try self.resolveInst(un_op);
   8062     const ty = self.air.typeOf(un_op);
   8063     const result = switch (try self.isNullPtr(inst, ty, operand)) {
   8064         .eflags => |cc| .{ .eflags = cc.negate() },
   8065         else => unreachable,
   8066     };
   8067     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8068 }
   8069 
   8070 fn airIsErr(self: *Self, inst: Air.Inst.Index) !void {
   8071     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8072     const operand = try self.resolveInst(un_op);
   8073     const ty = self.air.typeOf(un_op);
   8074     const result = try self.isErr(inst, ty, operand);
   8075     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8076 }
   8077 
   8078 fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void {
   8079     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8080 
   8081     const operand_ptr = try self.resolveInst(un_op);
   8082     const operand_ptr_lock: ?RegisterLock = switch (operand_ptr) {
   8083         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   8084         else => null,
   8085     };
   8086     defer if (operand_ptr_lock) |lock| self.register_manager.unlockReg(lock);
   8087 
   8088     const operand: MCValue = blk: {
   8089         if (self.reuseOperand(inst, un_op, 0, operand_ptr)) {
   8090             // The MCValue that holds the pointer can be re-used as the value.
   8091             break :blk operand_ptr;
   8092         } else {
   8093             break :blk try self.allocRegOrMem(inst, true);
   8094         }
   8095     };
   8096     const ptr_ty = self.air.typeOf(un_op);
   8097     try self.load(operand, ptr_ty, operand_ptr);
   8098 
   8099     const result = try self.isErr(inst, ptr_ty.childType(), operand);
   8100 
   8101     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8102 }
   8103 
   8104 fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void {
   8105     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8106     const operand = try self.resolveInst(un_op);
   8107     const ty = self.air.typeOf(un_op);
   8108     const result = try self.isNonErr(inst, ty, operand);
   8109     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8110 }
   8111 
   8112 fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void {
   8113     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8114 
   8115     const operand_ptr = try self.resolveInst(un_op);
   8116     const operand_ptr_lock: ?RegisterLock = switch (operand_ptr) {
   8117         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   8118         else => null,
   8119     };
   8120     defer if (operand_ptr_lock) |lock| self.register_manager.unlockReg(lock);
   8121 
   8122     const operand: MCValue = blk: {
   8123         if (self.reuseOperand(inst, un_op, 0, operand_ptr)) {
   8124             // The MCValue that holds the pointer can be re-used as the value.
   8125             break :blk operand_ptr;
   8126         } else {
   8127             break :blk try self.allocRegOrMem(inst, true);
   8128         }
   8129     };
   8130     const ptr_ty = self.air.typeOf(un_op);
   8131     try self.load(operand, ptr_ty, operand_ptr);
   8132 
   8133     const result = try self.isNonErr(inst, ptr_ty.childType(), operand);
   8134 
   8135     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8136 }
   8137 
   8138 fn airLoop(self: *Self, inst: Air.Inst.Index) !void {
   8139     // A loop is a setup to be able to jump back to the beginning.
   8140     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   8141     const loop = self.air.extraData(Air.Block, ty_pl.payload);
   8142     const body = self.air.extra[loop.end..][0..loop.data.body_len];
   8143     const jmp_target = @intCast(u32, self.mir_instructions.len);
   8144 
   8145     self.scope_generation += 1;
   8146     const state = try self.saveState();
   8147 
   8148     try self.genBody(body);
   8149     try self.restoreState(state, &.{}, .{
   8150         .emit_instructions = true,
   8151         .update_tracking = false,
   8152         .resurrect = false,
   8153         .close_scope = true,
   8154     });
   8155     _ = try self.asmJmpReloc(jmp_target);
   8156 
   8157     return self.finishAirBookkeeping();
   8158 }
   8159 
   8160 fn airBlock(self: *Self, inst: Air.Inst.Index) !void {
   8161     // A block is a setup to be able to jump to the end.
   8162     const inst_tracking_i = self.inst_tracking.count();
   8163     self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(.unreach));
   8164 
   8165     self.scope_generation += 1;
   8166     try self.blocks.putNoClobber(self.gpa, inst, .{ .state = self.initRetroactiveState() });
   8167     const liveness = self.liveness.getBlock(inst);
   8168 
   8169     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   8170     const extra = self.air.extraData(Air.Block, ty_pl.payload);
   8171     const body = self.air.extra[extra.end..][0..extra.data.body_len];
   8172     try self.genBody(body);
   8173 
   8174     var block_data = self.blocks.fetchRemove(inst).?;
   8175     defer block_data.value.deinit(self.gpa);
   8176     if (block_data.value.relocs.items.len > 0) {
   8177         try self.restoreState(block_data.value.state, liveness.deaths, .{
   8178             .emit_instructions = false,
   8179             .update_tracking = true,
   8180             .resurrect = true,
   8181             .close_scope = true,
   8182         });
   8183         for (block_data.value.relocs.items) |reloc| try self.performReloc(reloc);
   8184     }
   8185 
   8186     if (std.debug.runtime_safety) assert(self.inst_tracking.getIndex(inst).? == inst_tracking_i);
   8187     const tracking = &self.inst_tracking.values()[inst_tracking_i];
   8188     if (self.liveness.isUnused(inst)) tracking.die(self, inst);
   8189     self.getValue(tracking.short, inst);
   8190     self.finishAirBookkeeping();
   8191 }
   8192 
   8193 fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void {
   8194     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   8195     const condition = try self.resolveInst(pl_op.operand);
   8196     const condition_ty = self.air.typeOf(pl_op.operand);
   8197     const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload);
   8198     var extra_index: usize = switch_br.end;
   8199     var case_i: u32 = 0;
   8200     const liveness = try self.liveness.getSwitchBr(self.gpa, inst, switch_br.data.cases_len + 1);
   8201     defer self.gpa.free(liveness.deaths);
   8202 
   8203     // If the condition dies here in this switch instruction, process
   8204     // that death now instead of later as this has an effect on
   8205     // whether it needs to be spilled in the branches
   8206     if (self.liveness.operandDies(inst, 0)) {
   8207         if (Air.refToIndex(pl_op.operand)) |op_inst| self.processDeath(op_inst);
   8208     }
   8209 
   8210     self.scope_generation += 1;
   8211     const state = try self.saveState();
   8212 
   8213     while (case_i < switch_br.data.cases_len) : (case_i += 1) {
   8214         const case = self.air.extraData(Air.SwitchBr.Case, extra_index);
   8215         const items = @ptrCast(
   8216             []const Air.Inst.Ref,
   8217             self.air.extra[case.end..][0..case.data.items_len],
   8218         );
   8219         const case_body = self.air.extra[case.end + items.len ..][0..case.data.body_len];
   8220         extra_index = case.end + items.len + case_body.len;
   8221 
   8222         var relocs = try self.gpa.alloc(u32, items.len);
   8223         defer self.gpa.free(relocs);
   8224 
   8225         try self.spillEflagsIfOccupied();
   8226         for (items, relocs, 0..) |item, *reloc, i| {
   8227             const item_mcv = try self.resolveInst(item);
   8228             try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, item_mcv);
   8229             reloc.* = try self.asmJccReloc(undefined, if (i < relocs.len - 1) .e else .ne);
   8230         }
   8231 
   8232         for (liveness.deaths[case_i]) |operand| self.processDeath(operand);
   8233 
   8234         for (relocs[0 .. relocs.len - 1]) |reloc| try self.performReloc(reloc);
   8235         try self.genBody(case_body);
   8236         try self.restoreState(state, &.{}, .{
   8237             .emit_instructions = false,
   8238             .update_tracking = true,
   8239             .resurrect = true,
   8240             .close_scope = true,
   8241         });
   8242 
   8243         try self.performReloc(relocs[relocs.len - 1]);
   8244     }
   8245 
   8246     if (switch_br.data.else_body_len > 0) {
   8247         const else_body = self.air.extra[extra_index..][0..switch_br.data.else_body_len];
   8248 
   8249         const else_deaths = liveness.deaths.len - 1;
   8250         for (liveness.deaths[else_deaths]) |operand| self.processDeath(operand);
   8251 
   8252         try self.genBody(else_body);
   8253         try self.restoreState(state, &.{}, .{
   8254             .emit_instructions = false,
   8255             .update_tracking = true,
   8256             .resurrect = true,
   8257             .close_scope = true,
   8258         });
   8259     }
   8260 
   8261     // We already took care of pl_op.operand earlier, so we're going to pass .none here
   8262     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
   8263 }
   8264 
   8265 fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void {
   8266     const next_inst = @intCast(u32, self.mir_instructions.len);
   8267     switch (self.mir_instructions.items(.tag)[reloc]) {
   8268         .j, .jmp => {},
   8269         .pseudo => switch (self.mir_instructions.items(.ops)[reloc]) {
   8270             .pseudo_j_z_and_np_inst, .pseudo_j_nz_or_p_inst => {},
   8271             else => unreachable,
   8272         },
   8273         else => unreachable,
   8274     }
   8275     self.mir_instructions.items(.data)[reloc].inst.inst = next_inst;
   8276 }
   8277 
   8278 fn airBr(self: *Self, inst: Air.Inst.Index) !void {
   8279     const br = self.air.instructions.items(.data)[inst].br;
   8280     const src_mcv = try self.resolveInst(br.operand);
   8281 
   8282     const block_ty = self.air.typeOfIndex(br.block_inst);
   8283     const block_unused =
   8284         !block_ty.hasRuntimeBitsIgnoreComptime() or self.liveness.isUnused(br.block_inst);
   8285     const block_tracking = self.inst_tracking.getPtr(br.block_inst).?;
   8286     const block_data = self.blocks.getPtr(br.block_inst).?;
   8287     const first_br = block_data.relocs.items.len == 0;
   8288     const block_result = result: {
   8289         if (block_unused) break :result .none;
   8290 
   8291         if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) {
   8292             if (first_br) break :result src_mcv;
   8293 
   8294             if (block_tracking.getReg()) |block_reg|
   8295                 try self.register_manager.getReg(block_reg, br.block_inst);
   8296             // .long = .none to avoid merging operand and block result stack frames.
   8297             var current_tracking = InstTracking{ .long = .none, .short = src_mcv };
   8298             try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*);
   8299             if (src_mcv.getReg()) |src_reg| self.register_manager.freeReg(src_reg);
   8300             break :result block_tracking.short;
   8301         }
   8302 
   8303         const dst_mcv = if (first_br) try self.allocRegOrMem(br.block_inst, true) else dst: {
   8304             self.getValue(block_tracking.short, br.block_inst);
   8305             break :dst block_tracking.short;
   8306         };
   8307         try self.genCopy(block_ty, dst_mcv, src_mcv);
   8308         break :result dst_mcv;
   8309     };
   8310 
   8311     // Process operand death so that it is properly accounted for in the State below.
   8312     if (self.liveness.operandDies(inst, 0)) {
   8313         if (Air.refToIndex(br.operand)) |op_inst| self.processDeath(op_inst);
   8314     }
   8315 
   8316     if (first_br) {
   8317         block_tracking.* = InstTracking.init(block_result);
   8318         try self.saveRetroactiveState(&block_data.state);
   8319     } else try self.restoreState(block_data.state, &.{}, .{
   8320         .emit_instructions = true,
   8321         .update_tracking = false,
   8322         .resurrect = false,
   8323         .close_scope = false,
   8324     });
   8325 
   8326     // Stop tracking block result without forgetting tracking info
   8327     self.freeValue(block_tracking.short);
   8328 
   8329     // Emit a jump with a relocation. It will be patched up after the block ends.
   8330     // Leave the jump offset undefined
   8331     const jmp_reloc = try self.asmJmpReloc(undefined);
   8332     try block_data.relocs.append(self.gpa, jmp_reloc);
   8333 
   8334     self.finishAirBookkeeping();
   8335 }
   8336 
   8337 fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
   8338     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   8339     const extra = self.air.extraData(Air.Asm, ty_pl.payload);
   8340     const clobbers_len = @truncate(u31, extra.data.flags);
   8341     var extra_i: usize = extra.end;
   8342     const outputs = @ptrCast([]const Air.Inst.Ref, self.air.extra[extra_i..][0..extra.data.outputs_len]);
   8343     extra_i += outputs.len;
   8344     const inputs = @ptrCast([]const Air.Inst.Ref, self.air.extra[extra_i..][0..extra.data.inputs_len]);
   8345     extra_i += inputs.len;
   8346 
   8347     var result: MCValue = .none;
   8348     var args = std.StringArrayHashMap(MCValue).init(self.gpa);
   8349     try args.ensureTotalCapacity(outputs.len + inputs.len + clobbers_len);
   8350     defer {
   8351         for (args.values()) |arg| switch (arg) {
   8352             .register => |reg| self.register_manager.unlockReg(.{ .register = reg }),
   8353             else => {},
   8354         };
   8355         args.deinit();
   8356     }
   8357 
   8358     if (outputs.len > 1) {
   8359         return self.fail("TODO implement codegen for asm with more than 1 output", .{});
   8360     }
   8361 
   8362     for (outputs) |output| {
   8363         if (output != .none) {
   8364             return self.fail("TODO implement codegen for non-expr asm", .{});
   8365         }
   8366         const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
   8367         const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
   8368         const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
   8369         // This equation accounts for the fact that even if we have exactly 4 bytes
   8370         // for the string, we still use the next u32 for the null terminator.
   8371         extra_i += (constraint.len + name.len + (2 + 3)) / 4;
   8372 
   8373         const mcv: MCValue = if (mem.eql(u8, constraint, "=r"))
   8374             .{ .register = self.register_manager.tryAllocReg(inst, gp) orelse
   8375                 return self.fail("ran out of registers lowering inline asm", .{}) }
   8376         else if (mem.startsWith(u8, constraint, "={") and mem.endsWith(u8, constraint, "}"))
   8377             .{ .register = parseRegName(constraint["={".len .. constraint.len - "}".len]) orelse
   8378                 return self.fail("unrecognized register constraint: '{s}'", .{constraint}) }
   8379         else
   8380             return self.fail("unrecognized constraint: '{s}'", .{constraint});
   8381         args.putAssumeCapacity(name, mcv);
   8382         switch (mcv) {
   8383             .register => |reg| _ = if (RegisterManager.indexOfRegIntoTracked(reg)) |_|
   8384                 self.register_manager.lockRegAssumeUnused(reg),
   8385             else => {},
   8386         }
   8387         if (output == .none) result = mcv;
   8388     }
   8389 
   8390     for (inputs) |input| {
   8391         const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
   8392         const constraint = std.mem.sliceTo(input_bytes, 0);
   8393         const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
   8394         // This equation accounts for the fact that even if we have exactly 4 bytes
   8395         // for the string, we still use the next u32 for the null terminator.
   8396         extra_i += (constraint.len + name.len + (2 + 3)) / 4;
   8397 
   8398         if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
   8399             return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
   8400         }
   8401         const reg_name = constraint[1 .. constraint.len - 1];
   8402         const reg = parseRegName(reg_name) orelse
   8403             return self.fail("unrecognized register: '{s}'", .{reg_name});
   8404 
   8405         const arg_mcv = try self.resolveInst(input);
   8406         try self.register_manager.getReg(reg, null);
   8407         try self.genSetReg(reg, self.air.typeOf(input), arg_mcv);
   8408     }
   8409 
   8410     {
   8411         var clobber_i: u32 = 0;
   8412         while (clobber_i < clobbers_len) : (clobber_i += 1) {
   8413             const clobber = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
   8414             // This equation accounts for the fact that even if we have exactly 4 bytes
   8415             // for the string, we still use the next u32 for the null terminator.
   8416             extra_i += clobber.len / 4 + 1;
   8417 
   8418             // TODO honor these
   8419         }
   8420     }
   8421 
   8422     const asm_source = mem.sliceAsBytes(self.air.extra[extra_i..])[0..extra.data.source_len];
   8423     var line_it = mem.tokenize(u8, asm_source, "\n\r;");
   8424     while (line_it.next()) |line| {
   8425         var mnem_it = mem.tokenize(u8, line, " \t");
   8426         const mnem_str = mnem_it.next() orelse continue;
   8427         if (mem.startsWith(u8, mnem_str, "#")) continue;
   8428 
   8429         const mnem_size: ?Memory.PtrSize = if (mem.endsWith(u8, mnem_str, "b"))
   8430             .byte
   8431         else if (mem.endsWith(u8, mnem_str, "w"))
   8432             .word
   8433         else if (mem.endsWith(u8, mnem_str, "l"))
   8434             .dword
   8435         else if (mem.endsWith(u8, mnem_str, "q"))
   8436             .qword
   8437         else
   8438             null;
   8439         const mnem_tag = Mir.Inst.FixedTag{ ._, mnem: {
   8440             if (mnem_size) |_| {
   8441                 if (std.meta.stringToEnum(Mir.Inst.Tag, mnem_str[0 .. mnem_str.len - 1])) |mnem| {
   8442                     break :mnem mnem;
   8443                 }
   8444             }
   8445             break :mnem std.meta.stringToEnum(Mir.Inst.Tag, mnem_str) orelse
   8446                 return self.fail("Invalid mnemonic: '{s}'", .{mnem_str});
   8447         } };
   8448 
   8449         var op_it = mem.tokenize(u8, mnem_it.rest(), ",");
   8450         var ops = [1]encoder.Instruction.Operand{.none} ** 4;
   8451         for (&ops) |*op| {
   8452             const op_str = mem.trim(u8, op_it.next() orelse break, " \t");
   8453             if (mem.startsWith(u8, op_str, "#")) break;
   8454             if (mem.startsWith(u8, op_str, "%%")) {
   8455                 const colon = mem.indexOfScalarPos(u8, op_str, "%%".len + 2, ':');
   8456                 const reg = parseRegName(op_str["%%".len .. colon orelse op_str.len]) orelse
   8457                     return self.fail("Invalid register: '{s}'", .{op_str});
   8458                 if (colon) |colon_pos| {
   8459                     const disp = std.fmt.parseInt(i32, op_str[colon_pos + 1 ..], 0) catch
   8460                         return self.fail("Invalid displacement: '{s}'", .{op_str});
   8461                     op.* = .{ .mem = Memory.sib(
   8462                         mnem_size orelse return self.fail("Unknown size: '{s}'", .{op_str}),
   8463                         .{ .base = .{ .reg = reg }, .disp = disp },
   8464                     ) };
   8465                 } else {
   8466                     if (mnem_size) |size| if (reg.bitSize() != size.bitSize())
   8467                         return self.fail("Invalid register size: '{s}'", .{op_str});
   8468                     op.* = .{ .reg = reg };
   8469                 }
   8470             } else if (mem.startsWith(u8, op_str, "%[") and mem.endsWith(u8, op_str, "]")) {
   8471                 switch (args.get(op_str["%[".len .. op_str.len - "]".len]) orelse
   8472                     return self.fail("No matching constraint: '{s}'", .{op_str})) {
   8473                     .register => |reg| op.* = .{ .reg = reg },
   8474                     else => return self.fail("Invalid constraint: '{s}'", .{op_str}),
   8475                 }
   8476             } else if (mem.startsWith(u8, op_str, "$")) {
   8477                 if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| {
   8478                     if (mnem_size) |size| {
   8479                         const max = @as(u64, math.maxInt(u64)) >>
   8480                             @intCast(u6, 64 - (size.bitSize() - 1));
   8481                         if ((if (s < 0) ~s else s) > max)
   8482                             return self.fail("Invalid immediate size: '{s}'", .{op_str});
   8483                     }
   8484                     op.* = .{ .imm = Immediate.s(s) };
   8485                 } else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| {
   8486                     if (mnem_size) |size| {
   8487                         const max = @as(u64, math.maxInt(u64)) >>
   8488                             @intCast(u6, 64 - size.bitSize());
   8489                         if (u > max)
   8490                             return self.fail("Invalid immediate size: '{s}'", .{op_str});
   8491                     }
   8492                     op.* = .{ .imm = Immediate.u(u) };
   8493                 } else |_| return self.fail("Invalid immediate: '{s}'", .{op_str});
   8494             } else return self.fail("Invalid operand: '{s}'", .{op_str});
   8495         } else if (op_it.next()) |op_str| return self.fail("Extra operand: '{s}'", .{op_str});
   8496 
   8497         (switch (ops[0]) {
   8498             .none => self.asmOpOnly(mnem_tag),
   8499             .reg => |reg0| switch (ops[1]) {
   8500                 .none => self.asmRegister(mnem_tag, reg0),
   8501                 .reg => |reg1| switch (ops[2]) {
   8502                     .none => self.asmRegisterRegister(mnem_tag, reg1, reg0),
   8503                     .reg => |reg2| switch (ops[3]) {
   8504                         .none => self.asmRegisterRegisterRegister(mnem_tag, reg2, reg1, reg0),
   8505                         else => error.InvalidInstruction,
   8506                     },
   8507                     .mem => |mem2| switch (ops[3]) {
   8508                         .none => self.asmMemoryRegisterRegister(mnem_tag, mem2, reg1, reg0),
   8509                         else => error.InvalidInstruction,
   8510                     },
   8511                     else => error.InvalidInstruction,
   8512                 },
   8513                 .mem => |mem1| switch (ops[2]) {
   8514                     .none => self.asmMemoryRegister(mnem_tag, mem1, reg0),
   8515                     else => error.InvalidInstruction,
   8516                 },
   8517                 else => error.InvalidInstruction,
   8518             },
   8519             .mem => |mem0| switch (ops[1]) {
   8520                 .none => self.asmMemory(mnem_tag, mem0),
   8521                 .reg => |reg1| switch (ops[2]) {
   8522                     .none => self.asmRegisterMemory(mnem_tag, reg1, mem0),
   8523                     else => error.InvalidInstruction,
   8524                 },
   8525                 else => error.InvalidInstruction,
   8526             },
   8527             .imm => |imm0| switch (ops[1]) {
   8528                 .none => self.asmImmediate(mnem_tag, imm0),
   8529                 .reg => |reg1| switch (ops[2]) {
   8530                     .none => self.asmRegisterImmediate(mnem_tag, reg1, imm0),
   8531                     .reg => |reg2| switch (ops[3]) {
   8532                         .none => self.asmRegisterRegisterImmediate(mnem_tag, reg2, reg1, imm0),
   8533                         else => error.InvalidInstruction,
   8534                     },
   8535                     .mem => |mem2| switch (ops[3]) {
   8536                         .none => self.asmMemoryRegisterImmediate(mnem_tag, mem2, reg1, imm0),
   8537                         else => error.InvalidInstruction,
   8538                     },
   8539                     else => error.InvalidInstruction,
   8540                 },
   8541                 .mem => |mem1| switch (ops[2]) {
   8542                     .none => self.asmMemoryImmediate(mnem_tag, mem1, imm0),
   8543                     else => error.InvalidInstruction,
   8544                 },
   8545                 else => error.InvalidInstruction,
   8546             },
   8547         }) catch |err| switch (err) {
   8548             error.InvalidInstruction => return self.fail(
   8549                 "Invalid instruction: '{s} {s} {s} {s} {s}'",
   8550                 .{
   8551                     @tagName(mnem_tag[1]),
   8552                     @tagName(ops[0]),
   8553                     @tagName(ops[1]),
   8554                     @tagName(ops[2]),
   8555                     @tagName(ops[3]),
   8556                 },
   8557             ),
   8558             else => |e| return e,
   8559         };
   8560     }
   8561 
   8562     simple: {
   8563         var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1);
   8564         var buf_index: usize = 0;
   8565         for (outputs) |output| {
   8566             if (output == .none) continue;
   8567 
   8568             if (buf_index >= buf.len) break :simple;
   8569             buf[buf_index] = output;
   8570             buf_index += 1;
   8571         }
   8572         if (buf_index + inputs.len > buf.len) break :simple;
   8573         @memcpy(buf[buf_index..][0..inputs.len], inputs);
   8574         return self.finishAir(inst, result, buf);
   8575     }
   8576     var bt = self.liveness.iterateBigTomb(inst);
   8577     for (outputs) |output| if (output != .none) self.feed(&bt, output);
   8578     for (inputs) |input| self.feed(&bt, input);
   8579     return self.finishAirResult(inst, result);
   8580 }
   8581 
   8582 const MoveStrategy = union(enum) {
   8583     move: Mir.Inst.FixedTag,
   8584     insert_extract: InsertExtract,
   8585     vex_insert_extract: InsertExtract,
   8586 
   8587     const InsertExtract = struct {
   8588         insert: Mir.Inst.FixedTag,
   8589         extract: Mir.Inst.FixedTag,
   8590         imm: Immediate,
   8591     };
   8592 };
   8593 fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
   8594     switch (ty.zigTypeTag()) {
   8595         else => return .{ .move = .{ ._, .mov } },
   8596         .Float => switch (ty.floatBits(self.target.*)) {
   8597             16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
   8598                 .insert = .{ .vp_w, .insr },
   8599                 .extract = .{ .vp_w, .extr },
   8600                 .imm = Immediate.u(0),
   8601             } } else .{ .insert_extract = .{
   8602                 .insert = .{ .p_w, .insr },
   8603                 .extract = .{ .p_w, .extr },
   8604                 .imm = Immediate.u(0),
   8605             } },
   8606             32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } },
   8607             64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } },
   8608             128 => return .{ .move = if (self.hasFeature(.avx))
   8609                 if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
   8610             else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
   8611             else => {},
   8612         },
   8613         .Vector => switch (ty.childType().zigTypeTag()) {
   8614             .Int => switch (ty.childType().intInfo(self.target.*).bits) {
   8615                 8 => switch (ty.vectorLen()) {
   8616                     1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
   8617                         .insert = .{ .vp_b, .insr },
   8618                         .extract = .{ .vp_b, .extr },
   8619                         .imm = Immediate.u(0),
   8620                     } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
   8621                         .insert = .{ .p_b, .insr },
   8622                         .extract = .{ .p_b, .extr },
   8623                         .imm = Immediate.u(0),
   8624                     } },
   8625                     2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
   8626                         .insert = .{ .vp_w, .insr },
   8627                         .extract = .{ .vp_w, .extr },
   8628                         .imm = Immediate.u(0),
   8629                     } } else .{ .insert_extract = .{
   8630                         .insert = .{ .p_w, .insr },
   8631                         .extract = .{ .p_w, .extr },
   8632                         .imm = Immediate.u(0),
   8633                     } },
   8634                     3...4 => return .{ .move = if (self.hasFeature(.avx))
   8635                         .{ .v_ss, .mov }
   8636                     else
   8637                         .{ ._ss, .mov } },
   8638                     5...8 => return .{ .move = if (self.hasFeature(.avx))
   8639                         .{ .v_sd, .mov }
   8640                     else
   8641                         .{ ._sd, .mov } },
   8642                     else => {},
   8643                 },
   8644                 16 => switch (ty.vectorLen()) {
   8645                     1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
   8646                         .insert = .{ .vp_w, .insr },
   8647                         .extract = .{ .vp_w, .extr },
   8648                         .imm = Immediate.u(0),
   8649                     } } else .{ .insert_extract = .{
   8650                         .insert = .{ .p_w, .insr },
   8651                         .extract = .{ .p_w, .extr },
   8652                         .imm = Immediate.u(0),
   8653                     } },
   8654                     2 => return .{ .move = if (self.hasFeature(.avx))
   8655                         .{ .v_ss, .mov }
   8656                     else
   8657                         .{ ._ss, .mov } },
   8658                     3...4 => return .{ .move = if (self.hasFeature(.avx))
   8659                         .{ .v_sd, .mov }
   8660                     else
   8661                         .{ ._sd, .mov } },
   8662                     5...8 => return .{ .move = if (self.hasFeature(.avx))
   8663                         .{ .v_ps, .mov }
   8664                     else
   8665                         .{ ._ps, .mov } },
   8666                     else => {},
   8667                 },
   8668                 32 => switch (ty.vectorLen()) {
   8669                     1 => return .{ .move = if (self.hasFeature(.avx))
   8670                         .{ .v_ss, .mov }
   8671                     else
   8672                         .{ ._ss, .mov } },
   8673                     2 => return .{ .move = if (self.hasFeature(.avx))
   8674                         .{ .v_sd, .mov }
   8675                     else
   8676                         .{ ._sd, .mov } },
   8677                     3...4 => return .{ .move = if (self.hasFeature(.avx))
   8678                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
   8679                     else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
   8680                     5...8 => if (self.hasFeature(.avx))
   8681                         return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
   8682                     else => {},
   8683                 },
   8684                 64 => switch (ty.vectorLen()) {
   8685                     1 => return .{ .move = if (self.hasFeature(.avx))
   8686                         .{ .v_sd, .mov }
   8687                     else
   8688                         .{ ._sd, .mov } },
   8689                     2 => return .{ .move = if (self.hasFeature(.avx))
   8690                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
   8691                     else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
   8692                     3...4 => if (self.hasFeature(.avx))
   8693                         return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
   8694                     else => {},
   8695                 },
   8696                 else => {},
   8697             },
   8698             .Float => switch (ty.childType().floatBits(self.target.*)) {
   8699                 16 => switch (ty.vectorLen()) {
   8700                     1 => {},
   8701                     2 => return .{ .move = if (self.hasFeature(.avx))
   8702                         .{ .v_ss, .mov }
   8703                     else
   8704                         .{ ._ss, .mov } },
   8705                     3...4 => return .{ .move = if (self.hasFeature(.avx))
   8706                         .{ .v_sd, .mov }
   8707                     else
   8708                         .{ ._sd, .mov } },
   8709                     5...8 => return .{ .move = if (self.hasFeature(.avx))
   8710                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
   8711                     else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
   8712                     9...16 => if (self.hasFeature(.avx))
   8713                         return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
   8714                     else => {},
   8715                 },
   8716                 32 => switch (ty.vectorLen()) {
   8717                     1 => return .{ .move = if (self.hasFeature(.avx))
   8718                         .{ .v_ss, .mov }
   8719                     else
   8720                         .{ ._ss, .mov } },
   8721                     2 => return .{ .move = if (self.hasFeature(.avx))
   8722                         .{ .v_sd, .mov }
   8723                     else
   8724                         .{ ._sd, .mov } },
   8725                     3...4 => return .{ .move = if (self.hasFeature(.avx))
   8726                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
   8727                     else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
   8728                     5...8 => if (self.hasFeature(.avx))
   8729                         return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
   8730                     else => {},
   8731                 },
   8732                 64 => switch (ty.vectorLen()) {
   8733                     1 => return .{ .move = if (self.hasFeature(.avx))
   8734                         .{ .v_sd, .mov }
   8735                     else
   8736                         .{ ._sd, .mov } },
   8737                     2 => return .{ .move = if (self.hasFeature(.avx))
   8738                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
   8739                     else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
   8740                     3...4 => if (self.hasFeature(.avx))
   8741                         return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
   8742                     else => {},
   8743                 },
   8744                 128 => switch (ty.vectorLen()) {
   8745                     1 => return .{ .move = if (self.hasFeature(.avx))
   8746                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
   8747                     else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
   8748                     2 => if (self.hasFeature(.avx))
   8749                         return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
   8750                     else => {},
   8751                 },
   8752                 else => {},
   8753             },
   8754             else => {},
   8755         },
   8756     }
   8757     return self.fail("TODO moveStrategy for {}", .{ty.fmt(self.bin_file.options.module.?)});
   8758 }
   8759 
   8760 fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
   8761     const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
   8762     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   8763 
   8764     switch (dst_mcv) {
   8765         .none,
   8766         .unreach,
   8767         .dead,
   8768         .undef,
   8769         .immediate,
   8770         .eflags,
   8771         .register_overflow,
   8772         .lea_direct,
   8773         .lea_got,
   8774         .lea_tlv,
   8775         .lea_frame,
   8776         .reserved_frame,
   8777         => unreachable, // unmodifiable destination
   8778         .register => |reg| try self.genSetReg(reg, ty, src_mcv),
   8779         .register_offset => |dst_reg_off| try self.genSetReg(dst_reg_off.reg, ty, switch (src_mcv) {
   8780             .none,
   8781             .unreach,
   8782             .dead,
   8783             .undef,
   8784             .register_overflow,
   8785             .reserved_frame,
   8786             => unreachable,
   8787             .immediate,
   8788             .register,
   8789             .register_offset,
   8790             .lea_frame,
   8791             => src_mcv.offset(-dst_reg_off.off),
   8792             else => .{ .register_offset = .{
   8793                 .reg = try self.copyToTmpRegister(ty, src_mcv),
   8794                 .off = -dst_reg_off.off,
   8795             } },
   8796         }),
   8797         .indirect => |reg_off| try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ty, src_mcv),
   8798         .memory, .load_direct, .load_got, .load_tlv => {
   8799             switch (dst_mcv) {
   8800                 .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr|
   8801                     return self.genSetMem(.{ .reg = .ds }, small_addr, ty, src_mcv),
   8802                 .load_direct, .load_got, .load_tlv => {},
   8803                 else => unreachable,
   8804             }
   8805 
   8806             const addr_reg = try self.copyToTmpRegister(Type.usize, dst_mcv.address());
   8807             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   8808             defer self.register_manager.unlockReg(addr_lock);
   8809 
   8810             try self.genSetMem(.{ .reg = addr_reg }, 0, ty, src_mcv);
   8811         },
   8812         .load_frame => |frame_addr| try self.genSetMem(
   8813             .{ .frame = frame_addr.index },
   8814             frame_addr.off,
   8815             ty,
   8816             src_mcv,
   8817         ),
   8818     }
   8819 }
   8820 
   8821 fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerError!void {
   8822     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
   8823     if (abi_size * 8 > dst_reg.bitSize())
   8824         return self.fail("genSetReg called with a value larger than dst_reg", .{});
   8825     switch (src_mcv) {
   8826         .none,
   8827         .unreach,
   8828         .dead,
   8829         .register_overflow,
   8830         .reserved_frame,
   8831         => unreachable,
   8832         .undef => if (self.wantSafety())
   8833             try self.genSetReg(dst_reg.to64(), Type.usize, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
   8834         .eflags => |cc| try self.asmSetccRegister(dst_reg.to8(), cc),
   8835         .immediate => |imm| {
   8836             if (imm == 0) {
   8837                 // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
   8838                 // register is the fastest way to zero a register.
   8839                 try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32());
   8840             } else if (abi_size > 4 and math.cast(u32, imm) != null) {
   8841                 // 32-bit moves zero-extend to 64-bit.
   8842                 try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), Immediate.u(imm));
   8843             } else if (abi_size <= 4 and @bitCast(i64, imm) < 0) {
   8844                 try self.asmRegisterImmediate(
   8845                     .{ ._, .mov },
   8846                     registerAlias(dst_reg, abi_size),
   8847                     Immediate.s(@intCast(i32, @bitCast(i64, imm))),
   8848                 );
   8849             } else {
   8850                 try self.asmRegisterImmediate(
   8851                     .{ ._, .mov },
   8852                     registerAlias(dst_reg, abi_size),
   8853                     Immediate.u(imm),
   8854                 );
   8855             }
   8856         },
   8857         .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister(
   8858             if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point))
   8859                 switch (ty.zigTypeTag()) {
   8860                     else => .{ ._, .mov },
   8861                     .Float, .Vector => .{ ._ps, .mova },
   8862                 }
   8863             else switch (abi_size) {
   8864                 2 => return try self.asmRegisterRegisterImmediate(
   8865                     if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr },
   8866                     registerAlias(dst_reg, 4),
   8867                     registerAlias(src_reg, 4),
   8868                     Immediate.u(0),
   8869                 ),
   8870                 4 => .{ ._d, .mov },
   8871                 8 => .{ ._q, .mov },
   8872                 else => return self.fail(
   8873                     "unsupported register copy from {s} to {s}",
   8874                     .{ @tagName(src_reg), @tagName(dst_reg) },
   8875                 ),
   8876             },
   8877             registerAlias(dst_reg, abi_size),
   8878             registerAlias(src_reg, abi_size),
   8879         ),
   8880         .register_offset,
   8881         .indirect,
   8882         .load_frame,
   8883         .lea_frame,
   8884         => {
   8885             const dst_alias = registerAlias(dst_reg, abi_size);
   8886             const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
   8887                 .register_offset, .indirect => |reg_off| .{
   8888                     .base = .{ .reg = reg_off.reg },
   8889                     .disp = reg_off.off,
   8890                 },
   8891                 .load_frame, .lea_frame => |frame_addr| .{
   8892                     .base = .{ .frame = frame_addr.index },
   8893                     .disp = frame_addr.off,
   8894                 },
   8895                 else => unreachable,
   8896             });
   8897             switch (@as(MoveStrategy, switch (src_mcv) {
   8898                 .register_offset => |reg_off| switch (reg_off.off) {
   8899                     0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }),
   8900                     else => .{ .move = .{ ._, .lea } },
   8901                 },
   8902                 .indirect => try self.moveStrategy(ty, false),
   8903                 .load_frame => |frame_addr| try self.moveStrategy(
   8904                     ty,
   8905                     self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*),
   8906                 ),
   8907                 .lea_frame => .{ .move = .{ ._, .lea } },
   8908                 else => unreachable,
   8909             })) {
   8910                 .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
   8911                 .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
   8912                     ie.insert,
   8913                     dst_alias,
   8914                     src_mem,
   8915                     ie.imm,
   8916                 ),
   8917                 .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
   8918                     ie.insert,
   8919                     dst_alias,
   8920                     dst_alias,
   8921                     src_mem,
   8922                     ie.imm,
   8923                 ),
   8924             }
   8925         },
   8926         .memory, .load_direct, .load_got, .load_tlv => {
   8927             switch (src_mcv) {
   8928                 .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| {
   8929                     const dst_alias = registerAlias(dst_reg, abi_size);
   8930                     const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
   8931                         .base = .{ .reg = .ds },
   8932                         .disp = small_addr,
   8933                     });
   8934                     switch (try self.moveStrategy(ty, mem.isAlignedGeneric(
   8935                         u32,
   8936                         @bitCast(u32, small_addr),
   8937                         ty.abiAlignment(self.target.*),
   8938                     ))) {
   8939                         .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
   8940                         .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
   8941                             ie.insert,
   8942                             dst_alias,
   8943                             src_mem,
   8944                             ie.imm,
   8945                         ),
   8946                         .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
   8947                             ie.insert,
   8948                             dst_alias,
   8949                             dst_alias,
   8950                             src_mem,
   8951                             ie.imm,
   8952                         ),
   8953                     }
   8954                 },
   8955                 .load_direct => |sym_index| switch (ty.zigTypeTag()) {
   8956                     else => {
   8957                         const atom_index = try self.owner.getSymbolIndex(self);
   8958                         _ = try self.addInst(.{
   8959                             .tag = .mov,
   8960                             .ops = .direct_reloc,
   8961                             .data = .{ .rx = .{
   8962                                 .r1 = dst_reg.to64(),
   8963                                 .payload = try self.addExtra(Mir.Reloc{
   8964                                     .atom_index = atom_index,
   8965                                     .sym_index = sym_index,
   8966                                 }),
   8967                             } },
   8968                         });
   8969                         return;
   8970                     },
   8971                     .Float, .Vector => {},
   8972                 },
   8973                 .load_got, .load_tlv => {},
   8974                 else => unreachable,
   8975             }
   8976 
   8977             const addr_reg = try self.copyToTmpRegister(Type.usize, src_mcv.address());
   8978             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   8979             defer self.register_manager.unlockReg(addr_lock);
   8980 
   8981             const dst_alias = registerAlias(dst_reg, abi_size);
   8982             const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
   8983                 .base = .{ .reg = addr_reg },
   8984             });
   8985             switch (try self.moveStrategy(ty, false)) {
   8986                 .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
   8987                 .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
   8988                     ie.insert,
   8989                     dst_alias,
   8990                     src_mem,
   8991                     ie.imm,
   8992                 ),
   8993                 .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
   8994                     ie.insert,
   8995                     dst_alias,
   8996                     dst_alias,
   8997                     src_mem,
   8998                     ie.imm,
   8999                 ),
   9000             }
   9001         },
   9002         .lea_direct, .lea_got => |sym_index| {
   9003             const atom_index = try self.owner.getSymbolIndex(self);
   9004             _ = try self.addInst(.{
   9005                 .tag = switch (src_mcv) {
   9006                     .lea_direct => .lea,
   9007                     .lea_got => .mov,
   9008                     else => unreachable,
   9009                 },
   9010                 .ops = switch (src_mcv) {
   9011                     .lea_direct => .direct_reloc,
   9012                     .lea_got => .got_reloc,
   9013                     else => unreachable,
   9014                 },
   9015                 .data = .{ .rx = .{
   9016                     .r1 = dst_reg.to64(),
   9017                     .payload = try self.addExtra(Mir.Reloc{
   9018                         .atom_index = atom_index,
   9019                         .sym_index = sym_index,
   9020                     }),
   9021                 } },
   9022             });
   9023         },
   9024         .lea_tlv => |sym_index| {
   9025             const atom_index = try self.owner.getSymbolIndex(self);
   9026             if (self.bin_file.cast(link.File.MachO)) |_| {
   9027                 _ = try self.addInst(.{
   9028                     .tag = .lea,
   9029                     .ops = .tlv_reloc,
   9030                     .data = .{ .rx = .{
   9031                         .r1 = .rdi,
   9032                         .payload = try self.addExtra(Mir.Reloc{
   9033                             .atom_index = atom_index,
   9034                             .sym_index = sym_index,
   9035                         }),
   9036                     } },
   9037                 });
   9038                 // TODO: spill registers before calling
   9039                 try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } }));
   9040                 try self.genSetReg(dst_reg.to64(), Type.usize, .{ .register = .rax });
   9041             } else return self.fail("TODO emit ptr to TLV sequence on {s}", .{
   9042                 @tagName(self.bin_file.tag),
   9043             });
   9044         },
   9045     }
   9046 }
   9047 
   9048 fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCValue) InnerError!void {
   9049     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
   9050     const dst_ptr_mcv: MCValue = switch (base) {
   9051         .none => .{ .immediate = @bitCast(u64, @as(i64, disp)) },
   9052         .reg => |base_reg| .{ .register_offset = .{ .reg = base_reg, .off = disp } },
   9053         .frame => |base_frame_index| .{ .lea_frame = .{ .index = base_frame_index, .off = disp } },
   9054     };
   9055     switch (src_mcv) {
   9056         .none, .unreach, .dead, .reserved_frame => unreachable,
   9057         .undef => if (self.wantSafety())
   9058             try self.genInlineMemset(dst_ptr_mcv, .{ .immediate = 0xaa }, .{ .immediate = abi_size }),
   9059         .immediate => |imm| switch (abi_size) {
   9060             1, 2, 4 => {
   9061                 const immediate = if (ty.isSignedInt())
   9062                     Immediate.s(@truncate(i32, @bitCast(i64, imm)))
   9063                 else
   9064                     Immediate.u(@intCast(u32, imm));
   9065                 try self.asmMemoryImmediate(
   9066                     .{ ._, .mov },
   9067                     Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }),
   9068                     immediate,
   9069                 );
   9070             },
   9071             3, 5...7 => unreachable,
   9072             else => if (math.cast(i32, @bitCast(i64, imm))) |small| {
   9073                 try self.asmMemoryImmediate(
   9074                     .{ ._, .mov },
   9075                     Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }),
   9076                     Immediate.s(small),
   9077                 );
   9078             } else {
   9079                 var offset: i32 = 0;
   9080                 while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate(
   9081                     .{ ._, .mov },
   9082                     Memory.sib(.dword, .{ .base = base, .disp = disp + offset }),
   9083                     if (ty.isSignedInt())
   9084                         Immediate.s(@truncate(
   9085                             i32,
   9086                             @bitCast(i64, imm) >> (math.cast(u6, offset * 8) orelse 63),
   9087                         ))
   9088                     else
   9089                         Immediate.u(@truncate(
   9090                             u32,
   9091                             if (math.cast(u6, offset * 8)) |shift| imm >> shift else 0,
   9092                         )),
   9093                 );
   9094             },
   9095         },
   9096         .eflags => |cc| try self.asmSetccMemory(Memory.sib(.byte, .{ .base = base, .disp = disp }), cc),
   9097         .register => |src_reg| {
   9098             const dst_mem = Memory.sib(
   9099                 Memory.PtrSize.fromSize(abi_size),
   9100                 .{ .base = base, .disp = disp },
   9101             );
   9102             const src_alias = registerAlias(src_reg, abi_size);
   9103             switch (try self.moveStrategy(ty, switch (base) {
   9104                 .none => mem.isAlignedGeneric(
   9105                     u32,
   9106                     @bitCast(u32, disp),
   9107                     ty.abiAlignment(self.target.*),
   9108                 ),
   9109                 .reg => |reg| switch (reg) {
   9110                     .es, .cs, .ss, .ds => mem.isAlignedGeneric(
   9111                         u32,
   9112                         @bitCast(u32, disp),
   9113                         ty.abiAlignment(self.target.*),
   9114                     ),
   9115                     else => false,
   9116                 },
   9117                 .frame => |frame_index| self.getFrameAddrAlignment(
   9118                     .{ .index = frame_index, .off = disp },
   9119                 ) >= ty.abiAlignment(self.target.*),
   9120             })) {
   9121                 .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_alias),
   9122                 .insert_extract, .vex_insert_extract => |ie| try self.asmMemoryRegisterImmediate(
   9123                     ie.extract,
   9124                     dst_mem,
   9125                     src_alias,
   9126                     ie.imm,
   9127                 ),
   9128             }
   9129         },
   9130         .register_overflow => |ro| {
   9131             try self.genSetMem(
   9132                 base,
   9133                 disp + @intCast(i32, ty.structFieldOffset(0, self.target.*)),
   9134                 ty.structFieldType(0),
   9135                 .{ .register = ro.reg },
   9136             );
   9137             try self.genSetMem(
   9138                 base,
   9139                 disp + @intCast(i32, ty.structFieldOffset(1, self.target.*)),
   9140                 ty.structFieldType(1),
   9141                 .{ .eflags = ro.eflags },
   9142             );
   9143         },
   9144         .register_offset,
   9145         .memory,
   9146         .indirect,
   9147         .load_direct,
   9148         .lea_direct,
   9149         .load_got,
   9150         .lea_got,
   9151         .load_tlv,
   9152         .lea_tlv,
   9153         .load_frame,
   9154         .lea_frame,
   9155         => switch (abi_size) {
   9156             0 => {},
   9157             1, 2, 4, 8 => {
   9158                 const src_reg = try self.copyToTmpRegister(ty, src_mcv);
   9159                 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   9160                 defer self.register_manager.unlockReg(src_lock);
   9161 
   9162                 try self.genSetMem(base, disp, ty, .{ .register = src_reg });
   9163             },
   9164             else => try self.genInlineMemcpy(dst_ptr_mcv, src_mcv.address(), .{ .immediate = abi_size }),
   9165         },
   9166     }
   9167 }
   9168 
   9169 /// Like `genInlineMemcpy` but copies value from a register to an address via dereferencing
   9170 /// of destination register.
   9171 /// Boils down to MOV r/m64, r64.
   9172 fn genInlineMemcpyRegisterRegister(
   9173     self: *Self,
   9174     ty: Type,
   9175     dst_reg: Register,
   9176     src_reg: Register,
   9177     offset: i32,
   9178 ) InnerError!void {
   9179     assert(dst_reg.bitSize() == 64);
   9180 
   9181     const dst_reg_lock = self.register_manager.lockReg(dst_reg);
   9182     defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock);
   9183 
   9184     const src_reg_lock = self.register_manager.lockReg(src_reg);
   9185     defer if (src_reg_lock) |lock| self.register_manager.unlockReg(lock);
   9186 
   9187     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
   9188 
   9189     if (!math.isPowerOfTwo(abi_size)) {
   9190         const tmp_reg = try self.copyToTmpRegister(ty, .{ .register = src_reg });
   9191 
   9192         var next_offset = offset;
   9193         var remainder = abi_size;
   9194         while (remainder > 0) {
   9195             const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder));
   9196             try self.asmMemoryRegister(
   9197                 .{ ._, .mov },
   9198                 Memory.sib(Memory.PtrSize.fromSize(nearest_power_of_two), .{
   9199                     .base = dst_reg,
   9200                     .disp = -next_offset,
   9201                 }),
   9202                 registerAlias(tmp_reg, nearest_power_of_two),
   9203             );
   9204 
   9205             if (nearest_power_of_two > 1) {
   9206                 try self.genShiftBinOpMir(.{ ._r, .sh }, ty, .{ .register = tmp_reg }, .{
   9207                     .immediate = nearest_power_of_two * 8,
   9208                 });
   9209             }
   9210 
   9211             remainder -= nearest_power_of_two;
   9212             next_offset -= nearest_power_of_two;
   9213         }
   9214     } else {
   9215         try self.asmMemoryRegister(
   9216             switch (src_reg.class()) {
   9217                 .general_purpose, .segment => .{ ._, .mov },
   9218                 .floating_point => .{ ._ss, .mov },
   9219             },
   9220             Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }),
   9221             registerAlias(src_reg, abi_size),
   9222         );
   9223     }
   9224 }
   9225 
   9226 fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue) InnerError!void {
   9227     try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
   9228     try self.genSetReg(.rdi, Type.usize, dst_ptr);
   9229     try self.genSetReg(.rsi, Type.usize, src_ptr);
   9230     try self.genSetReg(.rcx, Type.usize, len);
   9231     try self.asmOpOnly(.{ .@"rep _sb", .mov });
   9232 }
   9233 
   9234 fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) InnerError!void {
   9235     try self.spillRegisters(&.{ .rdi, .al, .rcx });
   9236     try self.genSetReg(.rdi, Type.usize, dst_ptr);
   9237     try self.genSetReg(.al, Type.u8, value);
   9238     try self.genSetReg(.rcx, Type.usize, len);
   9239     try self.asmOpOnly(.{ .@"rep _sb", .sto });
   9240 }
   9241 
   9242 fn genLazySymbolRef(
   9243     self: *Self,
   9244     comptime tag: Mir.Inst.Tag,
   9245     reg: Register,
   9246     lazy_sym: link.File.LazySymbol,
   9247 ) InnerError!void {
   9248     if (self.bin_file.cast(link.File.Elf)) |elf_file| {
   9249         const atom_index = elf_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
   9250             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
   9251         const atom = elf_file.getAtom(atom_index);
   9252         _ = try atom.getOrCreateOffsetTableEntry(elf_file);
   9253         const got_addr = atom.getOffsetTableAddress(elf_file);
   9254         const got_mem =
   9255             Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) });
   9256         switch (tag) {
   9257             .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), got_mem),
   9258             .call => try self.asmMemory(.{ ._, .call }, got_mem),
   9259             else => unreachable,
   9260         }
   9261         switch (tag) {
   9262             .lea, .call => {},
   9263             .mov => try self.asmRegisterMemory(
   9264                 .{ ._, tag },
   9265                 reg.to64(),
   9266                 Memory.sib(.qword, .{ .base = .{ .reg = reg.to64() } }),
   9267             ),
   9268             else => unreachable,
   9269         }
   9270     } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
   9271         const atom_index = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
   9272             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
   9273         const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?;
   9274         switch (tag) {
   9275             .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }),
   9276             .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }),
   9277             else => unreachable,
   9278         }
   9279         switch (tag) {
   9280             .lea, .mov => {},
   9281             .call => try self.asmRegister(.{ ._, .call }, reg),
   9282             else => unreachable,
   9283         }
   9284     } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
   9285         const atom_index = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
   9286             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
   9287         const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?;
   9288         switch (tag) {
   9289             .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }),
   9290             .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }),
   9291             else => unreachable,
   9292         }
   9293         switch (tag) {
   9294             .lea, .mov => {},
   9295             .call => try self.asmRegister(.{ ._, .call }, reg),
   9296             else => unreachable,
   9297         }
   9298     } else {
   9299         return self.fail("TODO implement genLazySymbol for x86_64 {s}", .{@tagName(self.bin_file.tag)});
   9300     }
   9301 }
   9302 
   9303 fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void {
   9304     const un_op = self.air.instructions.items(.data)[inst].un_op;
   9305     const result = result: {
   9306         // TODO: handle case where the operand is a slice not a raw pointer
   9307         const src_mcv = try self.resolveInst(un_op);
   9308         if (self.reuseOperand(inst, un_op, 0, src_mcv)) break :result src_mcv;
   9309 
   9310         const dst_mcv = try self.allocRegOrMem(inst, true);
   9311         const dst_ty = self.air.typeOfIndex(inst);
   9312         try self.genCopy(dst_ty, dst_mcv, src_mcv);
   9313         break :result dst_mcv;
   9314     };
   9315     return self.finishAir(inst, result, .{ un_op, .none, .none });
   9316 }
   9317 
   9318 fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
   9319     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   9320     const dst_ty = self.air.typeOfIndex(inst);
   9321     const src_ty = self.air.typeOf(ty_op.operand);
   9322 
   9323     const result = result: {
   9324         const dst_rc = regClassForType(dst_ty);
   9325         const src_rc = regClassForType(src_ty);
   9326         const operand = try self.resolveInst(ty_op.operand);
   9327         if (dst_rc.supersetOf(src_rc) and self.reuseOperand(inst, ty_op.operand, 0, operand))
   9328             break :result operand;
   9329 
   9330         const operand_lock = switch (operand) {
   9331             .register => |reg| self.register_manager.lockReg(reg),
   9332             .register_overflow => |ro| self.register_manager.lockReg(ro.reg),
   9333             else => null,
   9334         };
   9335         defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
   9336 
   9337         const dest = try self.allocRegOrMem(inst, true);
   9338         try self.genCopy(if (!dest.isMemory() or operand.isMemory()) dst_ty else src_ty, dest, operand);
   9339         break :result dest;
   9340     };
   9341     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   9342 }
   9343 
   9344 fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void {
   9345     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   9346 
   9347     const slice_ty = self.air.typeOfIndex(inst);
   9348     const ptr_ty = self.air.typeOf(ty_op.operand);
   9349     const ptr = try self.resolveInst(ty_op.operand);
   9350     const array_ty = ptr_ty.childType();
   9351     const array_len = array_ty.arrayLen();
   9352 
   9353     const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, self.target.*));
   9354     try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr);
   9355     try self.genSetMem(
   9356         .{ .frame = frame_index },
   9357         @intCast(i32, ptr_ty.abiSize(self.target.*)),
   9358         Type.usize,
   9359         .{ .immediate = array_len },
   9360     );
   9361 
   9362     const result = MCValue{ .load_frame = .{ .index = frame_index } };
   9363     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   9364 }
   9365 
   9366 fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void {
   9367     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   9368 
   9369     const src_ty = self.air.typeOf(ty_op.operand);
   9370     const src_bits = @intCast(u32, src_ty.bitSize(self.target.*));
   9371     const src_signedness =
   9372         if (src_ty.isAbiInt()) src_ty.intInfo(self.target.*).signedness else .unsigned;
   9373     const dst_ty = self.air.typeOfIndex(inst);
   9374 
   9375     const src_size = std.math.divCeil(u32, @max(switch (src_signedness) {
   9376         .signed => src_bits,
   9377         .unsigned => src_bits + 1,
   9378     }, 32), 8) catch unreachable;
   9379     if (src_size > 8) return self.fail("TODO implement airIntToFloat from {} to {}", .{
   9380         src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   9381     });
   9382 
   9383     const src_mcv = try self.resolveInst(ty_op.operand);
   9384     const src_reg = switch (src_mcv) {
   9385         .register => |reg| reg,
   9386         else => try self.copyToTmpRegister(src_ty, src_mcv),
   9387     };
   9388     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   9389     defer self.register_manager.unlockReg(src_lock);
   9390 
   9391     if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg);
   9392 
   9393     const dst_reg = try self.register_manager.allocReg(inst, regClassForType(dst_ty));
   9394     const dst_mcv = MCValue{ .register = dst_reg };
   9395     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   9396     defer self.register_manager.unlockReg(dst_lock);
   9397 
   9398     try self.asmRegisterRegister(switch (dst_ty.floatBits(self.target.*)) {
   9399         32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
   9400             .{ ._, .cvtsi2ss }
   9401         else
   9402             return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{
   9403                 src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   9404             }),
   9405         64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
   9406             .{ ._, .cvtsi2sd }
   9407         else
   9408             return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{
   9409                 src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   9410             }),
   9411         else => return self.fail("TODO implement airIntToFloat from {} to {}", .{
   9412             src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   9413         }),
   9414     }, dst_reg.to128(), registerAlias(src_reg, src_size));
   9415 
   9416     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   9417 }
   9418 
   9419 fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void {
   9420     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   9421 
   9422     const src_ty = self.air.typeOf(ty_op.operand);
   9423     const dst_ty = self.air.typeOfIndex(inst);
   9424     const operand = try self.resolveInst(ty_op.operand);
   9425     const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*));
   9426     const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
   9427 
   9428     switch (src_abi_size) {
   9429         4, 8 => {},
   9430         else => |size| return self.fail("TODO load ST(0) with abiSize={}", .{size}),
   9431     }
   9432     if (dst_abi_size > 8) {
   9433         return self.fail("TODO convert float with abiSize={}", .{dst_abi_size});
   9434     }
   9435 
   9436     // move float src to ST(0)
   9437     const frame_addr: FrameAddr = switch (operand) {
   9438         .load_frame => |frame_addr| frame_addr,
   9439         else => frame_addr: {
   9440             const frame_index = try self.allocFrameIndex(FrameAlloc.initType(src_ty, self.target.*));
   9441             try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, operand);
   9442             break :frame_addr .{ .index = frame_index };
   9443         },
   9444     };
   9445     try self.asmMemory(
   9446         .{ .f_, .ld },
   9447         Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{
   9448             .base = .{ .frame = frame_addr.index },
   9449             .disp = frame_addr.off,
   9450         }),
   9451     );
   9452 
   9453     // convert
   9454     const stack_dst = try self.allocRegOrMem(inst, false);
   9455     try self.asmMemory(
   9456         .{ .f_p, .istt },
   9457         Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{
   9458             .base = .{ .frame = stack_dst.load_frame.index },
   9459             .disp = stack_dst.load_frame.off,
   9460         }),
   9461     );
   9462 
   9463     return self.finishAir(inst, stack_dst, .{ ty_op.operand, .none, .none });
   9464 }
   9465 
   9466 fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
   9467     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   9468     const extra = self.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
   9469 
   9470     const ptr_ty = self.air.typeOf(extra.ptr);
   9471     const val_ty = self.air.typeOf(extra.expected_value);
   9472     const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*));
   9473 
   9474     try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx });
   9475     const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx });
   9476     defer for (regs_lock) |lock| self.register_manager.unlockReg(lock);
   9477 
   9478     const exp_mcv = try self.resolveInst(extra.expected_value);
   9479     if (val_abi_size > 8) {
   9480         try self.genSetReg(.rax, Type.usize, exp_mcv);
   9481         try self.genSetReg(.rdx, Type.usize, exp_mcv.address().offset(8).deref());
   9482     } else try self.genSetReg(.rax, val_ty, exp_mcv);
   9483 
   9484     const new_mcv = try self.resolveInst(extra.new_value);
   9485     const new_reg = if (val_abi_size > 8) new: {
   9486         try self.genSetReg(.rbx, Type.usize, new_mcv);
   9487         try self.genSetReg(.rcx, Type.usize, new_mcv.address().offset(8).deref());
   9488         break :new null;
   9489     } else try self.copyToTmpRegister(val_ty, new_mcv);
   9490     const new_lock = if (new_reg) |reg| self.register_manager.lockRegAssumeUnused(reg) else null;
   9491     defer if (new_lock) |lock| self.register_manager.unlockReg(lock);
   9492 
   9493     const ptr_mcv = try self.resolveInst(extra.ptr);
   9494     const ptr_size = Memory.PtrSize.fromSize(val_abi_size);
   9495     const ptr_mem = switch (ptr_mcv) {
   9496         .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(ptr_size),
   9497         else => Memory.sib(ptr_size, .{
   9498             .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
   9499         }),
   9500     };
   9501     switch (ptr_mem) {
   9502         .sib, .rip => {},
   9503         .moffs => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}),
   9504     }
   9505     const ptr_lock = switch (ptr_mem.base()) {
   9506         .none, .frame => null,
   9507         .reg => |reg| self.register_manager.lockReg(reg),
   9508     };
   9509     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
   9510 
   9511     try self.spillEflagsIfOccupied();
   9512     if (val_abi_size <= 8) try self.asmMemoryRegister(
   9513         .{ .@"lock _", .cmpxchg },
   9514         ptr_mem,
   9515         registerAlias(new_reg.?, val_abi_size),
   9516     ) else try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem);
   9517 
   9518     const result: MCValue = result: {
   9519         if (self.liveness.isUnused(inst)) break :result .unreach;
   9520 
   9521         if (val_abi_size <= 8) {
   9522             self.eflags_inst = inst;
   9523             break :result .{ .register_overflow = .{ .reg = .rax, .eflags = .ne } };
   9524         }
   9525 
   9526         const dst_mcv = try self.allocRegOrMem(inst, false);
   9527         try self.genCopy(Type.usize, dst_mcv, .{ .register = .rax });
   9528         try self.genCopy(Type.usize, dst_mcv.address().offset(8).deref(), .{ .register = .rdx });
   9529         try self.genCopy(Type.bool, dst_mcv.address().offset(16).deref(), .{ .eflags = .ne });
   9530         break :result dst_mcv;
   9531     };
   9532     return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value });
   9533 }
   9534 
   9535 fn atomicOp(
   9536     self: *Self,
   9537     ptr_mcv: MCValue,
   9538     val_mcv: MCValue,
   9539     ptr_ty: Type,
   9540     val_ty: Type,
   9541     unused: bool,
   9542     rmw_op: ?std.builtin.AtomicRmwOp,
   9543     order: std.builtin.AtomicOrder,
   9544 ) InnerError!MCValue {
   9545     const ptr_lock = switch (ptr_mcv) {
   9546         .register => |reg| self.register_manager.lockReg(reg),
   9547         else => null,
   9548     };
   9549     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
   9550 
   9551     const val_lock = switch (val_mcv) {
   9552         .register => |reg| self.register_manager.lockReg(reg),
   9553         else => null,
   9554     };
   9555     defer if (val_lock) |lock| self.register_manager.unlockReg(lock);
   9556 
   9557     const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*));
   9558     const ptr_size = Memory.PtrSize.fromSize(val_abi_size);
   9559     const ptr_mem = switch (ptr_mcv) {
   9560         .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(ptr_size),
   9561         else => Memory.sib(ptr_size, .{
   9562             .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
   9563         }),
   9564     };
   9565     switch (ptr_mem) {
   9566         .sib, .rip => {},
   9567         .moffs => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}),
   9568     }
   9569     const mem_lock = switch (ptr_mem.base()) {
   9570         .none, .frame => null,
   9571         .reg => |reg| self.register_manager.lockReg(reg),
   9572     };
   9573     defer if (mem_lock) |lock| self.register_manager.unlockReg(lock);
   9574 
   9575     const method: enum { lock, loop, libcall } = if (val_ty.isRuntimeFloat())
   9576         .loop
   9577     else switch (rmw_op orelse .Xchg) {
   9578         .Xchg,
   9579         .Add,
   9580         .Sub,
   9581         => if (val_abi_size <= 8) .lock else if (val_abi_size <= 16) .loop else .libcall,
   9582         .And,
   9583         .Or,
   9584         .Xor,
   9585         => if (val_abi_size <= 8 and unused) .lock else if (val_abi_size <= 16) .loop else .libcall,
   9586         .Nand,
   9587         .Max,
   9588         .Min,
   9589         => if (val_abi_size <= 16) .loop else .libcall,
   9590     };
   9591     switch (method) {
   9592         .lock => {
   9593             const tag: Mir.Inst.Tag = if (rmw_op) |op| switch (op) {
   9594                 .Xchg => if (unused) .mov else .xchg,
   9595                 .Add => if (unused) .add else .xadd,
   9596                 .Sub => if (unused) .sub else .xadd,
   9597                 .And => .@"and",
   9598                 .Or => .@"or",
   9599                 .Xor => .xor,
   9600                 else => unreachable,
   9601             } else switch (order) {
   9602                 .Unordered, .Monotonic, .Release, .AcqRel => .mov,
   9603                 .Acquire => unreachable,
   9604                 .SeqCst => .xchg,
   9605             };
   9606 
   9607             const dst_reg = try self.register_manager.allocReg(null, gp);
   9608             const dst_mcv = MCValue{ .register = dst_reg };
   9609             const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   9610             defer self.register_manager.unlockReg(dst_lock);
   9611 
   9612             try self.genSetReg(dst_reg, val_ty, val_mcv);
   9613             if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) {
   9614                 try self.genUnOpMir(.{ ._, .neg }, val_ty, dst_mcv);
   9615             }
   9616             try self.asmMemoryRegister(
   9617                 switch (tag) {
   9618                     .mov, .xchg => .{ ._, tag },
   9619                     .xadd, .add, .sub, .@"and", .@"or", .xor => .{ .@"lock _", tag },
   9620                     else => unreachable,
   9621                 },
   9622                 ptr_mem,
   9623                 registerAlias(dst_reg, val_abi_size),
   9624             );
   9625 
   9626             return if (unused) .unreach else dst_mcv;
   9627         },
   9628         .loop => _ = if (val_abi_size <= 8) {
   9629             const tmp_reg = try self.register_manager.allocReg(null, gp);
   9630             const tmp_mcv = MCValue{ .register = tmp_reg };
   9631             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   9632             defer self.register_manager.unlockReg(tmp_lock);
   9633 
   9634             try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(.rax, val_abi_size), ptr_mem);
   9635             const loop = @intCast(u32, self.mir_instructions.len);
   9636             if (rmw_op != std.builtin.AtomicRmwOp.Xchg) {
   9637                 try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax });
   9638             }
   9639             if (rmw_op) |op| switch (op) {
   9640                 .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv),
   9641                 .Add => try self.genBinOpMir(.{ ._, .add }, val_ty, tmp_mcv, val_mcv),
   9642                 .Sub => try self.genBinOpMir(.{ ._, .sub }, val_ty, tmp_mcv, val_mcv),
   9643                 .And => try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv),
   9644                 .Nand => {
   9645                     try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv);
   9646                     try self.genUnOpMir(.{ ._, .not }, val_ty, tmp_mcv);
   9647                 },
   9648                 .Or => try self.genBinOpMir(.{ ._, .@"or" }, val_ty, tmp_mcv, val_mcv),
   9649                 .Xor => try self.genBinOpMir(.{ ._, .xor }, val_ty, tmp_mcv, val_mcv),
   9650                 .Min, .Max => {
   9651                     const cc: Condition = switch (if (val_ty.isAbiInt())
   9652                         val_ty.intInfo(self.target.*).signedness
   9653                     else
   9654                         .unsigned) {
   9655                         .unsigned => switch (op) {
   9656                             .Min => .a,
   9657                             .Max => .b,
   9658                             else => unreachable,
   9659                         },
   9660                         .signed => switch (op) {
   9661                             .Min => .g,
   9662                             .Max => .l,
   9663                             else => unreachable,
   9664                         },
   9665                     };
   9666 
   9667                     try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv);
   9668                     const cmov_abi_size = @max(val_abi_size, 2);
   9669                     switch (val_mcv) {
   9670                         .register => |val_reg| try self.asmCmovccRegisterRegister(
   9671                             registerAlias(tmp_reg, cmov_abi_size),
   9672                             registerAlias(val_reg, cmov_abi_size),
   9673                             cc,
   9674                         ),
   9675                         .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
   9676                             registerAlias(tmp_reg, cmov_abi_size),
   9677                             val_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)),
   9678                             cc,
   9679                         ),
   9680                         else => {
   9681                             const val_reg = try self.copyToTmpRegister(val_ty, val_mcv);
   9682                             try self.asmCmovccRegisterRegister(
   9683                                 registerAlias(tmp_reg, cmov_abi_size),
   9684                                 registerAlias(val_reg, cmov_abi_size),
   9685                                 cc,
   9686                             );
   9687                         },
   9688                     }
   9689                 },
   9690             };
   9691             try self.asmMemoryRegister(
   9692                 .{ .@"lock _", .cmpxchg },
   9693                 ptr_mem,
   9694                 registerAlias(tmp_reg, val_abi_size),
   9695             );
   9696             _ = try self.asmJccReloc(loop, .ne);
   9697             return if (unused) .unreach else .{ .register = .rax };
   9698         } else {
   9699             try self.asmRegisterMemory(.{ ._, .mov }, .rax, Memory.sib(.qword, .{
   9700                 .base = ptr_mem.sib.base,
   9701                 .scale_index = ptr_mem.scaleIndex(),
   9702                 .disp = ptr_mem.sib.disp + 0,
   9703             }));
   9704             try self.asmRegisterMemory(.{ ._, .mov }, .rdx, Memory.sib(.qword, .{
   9705                 .base = ptr_mem.sib.base,
   9706                 .scale_index = ptr_mem.scaleIndex(),
   9707                 .disp = ptr_mem.sib.disp + 8,
   9708             }));
   9709             const loop = @intCast(u32, self.mir_instructions.len);
   9710             const val_mem_mcv: MCValue = switch (val_mcv) {
   9711                 .memory, .indirect, .load_frame => val_mcv,
   9712                 else => .{ .indirect = .{
   9713                     .reg = try self.copyToTmpRegister(Type.usize, val_mcv.address()),
   9714                 } },
   9715             };
   9716             const val_lo_mem = val_mem_mcv.mem(.qword);
   9717             const val_hi_mem = val_mem_mcv.address().offset(8).deref().mem(.qword);
   9718             if (rmw_op != std.builtin.AtomicRmwOp.Xchg) {
   9719                 try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax);
   9720                 try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx);
   9721             }
   9722             if (rmw_op) |op| switch (op) {
   9723                 .Xchg => {
   9724                     try self.asmRegisterMemory(.{ ._, .mov }, .rbx, val_lo_mem);
   9725                     try self.asmRegisterMemory(.{ ._, .mov }, .rcx, val_hi_mem);
   9726                 },
   9727                 .Add => {
   9728                     try self.asmRegisterMemory(.{ ._, .add }, .rbx, val_lo_mem);
   9729                     try self.asmRegisterMemory(.{ ._, .adc }, .rcx, val_hi_mem);
   9730                 },
   9731                 .Sub => {
   9732                     try self.asmRegisterMemory(.{ ._, .sub }, .rbx, val_lo_mem);
   9733                     try self.asmRegisterMemory(.{ ._, .sbb }, .rcx, val_hi_mem);
   9734                 },
   9735                 .And => {
   9736                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem);
   9737                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem);
   9738                 },
   9739                 .Nand => {
   9740                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem);
   9741                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem);
   9742                     try self.asmRegister(.{ ._, .not }, .rbx);
   9743                     try self.asmRegister(.{ ._, .not }, .rcx);
   9744                 },
   9745                 .Or => {
   9746                     try self.asmRegisterMemory(.{ ._, .@"or" }, .rbx, val_lo_mem);
   9747                     try self.asmRegisterMemory(.{ ._, .@"or" }, .rcx, val_hi_mem);
   9748                 },
   9749                 .Xor => {
   9750                     try self.asmRegisterMemory(.{ ._, .xor }, .rbx, val_lo_mem);
   9751                     try self.asmRegisterMemory(.{ ._, .xor }, .rcx, val_hi_mem);
   9752                 },
   9753                 else => return self.fail("TODO implement x86 atomic loop for {} {s}", .{
   9754                     val_ty.fmt(self.bin_file.options.module.?), @tagName(op),
   9755                 }),
   9756             };
   9757             try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem);
   9758             _ = try self.asmJccReloc(loop, .ne);
   9759 
   9760             if (unused) return .unreach;
   9761             const dst_mcv = try self.allocTempRegOrMem(val_ty, false);
   9762             try self.asmMemoryRegister(
   9763                 .{ ._, .mov },
   9764                 Memory.sib(.qword, .{
   9765                     .base = .{ .frame = dst_mcv.load_frame.index },
   9766                     .disp = dst_mcv.load_frame.off + 0,
   9767                 }),
   9768                 .rax,
   9769             );
   9770             try self.asmMemoryRegister(
   9771                 .{ ._, .mov },
   9772                 Memory.sib(.qword, .{
   9773                     .base = .{ .frame = dst_mcv.load_frame.index },
   9774                     .disp = dst_mcv.load_frame.off + 8,
   9775                 }),
   9776                 .rdx,
   9777             );
   9778             return dst_mcv;
   9779         },
   9780         .libcall => return self.fail("TODO implement x86 atomic libcall", .{}),
   9781     }
   9782 }
   9783 
   9784 fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void {
   9785     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   9786     const extra = self.air.extraData(Air.AtomicRmw, pl_op.payload).data;
   9787 
   9788     try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx });
   9789     const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx });
   9790     defer for (regs_lock) |lock| self.register_manager.unlockReg(lock);
   9791 
   9792     const unused = self.liveness.isUnused(inst);
   9793 
   9794     const ptr_ty = self.air.typeOf(pl_op.operand);
   9795     const ptr_mcv = try self.resolveInst(pl_op.operand);
   9796 
   9797     const val_ty = self.air.typeOf(extra.operand);
   9798     const val_mcv = try self.resolveInst(extra.operand);
   9799 
   9800     const result =
   9801         try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, unused, extra.op(), extra.ordering());
   9802     return self.finishAir(inst, result, .{ pl_op.operand, extra.operand, .none });
   9803 }
   9804 
   9805 fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void {
   9806     const atomic_load = self.air.instructions.items(.data)[inst].atomic_load;
   9807 
   9808     const ptr_ty = self.air.typeOf(atomic_load.ptr);
   9809     const ptr_mcv = try self.resolveInst(atomic_load.ptr);
   9810     const ptr_lock = switch (ptr_mcv) {
   9811         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   9812         else => null,
   9813     };
   9814     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
   9815 
   9816     const dst_mcv =
   9817         if (self.reuseOperand(inst, atomic_load.ptr, 0, ptr_mcv))
   9818         ptr_mcv
   9819     else
   9820         try self.allocRegOrMem(inst, true);
   9821 
   9822     try self.load(dst_mcv, ptr_ty, ptr_mcv);
   9823     return self.finishAir(inst, dst_mcv, .{ atomic_load.ptr, .none, .none });
   9824 }
   9825 
   9826 fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void {
   9827     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   9828 
   9829     const ptr_ty = self.air.typeOf(bin_op.lhs);
   9830     const ptr_mcv = try self.resolveInst(bin_op.lhs);
   9831 
   9832     const val_ty = self.air.typeOf(bin_op.rhs);
   9833     const val_mcv = try self.resolveInst(bin_op.rhs);
   9834 
   9835     const result = try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, true, null, order);
   9836     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   9837 }
   9838 
   9839 fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
   9840     if (safety) {
   9841         // TODO if the value is undef, write 0xaa bytes to dest
   9842     } else {
   9843         // TODO if the value is undef, don't lower this instruction
   9844     }
   9845 
   9846     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   9847 
   9848     const dst_ptr = try self.resolveInst(bin_op.lhs);
   9849     const dst_ptr_ty = self.air.typeOf(bin_op.lhs);
   9850     const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) {
   9851         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   9852         else => null,
   9853     };
   9854     defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock);
   9855 
   9856     const src_val = try self.resolveInst(bin_op.rhs);
   9857     const elem_ty = self.air.typeOf(bin_op.rhs);
   9858     const src_val_lock: ?RegisterLock = switch (src_val) {
   9859         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   9860         else => null,
   9861     };
   9862     defer if (src_val_lock) |lock| self.register_manager.unlockReg(lock);
   9863 
   9864     const elem_abi_size = @intCast(u31, elem_ty.abiSize(self.target.*));
   9865 
   9866     if (elem_abi_size == 1) {
   9867         const ptr: MCValue = switch (dst_ptr_ty.ptrSize()) {
   9868             // TODO: this only handles slices stored in the stack
   9869             .Slice => dst_ptr,
   9870             .One => dst_ptr,
   9871             .C, .Many => unreachable,
   9872         };
   9873         const len: MCValue = switch (dst_ptr_ty.ptrSize()) {
   9874             // TODO: this only handles slices stored in the stack
   9875             .Slice => dst_ptr.address().offset(8).deref(),
   9876             .One => .{ .immediate = dst_ptr_ty.childType().arrayLen() },
   9877             .C, .Many => unreachable,
   9878         };
   9879         const len_lock: ?RegisterLock = switch (len) {
   9880             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   9881             else => null,
   9882         };
   9883         defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
   9884 
   9885         try self.genInlineMemset(ptr, src_val, len);
   9886         return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
   9887     }
   9888 
   9889     // Store the first element, and then rely on memcpy copying forwards.
   9890     // Length zero requires a runtime check - so we handle arrays specially
   9891     // here to elide it.
   9892     switch (dst_ptr_ty.ptrSize()) {
   9893         .Slice => {
   9894             var buf: Type.SlicePtrFieldTypeBuffer = undefined;
   9895             const slice_ptr_ty = dst_ptr_ty.slicePtrFieldType(&buf);
   9896 
   9897             // TODO: this only handles slices stored in the stack
   9898             const ptr = dst_ptr;
   9899             const len = dst_ptr.address().offset(8).deref();
   9900 
   9901             // Used to store the number of elements for comparison.
   9902             // After comparison, updated to store number of bytes needed to copy.
   9903             const len_reg = try self.register_manager.allocReg(null, gp);
   9904             const len_mcv: MCValue = .{ .register = len_reg };
   9905             const len_lock = self.register_manager.lockRegAssumeUnused(len_reg);
   9906             defer self.register_manager.unlockReg(len_lock);
   9907 
   9908             try self.genSetReg(len_reg, Type.usize, len);
   9909 
   9910             const skip_reloc = try self.asmJccReloc(undefined, .z);
   9911             try self.store(slice_ptr_ty, ptr, src_val);
   9912 
   9913             const second_elem_ptr_reg = try self.register_manager.allocReg(null, gp);
   9914             const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
   9915             const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
   9916             defer self.register_manager.unlockReg(second_elem_ptr_lock);
   9917 
   9918             try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{
   9919                 .reg = try self.copyToTmpRegister(Type.usize, ptr),
   9920                 .off = elem_abi_size,
   9921             } });
   9922 
   9923             try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 });
   9924             try self.asmRegisterRegisterImmediate(
   9925                 .{ .i_, .mul },
   9926                 len_reg,
   9927                 len_reg,
   9928                 Immediate.u(elem_abi_size),
   9929             );
   9930             try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv);
   9931 
   9932             try self.performReloc(skip_reloc);
   9933         },
   9934         .One => {
   9935             var elem_ptr_pl = Type.Payload.ElemType{
   9936                 .base = .{ .tag = .single_mut_pointer },
   9937                 .data = elem_ty,
   9938             };
   9939             const elem_ptr_ty = Type.initPayload(&elem_ptr_pl.base);
   9940 
   9941             const len = dst_ptr_ty.childType().arrayLen();
   9942 
   9943             assert(len != 0); // prevented by Sema
   9944             try self.store(elem_ptr_ty, dst_ptr, src_val);
   9945 
   9946             const second_elem_ptr_reg = try self.register_manager.allocReg(null, gp);
   9947             const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
   9948             const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
   9949             defer self.register_manager.unlockReg(second_elem_ptr_lock);
   9950 
   9951             try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{
   9952                 .reg = try self.copyToTmpRegister(Type.usize, dst_ptr),
   9953                 .off = elem_abi_size,
   9954             } });
   9955 
   9956             const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) };
   9957             try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy);
   9958         },
   9959         .C, .Many => unreachable,
   9960     }
   9961 
   9962     return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
   9963 }
   9964 
   9965 fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
   9966     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   9967 
   9968     const dst_ptr = try self.resolveInst(bin_op.lhs);
   9969     const dst_ptr_ty = self.air.typeOf(bin_op.lhs);
   9970     const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) {
   9971         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   9972         else => null,
   9973     };
   9974     defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock);
   9975 
   9976     const src_ptr = try self.resolveInst(bin_op.rhs);
   9977     const src_ptr_lock: ?RegisterLock = switch (src_ptr) {
   9978         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   9979         else => null,
   9980     };
   9981     defer if (src_ptr_lock) |lock| self.register_manager.unlockReg(lock);
   9982 
   9983     const len: MCValue = switch (dst_ptr_ty.ptrSize()) {
   9984         .Slice => dst_ptr.address().offset(8).deref(),
   9985         .One => .{ .immediate = dst_ptr_ty.childType().arrayLen() },
   9986         .C, .Many => unreachable,
   9987     };
   9988     const len_lock: ?RegisterLock = switch (len) {
   9989         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   9990         else => null,
   9991     };
   9992     defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
   9993 
   9994     // TODO: dst_ptr and src_ptr could be slices rather than raw pointers
   9995     try self.genInlineMemcpy(dst_ptr, src_ptr, len);
   9996 
   9997     return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
   9998 }
   9999 
  10000 fn airTagName(self: *Self, inst: Air.Inst.Index) !void {
  10001     const mod = self.bin_file.options.module.?;
  10002     const un_op = self.air.instructions.items(.data)[inst].un_op;
  10003     const inst_ty = self.air.typeOfIndex(inst);
  10004     const enum_ty = self.air.typeOf(un_op);
  10005 
  10006     // We need a properly aligned and sized call frame to be able to call this function.
  10007     {
  10008         const needed_call_frame = FrameAlloc.init(.{
  10009             .size = inst_ty.abiSize(self.target.*),
  10010             .alignment = inst_ty.abiAlignment(self.target.*),
  10011         });
  10012         const frame_allocs_slice = self.frame_allocs.slice();
  10013         const stack_frame_size =
  10014             &frame_allocs_slice.items(.abi_size)[@enumToInt(FrameIndex.call_frame)];
  10015         stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size);
  10016         const stack_frame_align =
  10017             &frame_allocs_slice.items(.abi_align)[@enumToInt(FrameIndex.call_frame)];
  10018         stack_frame_align.* = @max(stack_frame_align.*, needed_call_frame.abi_align);
  10019     }
  10020 
  10021     try self.spillEflagsIfOccupied();
  10022     try self.spillRegisters(abi.getCallerPreservedRegs(self.target.*));
  10023 
  10024     const param_regs = abi.getCAbiIntParamRegs(self.target.*);
  10025 
  10026     const dst_mcv = try self.allocRegOrMem(inst, false);
  10027     try self.genSetReg(param_regs[0], Type.usize, dst_mcv.address());
  10028 
  10029     const operand = try self.resolveInst(un_op);
  10030     try self.genSetReg(param_regs[1], enum_ty, operand);
  10031 
  10032     try self.genLazySymbolRef(
  10033         .call,
  10034         .rax,
  10035         link.File.LazySymbol.initDecl(.code, enum_ty.getOwnerDecl(), mod),
  10036     );
  10037 
  10038     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
  10039 }
  10040 
  10041 fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
  10042     const mod = self.bin_file.options.module.?;
  10043     const un_op = self.air.instructions.items(.data)[inst].un_op;
  10044 
  10045     const err_ty = self.air.typeOf(un_op);
  10046     const err_mcv = try self.resolveInst(un_op);
  10047     const err_reg = try self.copyToTmpRegister(err_ty, err_mcv);
  10048     const err_lock = self.register_manager.lockRegAssumeUnused(err_reg);
  10049     defer self.register_manager.unlockReg(err_lock);
  10050 
  10051     const addr_reg = try self.register_manager.allocReg(null, gp);
  10052     const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  10053     defer self.register_manager.unlockReg(addr_lock);
  10054     try self.genLazySymbolRef(.lea, addr_reg, link.File.LazySymbol.initDecl(.const_data, null, mod));
  10055 
  10056     const start_reg = try self.register_manager.allocReg(null, gp);
  10057     const start_lock = self.register_manager.lockRegAssumeUnused(start_reg);
  10058     defer self.register_manager.unlockReg(start_lock);
  10059 
  10060     const end_reg = try self.register_manager.allocReg(null, gp);
  10061     const end_lock = self.register_manager.lockRegAssumeUnused(end_reg);
  10062     defer self.register_manager.unlockReg(end_lock);
  10063 
  10064     try self.truncateRegister(err_ty, err_reg.to32());
  10065 
  10066     try self.asmRegisterMemory(
  10067         .{ ._, .mov },
  10068         start_reg.to32(),
  10069         Memory.sib(.dword, .{
  10070             .base = .{ .reg = addr_reg.to64() },
  10071             .scale_index = .{ .scale = 4, .index = err_reg.to64() },
  10072             .disp = 4,
  10073         }),
  10074     );
  10075     try self.asmRegisterMemory(
  10076         .{ ._, .mov },
  10077         end_reg.to32(),
  10078         Memory.sib(.dword, .{
  10079             .base = .{ .reg = addr_reg.to64() },
  10080             .scale_index = .{ .scale = 4, .index = err_reg.to64() },
  10081             .disp = 8,
  10082         }),
  10083     );
  10084     try self.asmRegisterRegister(.{ ._, .sub }, end_reg.to32(), start_reg.to32());
  10085     try self.asmRegisterMemory(
  10086         .{ ._, .lea },
  10087         start_reg.to64(),
  10088         Memory.sib(.byte, .{
  10089             .base = .{ .reg = addr_reg.to64() },
  10090             .scale_index = .{ .scale = 1, .index = start_reg.to64() },
  10091             .disp = 0,
  10092         }),
  10093     );
  10094     try self.asmRegisterMemory(
  10095         .{ ._, .lea },
  10096         end_reg.to32(),
  10097         Memory.sib(.byte, .{
  10098             .base = .{ .reg = end_reg.to64() },
  10099             .disp = -1,
  10100         }),
  10101     );
  10102 
  10103     const dst_mcv = try self.allocRegOrMem(inst, false);
  10104     try self.asmMemoryRegister(
  10105         .{ ._, .mov },
  10106         Memory.sib(.qword, .{
  10107             .base = .{ .frame = dst_mcv.load_frame.index },
  10108             .disp = dst_mcv.load_frame.off,
  10109         }),
  10110         start_reg.to64(),
  10111     );
  10112     try self.asmMemoryRegister(
  10113         .{ ._, .mov },
  10114         Memory.sib(.qword, .{
  10115             .base = .{ .frame = dst_mcv.load_frame.index },
  10116             .disp = dst_mcv.load_frame.off + 8,
  10117         }),
  10118         end_reg.to64(),
  10119     );
  10120 
  10121     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
  10122 }
  10123 
  10124 fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
  10125     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
  10126     const vector_ty = self.air.typeOfIndex(inst);
  10127     const dst_rc = regClassForType(vector_ty);
  10128     const scalar_ty = vector_ty.scalarType();
  10129 
  10130     const src_mcv = try self.resolveInst(ty_op.operand);
  10131     const result: MCValue = result: {
  10132         switch (scalar_ty.zigTypeTag()) {
  10133             else => {},
  10134             .Float => switch (scalar_ty.floatBits(self.target.*)) {
  10135                 32 => switch (vector_ty.vectorLen()) {
  10136                     1 => {
  10137                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  10138                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  10139                         try self.genSetReg(dst_reg, scalar_ty, src_mcv);
  10140                         break :result .{ .register = dst_reg };
  10141                     },
  10142                     2...4 => {
  10143                         if (self.hasFeature(.avx)) {
  10144                             const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  10145                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
  10146                                 .{ .v_ss, .broadcast },
  10147                                 dst_reg.to128(),
  10148                                 src_mcv.mem(.dword),
  10149                             ) else {
  10150                                 const src_reg = if (src_mcv.isRegister())
  10151                                     src_mcv.getReg().?
  10152                                 else
  10153                                     try self.copyToTmpRegister(scalar_ty, src_mcv);
  10154                                 try self.asmRegisterRegisterRegisterImmediate(
  10155                                     .{ .v_ps, .shuf },
  10156                                     dst_reg.to128(),
  10157                                     src_reg.to128(),
  10158                                     src_reg.to128(),
  10159                                     Immediate.u(0),
  10160                                 );
  10161                             }
  10162                             break :result .{ .register = dst_reg };
  10163                         } else {
  10164                             const dst_mcv = if (src_mcv.isRegister() and
  10165                                 self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  10166                                 src_mcv
  10167                             else
  10168                                 try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv);
  10169                             const dst_reg = dst_mcv.getReg().?;
  10170                             try self.asmRegisterRegisterImmediate(
  10171                                 .{ ._ps, .shuf },
  10172                                 dst_reg.to128(),
  10173                                 dst_reg.to128(),
  10174                                 Immediate.u(0),
  10175                             );
  10176                             break :result dst_mcv;
  10177                         }
  10178                     },
  10179                     5...8 => if (self.hasFeature(.avx)) {
  10180                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  10181                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
  10182                             .{ .v_ss, .broadcast },
  10183                             dst_reg.to256(),
  10184                             src_mcv.mem(.dword),
  10185                         ) else {
  10186                             const src_reg = if (src_mcv.isRegister())
  10187                                 src_mcv.getReg().?
  10188                             else
  10189                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  10190                             if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
  10191                                 .{ .v_ss, .broadcast },
  10192                                 dst_reg.to256(),
  10193                                 src_reg.to128(),
  10194                             ) else {
  10195                                 try self.asmRegisterRegisterRegisterImmediate(
  10196                                     .{ .v_ps, .shuf },
  10197                                     dst_reg.to128(),
  10198                                     src_reg.to128(),
  10199                                     src_reg.to128(),
  10200                                     Immediate.u(0),
  10201                                 );
  10202                                 try self.asmRegisterRegisterRegisterImmediate(
  10203                                     .{ .v_f128, .insert },
  10204                                     dst_reg.to256(),
  10205                                     dst_reg.to256(),
  10206                                     dst_reg.to128(),
  10207                                     Immediate.u(1),
  10208                                 );
  10209                             }
  10210                         }
  10211                         break :result .{ .register = dst_reg };
  10212                     },
  10213                     else => {},
  10214                 },
  10215                 64 => switch (vector_ty.vectorLen()) {
  10216                     1 => {
  10217                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  10218                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  10219                         try self.genSetReg(dst_reg, scalar_ty, src_mcv);
  10220                         break :result .{ .register = dst_reg };
  10221                     },
  10222                     2 => {
  10223                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  10224                         if (self.hasFeature(.sse3)) {
  10225                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
  10226                                 if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
  10227                                 dst_reg.to128(),
  10228                                 src_mcv.mem(.qword),
  10229                             ) else try self.asmRegisterRegister(
  10230                                 if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
  10231                                 dst_reg.to128(),
  10232                                 (if (src_mcv.isRegister())
  10233                                     src_mcv.getReg().?
  10234                                 else
  10235                                     try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
  10236                             );
  10237                             break :result .{ .register = dst_reg };
  10238                         } else try self.asmRegisterRegister(
  10239                             .{ ._ps, .movlh },
  10240                             dst_reg.to128(),
  10241                             (if (src_mcv.isRegister())
  10242                                 src_mcv.getReg().?
  10243                             else
  10244                                 try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
  10245                         );
  10246                     },
  10247                     3...4 => if (self.hasFeature(.avx)) {
  10248                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  10249                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
  10250                             .{ .v_sd, .broadcast },
  10251                             dst_reg.to256(),
  10252                             src_mcv.mem(.qword),
  10253                         ) else {
  10254                             const src_reg = if (src_mcv.isRegister())
  10255                                 src_mcv.getReg().?
  10256                             else
  10257                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  10258                             if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
  10259                                 .{ .v_sd, .broadcast },
  10260                                 dst_reg.to256(),
  10261                                 src_reg.to128(),
  10262                             ) else {
  10263                                 try self.asmRegisterRegister(
  10264                                     .{ .v_, .movddup },
  10265                                     dst_reg.to128(),
  10266                                     src_reg.to128(),
  10267                                 );
  10268                                 try self.asmRegisterRegisterRegisterImmediate(
  10269                                     .{ .v_f128, .insert },
  10270                                     dst_reg.to256(),
  10271                                     dst_reg.to256(),
  10272                                     dst_reg.to128(),
  10273                                     Immediate.u(1),
  10274                                 );
  10275                             }
  10276                         }
  10277                         break :result .{ .register = dst_reg };
  10278                     },
  10279                     else => {},
  10280                 },
  10281                 128 => switch (vector_ty.vectorLen()) {
  10282                     1 => {
  10283                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  10284                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  10285                         try self.genSetReg(dst_reg, scalar_ty, src_mcv);
  10286                         break :result .{ .register = dst_reg };
  10287                     },
  10288                     2 => if (self.hasFeature(.avx)) {
  10289                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  10290                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
  10291                             .{ .v_f128, .broadcast },
  10292                             dst_reg.to256(),
  10293                             src_mcv.mem(.xword),
  10294                         ) else {
  10295                             const src_reg = if (src_mcv.isRegister())
  10296                                 src_mcv.getReg().?
  10297                             else
  10298                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  10299                             try self.asmRegisterRegisterRegisterImmediate(
  10300                                 .{ .v_f128, .insert },
  10301                                 dst_reg.to256(),
  10302                                 src_reg.to256(),
  10303                                 src_reg.to128(),
  10304                                 Immediate.u(1),
  10305                             );
  10306                         }
  10307                         break :result .{ .register = dst_reg };
  10308                     },
  10309                     else => {},
  10310                 },
  10311                 16, 80 => {},
  10312                 else => unreachable,
  10313             },
  10314         }
  10315         return self.fail("TODO implement airSplat for {}", .{
  10316             vector_ty.fmt(self.bin_file.options.module.?),
  10317         });
  10318     };
  10319     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  10320 }
  10321 
  10322 fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
  10323     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
  10324     const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
  10325     _ = extra;
  10326     return self.fail("TODO implement airSelect for x86_64", .{});
  10327     //return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
  10328 }
  10329 
  10330 fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
  10331     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
  10332     _ = ty_op;
  10333     return self.fail("TODO implement airShuffle for x86_64", .{});
  10334     //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  10335 }
  10336 
  10337 fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
  10338     const reduce = self.air.instructions.items(.data)[inst].reduce;
  10339     _ = reduce;
  10340     return self.fail("TODO implement airReduce for x86_64", .{});
  10341     //return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
  10342 }
  10343 
  10344 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
  10345     const result_ty = self.air.typeOfIndex(inst);
  10346     const len = @intCast(usize, result_ty.arrayLen());
  10347     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
  10348     const elements = @ptrCast([]const Air.Inst.Ref, self.air.extra[ty_pl.payload..][0..len]);
  10349     const result: MCValue = result: {
  10350         switch (result_ty.zigTypeTag()) {
  10351             .Struct => {
  10352                 const frame_index =
  10353                     try self.allocFrameIndex(FrameAlloc.initType(result_ty, self.target.*));
  10354                 if (result_ty.containerLayout() == .Packed) {
  10355                     const struct_obj = result_ty.castTag(.@"struct").?.data;
  10356                     try self.genInlineMemset(
  10357                         .{ .lea_frame = .{ .index = frame_index } },
  10358                         .{ .immediate = 0 },
  10359                         .{ .immediate = result_ty.abiSize(self.target.*) },
  10360                     );
  10361                     for (elements, 0..) |elem, elem_i| {
  10362                         if (result_ty.structFieldValueComptime(elem_i) != null) continue;
  10363 
  10364                         const elem_ty = result_ty.structFieldType(elem_i);
  10365                         const elem_bit_size = @intCast(u32, elem_ty.bitSize(self.target.*));
  10366                         if (elem_bit_size > 64) {
  10367                             return self.fail(
  10368                                 "TODO airAggregateInit implement packed structs with large fields",
  10369                                 .{},
  10370                             );
  10371                         }
  10372                         const elem_abi_size = @intCast(u32, elem_ty.abiSize(self.target.*));
  10373                         const elem_abi_bits = elem_abi_size * 8;
  10374                         const elem_off = struct_obj.packedFieldBitOffset(self.target.*, elem_i);
  10375                         const elem_byte_off = @intCast(i32, elem_off / elem_abi_bits * elem_abi_size);
  10376                         const elem_bit_off = elem_off % elem_abi_bits;
  10377                         const elem_mcv = try self.resolveInst(elem);
  10378                         const mat_elem_mcv = switch (elem_mcv) {
  10379                             .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  10380                             else => elem_mcv,
  10381                         };
  10382                         const elem_lock = switch (mat_elem_mcv) {
  10383                             .register => |reg| self.register_manager.lockReg(reg),
  10384                             .immediate => |imm| lock: {
  10385                                 if (imm == 0) continue;
  10386                                 break :lock null;
  10387                             },
  10388                             else => null,
  10389                         };
  10390                         defer if (elem_lock) |lock| self.register_manager.unlockReg(lock);
  10391                         const elem_reg = registerAlias(
  10392                             try self.copyToTmpRegister(elem_ty, mat_elem_mcv),
  10393                             elem_abi_size,
  10394                         );
  10395                         const elem_extra_bits = self.regExtraBits(elem_ty);
  10396                         if (elem_bit_off < elem_extra_bits) {
  10397                             try self.truncateRegister(elem_ty, elem_reg);
  10398                         }
  10399                         if (elem_bit_off > 0) try self.genShiftBinOpMir(
  10400                             .{ ._l, .sh },
  10401                             elem_ty,
  10402                             .{ .register = elem_reg },
  10403                             .{ .immediate = elem_bit_off },
  10404                         );
  10405                         try self.genBinOpMir(
  10406                             .{ ._, .@"or" },
  10407                             elem_ty,
  10408                             .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } },
  10409                             .{ .register = elem_reg },
  10410                         );
  10411                         if (elem_bit_off > elem_extra_bits) {
  10412                             const reg = try self.copyToTmpRegister(elem_ty, mat_elem_mcv);
  10413                             if (elem_extra_bits > 0) {
  10414                                 try self.truncateRegister(elem_ty, registerAlias(reg, elem_abi_size));
  10415                             }
  10416                             try self.genShiftBinOpMir(
  10417                                 .{ ._r, .sh },
  10418                                 elem_ty,
  10419                                 .{ .register = reg },
  10420                                 .{ .immediate = elem_abi_bits - elem_bit_off },
  10421                             );
  10422                             try self.genBinOpMir(
  10423                                 .{ ._, .@"or" },
  10424                                 elem_ty,
  10425                                 .{ .load_frame = .{
  10426                                     .index = frame_index,
  10427                                     .off = elem_byte_off + @intCast(i32, elem_abi_size),
  10428                                 } },
  10429                                 .{ .register = reg },
  10430                             );
  10431                         }
  10432                     }
  10433                 } else for (elements, 0..) |elem, elem_i| {
  10434                     if (result_ty.structFieldValueComptime(elem_i) != null) continue;
  10435 
  10436                     const elem_ty = result_ty.structFieldType(elem_i);
  10437                     const elem_off = @intCast(i32, result_ty.structFieldOffset(elem_i, self.target.*));
  10438                     const elem_mcv = try self.resolveInst(elem);
  10439                     const mat_elem_mcv = switch (elem_mcv) {
  10440                         .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  10441                         else => elem_mcv,
  10442                     };
  10443                     try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv);
  10444                 }
  10445                 break :result .{ .load_frame = .{ .index = frame_index } };
  10446             },
  10447             .Array => {
  10448                 const frame_index =
  10449                     try self.allocFrameIndex(FrameAlloc.initType(result_ty, self.target.*));
  10450                 const elem_ty = result_ty.childType();
  10451                 const elem_size = @intCast(u32, elem_ty.abiSize(self.target.*));
  10452 
  10453                 for (elements, 0..) |elem, elem_i| {
  10454                     const elem_mcv = try self.resolveInst(elem);
  10455                     const mat_elem_mcv = switch (elem_mcv) {
  10456                         .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  10457                         else => elem_mcv,
  10458                     };
  10459                     const elem_off = @intCast(i32, elem_size * elem_i);
  10460                     try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv);
  10461                 }
  10462                 break :result .{ .load_frame = .{ .index = frame_index } };
  10463             },
  10464             .Vector => return self.fail("TODO implement aggregate_init for vectors", .{}),
  10465             else => unreachable,
  10466         }
  10467     };
  10468 
  10469     if (elements.len <= Liveness.bpi - 1) {
  10470         var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1);
  10471         @memcpy(buf[0..elements.len], elements);
  10472         return self.finishAir(inst, result, buf);
  10473     }
  10474     var bt = self.liveness.iterateBigTomb(inst);
  10475     for (elements) |elem| self.feed(&bt, elem);
  10476     return self.finishAirResult(inst, result);
  10477 }
  10478 
  10479 fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void {
  10480     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
  10481     const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data;
  10482     const result: MCValue = result: {
  10483         const union_ty = self.air.typeOfIndex(inst);
  10484         const layout = union_ty.unionGetLayout(self.target.*);
  10485 
  10486         const src_ty = self.air.typeOf(extra.init);
  10487         const src_mcv = try self.resolveInst(extra.init);
  10488         if (layout.tag_size == 0) {
  10489             if (self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv;
  10490 
  10491             const dst_mcv = try self.allocRegOrMem(inst, true);
  10492             try self.genCopy(src_ty, dst_mcv, src_mcv);
  10493             break :result dst_mcv;
  10494         }
  10495 
  10496         const dst_mcv = try self.allocRegOrMem(inst, false);
  10497 
  10498         const union_obj = union_ty.cast(Type.Payload.Union).?.data;
  10499         const field_name = union_obj.fields.keys()[extra.field_index];
  10500         const tag_ty = union_ty.unionTagTypeSafety().?;
  10501         const field_index = @intCast(u32, tag_ty.enumFieldIndex(field_name).?);
  10502         var tag_pl = Value.Payload.U32{ .base = .{ .tag = .enum_field_index }, .data = field_index };
  10503         const tag_val = Value.initPayload(&tag_pl.base);
  10504         var tag_int_pl: Value.Payload.U64 = undefined;
  10505         const tag_int_val = tag_val.enumToInt(tag_ty, &tag_int_pl);
  10506         const tag_int = tag_int_val.toUnsignedInt(self.target.*);
  10507         const tag_off = if (layout.tag_align < layout.payload_align)
  10508             @intCast(i32, layout.payload_size)
  10509         else
  10510             0;
  10511         try self.genCopy(tag_ty, dst_mcv.address().offset(tag_off).deref(), .{ .immediate = tag_int });
  10512 
  10513         const pl_off = if (layout.tag_align < layout.payload_align)
  10514             0
  10515         else
  10516             @intCast(i32, layout.tag_size);
  10517         try self.genCopy(src_ty, dst_mcv.address().offset(pl_off).deref(), src_mcv);
  10518 
  10519         break :result dst_mcv;
  10520     };
  10521     return self.finishAir(inst, result, .{ extra.init, .none, .none });
  10522 }
  10523 
  10524 fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
  10525     const prefetch = self.air.instructions.items(.data)[inst].prefetch;
  10526     return self.finishAir(inst, .unreach, .{ prefetch.ptr, .none, .none });
  10527 }
  10528 
  10529 fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
  10530     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
  10531     const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
  10532     const ty = self.air.typeOfIndex(inst);
  10533 
  10534     if (!self.hasFeature(.fma)) return self.fail("TODO implement airMulAdd for {}", .{
  10535         ty.fmt(self.bin_file.options.module.?),
  10536     });
  10537 
  10538     const ops = [3]Air.Inst.Ref{ extra.lhs, extra.rhs, pl_op.operand };
  10539     var mcvs: [3]MCValue = undefined;
  10540     var locks = [1]?RegisterManager.RegisterLock{null} ** 3;
  10541     defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
  10542     var order = [1]u2{0} ** 3;
  10543     var unused = std.StaticBitSet(3).initFull();
  10544     for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| {
  10545         const op_index = @intCast(u2, op_i);
  10546         mcv.* = try self.resolveInst(op);
  10547         if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) {
  10548             order[op_index] = 1;
  10549             unused.unset(0);
  10550         } else if (unused.isSet(2) and mcv.isMemory()) {
  10551             order[op_index] = 3;
  10552             unused.unset(2);
  10553         }
  10554         switch (mcv.*) {
  10555             .register => |reg| lock.* = self.register_manager.lockReg(reg),
  10556             else => {},
  10557         }
  10558     }
  10559     for (&order, &mcvs, &locks) |*mop_index, *mcv, *lock| {
  10560         if (mop_index.* != 0) continue;
  10561         mop_index.* = 1 + @intCast(u2, unused.toggleFirstSet().?);
  10562         if (mop_index.* > 1 and mcv.isRegister()) continue;
  10563         const reg = try self.copyToTmpRegister(ty, mcv.*);
  10564         mcv.* = .{ .register = reg };
  10565         if (lock.*) |old_lock| self.register_manager.unlockReg(old_lock);
  10566         lock.* = self.register_manager.lockRegAssumeUnused(reg);
  10567     }
  10568 
  10569     const mir_tag = if (@as(
  10570         ?Mir.Inst.FixedTag,
  10571         if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 }))
  10572             switch (ty.zigTypeTag()) {
  10573                 .Float => switch (ty.floatBits(self.target.*)) {
  10574                     32 => .{ .v_ss, .fmadd132 },
  10575                     64 => .{ .v_sd, .fmadd132 },
  10576                     16, 80, 128 => null,
  10577                     else => unreachable,
  10578                 },
  10579                 .Vector => switch (ty.childType().zigTypeTag()) {
  10580                     .Float => switch (ty.childType().floatBits(self.target.*)) {
  10581                         32 => switch (ty.vectorLen()) {
  10582                             1 => .{ .v_ss, .fmadd132 },
  10583                             2...8 => .{ .v_ps, .fmadd132 },
  10584                             else => null,
  10585                         },
  10586                         64 => switch (ty.vectorLen()) {
  10587                             1 => .{ .v_sd, .fmadd132 },
  10588                             2...4 => .{ .v_pd, .fmadd132 },
  10589                             else => null,
  10590                         },
  10591                         16, 80, 128 => null,
  10592                         else => unreachable,
  10593                     },
  10594                     else => unreachable,
  10595                 },
  10596                 else => unreachable,
  10597             }
  10598         else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 }))
  10599             switch (ty.zigTypeTag()) {
  10600                 .Float => switch (ty.floatBits(self.target.*)) {
  10601                     32 => .{ .v_ss, .fmadd213 },
  10602                     64 => .{ .v_sd, .fmadd213 },
  10603                     16, 80, 128 => null,
  10604                     else => unreachable,
  10605                 },
  10606                 .Vector => switch (ty.childType().zigTypeTag()) {
  10607                     .Float => switch (ty.childType().floatBits(self.target.*)) {
  10608                         32 => switch (ty.vectorLen()) {
  10609                             1 => .{ .v_ss, .fmadd213 },
  10610                             2...8 => .{ .v_ps, .fmadd213 },
  10611                             else => null,
  10612                         },
  10613                         64 => switch (ty.vectorLen()) {
  10614                             1 => .{ .v_sd, .fmadd213 },
  10615                             2...4 => .{ .v_pd, .fmadd213 },
  10616                             else => null,
  10617                         },
  10618                         16, 80, 128 => null,
  10619                         else => unreachable,
  10620                     },
  10621                     else => unreachable,
  10622                 },
  10623                 else => unreachable,
  10624             }
  10625         else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 }))
  10626             switch (ty.zigTypeTag()) {
  10627                 .Float => switch (ty.floatBits(self.target.*)) {
  10628                     32 => .{ .v_ss, .fmadd231 },
  10629                     64 => .{ .v_sd, .fmadd231 },
  10630                     16, 80, 128 => null,
  10631                     else => unreachable,
  10632                 },
  10633                 .Vector => switch (ty.childType().zigTypeTag()) {
  10634                     .Float => switch (ty.childType().floatBits(self.target.*)) {
  10635                         32 => switch (ty.vectorLen()) {
  10636                             1 => .{ .v_ss, .fmadd231 },
  10637                             2...8 => .{ .v_ps, .fmadd231 },
  10638                             else => null,
  10639                         },
  10640                         64 => switch (ty.vectorLen()) {
  10641                             1 => .{ .v_sd, .fmadd231 },
  10642                             2...4 => .{ .v_pd, .fmadd231 },
  10643                             else => null,
  10644                         },
  10645                         16, 80, 128 => null,
  10646                         else => unreachable,
  10647                     },
  10648                     else => unreachable,
  10649                 },
  10650                 else => unreachable,
  10651             }
  10652         else
  10653             unreachable,
  10654     )) |tag| tag else return self.fail("TODO implement airMulAdd for {}", .{
  10655         ty.fmt(self.bin_file.options.module.?),
  10656     });
  10657 
  10658     var mops: [3]MCValue = undefined;
  10659     for (order, mcvs) |mop_index, mcv| mops[mop_index - 1] = mcv;
  10660 
  10661     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
  10662     const mop1_reg = registerAlias(mops[0].getReg().?, abi_size);
  10663     const mop2_reg = registerAlias(mops[1].getReg().?, abi_size);
  10664     if (mops[2].isRegister()) try self.asmRegisterRegisterRegister(
  10665         mir_tag,
  10666         mop1_reg,
  10667         mop2_reg,
  10668         registerAlias(mops[2].getReg().?, abi_size),
  10669     ) else try self.asmRegisterRegisterMemory(
  10670         mir_tag,
  10671         mop1_reg,
  10672         mop2_reg,
  10673         mops[2].mem(Memory.PtrSize.fromSize(abi_size)),
  10674     );
  10675     return self.finishAir(inst, mops[0], ops);
  10676 }
  10677 
  10678 fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue {
  10679     const ty = self.air.typeOf(ref);
  10680 
  10681     // If the type has no codegen bits, no need to store it.
  10682     if (!ty.hasRuntimeBitsIgnoreComptime()) return .none;
  10683 
  10684     if (Air.refToIndex(ref)) |inst| {
  10685         const mcv = switch (self.air.instructions.items(.tag)[inst]) {
  10686             .constant => tracking: {
  10687                 const gop = try self.const_tracking.getOrPut(self.gpa, inst);
  10688                 if (!gop.found_existing) gop.value_ptr.* = InstTracking.init(try self.genTypedValue(.{
  10689                     .ty = ty,
  10690                     .val = self.air.value(ref).?,
  10691                 }));
  10692                 break :tracking gop.value_ptr;
  10693             },
  10694             .const_ty => unreachable,
  10695             else => self.inst_tracking.getPtr(inst).?,
  10696         }.short;
  10697         switch (mcv) {
  10698             .none, .unreach, .dead => unreachable,
  10699             else => return mcv,
  10700         }
  10701     }
  10702 
  10703     return self.genTypedValue(.{ .ty = ty, .val = self.air.value(ref).? });
  10704 }
  10705 
  10706 fn getResolvedInstValue(self: *Self, inst: Air.Inst.Index) *InstTracking {
  10707     const tracking = switch (self.air.instructions.items(.tag)[inst]) {
  10708         .constant => &self.const_tracking,
  10709         .const_ty => unreachable,
  10710         else => &self.inst_tracking,
  10711     }.getPtr(inst).?;
  10712     return switch (tracking.short) {
  10713         .none, .unreach, .dead => unreachable,
  10714         else => tracking,
  10715     };
  10716 }
  10717 
  10718 /// If the MCValue is an immediate, and it does not fit within this type,
  10719 /// we put it in a register.
  10720 /// A potential opportunity for future optimization here would be keeping track
  10721 /// of the fact that the instruction is available both as an immediate
  10722 /// and as a register.
  10723 fn limitImmediateType(self: *Self, operand: Air.Inst.Ref, comptime T: type) !MCValue {
  10724     const mcv = try self.resolveInst(operand);
  10725     const ti = @typeInfo(T).Int;
  10726     switch (mcv) {
  10727         .immediate => |imm| {
  10728             // This immediate is unsigned.
  10729             const U = std.meta.Int(.unsigned, ti.bits - @boolToInt(ti.signedness == .signed));
  10730             if (imm >= math.maxInt(U)) {
  10731                 return MCValue{ .register = try self.copyToTmpRegister(Type.usize, mcv) };
  10732             }
  10733         },
  10734         else => {},
  10735     }
  10736     return mcv;
  10737 }
  10738 
  10739 fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue {
  10740     return switch (try codegen.genTypedValue(self.bin_file, self.src_loc, arg_tv, self.owner.getDecl())) {
  10741         .mcv => |mcv| switch (mcv) {
  10742             .none => .none,
  10743             .undef => .undef,
  10744             .immediate => |imm| .{ .immediate = imm },
  10745             .memory => |addr| .{ .memory = addr },
  10746             .load_direct => |sym_index| .{ .load_direct = sym_index },
  10747             .load_got => |sym_index| .{ .lea_got = sym_index },
  10748             .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
  10749         },
  10750         .fail => |msg| {
  10751             self.err_msg = msg;
  10752             return error.CodegenFail;
  10753         },
  10754     };
  10755 }
  10756 
  10757 const CallMCValues = struct {
  10758     args: []MCValue,
  10759     return_value: InstTracking,
  10760     stack_byte_count: u31,
  10761     stack_align: u31,
  10762 
  10763     fn deinit(self: *CallMCValues, func: *Self) void {
  10764         func.gpa.free(self.args);
  10765         self.* = undefined;
  10766     }
  10767 };
  10768 
  10769 /// Caller must call `CallMCValues.deinit`.
  10770 fn resolveCallingConventionValues(
  10771     self: *Self,
  10772     fn_ty: Type,
  10773     var_args: []const Air.Inst.Ref,
  10774     stack_frame_base: FrameIndex,
  10775 ) !CallMCValues {
  10776     const cc = fn_ty.fnCallingConvention();
  10777     const param_len = fn_ty.fnParamLen();
  10778     const param_types = try self.gpa.alloc(Type, param_len + var_args.len);
  10779     defer self.gpa.free(param_types);
  10780     fn_ty.fnParamTypes(param_types);
  10781     // TODO: promote var arg types
  10782     for (param_types[param_len..], var_args) |*param_ty, arg| param_ty.* = self.air.typeOf(arg);
  10783     var result: CallMCValues = .{
  10784         .args = try self.gpa.alloc(MCValue, param_types.len),
  10785         // These undefined values must be populated before returning from this function.
  10786         .return_value = undefined,
  10787         .stack_byte_count = 0,
  10788         .stack_align = undefined,
  10789     };
  10790     errdefer self.gpa.free(result.args);
  10791 
  10792     const ret_ty = fn_ty.fnReturnType();
  10793 
  10794     switch (cc) {
  10795         .Naked => {
  10796             assert(result.args.len == 0);
  10797             result.return_value = InstTracking.init(.unreach);
  10798             result.stack_align = 8;
  10799         },
  10800         .C => {
  10801             var param_reg_i: usize = 0;
  10802             var param_sse_reg_i: usize = 0;
  10803             result.stack_align = 16;
  10804 
  10805             switch (self.target.os.tag) {
  10806                 .windows => {
  10807                     // Align the stack to 16bytes before allocating shadow stack space (if any).
  10808                     result.stack_byte_count += @intCast(u31, 4 * Type.usize.abiSize(self.target.*));
  10809                 },
  10810                 else => {},
  10811             }
  10812 
  10813             // Return values
  10814             if (ret_ty.zigTypeTag() == .NoReturn) {
  10815                 result.return_value = InstTracking.init(.unreach);
  10816             } else if (!ret_ty.hasRuntimeBitsIgnoreComptime()) {
  10817                 // TODO: is this even possible for C calling convention?
  10818                 result.return_value = InstTracking.init(.none);
  10819             } else {
  10820                 const classes = switch (self.target.os.tag) {
  10821                     .windows => &[1]abi.Class{abi.classifyWindows(ret_ty, self.target.*)},
  10822                     else => mem.sliceTo(&abi.classifySystemV(ret_ty, self.target.*, .ret), .none),
  10823                 };
  10824                 if (classes.len > 1) {
  10825                     return self.fail("TODO handle multiple classes per type", .{});
  10826                 }
  10827                 const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0];
  10828                 result.return_value = switch (classes[0]) {
  10829                     .integer => InstTracking.init(.{ .register = registerAlias(
  10830                         ret_reg,
  10831                         @intCast(u32, ret_ty.abiSize(self.target.*)),
  10832                     ) }),
  10833                     .float, .sse => InstTracking.init(.{ .register = .xmm0 }),
  10834                     .memory => ret: {
  10835                         const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i];
  10836                         param_reg_i += 1;
  10837                         break :ret .{
  10838                             .short = .{ .indirect = .{ .reg = ret_reg } },
  10839                             .long = .{ .indirect = .{ .reg = ret_indirect_reg } },
  10840                         };
  10841                     },
  10842                     else => |class| return self.fail("TODO handle calling convention class {s}", .{
  10843                         @tagName(class),
  10844                     }),
  10845                 };
  10846             }
  10847 
  10848             // Input params
  10849             for (param_types, result.args) |ty, *arg| {
  10850                 assert(ty.hasRuntimeBitsIgnoreComptime());
  10851 
  10852                 const classes = switch (self.target.os.tag) {
  10853                     .windows => &[1]abi.Class{abi.classifyWindows(ty, self.target.*)},
  10854                     else => mem.sliceTo(&abi.classifySystemV(ty, self.target.*, .arg), .none),
  10855                 };
  10856                 if (classes.len > 1) {
  10857                     return self.fail("TODO handle multiple classes per type", .{});
  10858                 }
  10859                 switch (classes[0]) {
  10860                     .integer => if (param_reg_i < abi.getCAbiIntParamRegs(self.target.*).len) {
  10861                         arg.* = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i] };
  10862                         param_reg_i += 1;
  10863                         continue;
  10864                     },
  10865                     .float, .sse => switch (self.target.os.tag) {
  10866                         .windows => if (param_reg_i < 4) {
  10867                             arg.* = .{ .register = @intToEnum(
  10868                                 Register,
  10869                                 @enumToInt(Register.xmm0) + param_reg_i,
  10870                             ) };
  10871                             param_reg_i += 1;
  10872                             continue;
  10873                         },
  10874                         else => if (param_sse_reg_i < 8) {
  10875                             arg.* = .{ .register = @intToEnum(
  10876                                 Register,
  10877                                 @enumToInt(Register.xmm0) + param_sse_reg_i,
  10878                             ) };
  10879                             param_sse_reg_i += 1;
  10880                             continue;
  10881                         },
  10882                     },
  10883                     .memory => {}, // fallthrough
  10884                     else => |class| return self.fail("TODO handle calling convention class {s}", .{
  10885                         @tagName(class),
  10886                     }),
  10887                 }
  10888 
  10889                 const param_size = @intCast(u31, ty.abiSize(self.target.*));
  10890                 const param_align = @intCast(u31, ty.abiAlignment(self.target.*));
  10891                 result.stack_byte_count =
  10892                     mem.alignForwardGeneric(u31, result.stack_byte_count, param_align);
  10893                 arg.* = .{ .load_frame = .{
  10894                     .index = stack_frame_base,
  10895                     .off = result.stack_byte_count,
  10896                 } };
  10897                 result.stack_byte_count += param_size;
  10898             }
  10899         },
  10900         .Unspecified => {
  10901             result.stack_align = 16;
  10902 
  10903             // Return values
  10904             if (ret_ty.zigTypeTag() == .NoReturn) {
  10905                 result.return_value = InstTracking.init(.unreach);
  10906             } else if (!ret_ty.hasRuntimeBitsIgnoreComptime()) {
  10907                 result.return_value = InstTracking.init(.none);
  10908             } else {
  10909                 const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0];
  10910                 const ret_ty_size = @intCast(u31, ret_ty.abiSize(self.target.*));
  10911                 if (ret_ty_size <= 8 and !ret_ty.isRuntimeFloat()) {
  10912                     const aliased_reg = registerAlias(ret_reg, ret_ty_size);
  10913                     result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none };
  10914                 } else {
  10915                     const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
  10916                     result.return_value = .{
  10917                         .short = .{ .indirect = .{ .reg = ret_reg } },
  10918                         .long = .{ .indirect = .{ .reg = ret_indirect_reg } },
  10919                     };
  10920                 }
  10921             }
  10922 
  10923             // Input params
  10924             for (param_types, result.args) |ty, *arg| {
  10925                 if (!ty.hasRuntimeBitsIgnoreComptime()) {
  10926                     arg.* = .none;
  10927                     continue;
  10928                 }
  10929                 const param_size = @intCast(u31, ty.abiSize(self.target.*));
  10930                 const param_align = @intCast(u31, ty.abiAlignment(self.target.*));
  10931                 result.stack_byte_count =
  10932                     mem.alignForwardGeneric(u31, result.stack_byte_count, param_align);
  10933                 arg.* = .{ .load_frame = .{
  10934                     .index = stack_frame_base,
  10935                     .off = result.stack_byte_count,
  10936                 } };
  10937                 result.stack_byte_count += param_size;
  10938             }
  10939         },
  10940         else => return self.fail("TODO implement function parameters and return values for {} on x86_64", .{cc}),
  10941     }
  10942 
  10943     result.stack_byte_count = mem.alignForwardGeneric(u31, result.stack_byte_count, result.stack_align);
  10944     return result;
  10945 }
  10946 
  10947 /// TODO support scope overrides. Also note this logic is duplicated with `Module.wantSafety`.
  10948 fn wantSafety(self: *Self) bool {
  10949     return switch (self.bin_file.options.optimize_mode) {
  10950         .Debug => true,
  10951         .ReleaseSafe => true,
  10952         .ReleaseFast => false,
  10953         .ReleaseSmall => false,
  10954     };
  10955 }
  10956 
  10957 fn fail(self: *Self, comptime format: []const u8, args: anytype) InnerError {
  10958     @setCold(true);
  10959     assert(self.err_msg == null);
  10960     self.err_msg = try ErrorMsg.create(self.bin_file.allocator, self.src_loc, format, args);
  10961     return error.CodegenFail;
  10962 }
  10963 
  10964 fn failSymbol(self: *Self, comptime format: []const u8, args: anytype) InnerError {
  10965     @setCold(true);
  10966     assert(self.err_msg == null);
  10967     self.err_msg = try ErrorMsg.create(self.bin_file.allocator, self.src_loc, format, args);
  10968     return error.CodegenFail;
  10969 }
  10970 
  10971 fn parseRegName(name: []const u8) ?Register {
  10972     if (@hasDecl(Register, "parseRegName")) {
  10973         return Register.parseRegName(name);
  10974     }
  10975     return std.meta.stringToEnum(Register, name);
  10976 }
  10977 
  10978 /// Returns register wide enough to hold at least `size_bytes`.
  10979 fn registerAlias(reg: Register, size_bytes: u32) Register {
  10980     return switch (reg.class()) {
  10981         .general_purpose => if (size_bytes == 0)
  10982             unreachable // should be comptime-known
  10983         else if (size_bytes <= 1)
  10984             reg.to8()
  10985         else if (size_bytes <= 2)
  10986             reg.to16()
  10987         else if (size_bytes <= 4)
  10988             reg.to32()
  10989         else if (size_bytes <= 8)
  10990             reg.to64()
  10991         else
  10992             unreachable,
  10993         .floating_point => if (size_bytes <= 16)
  10994             reg.to128()
  10995         else if (size_bytes <= 32)
  10996             reg.to256()
  10997         else
  10998             unreachable,
  10999         .segment => unreachable,
  11000     };
  11001 }
  11002 
  11003 /// Truncates the value in the register in place.
  11004 /// Clobbers any remaining bits.
  11005 fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
  11006     const int_info = if (ty.isAbiInt()) ty.intInfo(self.target.*) else std.builtin.Type.Int{
  11007         .signedness = .unsigned,
  11008         .bits = @intCast(u16, ty.bitSize(self.target.*)),
  11009     };
  11010     const max_reg_bit_width = Register.rax.bitSize();
  11011     switch (int_info.signedness) {
  11012         .signed => {
  11013             const shift = @intCast(u6, max_reg_bit_width - int_info.bits);
  11014             try self.genShiftBinOpMir(
  11015                 .{ ._l, .sa },
  11016                 Type.isize,
  11017                 .{ .register = reg },
  11018                 .{ .immediate = shift },
  11019             );
  11020             try self.genShiftBinOpMir(
  11021                 .{ ._r, .sa },
  11022                 Type.isize,
  11023                 .{ .register = reg },
  11024                 .{ .immediate = shift },
  11025             );
  11026         },
  11027         .unsigned => {
  11028             const shift = @intCast(u6, max_reg_bit_width - int_info.bits);
  11029             const mask = (~@as(u64, 0)) >> shift;
  11030             if (int_info.bits <= 32) {
  11031                 try self.genBinOpMir(
  11032                     .{ ._, .@"and" },
  11033                     Type.u32,
  11034                     .{ .register = reg },
  11035                     .{ .immediate = mask },
  11036                 );
  11037             } else {
  11038                 const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask });
  11039                 try self.genBinOpMir(
  11040                     .{ ._, .@"and" },
  11041                     Type.usize,
  11042                     .{ .register = reg },
  11043                     .{ .register = tmp_reg },
  11044                 );
  11045             }
  11046         },
  11047     }
  11048 }
  11049 
  11050 fn regBitSize(self: *Self, ty: Type) u64 {
  11051     const abi_size = ty.abiSize(self.target.*);
  11052     return switch (ty.zigTypeTag()) {
  11053         else => switch (abi_size) {
  11054             1 => 8,
  11055             2 => 16,
  11056             3...4 => 32,
  11057             5...8 => 64,
  11058             else => unreachable,
  11059         },
  11060         .Float => switch (abi_size) {
  11061             1...16 => 128,
  11062             17...32 => 256,
  11063             else => unreachable,
  11064         },
  11065     };
  11066 }
  11067 
  11068 fn regExtraBits(self: *Self, ty: Type) u64 {
  11069     return self.regBitSize(ty) - ty.bitSize(self.target.*);
  11070 }
  11071 
  11072 fn hasFeature(self: *Self, feature: Target.x86.Feature) bool {
  11073     return Target.x86.featureSetHas(self.target.cpu.features, feature);
  11074 }
  11075 fn hasAnyFeatures(self: *Self, features: anytype) bool {
  11076     return Target.x86.featureSetHasAny(self.target.cpu.features, features);
  11077 }
  11078 fn hasAllFeatures(self: *Self, features: anytype) bool {
  11079     return Target.x86.featureSetHasAll(self.target.cpu.features, features);
  11080 }