zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob fb4c8a57 (506828B) - Raw


      1 const std = @import("std");
      2 const build_options = @import("build_options");
      3 const builtin = @import("builtin");
      4 const assert = std.debug.assert;
      5 const codegen = @import("../../codegen.zig");
      6 const leb128 = std.leb;
      7 const link = @import("../../link.zig");
      8 const log = std.log.scoped(.codegen);
      9 const tracking_log = std.log.scoped(.tracking);
     10 const verbose_tracking_log = std.log.scoped(.verbose_tracking);
     11 const wip_mir_log = std.log.scoped(.wip_mir);
     12 const math = std.math;
     13 const mem = std.mem;
     14 const trace = @import("../../tracy.zig").trace;
     15 
     16 const Air = @import("../../Air.zig");
     17 const Allocator = mem.Allocator;
     18 const CodeGenError = codegen.CodeGenError;
     19 const Compilation = @import("../../Compilation.zig");
     20 const DebugInfoOutput = codegen.DebugInfoOutput;
     21 const DW = std.dwarf;
     22 const ErrorMsg = Module.ErrorMsg;
     23 const Result = codegen.Result;
     24 const Emit = @import("Emit.zig");
     25 const Liveness = @import("../../Liveness.zig");
     26 const Lower = @import("Lower.zig");
     27 const Mir = @import("Mir.zig");
     28 const Module = @import("../../Module.zig");
     29 const InternPool = @import("../../InternPool.zig");
     30 const Target = std.Target;
     31 const Type = @import("../../type.zig").Type;
     32 const TypedValue = @import("../../TypedValue.zig");
     33 const Value = @import("../../value.zig").Value;
     34 
     35 const abi = @import("abi.zig");
     36 const bits = @import("bits.zig");
     37 const encoder = @import("encoder.zig");
     38 const errUnionErrorOffset = codegen.errUnionErrorOffset;
     39 const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
     40 
     41 const Condition = bits.Condition;
     42 const Immediate = bits.Immediate;
     43 const Memory = bits.Memory;
     44 const Register = bits.Register;
     45 const RegisterManager = abi.RegisterManager;
     46 const RegisterLock = RegisterManager.RegisterLock;
     47 const FrameIndex = bits.FrameIndex;
     48 
     49 const gp = abi.RegisterClass.gp;
     50 const sse = abi.RegisterClass.sse;
     51 
     52 const InnerError = CodeGenError || error{OutOfRegisters};
     53 
     54 gpa: Allocator,
     55 air: Air,
     56 liveness: Liveness,
     57 bin_file: *link.File,
     58 debug_output: DebugInfoOutput,
     59 target: *const std.Target,
     60 owner: Owner,
     61 err_msg: ?*ErrorMsg,
     62 args: []MCValue,
     63 ret_mcv: InstTracking,
     64 fn_type: Type,
     65 arg_index: u32,
     66 src_loc: Module.SrcLoc,
     67 
     68 eflags_inst: ?Air.Inst.Index = null,
     69 
     70 /// MIR Instructions
     71 mir_instructions: std.MultiArrayList(Mir.Inst) = .{},
     72 /// MIR extra data
     73 mir_extra: std.ArrayListUnmanaged(u32) = .{},
     74 
     75 /// Byte offset within the source file of the ending curly.
     76 end_di_line: u32,
     77 end_di_column: u32,
     78 
     79 /// The value is an offset into the `Function` `code` from the beginning.
     80 /// To perform the reloc, write 32-bit signed little-endian integer
     81 /// which is a relative jump, based on the address following the reloc.
     82 exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
     83 
     84 const_tracking: ConstTrackingMap = .{},
     85 inst_tracking: InstTrackingMap = .{},
     86 
     87 // Key is the block instruction
     88 blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockData) = .{},
     89 
     90 register_manager: RegisterManager = .{},
     91 
     92 /// Generation of the current scope, increments by 1 for every entered scope.
     93 scope_generation: u32 = 0,
     94 
     95 frame_allocs: std.MultiArrayList(FrameAlloc) = .{},
     96 free_frame_indices: std.AutoArrayHashMapUnmanaged(FrameIndex, void) = .{},
     97 frame_locs: std.MultiArrayList(Mir.FrameLoc) = .{},
     98 
     99 /// Debug field, used to find bugs in the compiler.
    100 air_bookkeeping: @TypeOf(air_bookkeeping_init) = air_bookkeeping_init,
    101 
    102 /// For mir debug info, maps a mir index to a air index
    103 mir_to_air_map: @TypeOf(mir_to_air_map_init) = mir_to_air_map_init,
    104 
    105 const air_bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {};
    106 
    107 const mir_to_air_map_init = if (builtin.mode == .Debug) std.AutoHashMapUnmanaged(Mir.Inst.Index, Air.Inst.Index){} else {};
    108 
    109 const FrameAddr = struct { index: FrameIndex, off: i32 = 0 };
    110 const RegisterOffset = struct { reg: Register, off: i32 = 0 };
    111 
    112 const Owner = union(enum) {
    113     func_index: InternPool.Index,
    114     lazy_sym: link.File.LazySymbol,
    115 
    116     fn getDecl(owner: Owner, mod: *Module) Module.Decl.Index {
    117         return switch (owner) {
    118             .func_index => |func_index| mod.funcOwnerDeclIndex(func_index),
    119             .lazy_sym => |lazy_sym| lazy_sym.ty.getOwnerDecl(mod),
    120         };
    121     }
    122 
    123     fn getSymbolIndex(owner: Owner, ctx: *Self) !u32 {
    124         switch (owner) {
    125             .func_index => |func_index| {
    126                 const mod = ctx.bin_file.options.module.?;
    127                 const decl_index = mod.funcOwnerDeclIndex(func_index);
    128                 if (ctx.bin_file.cast(link.File.MachO)) |macho_file| {
    129                     const atom = try macho_file.getOrCreateAtomForDecl(decl_index);
    130                     return macho_file.getAtom(atom).getSymbolIndex().?;
    131                 } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| {
    132                     const atom = try coff_file.getOrCreateAtomForDecl(decl_index);
    133                     return coff_file.getAtom(atom).getSymbolIndex().?;
    134                 } else if (ctx.bin_file.cast(link.File.Plan9)) |p9_file| {
    135                     return p9_file.seeDecl(decl_index);
    136                 } else unreachable;
    137             },
    138             .lazy_sym => |lazy_sym| {
    139                 if (ctx.bin_file.cast(link.File.MachO)) |macho_file| {
    140                     const atom = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
    141                         return ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    142                     return macho_file.getAtom(atom).getSymbolIndex().?;
    143                 } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| {
    144                     const atom = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
    145                         return ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    146                     return coff_file.getAtom(atom).getSymbolIndex().?;
    147                 } else if (ctx.bin_file.cast(link.File.Plan9)) |p9_file| {
    148                     return p9_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
    149                         return ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    150                 } else unreachable;
    151             },
    152         }
    153     }
    154 };
    155 
    156 pub const MCValue = union(enum) {
    157     /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
    158     /// TODO Look into deleting this tag and using `dead` instead, since every use
    159     /// of MCValue.none should be instead looking at the type and noticing it is 0 bits.
    160     none,
    161     /// Control flow will not allow this value to be observed.
    162     unreach,
    163     /// No more references to this value remain.
    164     /// The payload is the value of scope_generation at the point where the death occurred
    165     dead: u32,
    166     /// The value is undefined.
    167     undef,
    168     /// A pointer-sized integer that fits in a register.
    169     /// If the type is a pointer, this is the pointer address in virtual address space.
    170     immediate: u64,
    171     /// The value resides in the EFLAGS register.
    172     eflags: Condition,
    173     /// The value is in a register.
    174     register: Register,
    175     /// The value is a constant offset from the value in a register.
    176     register_offset: RegisterOffset,
    177     /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register.
    178     register_overflow: struct { reg: Register, eflags: Condition },
    179     /// The value is in memory at a hard-coded address.
    180     /// If the type is a pointer, it means the pointer address is at this memory location.
    181     memory: u64,
    182     /// The value is in memory at a constant offset from the address in a register.
    183     indirect: RegisterOffset,
    184     /// The value is in memory.
    185     /// Payload is a symbol index.
    186     load_direct: u32,
    187     /// The value is a pointer to a value in memory.
    188     /// Payload is a symbol index.
    189     lea_direct: u32,
    190     /// The value is in memory referenced indirectly via GOT.
    191     /// Payload is a symbol index.
    192     load_got: u32,
    193     /// The value is a pointer to a value referenced indirectly via GOT.
    194     /// Payload is a symbol index.
    195     lea_got: u32,
    196     /// The value is a threadlocal variable.
    197     /// Payload is a symbol index.
    198     load_tlv: u32,
    199     /// The value is a pointer to a threadlocal variable.
    200     /// Payload is a symbol index.
    201     lea_tlv: u32,
    202     /// The value stored at an offset from a frame index
    203     /// Payload is a frame address.
    204     load_frame: FrameAddr,
    205     /// The address of an offset from a frame index
    206     /// Payload is a frame address.
    207     lea_frame: FrameAddr,
    208     /// This indicates that we have already allocated a frame index for this instruction,
    209     /// but it has not been spilled there yet in the current control flow.
    210     /// Payload is a frame index.
    211     reserved_frame: FrameIndex,
    212 
    213     fn isMemory(mcv: MCValue) bool {
    214         return switch (mcv) {
    215             .memory, .indirect, .load_frame => true,
    216             else => false,
    217         };
    218     }
    219 
    220     fn isImmediate(mcv: MCValue) bool {
    221         return switch (mcv) {
    222             .immediate => true,
    223             else => false,
    224         };
    225     }
    226 
    227     fn isRegister(mcv: MCValue) bool {
    228         return switch (mcv) {
    229             .register => true,
    230             .register_offset => |reg_off| return reg_off.off == 0,
    231             else => false,
    232         };
    233     }
    234 
    235     fn isRegisterOffset(mcv: MCValue) bool {
    236         return switch (mcv) {
    237             .register, .register_offset => true,
    238             else => false,
    239         };
    240     }
    241 
    242     fn getReg(mcv: MCValue) ?Register {
    243         return switch (mcv) {
    244             .register => |reg| reg,
    245             .register_offset, .indirect => |ro| ro.reg,
    246             .register_overflow => |ro| ro.reg,
    247             else => null,
    248         };
    249     }
    250 
    251     fn getCondition(mcv: MCValue) ?Condition {
    252         return switch (mcv) {
    253             .eflags => |cc| cc,
    254             .register_overflow => |reg_ov| reg_ov.eflags,
    255             else => null,
    256         };
    257     }
    258 
    259     fn address(mcv: MCValue) MCValue {
    260         return switch (mcv) {
    261             .none,
    262             .unreach,
    263             .dead,
    264             .undef,
    265             .immediate,
    266             .eflags,
    267             .register,
    268             .register_offset,
    269             .register_overflow,
    270             .lea_direct,
    271             .lea_got,
    272             .lea_tlv,
    273             .lea_frame,
    274             .reserved_frame,
    275             => unreachable, // not in memory
    276             .memory => |addr| .{ .immediate = addr },
    277             .indirect => |reg_off| switch (reg_off.off) {
    278                 0 => .{ .register = reg_off.reg },
    279                 else => .{ .register_offset = reg_off },
    280             },
    281             .load_direct => |sym_index| .{ .lea_direct = sym_index },
    282             .load_got => |sym_index| .{ .lea_got = sym_index },
    283             .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
    284             .load_frame => |frame_addr| .{ .lea_frame = frame_addr },
    285         };
    286     }
    287 
    288     fn deref(mcv: MCValue) MCValue {
    289         return switch (mcv) {
    290             .none,
    291             .unreach,
    292             .dead,
    293             .undef,
    294             .eflags,
    295             .register_overflow,
    296             .memory,
    297             .indirect,
    298             .load_direct,
    299             .load_got,
    300             .load_tlv,
    301             .load_frame,
    302             .reserved_frame,
    303             => unreachable, // not a dereferenceable
    304             .immediate => |addr| .{ .memory = addr },
    305             .register => |reg| .{ .indirect = .{ .reg = reg } },
    306             .register_offset => |reg_off| .{ .indirect = reg_off },
    307             .lea_direct => |sym_index| .{ .load_direct = sym_index },
    308             .lea_got => |sym_index| .{ .load_got = sym_index },
    309             .lea_tlv => |sym_index| .{ .load_tlv = sym_index },
    310             .lea_frame => |frame_addr| .{ .load_frame = frame_addr },
    311         };
    312     }
    313 
    314     fn offset(mcv: MCValue, off: i32) MCValue {
    315         return switch (mcv) {
    316             .none,
    317             .unreach,
    318             .dead,
    319             .undef,
    320             .eflags,
    321             .register_overflow,
    322             .memory,
    323             .indirect,
    324             .load_direct,
    325             .lea_direct,
    326             .load_got,
    327             .lea_got,
    328             .load_tlv,
    329             .lea_tlv,
    330             .load_frame,
    331             .reserved_frame,
    332             => unreachable, // not offsettable
    333             .immediate => |imm| .{ .immediate = @bitCast(@as(i64, @bitCast(imm)) +% off) },
    334             .register => |reg| .{ .register_offset = .{ .reg = reg, .off = off } },
    335             .register_offset => |reg_off| .{
    336                 .register_offset = .{ .reg = reg_off.reg, .off = reg_off.off + off },
    337             },
    338             .lea_frame => |frame_addr| .{
    339                 .lea_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
    340             },
    341         };
    342     }
    343 
    344     fn mem(mcv: MCValue, ptr_size: Memory.PtrSize) Memory {
    345         return switch (mcv) {
    346             .none,
    347             .unreach,
    348             .dead,
    349             .undef,
    350             .immediate,
    351             .eflags,
    352             .register,
    353             .register_offset,
    354             .register_overflow,
    355             .load_direct,
    356             .lea_direct,
    357             .load_got,
    358             .lea_got,
    359             .load_tlv,
    360             .lea_tlv,
    361             .lea_frame,
    362             .reserved_frame,
    363             => unreachable,
    364             .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr|
    365                 Memory.sib(ptr_size, .{ .base = .{ .reg = .ds }, .disp = small_addr })
    366             else
    367                 Memory.moffs(.ds, addr),
    368             .indirect => |reg_off| Memory.sib(ptr_size, .{
    369                 .base = .{ .reg = reg_off.reg },
    370                 .disp = reg_off.off,
    371             }),
    372             .load_frame => |frame_addr| Memory.sib(ptr_size, .{
    373                 .base = .{ .frame = frame_addr.index },
    374                 .disp = frame_addr.off,
    375             }),
    376         };
    377     }
    378 
    379     pub fn format(
    380         mcv: MCValue,
    381         comptime _: []const u8,
    382         _: std.fmt.FormatOptions,
    383         writer: anytype,
    384     ) @TypeOf(writer).Error!void {
    385         switch (mcv) {
    386             .none, .unreach, .dead, .undef => try writer.print("({s})", .{@tagName(mcv)}),
    387             .immediate => |pl| try writer.print("0x{x}", .{pl}),
    388             .memory => |pl| try writer.print("[ds:0x{x}]", .{pl}),
    389             inline .eflags, .register => |pl| try writer.print("{s}", .{@tagName(pl)}),
    390             .register_offset => |pl| try writer.print("{s} + 0x{x}", .{ @tagName(pl.reg), pl.off }),
    391             .register_overflow => |pl| try writer.print("{s}:{s}", .{ @tagName(pl.eflags), @tagName(pl.reg) }),
    392             .indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }),
    393             .load_direct => |pl| try writer.print("[direct:{d}]", .{pl}),
    394             .lea_direct => |pl| try writer.print("direct:{d}", .{pl}),
    395             .load_got => |pl| try writer.print("[got:{d}]", .{pl}),
    396             .lea_got => |pl| try writer.print("got:{d}", .{pl}),
    397             .load_tlv => |pl| try writer.print("[tlv:{d}]", .{pl}),
    398             .lea_tlv => |pl| try writer.print("tlv:{d}", .{pl}),
    399             .load_frame => |pl| try writer.print("[{} + 0x{x}]", .{ pl.index, pl.off }),
    400             .lea_frame => |pl| try writer.print("{} + 0x{x}", .{ pl.index, pl.off }),
    401             .reserved_frame => |pl| try writer.print("(dead:{})", .{pl}),
    402         }
    403     }
    404 };
    405 
    406 const InstTrackingMap = std.AutoArrayHashMapUnmanaged(Air.Inst.Index, InstTracking);
    407 const ConstTrackingMap = std.AutoArrayHashMapUnmanaged(InternPool.Index, InstTracking);
    408 const InstTracking = struct {
    409     long: MCValue,
    410     short: MCValue,
    411 
    412     fn init(result: MCValue) InstTracking {
    413         return .{ .long = switch (result) {
    414             .none,
    415             .unreach,
    416             .undef,
    417             .immediate,
    418             .memory,
    419             .load_direct,
    420             .lea_direct,
    421             .load_got,
    422             .lea_got,
    423             .load_tlv,
    424             .lea_tlv,
    425             .load_frame,
    426             .lea_frame,
    427             => result,
    428             .dead,
    429             .reserved_frame,
    430             => unreachable,
    431             .eflags,
    432             .register,
    433             .register_offset,
    434             .register_overflow,
    435             .indirect,
    436             => .none,
    437         }, .short = result };
    438     }
    439 
    440     fn getReg(self: InstTracking) ?Register {
    441         return self.short.getReg();
    442     }
    443 
    444     fn getCondition(self: InstTracking) ?Condition {
    445         return self.short.getCondition();
    446     }
    447 
    448     fn spill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void {
    449         if (std.meta.eql(self.long, self.short)) return; // Already spilled
    450         // Allocate or reuse frame index
    451         switch (self.long) {
    452             .none => self.long = try function.allocRegOrMem(inst, false),
    453             .load_frame => {},
    454             .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
    455             else => unreachable,
    456         }
    457         tracking_log.debug("spill %{d} from {} to {}", .{ inst, self.short, self.long });
    458         try function.genCopy(function.typeOfIndex(inst), self.long, self.short);
    459     }
    460 
    461     fn reuseFrame(self: *InstTracking) void {
    462         switch (self.long) {
    463             .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
    464             else => {},
    465         }
    466         self.short = switch (self.long) {
    467             .none,
    468             .unreach,
    469             .undef,
    470             .immediate,
    471             .memory,
    472             .load_direct,
    473             .lea_direct,
    474             .load_got,
    475             .lea_got,
    476             .load_tlv,
    477             .lea_tlv,
    478             .load_frame,
    479             .lea_frame,
    480             => self.long,
    481             .dead,
    482             .eflags,
    483             .register,
    484             .register_offset,
    485             .register_overflow,
    486             .indirect,
    487             .reserved_frame,
    488             => unreachable,
    489         };
    490     }
    491 
    492     fn trackSpill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void {
    493         function.freeValue(self.short);
    494         self.reuseFrame();
    495         tracking_log.debug("%{d} => {} (spilled)", .{ inst, self.* });
    496     }
    497 
    498     fn verifyMaterialize(self: *InstTracking, target: InstTracking) void {
    499         switch (self.long) {
    500             .none,
    501             .unreach,
    502             .undef,
    503             .immediate,
    504             .memory,
    505             .load_direct,
    506             .lea_direct,
    507             .load_got,
    508             .lea_got,
    509             .load_tlv,
    510             .lea_tlv,
    511             .lea_frame,
    512             => assert(std.meta.eql(self.long, target.long)),
    513             .load_frame,
    514             .reserved_frame,
    515             => switch (target.long) {
    516                 .none,
    517                 .load_frame,
    518                 .reserved_frame,
    519                 => {},
    520                 else => unreachable,
    521             },
    522             .dead,
    523             .eflags,
    524             .register,
    525             .register_offset,
    526             .register_overflow,
    527             .indirect,
    528             => unreachable,
    529         }
    530     }
    531 
    532     fn materialize(
    533         self: *InstTracking,
    534         function: *Self,
    535         inst: Air.Inst.Index,
    536         target: InstTracking,
    537     ) !void {
    538         self.verifyMaterialize(target);
    539         try self.materializeUnsafe(function, inst, target);
    540     }
    541 
    542     fn materializeUnsafe(
    543         self: *InstTracking,
    544         function: *Self,
    545         inst: Air.Inst.Index,
    546         target: InstTracking,
    547     ) !void {
    548         const ty = function.typeOfIndex(inst);
    549         if ((self.long == .none or self.long == .reserved_frame) and target.long == .load_frame)
    550             try function.genCopy(ty, target.long, self.short);
    551         try function.genCopy(ty, target.short, self.short);
    552     }
    553 
    554     fn trackMaterialize(self: *InstTracking, inst: Air.Inst.Index, target: InstTracking) void {
    555         self.verifyMaterialize(target);
    556         // Don't clobber reserved frame indices
    557         self.long = if (target.long == .none) switch (self.long) {
    558             .load_frame => |addr| .{ .reserved_frame = addr.index },
    559             .reserved_frame => self.long,
    560             else => target.long,
    561         } else target.long;
    562         self.short = target.short;
    563         tracking_log.debug("%{d} => {} (materialize)", .{ inst, self.* });
    564     }
    565 
    566     fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void {
    567         switch (self.short) {
    568             .dead => |die_generation| if (die_generation >= scope_generation) {
    569                 self.reuseFrame();
    570                 tracking_log.debug("%{d} => {} (resurrect)", .{ inst, self.* });
    571             },
    572             else => {},
    573         }
    574     }
    575 
    576     fn die(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void {
    577         function.freeValue(self.short);
    578         self.short = .{ .dead = function.scope_generation };
    579         tracking_log.debug("%{d} => {} (death)", .{ inst, self.* });
    580     }
    581 
    582     fn reuse(
    583         self: *InstTracking,
    584         function: *Self,
    585         new_inst: Air.Inst.Index,
    586         old_inst: Air.Inst.Index,
    587     ) void {
    588         self.short = .{ .dead = function.scope_generation };
    589         tracking_log.debug("%{d} => {} (reuse %{d})", .{ new_inst, self.*, old_inst });
    590     }
    591 
    592     pub fn format(
    593         self: InstTracking,
    594         comptime _: []const u8,
    595         _: std.fmt.FormatOptions,
    596         writer: anytype,
    597     ) @TypeOf(writer).Error!void {
    598         if (!std.meta.eql(self.long, self.short)) try writer.print("|{}| ", .{self.long});
    599         try writer.print("{}", .{self.short});
    600     }
    601 };
    602 
    603 const FrameAlloc = struct {
    604     abi_size: u31,
    605     abi_align: u5,
    606     ref_count: u16,
    607 
    608     fn init(alloc_abi: struct { size: u64, alignment: u32 }) FrameAlloc {
    609         assert(math.isPowerOfTwo(alloc_abi.alignment));
    610         return .{
    611             .abi_size = @intCast(alloc_abi.size),
    612             .abi_align = math.log2_int(u32, alloc_abi.alignment),
    613             .ref_count = 0,
    614         };
    615     }
    616     fn initType(ty: Type, mod: *Module) FrameAlloc {
    617         return init(.{ .size = ty.abiSize(mod), .alignment = ty.abiAlignment(mod) });
    618     }
    619 };
    620 
    621 const StackAllocation = struct {
    622     inst: ?Air.Inst.Index,
    623     /// TODO do we need size? should be determined by inst.ty.abiSize(mod)
    624     size: u32,
    625 };
    626 
    627 const BlockData = struct {
    628     relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
    629     state: State,
    630 
    631     fn deinit(self: *BlockData, gpa: Allocator) void {
    632         self.relocs.deinit(gpa);
    633         self.* = undefined;
    634     }
    635 };
    636 
    637 const Self = @This();
    638 
    639 pub fn generate(
    640     bin_file: *link.File,
    641     src_loc: Module.SrcLoc,
    642     func_index: InternPool.Index,
    643     air: Air,
    644     liveness: Liveness,
    645     code: *std.ArrayList(u8),
    646     debug_output: DebugInfoOutput,
    647 ) CodeGenError!Result {
    648     if (build_options.skip_non_native and builtin.cpu.arch != bin_file.options.target.cpu.arch) {
    649         @panic("Attempted to compile for architecture that was disabled by build configuration");
    650     }
    651 
    652     const mod = bin_file.options.module.?;
    653     const func = mod.funcInfo(func_index);
    654     const fn_owner_decl = mod.declPtr(func.owner_decl);
    655     assert(fn_owner_decl.has_tv);
    656     const fn_type = fn_owner_decl.ty;
    657 
    658     const gpa = bin_file.allocator;
    659     var function = Self{
    660         .gpa = gpa,
    661         .air = air,
    662         .liveness = liveness,
    663         .target = &bin_file.options.target,
    664         .bin_file = bin_file,
    665         .debug_output = debug_output,
    666         .owner = .{ .func_index = func_index },
    667         .err_msg = null,
    668         .args = undefined, // populated after `resolveCallingConventionValues`
    669         .ret_mcv = undefined, // populated after `resolveCallingConventionValues`
    670         .fn_type = fn_type,
    671         .arg_index = 0,
    672         .src_loc = src_loc,
    673         .end_di_line = func.rbrace_line,
    674         .end_di_column = func.rbrace_column,
    675     };
    676     defer {
    677         function.frame_allocs.deinit(gpa);
    678         function.free_frame_indices.deinit(gpa);
    679         function.frame_locs.deinit(gpa);
    680         var block_it = function.blocks.valueIterator();
    681         while (block_it.next()) |block| block.deinit(gpa);
    682         function.blocks.deinit(gpa);
    683         function.inst_tracking.deinit(gpa);
    684         function.const_tracking.deinit(gpa);
    685         function.exitlude_jump_relocs.deinit(gpa);
    686         function.mir_instructions.deinit(gpa);
    687         function.mir_extra.deinit(gpa);
    688         if (builtin.mode == .Debug) function.mir_to_air_map.deinit(gpa);
    689     }
    690 
    691     wip_mir_log.debug("{}:", .{function.fmtDecl(func.owner_decl)});
    692 
    693     const ip = &mod.intern_pool;
    694 
    695     try function.frame_allocs.resize(gpa, FrameIndex.named_count);
    696     function.frame_allocs.set(
    697         @intFromEnum(FrameIndex.stack_frame),
    698         FrameAlloc.init(.{
    699             .size = 0,
    700             .alignment = @intCast(func.analysis(ip).stack_alignment.toByteUnitsOptional() orelse 1),
    701         }),
    702     );
    703     function.frame_allocs.set(
    704         @intFromEnum(FrameIndex.call_frame),
    705         FrameAlloc.init(.{ .size = 0, .alignment = 1 }),
    706     );
    707 
    708     const fn_info = mod.typeToFunc(fn_type).?;
    709     var call_info = function.resolveCallingConventionValues(fn_info, &.{}, .args_frame) catch |err| switch (err) {
    710         error.CodegenFail => return Result{ .fail = function.err_msg.? },
    711         error.OutOfRegisters => return Result{
    712             .fail = try ErrorMsg.create(
    713                 bin_file.allocator,
    714                 src_loc,
    715                 "CodeGen ran out of registers. This is a bug in the Zig compiler.",
    716                 .{},
    717             ),
    718         },
    719         else => |e| return e,
    720     };
    721     defer call_info.deinit(&function);
    722 
    723     function.args = call_info.args;
    724     function.ret_mcv = call_info.return_value;
    725     function.frame_allocs.set(@intFromEnum(FrameIndex.ret_addr), FrameAlloc.init(.{
    726         .size = Type.usize.abiSize(mod),
    727         .alignment = @min(Type.usize.abiAlignment(mod), call_info.stack_align),
    728     }));
    729     function.frame_allocs.set(@intFromEnum(FrameIndex.base_ptr), FrameAlloc.init(.{
    730         .size = Type.usize.abiSize(mod),
    731         .alignment = @min(Type.usize.abiAlignment(mod) * 2, call_info.stack_align),
    732     }));
    733     function.frame_allocs.set(
    734         @intFromEnum(FrameIndex.args_frame),
    735         FrameAlloc.init(.{ .size = call_info.stack_byte_count, .alignment = call_info.stack_align }),
    736     );
    737 
    738     function.gen() catch |err| switch (err) {
    739         error.CodegenFail => return Result{ .fail = function.err_msg.? },
    740         error.OutOfRegisters => return Result{
    741             .fail = try ErrorMsg.create(bin_file.allocator, src_loc, "CodeGen ran out of registers. This is a bug in the Zig compiler.", .{}),
    742         },
    743         else => |e| return e,
    744     };
    745 
    746     var mir = Mir{
    747         .instructions = function.mir_instructions.toOwnedSlice(),
    748         .extra = try function.mir_extra.toOwnedSlice(bin_file.allocator),
    749         .frame_locs = function.frame_locs.toOwnedSlice(),
    750     };
    751     defer mir.deinit(bin_file.allocator);
    752 
    753     var emit = Emit{
    754         .lower = .{
    755             .allocator = bin_file.allocator,
    756             .mir = mir,
    757             .target = &bin_file.options.target,
    758             .src_loc = src_loc,
    759         },
    760         .bin_file = bin_file,
    761         .debug_output = debug_output,
    762         .code = code,
    763         .prev_di_pc = 0,
    764         .prev_di_line = func.lbrace_line,
    765         .prev_di_column = func.lbrace_column,
    766     };
    767     defer emit.deinit();
    768     emit.emitMir() catch |err| switch (err) {
    769         error.LowerFail, error.EmitFail => return Result{ .fail = emit.lower.err_msg.? },
    770         error.InvalidInstruction, error.CannotEncode => |e| {
    771             const msg = switch (e) {
    772                 error.InvalidInstruction => "CodeGen failed to find a viable instruction.",
    773                 error.CannotEncode => "CodeGen failed to encode the instruction.",
    774             };
    775             return Result{
    776                 .fail = try ErrorMsg.create(
    777                     bin_file.allocator,
    778                     src_loc,
    779                     "{s} This is a bug in the Zig compiler.",
    780                     .{msg},
    781                 ),
    782             };
    783         },
    784         else => |e| return e,
    785     };
    786 
    787     if (function.err_msg) |em| {
    788         return Result{ .fail = em };
    789     } else {
    790         return Result.ok;
    791     }
    792 }
    793 
    794 pub fn generateLazy(
    795     bin_file: *link.File,
    796     src_loc: Module.SrcLoc,
    797     lazy_sym: link.File.LazySymbol,
    798     code: *std.ArrayList(u8),
    799     debug_output: DebugInfoOutput,
    800 ) CodeGenError!Result {
    801     const gpa = bin_file.allocator;
    802     var function = Self{
    803         .gpa = gpa,
    804         .air = undefined,
    805         .liveness = undefined,
    806         .target = &bin_file.options.target,
    807         .bin_file = bin_file,
    808         .debug_output = debug_output,
    809         .owner = .{ .lazy_sym = lazy_sym },
    810         .err_msg = null,
    811         .args = undefined,
    812         .ret_mcv = undefined,
    813         .fn_type = undefined,
    814         .arg_index = undefined,
    815         .src_loc = src_loc,
    816         .end_di_line = undefined, // no debug info yet
    817         .end_di_column = undefined, // no debug info yet
    818     };
    819     defer {
    820         function.mir_instructions.deinit(gpa);
    821         function.mir_extra.deinit(gpa);
    822     }
    823 
    824     function.genLazy(lazy_sym) catch |err| switch (err) {
    825         error.CodegenFail => return Result{ .fail = function.err_msg.? },
    826         error.OutOfRegisters => return Result{
    827             .fail = try ErrorMsg.create(bin_file.allocator, src_loc, "CodeGen ran out of registers. This is a bug in the Zig compiler.", .{}),
    828         },
    829         else => |e| return e,
    830     };
    831 
    832     var mir = Mir{
    833         .instructions = function.mir_instructions.toOwnedSlice(),
    834         .extra = try function.mir_extra.toOwnedSlice(bin_file.allocator),
    835         .frame_locs = function.frame_locs.toOwnedSlice(),
    836     };
    837     defer mir.deinit(bin_file.allocator);
    838 
    839     var emit = Emit{
    840         .lower = .{
    841             .allocator = bin_file.allocator,
    842             .mir = mir,
    843             .target = &bin_file.options.target,
    844             .src_loc = src_loc,
    845         },
    846         .bin_file = bin_file,
    847         .debug_output = debug_output,
    848         .code = code,
    849         .prev_di_pc = undefined, // no debug info yet
    850         .prev_di_line = undefined, // no debug info yet
    851         .prev_di_column = undefined, // no debug info yet
    852     };
    853     defer emit.deinit();
    854     emit.emitMir() catch |err| switch (err) {
    855         error.LowerFail, error.EmitFail => return Result{ .fail = emit.lower.err_msg.? },
    856         error.InvalidInstruction, error.CannotEncode => |e| {
    857             const msg = switch (e) {
    858                 error.InvalidInstruction => "CodeGen failed to find a viable instruction.",
    859                 error.CannotEncode => "CodeGen failed to encode the instruction.",
    860             };
    861             return Result{
    862                 .fail = try ErrorMsg.create(
    863                     bin_file.allocator,
    864                     src_loc,
    865                     "{s} This is a bug in the Zig compiler.",
    866                     .{msg},
    867                 ),
    868             };
    869         },
    870         else => |e| return e,
    871     };
    872 
    873     if (function.err_msg) |em| {
    874         return Result{ .fail = em };
    875     } else {
    876         return Result.ok;
    877     }
    878 }
    879 
    880 const FormatDeclData = struct {
    881     mod: *Module,
    882     decl_index: Module.Decl.Index,
    883 };
    884 fn formatDecl(
    885     data: FormatDeclData,
    886     comptime _: []const u8,
    887     _: std.fmt.FormatOptions,
    888     writer: anytype,
    889 ) @TypeOf(writer).Error!void {
    890     try data.mod.declPtr(data.decl_index).renderFullyQualifiedName(data.mod, writer);
    891 }
    892 fn fmtDecl(self: *Self, decl_index: Module.Decl.Index) std.fmt.Formatter(formatDecl) {
    893     return .{ .data = .{
    894         .mod = self.bin_file.options.module.?,
    895         .decl_index = decl_index,
    896     } };
    897 }
    898 
    899 const FormatAirData = struct {
    900     self: *Self,
    901     inst: Air.Inst.Index,
    902 };
    903 fn formatAir(
    904     data: FormatAirData,
    905     comptime _: []const u8,
    906     _: std.fmt.FormatOptions,
    907     writer: anytype,
    908 ) @TypeOf(writer).Error!void {
    909     @import("../../print_air.zig").dumpInst(
    910         data.inst,
    911         data.self.bin_file.options.module.?,
    912         data.self.air,
    913         data.self.liveness,
    914     );
    915 }
    916 fn fmtAir(self: *Self, inst: Air.Inst.Index) std.fmt.Formatter(formatAir) {
    917     return .{ .data = .{ .self = self, .inst = inst } };
    918 }
    919 
    920 const FormatWipMirData = struct {
    921     self: *Self,
    922     inst: Mir.Inst.Index,
    923 };
    924 fn formatWipMir(
    925     data: FormatWipMirData,
    926     comptime _: []const u8,
    927     _: std.fmt.FormatOptions,
    928     writer: anytype,
    929 ) @TypeOf(writer).Error!void {
    930     var lower = Lower{
    931         .allocator = data.self.gpa,
    932         .mir = .{
    933             .instructions = data.self.mir_instructions.slice(),
    934             .extra = data.self.mir_extra.items,
    935             .frame_locs = (std.MultiArrayList(Mir.FrameLoc){}).slice(),
    936         },
    937         .target = data.self.target,
    938         .src_loc = data.self.src_loc,
    939     };
    940     for ((lower.lowerMir(data.inst) catch |err| switch (err) {
    941         error.LowerFail => {
    942             defer {
    943                 lower.err_msg.?.deinit(data.self.gpa);
    944                 lower.err_msg = null;
    945             }
    946             try writer.writeAll(lower.err_msg.?.msg);
    947             return;
    948         },
    949         error.OutOfMemory, error.InvalidInstruction, error.CannotEncode => |e| {
    950             try writer.writeAll(switch (e) {
    951                 error.OutOfMemory => "Out of memory",
    952                 error.InvalidInstruction => "CodeGen failed to find a viable instruction.",
    953                 error.CannotEncode => "CodeGen failed to encode the instruction.",
    954             });
    955             return;
    956         },
    957         else => |e| return e,
    958     }).insts) |lowered_inst| try writer.print("  | {}", .{lowered_inst});
    959 }
    960 fn fmtWipMir(self: *Self, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) {
    961     return .{ .data = .{ .self = self, .inst = inst } };
    962 }
    963 
    964 const FormatTrackingData = struct {
    965     self: *Self,
    966 };
    967 fn formatTracking(
    968     data: FormatTrackingData,
    969     comptime _: []const u8,
    970     _: std.fmt.FormatOptions,
    971     writer: anytype,
    972 ) @TypeOf(writer).Error!void {
    973     var it = data.self.inst_tracking.iterator();
    974     while (it.next()) |entry| try writer.print("\n%{d} = {}", .{ entry.key_ptr.*, entry.value_ptr.* });
    975 }
    976 fn fmtTracking(self: *Self) std.fmt.Formatter(formatTracking) {
    977     return .{ .data = .{ .self = self } };
    978 }
    979 
    980 fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index {
    981     const gpa = self.gpa;
    982     try self.mir_instructions.ensureUnusedCapacity(gpa, 1);
    983     const result_index: Mir.Inst.Index = @intCast(self.mir_instructions.len);
    984     self.mir_instructions.appendAssumeCapacity(inst);
    985     if (inst.tag != .pseudo or switch (inst.ops) {
    986         else => true,
    987         .pseudo_dbg_prologue_end_none,
    988         .pseudo_dbg_line_line_column,
    989         .pseudo_dbg_epilogue_begin_none,
    990         .pseudo_dead_none,
    991         => false,
    992     }) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)});
    993     return result_index;
    994 }
    995 
    996 fn addExtra(self: *Self, extra: anytype) Allocator.Error!u32 {
    997     const fields = std.meta.fields(@TypeOf(extra));
    998     try self.mir_extra.ensureUnusedCapacity(self.gpa, fields.len);
    999     return self.addExtraAssumeCapacity(extra);
   1000 }
   1001 
   1002 fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 {
   1003     const fields = std.meta.fields(@TypeOf(extra));
   1004     const result: u32 = @intCast(self.mir_extra.items.len);
   1005     inline for (fields) |field| {
   1006         self.mir_extra.appendAssumeCapacity(switch (field.type) {
   1007             u32 => @field(extra, field.name),
   1008             i32 => @bitCast(@field(extra, field.name)),
   1009             else => @compileError("bad field type: " ++ field.name ++ ": " ++ @typeName(field.type)),
   1010         });
   1011     }
   1012     return result;
   1013 }
   1014 
   1015 /// A `cc` of `.z_and_np` clobbers `reg2`!
   1016 fn asmCmovccRegisterRegister(self: *Self, reg1: Register, reg2: Register, cc: bits.Condition) !void {
   1017     _ = try self.addInst(.{
   1018         .tag = switch (cc) {
   1019             else => .cmov,
   1020             .z_and_np, .nz_or_p => .pseudo,
   1021         },
   1022         .ops = switch (cc) {
   1023             else => .rr,
   1024             .z_and_np => .pseudo_cmov_z_and_np_rr,
   1025             .nz_or_p => .pseudo_cmov_nz_or_p_rr,
   1026         },
   1027         .data = .{ .rr = .{
   1028             .fixes = switch (cc) {
   1029                 else => Mir.Inst.Fixes.fromCondition(cc),
   1030                 .z_and_np, .nz_or_p => ._,
   1031             },
   1032             .r1 = reg1,
   1033             .r2 = reg2,
   1034         } },
   1035     });
   1036 }
   1037 
   1038 /// A `cc` of `.z_and_np` is not supported by this encoding!
   1039 fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condition) !void {
   1040     _ = try self.addInst(.{
   1041         .tag = switch (cc) {
   1042             else => .cmov,
   1043             .z_and_np => unreachable,
   1044             .nz_or_p => .pseudo,
   1045         },
   1046         .ops = switch (cc) {
   1047             else => switch (m) {
   1048                 .sib => .rm_sib,
   1049                 .rip => .rm_rip,
   1050                 else => unreachable,
   1051             },
   1052             .z_and_np => unreachable,
   1053             .nz_or_p => switch (m) {
   1054                 .sib => .pseudo_cmov_nz_or_p_rm_sib,
   1055                 .rip => .pseudo_cmov_nz_or_p_rm_rip,
   1056                 else => unreachable,
   1057             },
   1058         },
   1059         .data = .{ .rx = .{
   1060             .fixes = switch (cc) {
   1061                 else => Mir.Inst.Fixes.fromCondition(cc),
   1062                 .z_and_np => unreachable,
   1063                 .nz_or_p => ._,
   1064             },
   1065             .r1 = reg,
   1066             .payload = switch (m) {
   1067                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1068                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1069                 else => unreachable,
   1070             },
   1071         } },
   1072     });
   1073 }
   1074 
   1075 fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void {
   1076     _ = try self.addInst(.{
   1077         .tag = switch (cc) {
   1078             else => .set,
   1079             .z_and_np, .nz_or_p => .pseudo,
   1080         },
   1081         .ops = switch (cc) {
   1082             else => .r,
   1083             .z_and_np => .pseudo_set_z_and_np_r,
   1084             .nz_or_p => .pseudo_set_nz_or_p_r,
   1085         },
   1086         .data = switch (cc) {
   1087             else => .{ .r = .{
   1088                 .fixes = Mir.Inst.Fixes.fromCondition(cc),
   1089                 .r1 = reg,
   1090             } },
   1091             .z_and_np, .nz_or_p => .{ .rr = .{
   1092                 .r1 = reg,
   1093                 .r2 = (try self.register_manager.allocReg(null, gp)).to8(),
   1094             } },
   1095         },
   1096     });
   1097 }
   1098 
   1099 fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void {
   1100     const payload = switch (m) {
   1101         .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1102         .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1103         else => unreachable,
   1104     };
   1105     _ = try self.addInst(.{
   1106         .tag = switch (cc) {
   1107             else => .set,
   1108             .z_and_np, .nz_or_p => .pseudo,
   1109         },
   1110         .ops = switch (cc) {
   1111             else => switch (m) {
   1112                 .sib => .m_sib,
   1113                 .rip => .m_rip,
   1114                 else => unreachable,
   1115             },
   1116             .z_and_np => switch (m) {
   1117                 .sib => .pseudo_set_z_and_np_m_sib,
   1118                 .rip => .pseudo_set_z_and_np_m_rip,
   1119                 else => unreachable,
   1120             },
   1121             .nz_or_p => switch (m) {
   1122                 .sib => .pseudo_set_nz_or_p_m_sib,
   1123                 .rip => .pseudo_set_nz_or_p_m_rip,
   1124                 else => unreachable,
   1125             },
   1126         },
   1127         .data = switch (cc) {
   1128             else => .{ .x = .{
   1129                 .fixes = Mir.Inst.Fixes.fromCondition(cc),
   1130                 .payload = payload,
   1131             } },
   1132             .z_and_np, .nz_or_p => .{ .rx = .{
   1133                 .r1 = (try self.register_manager.allocReg(null, gp)).to8(),
   1134                 .payload = payload,
   1135             } },
   1136         },
   1137     });
   1138 }
   1139 
   1140 fn asmJmpReloc(self: *Self, target: Mir.Inst.Index) !Mir.Inst.Index {
   1141     return self.addInst(.{
   1142         .tag = .jmp,
   1143         .ops = .inst,
   1144         .data = .{ .inst = .{
   1145             .inst = target,
   1146         } },
   1147     });
   1148 }
   1149 
   1150 fn asmJccReloc(self: *Self, target: Mir.Inst.Index, cc: bits.Condition) !Mir.Inst.Index {
   1151     return self.addInst(.{
   1152         .tag = switch (cc) {
   1153             else => .j,
   1154             .z_and_np, .nz_or_p => .pseudo,
   1155         },
   1156         .ops = switch (cc) {
   1157             else => .inst,
   1158             .z_and_np => .pseudo_j_z_and_np_inst,
   1159             .nz_or_p => .pseudo_j_nz_or_p_inst,
   1160         },
   1161         .data = .{ .inst = .{
   1162             .fixes = switch (cc) {
   1163                 else => Mir.Inst.Fixes.fromCondition(cc),
   1164                 .z_and_np, .nz_or_p => ._,
   1165             },
   1166             .inst = target,
   1167         } },
   1168     });
   1169 }
   1170 
   1171 fn asmPlaceholder(self: *Self) !Mir.Inst.Index {
   1172     return self.addInst(.{
   1173         .tag = .pseudo,
   1174         .ops = .pseudo_dead_none,
   1175         .data = undefined,
   1176     });
   1177 }
   1178 
   1179 fn asmOpOnly(self: *Self, tag: Mir.Inst.FixedTag) !void {
   1180     _ = try self.addInst(.{
   1181         .tag = tag[1],
   1182         .ops = .none,
   1183         .data = .{ .none = .{
   1184             .fixes = tag[0],
   1185         } },
   1186     });
   1187 }
   1188 
   1189 fn asmPseudo(self: *Self, ops: Mir.Inst.Ops) !void {
   1190     _ = try self.addInst(.{
   1191         .tag = .pseudo,
   1192         .ops = ops,
   1193         .data = undefined,
   1194     });
   1195 }
   1196 
   1197 fn asmRegister(self: *Self, tag: Mir.Inst.FixedTag, reg: Register) !void {
   1198     _ = try self.addInst(.{
   1199         .tag = tag[1],
   1200         .ops = .r,
   1201         .data = .{ .r = .{
   1202             .fixes = tag[0],
   1203             .r1 = reg,
   1204         } },
   1205     });
   1206 }
   1207 
   1208 fn asmImmediate(self: *Self, tag: Mir.Inst.FixedTag, imm: Immediate) !void {
   1209     _ = try self.addInst(.{
   1210         .tag = tag[1],
   1211         .ops = switch (imm) {
   1212             .signed => .i_s,
   1213             .unsigned => .i_u,
   1214         },
   1215         .data = .{ .i = .{
   1216             .fixes = tag[0],
   1217             .i = switch (imm) {
   1218                 .signed => |s| @bitCast(s),
   1219                 .unsigned => |u| @intCast(u),
   1220             },
   1221         } },
   1222     });
   1223 }
   1224 
   1225 fn asmRegisterRegister(self: *Self, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void {
   1226     _ = try self.addInst(.{
   1227         .tag = tag[1],
   1228         .ops = .rr,
   1229         .data = .{ .rr = .{
   1230             .fixes = tag[0],
   1231             .r1 = reg1,
   1232             .r2 = reg2,
   1233         } },
   1234     });
   1235 }
   1236 
   1237 fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void {
   1238     const ops: Mir.Inst.Ops = switch (imm) {
   1239         .signed => .ri_s,
   1240         .unsigned => |u| if (math.cast(u32, u)) |_| .ri_u else .ri64,
   1241     };
   1242     _ = try self.addInst(.{
   1243         .tag = tag[1],
   1244         .ops = ops,
   1245         .data = switch (ops) {
   1246             .ri_s, .ri_u => .{ .ri = .{
   1247                 .fixes = tag[0],
   1248                 .r1 = reg,
   1249                 .i = switch (imm) {
   1250                     .signed => |s| @bitCast(s),
   1251                     .unsigned => |u| @intCast(u),
   1252                 },
   1253             } },
   1254             .ri64 => .{ .rx = .{
   1255                 .fixes = tag[0],
   1256                 .r1 = reg,
   1257                 .payload = try self.addExtra(Mir.Imm64.encode(imm.unsigned)),
   1258             } },
   1259             else => unreachable,
   1260         },
   1261     });
   1262 }
   1263 
   1264 fn asmRegisterRegisterRegister(
   1265     self: *Self,
   1266     tag: Mir.Inst.FixedTag,
   1267     reg1: Register,
   1268     reg2: Register,
   1269     reg3: Register,
   1270 ) !void {
   1271     _ = try self.addInst(.{
   1272         .tag = tag[1],
   1273         .ops = .rrr,
   1274         .data = .{ .rrr = .{
   1275             .fixes = tag[0],
   1276             .r1 = reg1,
   1277             .r2 = reg2,
   1278             .r3 = reg3,
   1279         } },
   1280     });
   1281 }
   1282 
   1283 fn asmRegisterRegisterRegisterRegister(
   1284     self: *Self,
   1285     tag: Mir.Inst.FixedTag,
   1286     reg1: Register,
   1287     reg2: Register,
   1288     reg3: Register,
   1289     reg4: Register,
   1290 ) !void {
   1291     _ = try self.addInst(.{
   1292         .tag = tag[1],
   1293         .ops = .rrrr,
   1294         .data = .{ .rrrr = .{
   1295             .fixes = tag[0],
   1296             .r1 = reg1,
   1297             .r2 = reg2,
   1298             .r3 = reg3,
   1299             .r4 = reg4,
   1300         } },
   1301     });
   1302 }
   1303 
   1304 fn asmRegisterRegisterRegisterImmediate(
   1305     self: *Self,
   1306     tag: Mir.Inst.FixedTag,
   1307     reg1: Register,
   1308     reg2: Register,
   1309     reg3: Register,
   1310     imm: Immediate,
   1311 ) !void {
   1312     _ = try self.addInst(.{
   1313         .tag = tag[1],
   1314         .ops = .rrri,
   1315         .data = .{ .rrri = .{
   1316             .fixes = tag[0],
   1317             .r1 = reg1,
   1318             .r2 = reg2,
   1319             .r3 = reg3,
   1320             .i = @as(u8, @intCast(imm.unsigned)),
   1321         } },
   1322     });
   1323 }
   1324 
   1325 fn asmRegisterRegisterImmediate(
   1326     self: *Self,
   1327     tag: Mir.Inst.FixedTag,
   1328     reg1: Register,
   1329     reg2: Register,
   1330     imm: Immediate,
   1331 ) !void {
   1332     _ = try self.addInst(.{
   1333         .tag = tag[1],
   1334         .ops = switch (imm) {
   1335             .signed => .rri_s,
   1336             .unsigned => .rri_u,
   1337         },
   1338         .data = .{ .rri = .{
   1339             .fixes = tag[0],
   1340             .r1 = reg1,
   1341             .r2 = reg2,
   1342             .i = switch (imm) {
   1343                 .signed => |s| @bitCast(s),
   1344                 .unsigned => |u| @intCast(u),
   1345             },
   1346         } },
   1347     });
   1348 }
   1349 
   1350 fn asmRegisterRegisterMemory(
   1351     self: *Self,
   1352     tag: Mir.Inst.FixedTag,
   1353     reg1: Register,
   1354     reg2: Register,
   1355     m: Memory,
   1356 ) !void {
   1357     _ = try self.addInst(.{
   1358         .tag = tag[1],
   1359         .ops = switch (m) {
   1360             .sib => .rrm_sib,
   1361             .rip => .rrm_rip,
   1362             else => unreachable,
   1363         },
   1364         .data = .{ .rrx = .{
   1365             .fixes = tag[0],
   1366             .r1 = reg1,
   1367             .r2 = reg2,
   1368             .payload = switch (m) {
   1369                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1370                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1371                 else => unreachable,
   1372             },
   1373         } },
   1374     });
   1375 }
   1376 
   1377 fn asmMemory(self: *Self, tag: Mir.Inst.FixedTag, m: Memory) !void {
   1378     _ = try self.addInst(.{
   1379         .tag = tag[1],
   1380         .ops = switch (m) {
   1381             .sib => .m_sib,
   1382             .rip => .m_rip,
   1383             else => unreachable,
   1384         },
   1385         .data = .{ .x = .{
   1386             .fixes = tag[0],
   1387             .payload = switch (m) {
   1388                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1389                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1390                 else => unreachable,
   1391             },
   1392         } },
   1393     });
   1394 }
   1395 
   1396 fn asmRegisterMemory(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, m: Memory) !void {
   1397     _ = try self.addInst(.{
   1398         .tag = tag[1],
   1399         .ops = switch (m) {
   1400             .sib => .rm_sib,
   1401             .rip => .rm_rip,
   1402             else => unreachable,
   1403         },
   1404         .data = .{ .rx = .{
   1405             .fixes = tag[0],
   1406             .r1 = reg,
   1407             .payload = switch (m) {
   1408                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1409                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1410                 else => unreachable,
   1411             },
   1412         } },
   1413     });
   1414 }
   1415 
   1416 fn asmRegisterMemoryImmediate(
   1417     self: *Self,
   1418     tag: Mir.Inst.FixedTag,
   1419     reg: Register,
   1420     m: Memory,
   1421     imm: Immediate,
   1422 ) !void {
   1423     _ = try self.addInst(.{
   1424         .tag = tag[1],
   1425         .ops = switch (m) {
   1426             .sib => .rmi_sib,
   1427             .rip => .rmi_rip,
   1428             else => unreachable,
   1429         },
   1430         .data = .{ .rix = .{
   1431             .fixes = tag[0],
   1432             .r1 = reg,
   1433             .i = @as(u8, @intCast(imm.unsigned)),
   1434             .payload = switch (m) {
   1435                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1436                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1437                 else => unreachable,
   1438             },
   1439         } },
   1440     });
   1441 }
   1442 
   1443 fn asmRegisterRegisterMemoryImmediate(
   1444     self: *Self,
   1445     tag: Mir.Inst.FixedTag,
   1446     reg1: Register,
   1447     reg2: Register,
   1448     m: Memory,
   1449     imm: Immediate,
   1450 ) !void {
   1451     _ = try self.addInst(.{
   1452         .tag = tag[1],
   1453         .ops = switch (m) {
   1454             .sib => .rrmi_sib,
   1455             .rip => .rrmi_rip,
   1456             else => unreachable,
   1457         },
   1458         .data = .{ .rrix = .{
   1459             .fixes = tag[0],
   1460             .r1 = reg1,
   1461             .r2 = reg2,
   1462             .i = @intCast(imm.unsigned),
   1463             .payload = switch (m) {
   1464                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1465                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1466                 else => unreachable,
   1467             },
   1468         } },
   1469     });
   1470 }
   1471 
   1472 fn asmMemoryRegister(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, reg: Register) !void {
   1473     _ = try self.addInst(.{
   1474         .tag = tag[1],
   1475         .ops = switch (m) {
   1476             .sib => .mr_sib,
   1477             .rip => .mr_rip,
   1478             else => unreachable,
   1479         },
   1480         .data = .{ .rx = .{
   1481             .fixes = tag[0],
   1482             .r1 = reg,
   1483             .payload = switch (m) {
   1484                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1485                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1486                 else => unreachable,
   1487             },
   1488         } },
   1489     });
   1490 }
   1491 
   1492 fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void {
   1493     const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) {
   1494         .signed => |s| @bitCast(s),
   1495         .unsigned => |u| @intCast(u),
   1496     } });
   1497     assert(payload + 1 == switch (m) {
   1498         .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1499         .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1500         else => unreachable,
   1501     });
   1502     _ = try self.addInst(.{
   1503         .tag = tag[1],
   1504         .ops = switch (m) {
   1505             .sib => switch (imm) {
   1506                 .signed => .mi_sib_s,
   1507                 .unsigned => .mi_sib_u,
   1508             },
   1509             .rip => switch (imm) {
   1510                 .signed => .mi_rip_s,
   1511                 .unsigned => .mi_rip_u,
   1512             },
   1513             else => unreachable,
   1514         },
   1515         .data = .{ .x = .{
   1516             .fixes = tag[0],
   1517             .payload = payload,
   1518         } },
   1519     });
   1520 }
   1521 
   1522 fn asmMemoryRegisterRegister(
   1523     self: *Self,
   1524     tag: Mir.Inst.FixedTag,
   1525     m: Memory,
   1526     reg1: Register,
   1527     reg2: Register,
   1528 ) !void {
   1529     _ = try self.addInst(.{
   1530         .tag = tag[1],
   1531         .ops = switch (m) {
   1532             .sib => .mrr_sib,
   1533             .rip => .mrr_rip,
   1534             else => unreachable,
   1535         },
   1536         .data = .{ .rrx = .{
   1537             .fixes = tag[0],
   1538             .r1 = reg1,
   1539             .r2 = reg2,
   1540             .payload = switch (m) {
   1541                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1542                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1543                 else => unreachable,
   1544             },
   1545         } },
   1546     });
   1547 }
   1548 
   1549 fn asmMemoryRegisterImmediate(
   1550     self: *Self,
   1551     tag: Mir.Inst.FixedTag,
   1552     m: Memory,
   1553     reg: Register,
   1554     imm: Immediate,
   1555 ) !void {
   1556     _ = try self.addInst(.{
   1557         .tag = tag[1],
   1558         .ops = switch (m) {
   1559             .sib => .mri_sib,
   1560             .rip => .mri_rip,
   1561             else => unreachable,
   1562         },
   1563         .data = .{ .rix = .{
   1564             .fixes = tag[0],
   1565             .r1 = reg,
   1566             .i = @intCast(imm.unsigned),
   1567             .payload = switch (m) {
   1568                 .sib => try self.addExtra(Mir.MemorySib.encode(m)),
   1569                 .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
   1570                 else => unreachable,
   1571             },
   1572         } },
   1573     });
   1574 }
   1575 
   1576 fn gen(self: *Self) InnerError!void {
   1577     const mod = self.bin_file.options.module.?;
   1578     const cc = self.fn_type.fnCallingConvention(mod);
   1579     if (cc != .Naked) {
   1580         try self.asmRegister(.{ ._, .push }, .rbp);
   1581         const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
   1582         try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp);
   1583         const backpatch_frame_align = try self.asmPlaceholder();
   1584         const backpatch_frame_align_extra = try self.asmPlaceholder();
   1585         const backpatch_stack_alloc = try self.asmPlaceholder();
   1586         const backpatch_stack_alloc_extra = try self.asmPlaceholder();
   1587 
   1588         switch (self.ret_mcv.long) {
   1589             .none, .unreach => {},
   1590             .indirect => {
   1591                 // The address where to store the return value for the caller is in a
   1592                 // register which the callee is free to clobber. Therefore, we purposely
   1593                 // spill it to stack immediately.
   1594                 const frame_index =
   1595                     try self.allocFrameIndex(FrameAlloc.initType(Type.usize, mod));
   1596                 try self.genSetMem(
   1597                     .{ .frame = frame_index },
   1598                     0,
   1599                     Type.usize,
   1600                     self.ret_mcv.long.address().offset(-self.ret_mcv.short.indirect.off),
   1601                 );
   1602                 self.ret_mcv.long = .{ .load_frame = .{ .index = frame_index } };
   1603                 tracking_log.debug("spill {} to {}", .{ self.ret_mcv.long, frame_index });
   1604             },
   1605             else => unreachable,
   1606         }
   1607 
   1608         try self.asmPseudo(.pseudo_dbg_prologue_end_none);
   1609 
   1610         try self.genBody(self.air.getMainBody());
   1611 
   1612         // TODO can single exitlude jump reloc be elided? What if it is not at the end of the code?
   1613         // Example:
   1614         // pub fn main() void {
   1615         //     maybeErr() catch return;
   1616         //     unreachable;
   1617         // }
   1618         // Eliding the reloc will cause a miscompilation in this case.
   1619         for (self.exitlude_jump_relocs.items) |jmp_reloc| {
   1620             self.mir_instructions.items(.data)[jmp_reloc].inst.inst =
   1621                 @intCast(self.mir_instructions.len);
   1622         }
   1623 
   1624         try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
   1625         const backpatch_stack_dealloc = try self.asmPlaceholder();
   1626         const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder();
   1627         try self.asmRegister(.{ ._, .pop }, .rbp);
   1628         try self.asmOpOnly(.{ ._, .ret });
   1629 
   1630         const frame_layout = try self.computeFrameLayout();
   1631         const need_frame_align = frame_layout.stack_mask != math.maxInt(u32);
   1632         const need_stack_adjust = frame_layout.stack_adjust > 0;
   1633         const need_save_reg = frame_layout.save_reg_list.count() > 0;
   1634         if (need_frame_align) {
   1635             const page_align = @as(u32, math.maxInt(u32)) << 12;
   1636             self.mir_instructions.set(backpatch_frame_align, .{
   1637                 .tag = .@"and",
   1638                 .ops = .ri_s,
   1639                 .data = .{ .ri = .{
   1640                     .r1 = .rsp,
   1641                     .i = @max(frame_layout.stack_mask, page_align),
   1642                 } },
   1643             });
   1644             if (frame_layout.stack_mask < page_align) {
   1645                 self.mir_instructions.set(backpatch_frame_align_extra, .{
   1646                     .tag = .pseudo,
   1647                     .ops = .pseudo_probe_align_ri_s,
   1648                     .data = .{ .ri = .{
   1649                         .r1 = .rsp,
   1650                         .i = ~frame_layout.stack_mask & page_align,
   1651                     } },
   1652                 });
   1653             }
   1654         }
   1655         if (need_stack_adjust) {
   1656             const page_size: u32 = 1 << 12;
   1657             if (frame_layout.stack_adjust <= page_size) {
   1658                 self.mir_instructions.set(backpatch_stack_alloc, .{
   1659                     .tag = .sub,
   1660                     .ops = .ri_s,
   1661                     .data = .{ .ri = .{
   1662                         .r1 = .rsp,
   1663                         .i = frame_layout.stack_adjust,
   1664                     } },
   1665                 });
   1666             } else if (frame_layout.stack_adjust <
   1667                 page_size * Lower.pseudo_probe_adjust_unrolled_max_insts)
   1668             {
   1669                 self.mir_instructions.set(backpatch_stack_alloc, .{
   1670                     .tag = .pseudo,
   1671                     .ops = .pseudo_probe_adjust_unrolled_ri_s,
   1672                     .data = .{ .ri = .{
   1673                         .r1 = .rsp,
   1674                         .i = frame_layout.stack_adjust,
   1675                     } },
   1676                 });
   1677             } else {
   1678                 self.mir_instructions.set(backpatch_stack_alloc, .{
   1679                     .tag = .pseudo,
   1680                     .ops = .pseudo_probe_adjust_setup_rri_s,
   1681                     .data = .{ .rri = .{
   1682                         .r1 = .rsp,
   1683                         .r2 = .rax,
   1684                         .i = frame_layout.stack_adjust,
   1685                     } },
   1686                 });
   1687                 self.mir_instructions.set(backpatch_stack_alloc_extra, .{
   1688                     .tag = .pseudo,
   1689                     .ops = .pseudo_probe_adjust_loop_rr,
   1690                     .data = .{ .rr = .{
   1691                         .r1 = .rsp,
   1692                         .r2 = .rax,
   1693                     } },
   1694                 });
   1695             }
   1696         }
   1697         if (need_frame_align or need_stack_adjust) {
   1698             self.mir_instructions.set(backpatch_stack_dealloc, .{
   1699                 .tag = .mov,
   1700                 .ops = .rr,
   1701                 .data = .{ .rr = .{
   1702                     .r1 = .rsp,
   1703                     .r2 = .rbp,
   1704                 } },
   1705             });
   1706         }
   1707         if (need_save_reg) {
   1708             self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{
   1709                 .tag = .pseudo,
   1710                 .ops = .pseudo_push_reg_list,
   1711                 .data = .{ .reg_list = frame_layout.save_reg_list },
   1712             });
   1713             self.mir_instructions.set(backpatch_pop_callee_preserved_regs, .{
   1714                 .tag = .pseudo,
   1715                 .ops = .pseudo_pop_reg_list,
   1716                 .data = .{ .reg_list = frame_layout.save_reg_list },
   1717             });
   1718         }
   1719     } else {
   1720         try self.asmPseudo(.pseudo_dbg_prologue_end_none);
   1721         try self.genBody(self.air.getMainBody());
   1722         try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
   1723     }
   1724 
   1725     // Drop them off at the rbrace.
   1726     _ = try self.addInst(.{
   1727         .tag = .pseudo,
   1728         .ops = .pseudo_dbg_line_line_column,
   1729         .data = .{ .line_column = .{
   1730             .line = self.end_di_line,
   1731             .column = self.end_di_column,
   1732         } },
   1733     });
   1734 }
   1735 
   1736 fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
   1737     const mod = self.bin_file.options.module.?;
   1738     const ip = &mod.intern_pool;
   1739     const air_tags = self.air.instructions.items(.tag);
   1740 
   1741     for (body) |inst| {
   1742         if (builtin.mode == .Debug) {
   1743             const mir_inst: Mir.Inst.Index = @intCast(self.mir_instructions.len);
   1744             try self.mir_to_air_map.put(self.gpa, mir_inst, inst);
   1745         }
   1746 
   1747         if (self.liveness.isUnused(inst) and !self.air.mustLower(inst, ip)) continue;
   1748         wip_mir_log.debug("{}", .{self.fmtAir(inst)});
   1749         verbose_tracking_log.debug("{}", .{self.fmtTracking()});
   1750 
   1751         const old_air_bookkeeping = self.air_bookkeeping;
   1752         try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1);
   1753         switch (air_tags[inst]) {
   1754             // zig fmt: off
   1755             .not,
   1756             => |tag| try self.airUnOp(inst, tag),
   1757 
   1758             .add,
   1759             .add_wrap,
   1760             .sub,
   1761             .sub_wrap,
   1762             .bool_and,
   1763             .bool_or,
   1764             .bit_and,
   1765             .bit_or,
   1766             .xor,
   1767             .min,
   1768             .max,
   1769             => |tag| try self.airBinOp(inst, tag),
   1770 
   1771             .ptr_add, .ptr_sub => |tag| try self.airPtrArithmetic(inst, tag),
   1772 
   1773             .shr, .shr_exact => try self.airShlShrBinOp(inst),
   1774             .shl, .shl_exact => try self.airShlShrBinOp(inst),
   1775 
   1776             .mul             => try self.airMulDivBinOp(inst),
   1777             .mul_wrap        => try self.airMulDivBinOp(inst),
   1778             .rem             => try self.airMulDivBinOp(inst),
   1779             .mod             => try self.airMulDivBinOp(inst),
   1780 
   1781             .add_sat         => try self.airAddSat(inst),
   1782             .sub_sat         => try self.airSubSat(inst),
   1783             .mul_sat         => try self.airMulSat(inst),
   1784             .shl_sat         => try self.airShlSat(inst),
   1785             .slice           => try self.airSlice(inst),
   1786 
   1787             .sin,
   1788             .cos,
   1789             .tan,
   1790             .exp,
   1791             .exp2,
   1792             .log,
   1793             .log2,
   1794             .log10,
   1795             .round,
   1796             => try self.airUnaryMath(inst),
   1797 
   1798             .floor => try self.airRound(inst, 0b1_0_01),
   1799             .ceil => try self.airRound(inst, 0b1_0_10),
   1800             .trunc_float => try self.airRound(inst, 0b1_0_11),
   1801             .sqrt => try self.airSqrt(inst),
   1802             .neg, .fabs => try self.airFloatSign(inst),
   1803 
   1804             .add_with_overflow => try self.airAddSubWithOverflow(inst),
   1805             .sub_with_overflow => try self.airAddSubWithOverflow(inst),
   1806             .mul_with_overflow => try self.airMulWithOverflow(inst),
   1807             .shl_with_overflow => try self.airShlWithOverflow(inst),
   1808 
   1809             .div_float, .div_trunc, .div_floor, .div_exact => try self.airMulDivBinOp(inst),
   1810 
   1811             .cmp_lt  => try self.airCmp(inst, .lt),
   1812             .cmp_lte => try self.airCmp(inst, .lte),
   1813             .cmp_eq  => try self.airCmp(inst, .eq),
   1814             .cmp_gte => try self.airCmp(inst, .gte),
   1815             .cmp_gt  => try self.airCmp(inst, .gt),
   1816             .cmp_neq => try self.airCmp(inst, .neq),
   1817 
   1818             .cmp_vector => try self.airCmpVector(inst),
   1819             .cmp_lt_errors_len => try self.airCmpLtErrorsLen(inst),
   1820 
   1821             .alloc           => try self.airAlloc(inst),
   1822             .ret_ptr         => try self.airRetPtr(inst),
   1823             .arg             => try self.airArg(inst),
   1824             .assembly        => try self.airAsm(inst),
   1825             .bitcast         => try self.airBitCast(inst),
   1826             .block           => try self.airBlock(inst),
   1827             .br              => try self.airBr(inst),
   1828             .trap            => try self.airTrap(),
   1829             .breakpoint      => try self.airBreakpoint(),
   1830             .ret_addr        => try self.airRetAddr(inst),
   1831             .frame_addr      => try self.airFrameAddress(inst),
   1832             .fence           => try self.airFence(inst),
   1833             .cond_br         => try self.airCondBr(inst),
   1834             .dbg_stmt        => try self.airDbgStmt(inst),
   1835             .fptrunc         => try self.airFptrunc(inst),
   1836             .fpext           => try self.airFpext(inst),
   1837             .intcast         => try self.airIntCast(inst),
   1838             .trunc           => try self.airTrunc(inst),
   1839             .int_from_bool     => try self.airIntFromBool(inst),
   1840             .is_non_null     => try self.airIsNonNull(inst),
   1841             .is_non_null_ptr => try self.airIsNonNullPtr(inst),
   1842             .is_null         => try self.airIsNull(inst),
   1843             .is_null_ptr     => try self.airIsNullPtr(inst),
   1844             .is_non_err      => try self.airIsNonErr(inst),
   1845             .is_non_err_ptr  => try self.airIsNonErrPtr(inst),
   1846             .is_err          => try self.airIsErr(inst),
   1847             .is_err_ptr      => try self.airIsErrPtr(inst),
   1848             .load            => try self.airLoad(inst),
   1849             .loop            => try self.airLoop(inst),
   1850             .int_from_ptr        => try self.airIntFromPtr(inst),
   1851             .ret             => try self.airRet(inst),
   1852             .ret_load        => try self.airRetLoad(inst),
   1853             .store           => try self.airStore(inst, false),
   1854             .store_safe      => try self.airStore(inst, true),
   1855             .struct_field_ptr=> try self.airStructFieldPtr(inst),
   1856             .struct_field_val=> try self.airStructFieldVal(inst),
   1857             .array_to_slice  => try self.airArrayToSlice(inst),
   1858             .float_from_int    => try self.airFloatFromInt(inst),
   1859             .int_from_float    => try self.airIntFromFloat(inst),
   1860             .cmpxchg_strong  => try self.airCmpxchg(inst),
   1861             .cmpxchg_weak    => try self.airCmpxchg(inst),
   1862             .atomic_rmw      => try self.airAtomicRmw(inst),
   1863             .atomic_load     => try self.airAtomicLoad(inst),
   1864             .memcpy          => try self.airMemcpy(inst),
   1865             .memset          => try self.airMemset(inst, false),
   1866             .memset_safe     => try self.airMemset(inst, true),
   1867             .set_union_tag   => try self.airSetUnionTag(inst),
   1868             .get_union_tag   => try self.airGetUnionTag(inst),
   1869             .clz             => try self.airClz(inst),
   1870             .ctz             => try self.airCtz(inst),
   1871             .popcount        => try self.airPopcount(inst),
   1872             .byte_swap       => try self.airByteSwap(inst),
   1873             .bit_reverse     => try self.airBitReverse(inst),
   1874             .tag_name        => try self.airTagName(inst),
   1875             .error_name      => try self.airErrorName(inst),
   1876             .splat           => try self.airSplat(inst),
   1877             .select          => try self.airSelect(inst),
   1878             .shuffle         => try self.airShuffle(inst),
   1879             .reduce          => try self.airReduce(inst),
   1880             .aggregate_init  => try self.airAggregateInit(inst),
   1881             .union_init      => try self.airUnionInit(inst),
   1882             .prefetch        => try self.airPrefetch(inst),
   1883             .mul_add         => try self.airMulAdd(inst),
   1884             .addrspace_cast  => return self.fail("TODO implement addrspace_cast", .{}),
   1885 
   1886             .@"try"          => try self.airTry(inst),
   1887             .try_ptr         => try self.airTryPtr(inst),
   1888 
   1889             .dbg_var_ptr,
   1890             .dbg_var_val,
   1891             => try self.airDbgVar(inst),
   1892 
   1893             .dbg_inline_begin,
   1894             .dbg_inline_end,
   1895             => try self.airDbgInline(inst),
   1896 
   1897             .dbg_block_begin,
   1898             .dbg_block_end,
   1899             => try self.airDbgBlock(inst),
   1900 
   1901             .call              => try self.airCall(inst, .auto),
   1902             .call_always_tail  => try self.airCall(inst, .always_tail),
   1903             .call_never_tail   => try self.airCall(inst, .never_tail),
   1904             .call_never_inline => try self.airCall(inst, .never_inline),
   1905 
   1906             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
   1907             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
   1908             .atomic_store_release   => try self.airAtomicStore(inst, .Release),
   1909             .atomic_store_seq_cst   => try self.airAtomicStore(inst, .SeqCst),
   1910 
   1911             .struct_field_ptr_index_0 => try self.airStructFieldPtrIndex(inst, 0),
   1912             .struct_field_ptr_index_1 => try self.airStructFieldPtrIndex(inst, 1),
   1913             .struct_field_ptr_index_2 => try self.airStructFieldPtrIndex(inst, 2),
   1914             .struct_field_ptr_index_3 => try self.airStructFieldPtrIndex(inst, 3),
   1915 
   1916             .field_parent_ptr => try self.airFieldParentPtr(inst),
   1917 
   1918             .switch_br       => try self.airSwitchBr(inst),
   1919             .slice_ptr       => try self.airSlicePtr(inst),
   1920             .slice_len       => try self.airSliceLen(inst),
   1921 
   1922             .ptr_slice_len_ptr => try self.airPtrSliceLenPtr(inst),
   1923             .ptr_slice_ptr_ptr => try self.airPtrSlicePtrPtr(inst),
   1924 
   1925             .array_elem_val      => try self.airArrayElemVal(inst),
   1926             .slice_elem_val      => try self.airSliceElemVal(inst),
   1927             .slice_elem_ptr      => try self.airSliceElemPtr(inst),
   1928             .ptr_elem_val        => try self.airPtrElemVal(inst),
   1929             .ptr_elem_ptr        => try self.airPtrElemPtr(inst),
   1930 
   1931             .inferred_alloc, .inferred_alloc_comptime => unreachable,
   1932             .unreach  => if (self.wantSafety()) try self.airTrap() else self.finishAirBookkeeping(),
   1933 
   1934             .optional_payload           => try self.airOptionalPayload(inst),
   1935             .optional_payload_ptr       => try self.airOptionalPayloadPtr(inst),
   1936             .optional_payload_ptr_set   => try self.airOptionalPayloadPtrSet(inst),
   1937             .unwrap_errunion_err        => try self.airUnwrapErrUnionErr(inst),
   1938             .unwrap_errunion_payload    => try self.airUnwrapErrUnionPayload(inst),
   1939             .unwrap_errunion_err_ptr    => try self.airUnwrapErrUnionErrPtr(inst),
   1940             .unwrap_errunion_payload_ptr=> try self.airUnwrapErrUnionPayloadPtr(inst),
   1941             .errunion_payload_ptr_set   => try self.airErrUnionPayloadPtrSet(inst),
   1942             .err_return_trace           => try self.airErrReturnTrace(inst),
   1943             .set_err_return_trace       => try self.airSetErrReturnTrace(inst),
   1944             .save_err_return_trace_index=> try self.airSaveErrReturnTraceIndex(inst),
   1945 
   1946             .wrap_optional         => try self.airWrapOptional(inst),
   1947             .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
   1948             .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
   1949 
   1950             .add_optimized,
   1951             .sub_optimized,
   1952             .mul_optimized,
   1953             .div_float_optimized,
   1954             .div_trunc_optimized,
   1955             .div_floor_optimized,
   1956             .div_exact_optimized,
   1957             .rem_optimized,
   1958             .mod_optimized,
   1959             .neg_optimized,
   1960             .cmp_lt_optimized,
   1961             .cmp_lte_optimized,
   1962             .cmp_eq_optimized,
   1963             .cmp_gte_optimized,
   1964             .cmp_gt_optimized,
   1965             .cmp_neq_optimized,
   1966             .cmp_vector_optimized,
   1967             .reduce_optimized,
   1968             .int_from_float_optimized,
   1969             => return self.fail("TODO implement optimized float mode", .{}),
   1970 
   1971             .add_safe,
   1972             .sub_safe,
   1973             .mul_safe,
   1974             => return self.fail("TODO implement safety_checked_instructions", .{}),
   1975 
   1976             .is_named_enum_value => return self.fail("TODO implement is_named_enum_value", .{}),
   1977             .error_set_has_value => return self.fail("TODO implement error_set_has_value", .{}),
   1978             .vector_store_elem => return self.fail("TODO implement vector_store_elem", .{}),
   1979 
   1980             .c_va_arg => return self.fail("TODO implement c_va_arg", .{}),
   1981             .c_va_copy => return self.fail("TODO implement c_va_copy", .{}),
   1982             .c_va_end => return self.fail("TODO implement c_va_end", .{}),
   1983             .c_va_start => return self.fail("TODO implement c_va_start", .{}),
   1984 
   1985             .wasm_memory_size => unreachable,
   1986             .wasm_memory_grow => unreachable,
   1987 
   1988             .work_item_id => unreachable,
   1989             .work_group_size => unreachable,
   1990             .work_group_id => unreachable,
   1991             // zig fmt: on
   1992         }
   1993 
   1994         assert(!self.register_manager.lockedRegsExist());
   1995 
   1996         if (std.debug.runtime_safety) {
   1997             if (self.air_bookkeeping < old_air_bookkeeping + 1) {
   1998                 std.debug.panic("in codegen.zig, handling of AIR instruction %{d} ('{}') did not do proper bookkeeping. Look for a missing call to finishAir.", .{ inst, air_tags[inst] });
   1999             }
   2000 
   2001             { // check consistency of tracked registers
   2002                 var it = self.register_manager.free_registers.iterator(.{ .kind = .unset });
   2003                 while (it.next()) |index| {
   2004                     const tracked_inst = self.register_manager.registers[index];
   2005                     const tracking = self.getResolvedInstValue(tracked_inst);
   2006                     assert(RegisterManager.indexOfRegIntoTracked(tracking.getReg().?).? == index);
   2007                 }
   2008             }
   2009         }
   2010     }
   2011     verbose_tracking_log.debug("{}", .{self.fmtTracking()});
   2012 }
   2013 
   2014 fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void {
   2015     const mod = self.bin_file.options.module.?;
   2016     switch (lazy_sym.ty.zigTypeTag(mod)) {
   2017         .Enum => {
   2018             const enum_ty = lazy_sym.ty;
   2019             wip_mir_log.debug("{}.@tagName:", .{enum_ty.fmt(self.bin_file.options.module.?)});
   2020 
   2021             const param_regs = abi.getCAbiIntParamRegs(self.target.*);
   2022             const param_locks = self.register_manager.lockRegsAssumeUnused(2, param_regs[0..2].*);
   2023             defer for (param_locks) |lock| self.register_manager.unlockReg(lock);
   2024 
   2025             const ret_reg = param_regs[0];
   2026             const enum_mcv = MCValue{ .register = param_regs[1] };
   2027 
   2028             var exitlude_jump_relocs = try self.gpa.alloc(u32, enum_ty.enumFieldCount(mod));
   2029             defer self.gpa.free(exitlude_jump_relocs);
   2030 
   2031             const data_reg = try self.register_manager.allocReg(null, gp);
   2032             const data_lock = self.register_manager.lockRegAssumeUnused(data_reg);
   2033             defer self.register_manager.unlockReg(data_lock);
   2034             try self.genLazySymbolRef(.lea, data_reg, .{ .kind = .const_data, .ty = enum_ty });
   2035 
   2036             var data_off: i32 = 0;
   2037             for (exitlude_jump_relocs, 0..) |*exitlude_jump_reloc, index_usize| {
   2038                 const index: u32 = @intCast(index_usize);
   2039                 const tag_name = mod.intern_pool.stringToSlice(enum_ty.enumFields(mod)[index_usize]);
   2040                 const tag_val = try mod.enumValueFieldIndex(enum_ty, index);
   2041                 const tag_mcv = try self.genTypedValue(.{ .ty = enum_ty, .val = tag_val });
   2042                 try self.genBinOpMir(.{ ._, .cmp }, enum_ty, enum_mcv, tag_mcv);
   2043                 const skip_reloc = try self.asmJccReloc(undefined, .ne);
   2044 
   2045                 try self.genSetMem(
   2046                     .{ .reg = ret_reg },
   2047                     0,
   2048                     Type.usize,
   2049                     .{ .register_offset = .{ .reg = data_reg, .off = data_off } },
   2050                 );
   2051                 try self.genSetMem(.{ .reg = ret_reg }, 8, Type.usize, .{ .immediate = tag_name.len });
   2052 
   2053                 exitlude_jump_reloc.* = try self.asmJmpReloc(undefined);
   2054                 try self.performReloc(skip_reloc);
   2055 
   2056                 data_off += @intCast(tag_name.len + 1);
   2057             }
   2058 
   2059             try self.airTrap();
   2060 
   2061             for (exitlude_jump_relocs) |reloc| try self.performReloc(reloc);
   2062             try self.asmOpOnly(.{ ._, .ret });
   2063         },
   2064         else => return self.fail(
   2065             "TODO implement {s} for {}",
   2066             .{ @tagName(lazy_sym.kind), lazy_sym.ty.fmt(self.bin_file.options.module.?) },
   2067         ),
   2068     }
   2069 }
   2070 
   2071 fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) void {
   2072     const reg = value.getReg() orelse return;
   2073     if (self.register_manager.isRegFree(reg)) {
   2074         self.register_manager.getRegAssumeFree(reg, inst);
   2075     }
   2076 }
   2077 
   2078 fn freeValue(self: *Self, value: MCValue) void {
   2079     switch (value) {
   2080         .register => |reg| {
   2081             self.register_manager.freeReg(reg);
   2082         },
   2083         .register_offset => |reg_off| {
   2084             self.register_manager.freeReg(reg_off.reg);
   2085         },
   2086         .register_overflow => |reg_ov| {
   2087             self.register_manager.freeReg(reg_ov.reg);
   2088             self.eflags_inst = null;
   2089         },
   2090         .eflags => {
   2091             self.eflags_inst = null;
   2092         },
   2093         else => {}, // TODO process stack allocation death
   2094     }
   2095 }
   2096 
   2097 fn feed(self: *Self, bt: *Liveness.BigTomb, operand: Air.Inst.Ref) void {
   2098     if (bt.feed()) if (Air.refToIndex(operand)) |inst| self.processDeath(inst);
   2099 }
   2100 
   2101 /// Asserts there is already capacity to insert into top branch inst_table.
   2102 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
   2103     self.inst_tracking.getPtr(inst).?.die(self, inst);
   2104 }
   2105 
   2106 /// Called when there are no operands, and the instruction is always unreferenced.
   2107 fn finishAirBookkeeping(self: *Self) void {
   2108     if (std.debug.runtime_safety) {
   2109         self.air_bookkeeping += 1;
   2110     }
   2111 }
   2112 
   2113 fn finishAirResult(self: *Self, inst: Air.Inst.Index, result: MCValue) void {
   2114     if (self.liveness.isUnused(inst)) switch (result) {
   2115         .none, .dead, .unreach => {},
   2116         else => unreachable, // Why didn't the result die?
   2117     } else {
   2118         tracking_log.debug("%{d} => {} (birth)", .{ inst, result });
   2119         self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(result));
   2120         // In some cases, an operand may be reused as the result.
   2121         // If that operand died and was a register, it was freed by
   2122         // processDeath, so we have to "re-allocate" the register.
   2123         self.getValue(result, inst);
   2124     }
   2125     self.finishAirBookkeeping();
   2126 }
   2127 
   2128 fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref) void {
   2129     var tomb_bits = self.liveness.getTombBits(inst);
   2130     for (operands) |op| {
   2131         const dies = @as(u1, @truncate(tomb_bits)) != 0;
   2132         tomb_bits >>= 1;
   2133         if (!dies) continue;
   2134         self.processDeath(Air.refToIndexAllowNone(op) orelse continue);
   2135     }
   2136     self.finishAirResult(inst, result);
   2137 }
   2138 
   2139 const FrameLayout = struct {
   2140     stack_mask: u32,
   2141     stack_adjust: u32,
   2142     save_reg_list: Mir.RegisterList,
   2143 };
   2144 
   2145 fn setFrameLoc(
   2146     self: *Self,
   2147     frame_index: FrameIndex,
   2148     base: Register,
   2149     offset: *i32,
   2150     comptime aligned: bool,
   2151 ) void {
   2152     const frame_i = @intFromEnum(frame_index);
   2153     if (aligned) {
   2154         const alignment = @as(i32, 1) << self.frame_allocs.items(.abi_align)[frame_i];
   2155         offset.* = mem.alignForward(i32, offset.*, alignment);
   2156     }
   2157     self.frame_locs.set(frame_i, .{ .base = base, .disp = offset.* });
   2158     offset.* += self.frame_allocs.items(.abi_size)[frame_i];
   2159 }
   2160 
   2161 fn computeFrameLayout(self: *Self) !FrameLayout {
   2162     const frame_allocs_len = self.frame_allocs.len;
   2163     try self.frame_locs.resize(self.gpa, frame_allocs_len);
   2164     const stack_frame_order = try self.gpa.alloc(FrameIndex, frame_allocs_len - FrameIndex.named_count);
   2165     defer self.gpa.free(stack_frame_order);
   2166 
   2167     const frame_size = self.frame_allocs.items(.abi_size);
   2168     const frame_align = self.frame_allocs.items(.abi_align);
   2169     const frame_offset = self.frame_locs.items(.disp);
   2170 
   2171     for (stack_frame_order, FrameIndex.named_count..) |*frame_order, frame_index|
   2172         frame_order.* = @enumFromInt(frame_index);
   2173     {
   2174         const SortContext = struct {
   2175             frame_align: @TypeOf(frame_align),
   2176             pub fn lessThan(context: @This(), lhs: FrameIndex, rhs: FrameIndex) bool {
   2177                 return context.frame_align[@intFromEnum(lhs)] > context.frame_align[@intFromEnum(rhs)];
   2178             }
   2179         };
   2180         const sort_context = SortContext{ .frame_align = frame_align };
   2181         mem.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan);
   2182     }
   2183 
   2184     const call_frame_align = frame_align[@intFromEnum(FrameIndex.call_frame)];
   2185     const stack_frame_align = frame_align[@intFromEnum(FrameIndex.stack_frame)];
   2186     const args_frame_align = frame_align[@intFromEnum(FrameIndex.args_frame)];
   2187     const needed_align = @max(call_frame_align, stack_frame_align);
   2188     const need_align_stack = needed_align > args_frame_align;
   2189 
   2190     // Create list of registers to save in the prologue.
   2191     // TODO handle register classes
   2192     var save_reg_list = Mir.RegisterList{};
   2193     const callee_preserved_regs = abi.getCalleePreservedRegs(self.target.*);
   2194     for (callee_preserved_regs) |reg| {
   2195         if (self.register_manager.isRegAllocated(reg)) {
   2196             save_reg_list.push(callee_preserved_regs, reg);
   2197         }
   2198     }
   2199 
   2200     var rbp_offset: i32 = @intCast(save_reg_list.count() * 8);
   2201     self.setFrameLoc(.base_ptr, .rbp, &rbp_offset, false);
   2202     self.setFrameLoc(.ret_addr, .rbp, &rbp_offset, false);
   2203     self.setFrameLoc(.args_frame, .rbp, &rbp_offset, false);
   2204     const stack_frame_align_offset =
   2205         if (need_align_stack) 0 else frame_offset[@intFromEnum(FrameIndex.args_frame)];
   2206 
   2207     var rsp_offset: i32 = 0;
   2208     self.setFrameLoc(.call_frame, .rsp, &rsp_offset, true);
   2209     self.setFrameLoc(.stack_frame, .rsp, &rsp_offset, true);
   2210     for (stack_frame_order) |frame_index| self.setFrameLoc(frame_index, .rsp, &rsp_offset, true);
   2211     rsp_offset += stack_frame_align_offset;
   2212     rsp_offset = mem.alignForward(i32, rsp_offset, @as(i32, 1) << needed_align);
   2213     rsp_offset -= stack_frame_align_offset;
   2214     frame_size[@intFromEnum(FrameIndex.call_frame)] =
   2215         @intCast(rsp_offset - frame_offset[@intFromEnum(FrameIndex.stack_frame)]);
   2216 
   2217     return .{
   2218         .stack_mask = @as(u32, math.maxInt(u32)) << (if (need_align_stack) needed_align else 0),
   2219         .stack_adjust = @intCast(rsp_offset - frame_offset[@intFromEnum(FrameIndex.call_frame)]),
   2220         .save_reg_list = save_reg_list,
   2221     };
   2222 }
   2223 
   2224 fn getFrameAddrAlignment(self: *Self, frame_addr: FrameAddr) u32 {
   2225     const alloc_align = @as(u32, 1) << self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_align;
   2226     return @min(alloc_align, @as(u32, @bitCast(frame_addr.off)) & (alloc_align - 1));
   2227 }
   2228 
   2229 fn getFrameAddrSize(self: *Self, frame_addr: FrameAddr) u32 {
   2230     return self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_size - @as(u31, @intCast(frame_addr.off));
   2231 }
   2232 
   2233 fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex {
   2234     const frame_allocs_slice = self.frame_allocs.slice();
   2235     const frame_size = frame_allocs_slice.items(.abi_size);
   2236     const frame_align = frame_allocs_slice.items(.abi_align);
   2237 
   2238     const stack_frame_align = &frame_align[@intFromEnum(FrameIndex.stack_frame)];
   2239     stack_frame_align.* = @max(stack_frame_align.*, alloc.abi_align);
   2240 
   2241     for (self.free_frame_indices.keys(), 0..) |frame_index, free_i| {
   2242         const abi_size = frame_size[@intFromEnum(frame_index)];
   2243         if (abi_size != alloc.abi_size) continue;
   2244         const abi_align = &frame_align[@intFromEnum(frame_index)];
   2245         abi_align.* = @max(abi_align.*, alloc.abi_align);
   2246 
   2247         _ = self.free_frame_indices.swapRemoveAt(free_i);
   2248         return frame_index;
   2249     }
   2250     const frame_index: FrameIndex = @enumFromInt(self.frame_allocs.len);
   2251     try self.frame_allocs.append(self.gpa, alloc);
   2252     return frame_index;
   2253 }
   2254 
   2255 /// Use a pointer instruction as the basis for allocating stack memory.
   2256 fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex {
   2257     const mod = self.bin_file.options.module.?;
   2258     const ptr_ty = self.typeOfIndex(inst);
   2259     const val_ty = ptr_ty.childType(mod);
   2260     return self.allocFrameIndex(FrameAlloc.init(.{
   2261         .size = math.cast(u32, val_ty.abiSize(mod)) orelse {
   2262             return self.fail("type '{}' too big to fit into stack frame", .{val_ty.fmt(mod)});
   2263         },
   2264         .alignment = @max(ptr_ty.ptrAlignment(mod), 1),
   2265     }));
   2266 }
   2267 
   2268 fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
   2269     return self.allocRegOrMemAdvanced(self.typeOfIndex(inst), inst, reg_ok);
   2270 }
   2271 
   2272 fn allocTempRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool) !MCValue {
   2273     return self.allocRegOrMemAdvanced(elem_ty, null, reg_ok);
   2274 }
   2275 
   2276 fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue {
   2277     const mod = self.bin_file.options.module.?;
   2278     const abi_size = math.cast(u32, ty.abiSize(mod)) orelse {
   2279         return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(mod)});
   2280     };
   2281 
   2282     if (reg_ok) need_mem: {
   2283         if (abi_size <= @as(u32, switch (ty.zigTypeTag(mod)) {
   2284             .Float => switch (ty.floatBits(self.target.*)) {
   2285                 16, 32, 64, 128 => 16,
   2286                 80 => break :need_mem,
   2287                 else => unreachable,
   2288             },
   2289             .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
   2290                 .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
   2291                     16, 32, 64, 128 => if (self.hasFeature(.avx)) 32 else 16,
   2292                     80 => break :need_mem,
   2293                     else => unreachable,
   2294                 },
   2295                 else => if (self.hasFeature(.avx)) 32 else 16,
   2296             },
   2297             else => 8,
   2298         })) {
   2299             if (self.register_manager.tryAllocReg(inst, regClassForType(ty, mod))) |reg| {
   2300                 return MCValue{ .register = registerAlias(reg, abi_size) };
   2301             }
   2302         }
   2303     }
   2304 
   2305     const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ty, mod));
   2306     return .{ .load_frame = .{ .index = frame_index } };
   2307 }
   2308 
   2309 fn regClassForType(ty: Type, mod: *Module) RegisterManager.RegisterBitSet {
   2310     return switch (ty.zigTypeTag(mod)) {
   2311         .Float, .Vector => sse,
   2312         else => gp,
   2313     };
   2314 }
   2315 
   2316 const State = struct {
   2317     registers: RegisterManager.TrackedRegisters,
   2318     reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking,
   2319     free_registers: RegisterManager.RegisterBitSet,
   2320     inst_tracking_len: u32,
   2321     scope_generation: u32,
   2322 };
   2323 
   2324 fn initRetroactiveState(self: *Self) State {
   2325     var state: State = undefined;
   2326     state.inst_tracking_len = @intCast(self.inst_tracking.count());
   2327     state.scope_generation = self.scope_generation;
   2328     return state;
   2329 }
   2330 
   2331 fn saveRetroactiveState(self: *Self, state: *State) !void {
   2332     try self.spillEflagsIfOccupied();
   2333     const free_registers = self.register_manager.free_registers;
   2334     var it = free_registers.iterator(.{ .kind = .unset });
   2335     while (it.next()) |index| {
   2336         const tracked_inst = self.register_manager.registers[index];
   2337         state.registers[index] = tracked_inst;
   2338         state.reg_tracking[index] = self.inst_tracking.get(tracked_inst).?;
   2339     }
   2340     state.free_registers = free_registers;
   2341 }
   2342 
   2343 fn saveState(self: *Self) !State {
   2344     var state = self.initRetroactiveState();
   2345     try self.saveRetroactiveState(&state);
   2346     return state;
   2347 }
   2348 
   2349 fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, comptime opts: struct {
   2350     emit_instructions: bool,
   2351     update_tracking: bool,
   2352     resurrect: bool,
   2353     close_scope: bool,
   2354 }) !void {
   2355     if (opts.close_scope) {
   2356         for (
   2357             self.inst_tracking.keys()[state.inst_tracking_len..],
   2358             self.inst_tracking.values()[state.inst_tracking_len..],
   2359         ) |inst, *tracking| tracking.die(self, inst);
   2360         self.inst_tracking.shrinkRetainingCapacity(state.inst_tracking_len);
   2361     }
   2362 
   2363     if (opts.resurrect) for (
   2364         self.inst_tracking.keys()[0..state.inst_tracking_len],
   2365         self.inst_tracking.values()[0..state.inst_tracking_len],
   2366     ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation);
   2367     for (deaths) |death| self.processDeath(death);
   2368 
   2369     for (0..state.registers.len) |index| {
   2370         const current_maybe_inst = if (self.register_manager.free_registers.isSet(index))
   2371             null
   2372         else
   2373             self.register_manager.registers[index];
   2374         const target_maybe_inst = if (state.free_registers.isSet(index))
   2375             null
   2376         else
   2377             state.registers[index];
   2378         if (std.debug.runtime_safety) if (target_maybe_inst) |target_inst|
   2379             assert(self.inst_tracking.getIndex(target_inst).? < state.inst_tracking_len);
   2380         if (opts.emit_instructions) {
   2381             if (current_maybe_inst) |current_inst| {
   2382                 try self.inst_tracking.getPtr(current_inst).?.spill(self, current_inst);
   2383             }
   2384             if (target_maybe_inst) |target_inst| {
   2385                 try self.inst_tracking.getPtr(target_inst).?.materialize(
   2386                     self,
   2387                     target_inst,
   2388                     state.reg_tracking[index],
   2389                 );
   2390             }
   2391         }
   2392         if (opts.update_tracking) {
   2393             if (current_maybe_inst) |current_inst| {
   2394                 self.inst_tracking.getPtr(current_inst).?.trackSpill(self, current_inst);
   2395             }
   2396             {
   2397                 const reg = RegisterManager.regAtTrackedIndex(@intCast(index));
   2398                 self.register_manager.freeReg(reg);
   2399                 self.register_manager.getRegAssumeFree(reg, target_maybe_inst);
   2400             }
   2401             if (target_maybe_inst) |target_inst| {
   2402                 self.inst_tracking.getPtr(target_inst).?.trackMaterialize(
   2403                     target_inst,
   2404                     state.reg_tracking[index],
   2405                 );
   2406             }
   2407         }
   2408     }
   2409     if (opts.emit_instructions) if (self.eflags_inst) |inst|
   2410         try self.inst_tracking.getPtr(inst).?.spill(self, inst);
   2411     if (opts.update_tracking) if (self.eflags_inst) |inst| {
   2412         self.eflags_inst = null;
   2413         self.inst_tracking.getPtr(inst).?.trackSpill(self, inst);
   2414     };
   2415 
   2416     if (opts.update_tracking and std.debug.runtime_safety) {
   2417         assert(self.eflags_inst == null);
   2418         assert(self.register_manager.free_registers.eql(state.free_registers));
   2419         var used_reg_it = state.free_registers.iterator(.{ .kind = .unset });
   2420         while (used_reg_it.next()) |index|
   2421             assert(self.register_manager.registers[index] == state.registers[index]);
   2422     }
   2423 }
   2424 
   2425 pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void {
   2426     const tracking = self.inst_tracking.getPtr(inst).?;
   2427     assert(tracking.getReg().?.id() == reg.id());
   2428     try tracking.spill(self, inst);
   2429     tracking.trackSpill(self, inst);
   2430 }
   2431 
   2432 pub fn spillEflagsIfOccupied(self: *Self) !void {
   2433     if (self.eflags_inst) |inst| {
   2434         self.eflags_inst = null;
   2435         const tracking = self.inst_tracking.getPtr(inst).?;
   2436         assert(tracking.getCondition() != null);
   2437         try tracking.spill(self, inst);
   2438         tracking.trackSpill(self, inst);
   2439     }
   2440 }
   2441 
   2442 pub fn spillRegisters(self: *Self, registers: []const Register) !void {
   2443     for (registers) |reg| {
   2444         try self.register_manager.getReg(reg, null);
   2445     }
   2446 }
   2447 
   2448 /// Copies a value to a register without tracking the register. The register is not considered
   2449 /// allocated. A second call to `copyToTmpRegister` may return the same register.
   2450 /// This can have a side effect of spilling instructions to the stack to free up a register.
   2451 fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
   2452     const mod = self.bin_file.options.module.?;
   2453     const reg = try self.register_manager.allocReg(null, regClassForType(ty, mod));
   2454     try self.genSetReg(reg, ty, mcv);
   2455     return reg;
   2456 }
   2457 
   2458 /// Allocates a new register and copies `mcv` into it.
   2459 /// `reg_owner` is the instruction that gets associated with the register in the register table.
   2460 /// This can have a side effect of spilling instructions to the stack to free up a register.
   2461 /// WARNING make sure that the allocated register matches the returned MCValue from an instruction!
   2462 fn copyToRegisterWithInstTracking(
   2463     self: *Self,
   2464     reg_owner: Air.Inst.Index,
   2465     ty: Type,
   2466     mcv: MCValue,
   2467 ) !MCValue {
   2468     const mod = self.bin_file.options.module.?;
   2469     const reg: Register = try self.register_manager.allocReg(reg_owner, regClassForType(ty, mod));
   2470     try self.genSetReg(reg, ty, mcv);
   2471     return MCValue{ .register = reg };
   2472 }
   2473 
   2474 fn airAlloc(self: *Self, inst: Air.Inst.Index) !void {
   2475     const result = MCValue{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } };
   2476     return self.finishAir(inst, result, .{ .none, .none, .none });
   2477 }
   2478 
   2479 fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void {
   2480     const result: MCValue = switch (self.ret_mcv.long) {
   2481         else => unreachable,
   2482         .none => .{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } },
   2483         .load_frame => .{ .register_offset = .{
   2484             .reg = (try self.copyToRegisterWithInstTracking(
   2485                 inst,
   2486                 self.typeOfIndex(inst),
   2487                 self.ret_mcv.long,
   2488             )).register,
   2489             .off = self.ret_mcv.short.indirect.off,
   2490         } },
   2491     };
   2492     return self.finishAir(inst, result, .{ .none, .none, .none });
   2493 }
   2494 
   2495 fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
   2496     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   2497     const dst_ty = self.typeOfIndex(inst);
   2498     const dst_bits = dst_ty.floatBits(self.target.*);
   2499     const src_ty = self.typeOf(ty_op.operand);
   2500     const src_bits = src_ty.floatBits(self.target.*);
   2501 
   2502     const src_mcv = try self.resolveInst(ty_op.operand);
   2503     const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   2504         src_mcv
   2505     else
   2506         try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   2507     const dst_reg = dst_mcv.getReg().?.to128();
   2508     const dst_lock = self.register_manager.lockReg(dst_reg);
   2509     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   2510 
   2511     if (dst_bits == 16 and self.hasFeature(.f16c)) {
   2512         switch (src_bits) {
   2513             32 => {
   2514                 const mat_src_reg = if (src_mcv.isRegister())
   2515                     src_mcv.getReg().?
   2516                 else
   2517                     try self.copyToTmpRegister(src_ty, src_mcv);
   2518                 try self.asmRegisterRegisterImmediate(
   2519                     .{ .v_, .cvtps2ph },
   2520                     dst_reg,
   2521                     mat_src_reg.to128(),
   2522                     Immediate.u(0b1_00),
   2523                 );
   2524             },
   2525             else => return self.fail("TODO implement airFptrunc from {} to {}", .{
   2526                 src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   2527             }),
   2528         }
   2529     } else if (src_bits == 64 and dst_bits == 32) {
   2530         if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   2531             .{ .v_ss, .cvtsd2 },
   2532             dst_reg,
   2533             dst_reg,
   2534             src_mcv.mem(.qword),
   2535         ) else try self.asmRegisterRegisterRegister(
   2536             .{ .v_ss, .cvtsd2 },
   2537             dst_reg,
   2538             dst_reg,
   2539             (if (src_mcv.isRegister())
   2540                 src_mcv.getReg().?
   2541             else
   2542                 try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2543         ) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
   2544             .{ ._ss, .cvtsd2 },
   2545             dst_reg,
   2546             src_mcv.mem(.qword),
   2547         ) else try self.asmRegisterRegister(
   2548             .{ ._ss, .cvtsd2 },
   2549             dst_reg,
   2550             (if (src_mcv.isRegister())
   2551                 src_mcv.getReg().?
   2552             else
   2553                 try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2554         );
   2555     } else return self.fail("TODO implement airFptrunc from {} to {}", .{
   2556         src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   2557     });
   2558     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   2559 }
   2560 
   2561 fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
   2562     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   2563     const dst_ty = self.typeOfIndex(inst);
   2564     const dst_bits = dst_ty.floatBits(self.target.*);
   2565     const src_ty = self.typeOf(ty_op.operand);
   2566     const src_bits = src_ty.floatBits(self.target.*);
   2567 
   2568     const src_mcv = try self.resolveInst(ty_op.operand);
   2569     const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   2570         src_mcv
   2571     else
   2572         try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   2573     const dst_reg = dst_mcv.getReg().?.to128();
   2574     const dst_lock = self.register_manager.lockReg(dst_reg);
   2575     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   2576 
   2577     if (src_bits == 16 and self.hasFeature(.f16c)) {
   2578         const mat_src_reg = if (src_mcv.isRegister())
   2579             src_mcv.getReg().?
   2580         else
   2581             try self.copyToTmpRegister(src_ty, src_mcv);
   2582         try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
   2583         switch (dst_bits) {
   2584             32 => {},
   2585             64 => try self.asmRegisterRegisterRegister(.{ .v_sd, .cvtss2 }, dst_reg, dst_reg, dst_reg),
   2586             else => return self.fail("TODO implement airFpext from {} to {}", .{
   2587                 src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   2588             }),
   2589         }
   2590     } else if (src_bits == 32 and dst_bits == 64) {
   2591         if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   2592             .{ .v_sd, .cvtss2 },
   2593             dst_reg,
   2594             dst_reg,
   2595             src_mcv.mem(.dword),
   2596         ) else try self.asmRegisterRegisterRegister(
   2597             .{ .v_sd, .cvtss2 },
   2598             dst_reg,
   2599             dst_reg,
   2600             (if (src_mcv.isRegister())
   2601                 src_mcv.getReg().?
   2602             else
   2603                 try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2604         ) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
   2605             .{ ._sd, .cvtss2 },
   2606             dst_reg,
   2607             src_mcv.mem(.dword),
   2608         ) else try self.asmRegisterRegister(
   2609             .{ ._sd, .cvtss2 },
   2610             dst_reg,
   2611             (if (src_mcv.isRegister())
   2612                 src_mcv.getReg().?
   2613             else
   2614                 try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2615         );
   2616     } else return self.fail("TODO implement airFpext from {} to {}", .{
   2617         src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   2618     });
   2619     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   2620 }
   2621 
   2622 fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
   2623     const mod = self.bin_file.options.module.?;
   2624     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   2625     const result: MCValue = result: {
   2626         const src_ty = self.typeOf(ty_op.operand);
   2627         const src_int_info = src_ty.intInfo(mod);
   2628 
   2629         const dst_ty = self.typeOfIndex(inst);
   2630         const dst_int_info = dst_ty.intInfo(mod);
   2631         const abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   2632 
   2633         const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty;
   2634         const extend = switch (src_int_info.signedness) {
   2635             .signed => dst_int_info,
   2636             .unsigned => src_int_info,
   2637         }.signedness;
   2638 
   2639         const src_mcv = try self.resolveInst(ty_op.operand);
   2640         const src_storage_bits = switch (src_mcv) {
   2641             .register, .register_offset => 64,
   2642             .load_frame => |frame_addr| self.getFrameAddrSize(frame_addr) * 8,
   2643             else => src_int_info.bits,
   2644         };
   2645 
   2646         const dst_mcv = if (dst_int_info.bits <= src_storage_bits and
   2647             self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
   2648             const dst_mcv = try self.allocRegOrMem(inst, true);
   2649             try self.genCopy(min_ty, dst_mcv, src_mcv);
   2650             break :dst dst_mcv;
   2651         };
   2652 
   2653         if (dst_int_info.bits <= src_int_info.bits) break :result if (dst_mcv.isRegister())
   2654             .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) }
   2655         else
   2656             dst_mcv;
   2657 
   2658         if (dst_mcv.isRegister()) {
   2659             try self.truncateRegister(src_ty, dst_mcv.getReg().?);
   2660             break :result .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) };
   2661         }
   2662 
   2663         const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable;
   2664         const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable;
   2665 
   2666         const high_mcv = dst_mcv.address().offset((src_limbs_len - 1) * 8).deref();
   2667         const high_reg = try self.copyToTmpRegister(switch (src_int_info.signedness) {
   2668             .signed => Type.isize,
   2669             .unsigned => Type.usize,
   2670         }, high_mcv);
   2671         const high_lock = self.register_manager.lockRegAssumeUnused(high_reg);
   2672         defer self.register_manager.unlockReg(high_lock);
   2673 
   2674         const high_bits = src_int_info.bits % 64;
   2675         if (high_bits > 0) {
   2676             const high_ty = try mod.intType(extend, high_bits);
   2677             try self.truncateRegister(high_ty, high_reg);
   2678             try self.genCopy(Type.usize, high_mcv, .{ .register = high_reg });
   2679         }
   2680 
   2681         if (dst_limbs_len > src_limbs_len) try self.genInlineMemset(
   2682             dst_mcv.address().offset(src_limbs_len * 8),
   2683             switch (extend) {
   2684                 .signed => extend: {
   2685                     const extend_mcv = MCValue{ .register = high_reg };
   2686                     try self.genShiftBinOpMir(
   2687                         .{ ._r, .sa },
   2688                         Type.isize,
   2689                         extend_mcv,
   2690                         .{ .immediate = 63 },
   2691                     );
   2692                     break :extend extend_mcv;
   2693                 },
   2694                 .unsigned => .{ .immediate = 0 },
   2695             },
   2696             .{ .immediate = (dst_limbs_len - src_limbs_len) * 8 },
   2697         );
   2698 
   2699         break :result dst_mcv;
   2700     };
   2701     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   2702 }
   2703 
   2704 fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
   2705     const mod = self.bin_file.options.module.?;
   2706     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   2707 
   2708     const dst_ty = self.typeOfIndex(inst);
   2709     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   2710     const src_ty = self.typeOf(ty_op.operand);
   2711     const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
   2712 
   2713     const result = result: {
   2714         const src_mcv = try self.resolveInst(ty_op.operand);
   2715         const src_lock =
   2716             if (src_mcv.getReg()) |reg| self.register_manager.lockRegAssumeUnused(reg) else null;
   2717         defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   2718 
   2719         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   2720             src_mcv
   2721         else
   2722             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   2723 
   2724         if (dst_ty.zigTypeTag(mod) == .Vector) {
   2725             assert(src_ty.zigTypeTag(mod) == .Vector and dst_ty.vectorLen(mod) == src_ty.vectorLen(mod));
   2726             const dst_info = dst_ty.childType(mod).intInfo(mod);
   2727             const src_info = src_ty.childType(mod).intInfo(mod);
   2728             const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_info.bits) {
   2729                 8 => switch (src_info.bits) {
   2730                     16 => switch (dst_ty.vectorLen(mod)) {
   2731                         1...8 => if (self.hasFeature(.avx)) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw },
   2732                         9...16 => if (self.hasFeature(.avx2)) .{ .vp_b, .ackusw } else null,
   2733                         else => null,
   2734                     },
   2735                     else => null,
   2736                 },
   2737                 16 => switch (src_info.bits) {
   2738                     32 => switch (dst_ty.vectorLen(mod)) {
   2739                         1...4 => if (self.hasFeature(.avx))
   2740                             .{ .vp_w, .ackusd }
   2741                         else if (self.hasFeature(.sse4_1))
   2742                             .{ .p_w, .ackusd }
   2743                         else
   2744                             null,
   2745                         5...8 => if (self.hasFeature(.avx2)) .{ .vp_w, .ackusd } else null,
   2746                         else => null,
   2747                     },
   2748                     else => null,
   2749                 },
   2750                 else => null,
   2751             }) orelse return self.fail("TODO implement airTrunc for {}", .{
   2752                 dst_ty.fmt(self.bin_file.options.module.?),
   2753             });
   2754 
   2755             const elem_ty = src_ty.childType(mod);
   2756             const mask_val = try mod.intValue(elem_ty, @as(u64, math.maxInt(u64)) >> @intCast(64 - dst_info.bits));
   2757 
   2758             const splat_ty = try mod.vectorType(.{
   2759                 .len = @intCast(@divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)),
   2760                 .child = elem_ty.ip_index,
   2761             });
   2762             const splat_abi_size: u32 = @intCast(splat_ty.abiSize(mod));
   2763 
   2764             const splat_val = try mod.intern(.{ .aggregate = .{
   2765                 .ty = splat_ty.ip_index,
   2766                 .storage = .{ .repeated_elem = mask_val.ip_index },
   2767             } });
   2768 
   2769             const splat_mcv = try self.genTypedValue(.{ .ty = splat_ty, .val = splat_val.toValue() });
   2770             const splat_addr_mcv: MCValue = switch (splat_mcv) {
   2771                 .memory, .indirect, .load_frame => splat_mcv.address(),
   2772                 else => .{ .register = try self.copyToTmpRegister(Type.usize, splat_mcv.address()) },
   2773             };
   2774 
   2775             const dst_reg = registerAlias(dst_mcv.getReg().?, src_abi_size);
   2776             if (self.hasFeature(.avx)) {
   2777                 try self.asmRegisterRegisterMemory(
   2778                     .{ .vp_, .@"and" },
   2779                     dst_reg,
   2780                     dst_reg,
   2781                     splat_addr_mcv.deref().mem(Memory.PtrSize.fromSize(splat_abi_size)),
   2782                 );
   2783                 try self.asmRegisterRegisterRegister(mir_tag, dst_reg, dst_reg, dst_reg);
   2784             } else {
   2785                 try self.asmRegisterMemory(
   2786                     .{ .p_, .@"and" },
   2787                     dst_reg,
   2788                     splat_addr_mcv.deref().mem(Memory.PtrSize.fromSize(splat_abi_size)),
   2789                 );
   2790                 try self.asmRegisterRegister(mir_tag, dst_reg, dst_reg);
   2791             }
   2792             break :result dst_mcv;
   2793         }
   2794 
   2795         if (dst_abi_size > 8) {
   2796             return self.fail("TODO implement trunc for abi sizes larger than 8", .{});
   2797         }
   2798 
   2799         // when truncating a `u16` to `u5`, for example, those top 3 bits in the result
   2800         // have to be removed. this only happens if the dst if not a power-of-two size.
   2801         if (self.regExtraBits(dst_ty) > 0)
   2802             try self.truncateRegister(dst_ty, dst_mcv.register.to64());
   2803 
   2804         break :result dst_mcv;
   2805     };
   2806     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   2807 }
   2808 
   2809 fn airIntFromBool(self: *Self, inst: Air.Inst.Index) !void {
   2810     const un_op = self.air.instructions.items(.data)[inst].un_op;
   2811     const ty = self.typeOfIndex(inst);
   2812 
   2813     const operand = try self.resolveInst(un_op);
   2814     const dst_mcv = if (self.reuseOperand(inst, un_op, 0, operand))
   2815         operand
   2816     else
   2817         try self.copyToRegisterWithInstTracking(inst, ty, operand);
   2818 
   2819     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
   2820 }
   2821 
   2822 fn airSlice(self: *Self, inst: Air.Inst.Index) !void {
   2823     const mod = self.bin_file.options.module.?;
   2824     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   2825     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   2826 
   2827     const slice_ty = self.typeOfIndex(inst);
   2828     const ptr = try self.resolveInst(bin_op.lhs);
   2829     const ptr_ty = self.typeOf(bin_op.lhs);
   2830     const len = try self.resolveInst(bin_op.rhs);
   2831     const len_ty = self.typeOf(bin_op.rhs);
   2832 
   2833     const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, mod));
   2834     try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr);
   2835     try self.genSetMem(
   2836         .{ .frame = frame_index },
   2837         @intCast(ptr_ty.abiSize(mod)),
   2838         len_ty,
   2839         len,
   2840     );
   2841 
   2842     const result = MCValue{ .load_frame = .{ .index = frame_index } };
   2843     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   2844 }
   2845 
   2846 fn airUnOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
   2847     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   2848     const dst_mcv = try self.genUnOp(inst, tag, ty_op.operand);
   2849     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   2850 }
   2851 
   2852 fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
   2853     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   2854     const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
   2855     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   2856 }
   2857 
   2858 fn airPtrArithmetic(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
   2859     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   2860     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   2861     const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
   2862     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   2863 }
   2864 
   2865 fn activeIntBits(self: *Self, dst_air: Air.Inst.Ref) u16 {
   2866     const mod = self.bin_file.options.module.?;
   2867     const air_tag = self.air.instructions.items(.tag);
   2868     const air_data = self.air.instructions.items(.data);
   2869 
   2870     const dst_ty = self.typeOf(dst_air);
   2871     const dst_info = dst_ty.intInfo(mod);
   2872     if (Air.refToIndex(dst_air)) |inst| {
   2873         switch (air_tag[inst]) {
   2874             .intcast => {
   2875                 const src_ty = self.typeOf(air_data[inst].ty_op.operand);
   2876                 const src_info = src_ty.intInfo(mod);
   2877                 return @min(switch (src_info.signedness) {
   2878                     .signed => switch (dst_info.signedness) {
   2879                         .signed => src_info.bits,
   2880                         .unsigned => src_info.bits - 1,
   2881                     },
   2882                     .unsigned => switch (dst_info.signedness) {
   2883                         .signed => src_info.bits + 1,
   2884                         .unsigned => src_info.bits,
   2885                     },
   2886                 }, dst_info.bits);
   2887             },
   2888             else => {},
   2889         }
   2890     } else if (Air.refToInterned(dst_air)) |ip_index| {
   2891         var space: Value.BigIntSpace = undefined;
   2892         const src_int = ip_index.toValue().toBigInt(&space, mod);
   2893         return @as(u16, @intCast(src_int.bitCountTwosComp())) +
   2894             @intFromBool(src_int.positive and dst_info.signedness == .signed);
   2895     }
   2896     return dst_info.bits;
   2897 }
   2898 
   2899 fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void {
   2900     const mod = self.bin_file.options.module.?;
   2901     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   2902     const result = result: {
   2903         const tag = self.air.instructions.items(.tag)[inst];
   2904         const dst_ty = self.typeOfIndex(inst);
   2905         switch (dst_ty.zigTypeTag(mod)) {
   2906             .Float, .Vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs),
   2907             else => {},
   2908         }
   2909 
   2910         const dst_info = dst_ty.intInfo(mod);
   2911         const src_ty = try mod.intType(dst_info.signedness, switch (tag) {
   2912             else => unreachable,
   2913             .mul, .mul_wrap => @max(
   2914                 self.activeIntBits(bin_op.lhs),
   2915                 self.activeIntBits(bin_op.rhs),
   2916                 dst_info.bits / 2,
   2917             ),
   2918             .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_info.bits,
   2919         });
   2920 
   2921         try self.spillEflagsIfOccupied();
   2922         try self.spillRegisters(&.{ .rax, .rdx });
   2923         const lhs = try self.resolveInst(bin_op.lhs);
   2924         const rhs = try self.resolveInst(bin_op.rhs);
   2925         break :result try self.genMulDivBinOp(tag, inst, dst_ty, src_ty, lhs, rhs);
   2926     };
   2927     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   2928 }
   2929 
   2930 fn airAddSat(self: *Self, inst: Air.Inst.Index) !void {
   2931     const mod = self.bin_file.options.module.?;
   2932     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   2933     const ty = self.typeOf(bin_op.lhs);
   2934     if (ty.zigTypeTag(mod) == .Vector or ty.abiSize(mod) > 8) return self.fail(
   2935         "TODO implement addMulSat for {}",
   2936         .{ty.fmt(mod)},
   2937     );
   2938 
   2939     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   2940     const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
   2941         lhs_mcv
   2942     else
   2943         try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv);
   2944     const dst_reg = dst_mcv.register;
   2945     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   2946     defer self.register_manager.unlockReg(dst_lock);
   2947 
   2948     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   2949     const rhs_lock = switch (rhs_mcv) {
   2950         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   2951         else => null,
   2952     };
   2953     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   2954 
   2955     const limit_reg = try self.register_manager.allocReg(null, gp);
   2956     const limit_mcv = MCValue{ .register = limit_reg };
   2957     const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
   2958     defer self.register_manager.unlockReg(limit_lock);
   2959 
   2960     const reg_bits = self.regBitSize(ty);
   2961     const reg_extra_bits = self.regExtraBits(ty);
   2962     const cc: Condition = if (ty.isSignedInt(mod)) cc: {
   2963         if (reg_extra_bits > 0) {
   2964             try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits });
   2965         }
   2966         try self.genSetReg(limit_reg, ty, dst_mcv);
   2967         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
   2968         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
   2969             .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
   2970         });
   2971         if (reg_extra_bits > 0) {
   2972             const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv);
   2973             const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg };
   2974             const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg);
   2975             defer self.register_manager.unlockReg(shifted_rhs_lock);
   2976 
   2977             try self.genShiftBinOpMir(
   2978                 .{ ._l, .sa },
   2979                 ty,
   2980                 shifted_rhs_mcv,
   2981                 .{ .immediate = reg_extra_bits },
   2982             );
   2983             try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, shifted_rhs_mcv);
   2984         } else try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv);
   2985         break :cc .o;
   2986     } else cc: {
   2987         try self.genSetReg(limit_reg, ty, .{
   2988             .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - ty.bitSize(mod)),
   2989         });
   2990 
   2991         try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv);
   2992         if (reg_extra_bits > 0) {
   2993             try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, limit_mcv);
   2994             break :cc .a;
   2995         }
   2996         break :cc .c;
   2997     };
   2998 
   2999     const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
   3000     try self.asmCmovccRegisterRegister(
   3001         registerAlias(dst_reg, cmov_abi_size),
   3002         registerAlias(limit_reg, cmov_abi_size),
   3003         cc,
   3004     );
   3005 
   3006     if (reg_extra_bits > 0 and ty.isSignedInt(mod)) {
   3007         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits });
   3008     }
   3009 
   3010     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   3011 }
   3012 
   3013 fn airSubSat(self: *Self, inst: Air.Inst.Index) !void {
   3014     const mod = self.bin_file.options.module.?;
   3015     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   3016     const ty = self.typeOf(bin_op.lhs);
   3017     if (ty.zigTypeTag(mod) == .Vector or ty.abiSize(mod) > 8) return self.fail(
   3018         "TODO implement addMulSat for {}",
   3019         .{ty.fmt(mod)},
   3020     );
   3021 
   3022     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   3023     const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
   3024         lhs_mcv
   3025     else
   3026         try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv);
   3027     const dst_reg = dst_mcv.register;
   3028     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   3029     defer self.register_manager.unlockReg(dst_lock);
   3030 
   3031     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   3032     const rhs_lock = switch (rhs_mcv) {
   3033         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3034         else => null,
   3035     };
   3036     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   3037 
   3038     const limit_reg = try self.register_manager.allocReg(null, gp);
   3039     const limit_mcv = MCValue{ .register = limit_reg };
   3040     const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
   3041     defer self.register_manager.unlockReg(limit_lock);
   3042 
   3043     const reg_bits = self.regBitSize(ty);
   3044     const reg_extra_bits = self.regExtraBits(ty);
   3045     const cc: Condition = if (ty.isSignedInt(mod)) cc: {
   3046         if (reg_extra_bits > 0) {
   3047             try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits });
   3048         }
   3049         try self.genSetReg(limit_reg, ty, dst_mcv);
   3050         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
   3051         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
   3052             .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
   3053         });
   3054         if (reg_extra_bits > 0) {
   3055             const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv);
   3056             const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg };
   3057             const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg);
   3058             defer self.register_manager.unlockReg(shifted_rhs_lock);
   3059 
   3060             try self.genShiftBinOpMir(
   3061                 .{ ._l, .sa },
   3062                 ty,
   3063                 shifted_rhs_mcv,
   3064                 .{ .immediate = reg_extra_bits },
   3065             );
   3066             try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, shifted_rhs_mcv);
   3067         } else try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv);
   3068         break :cc .o;
   3069     } else cc: {
   3070         try self.genSetReg(limit_reg, ty, .{ .immediate = 0 });
   3071         try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv);
   3072         break :cc .c;
   3073     };
   3074 
   3075     const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
   3076     try self.asmCmovccRegisterRegister(
   3077         registerAlias(dst_reg, cmov_abi_size),
   3078         registerAlias(limit_reg, cmov_abi_size),
   3079         cc,
   3080     );
   3081 
   3082     if (reg_extra_bits > 0 and ty.isSignedInt(mod)) {
   3083         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits });
   3084     }
   3085 
   3086     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   3087 }
   3088 
   3089 fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
   3090     const mod = self.bin_file.options.module.?;
   3091     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   3092     const ty = self.typeOf(bin_op.lhs);
   3093     if (ty.zigTypeTag(mod) == .Vector or ty.abiSize(mod) > 8) return self.fail(
   3094         "TODO implement addMulSat for {}",
   3095         .{ty.fmt(mod)},
   3096     );
   3097 
   3098     try self.spillRegisters(&.{ .rax, .rdx });
   3099     const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx });
   3100     defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
   3101 
   3102     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   3103     const lhs_lock = switch (lhs_mcv) {
   3104         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3105         else => null,
   3106     };
   3107     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   3108 
   3109     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   3110     const rhs_lock = switch (rhs_mcv) {
   3111         .register => |reg| self.register_manager.lockReg(reg),
   3112         else => null,
   3113     };
   3114     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   3115 
   3116     const limit_reg = try self.register_manager.allocReg(null, gp);
   3117     const limit_mcv = MCValue{ .register = limit_reg };
   3118     const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
   3119     defer self.register_manager.unlockReg(limit_lock);
   3120 
   3121     const reg_bits = self.regBitSize(ty);
   3122     const cc: Condition = if (ty.isSignedInt(mod)) cc: {
   3123         try self.genSetReg(limit_reg, ty, lhs_mcv);
   3124         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv);
   3125         try self.genShiftBinOpMir(.{ ._, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
   3126         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
   3127             .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
   3128         });
   3129         break :cc .o;
   3130     } else cc: {
   3131         try self.genSetReg(limit_reg, ty, .{
   3132             .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - reg_bits),
   3133         });
   3134         break :cc .c;
   3135     };
   3136 
   3137     const dst_mcv = try self.genMulDivBinOp(.mul, inst, ty, ty, lhs_mcv, rhs_mcv);
   3138     const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
   3139     try self.asmCmovccRegisterRegister(
   3140         registerAlias(dst_mcv.register, cmov_abi_size),
   3141         registerAlias(limit_reg, cmov_abi_size),
   3142         cc,
   3143     );
   3144 
   3145     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   3146 }
   3147 
   3148 fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
   3149     const mod = self.bin_file.options.module.?;
   3150     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   3151     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3152     const result: MCValue = result: {
   3153         const tag = self.air.instructions.items(.tag)[inst];
   3154         const ty = self.typeOf(bin_op.lhs);
   3155         switch (ty.zigTypeTag(mod)) {
   3156             .Vector => return self.fail("TODO implement add/sub with overflow for Vector type", .{}),
   3157             .Int => {
   3158                 try self.spillEflagsIfOccupied();
   3159 
   3160                 const partial_mcv = try self.genBinOp(null, switch (tag) {
   3161                     .add_with_overflow => .add,
   3162                     .sub_with_overflow => .sub,
   3163                     else => unreachable,
   3164                 }, bin_op.lhs, bin_op.rhs);
   3165                 const int_info = ty.intInfo(mod);
   3166                 const cc: Condition = switch (int_info.signedness) {
   3167                     .unsigned => .c,
   3168                     .signed => .o,
   3169                 };
   3170 
   3171                 const tuple_ty = self.typeOfIndex(inst);
   3172                 if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) {
   3173                     switch (partial_mcv) {
   3174                         .register => |reg| {
   3175                             self.eflags_inst = inst;
   3176                             break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
   3177                         },
   3178                         else => {},
   3179                     }
   3180 
   3181                     const frame_index =
   3182                         try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
   3183                     try self.genSetMem(
   3184                         .{ .frame = frame_index },
   3185                         @intCast(tuple_ty.structFieldOffset(1, mod)),
   3186                         Type.u1,
   3187                         .{ .eflags = cc },
   3188                     );
   3189                     try self.genSetMem(
   3190                         .{ .frame = frame_index },
   3191                         @intCast(tuple_ty.structFieldOffset(0, mod)),
   3192                         ty,
   3193                         partial_mcv,
   3194                     );
   3195                     break :result .{ .load_frame = .{ .index = frame_index } };
   3196                 }
   3197 
   3198                 const frame_index =
   3199                     try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
   3200                 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
   3201                 break :result .{ .load_frame = .{ .index = frame_index } };
   3202             },
   3203             else => unreachable,
   3204         }
   3205     };
   3206     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3207 }
   3208 
   3209 fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
   3210     const mod = self.bin_file.options.module.?;
   3211     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   3212     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3213     const result: MCValue = result: {
   3214         const lhs_ty = self.typeOf(bin_op.lhs);
   3215         const rhs_ty = self.typeOf(bin_op.rhs);
   3216         switch (lhs_ty.zigTypeTag(mod)) {
   3217             .Vector => return self.fail("TODO implement shl with overflow for Vector type", .{}),
   3218             .Int => {
   3219                 try self.spillEflagsIfOccupied();
   3220 
   3221                 try self.register_manager.getReg(.rcx, null);
   3222                 const lhs = try self.resolveInst(bin_op.lhs);
   3223                 const rhs = try self.resolveInst(bin_op.rhs);
   3224 
   3225                 const int_info = lhs_ty.intInfo(mod);
   3226 
   3227                 const partial_mcv = try self.genShiftBinOp(.shl, null, lhs, rhs, lhs_ty, rhs_ty);
   3228                 const partial_lock = switch (partial_mcv) {
   3229                     .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3230                     else => null,
   3231                 };
   3232                 defer if (partial_lock) |lock| self.register_manager.unlockReg(lock);
   3233 
   3234                 const tmp_mcv = try self.genShiftBinOp(.shr, null, partial_mcv, rhs, lhs_ty, rhs_ty);
   3235                 const tmp_lock = switch (tmp_mcv) {
   3236                     .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3237                     else => null,
   3238                 };
   3239                 defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
   3240 
   3241                 try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs);
   3242                 const cc = Condition.ne;
   3243 
   3244                 const tuple_ty = self.typeOfIndex(inst);
   3245                 if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) {
   3246                     switch (partial_mcv) {
   3247                         .register => |reg| {
   3248                             self.eflags_inst = inst;
   3249                             break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
   3250                         },
   3251                         else => {},
   3252                     }
   3253 
   3254                     const frame_index =
   3255                         try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
   3256                     try self.genSetMem(
   3257                         .{ .frame = frame_index },
   3258                         @intCast(tuple_ty.structFieldOffset(1, mod)),
   3259                         tuple_ty.structFieldType(1, mod),
   3260                         .{ .eflags = cc },
   3261                     );
   3262                     try self.genSetMem(
   3263                         .{ .frame = frame_index },
   3264                         @intCast(tuple_ty.structFieldOffset(0, mod)),
   3265                         tuple_ty.structFieldType(0, mod),
   3266                         partial_mcv,
   3267                     );
   3268                     break :result .{ .load_frame = .{ .index = frame_index } };
   3269                 }
   3270 
   3271                 const frame_index =
   3272                     try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
   3273                 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
   3274                 break :result .{ .load_frame = .{ .index = frame_index } };
   3275             },
   3276             else => unreachable,
   3277         }
   3278     };
   3279     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3280 }
   3281 
   3282 fn genSetFrameTruncatedOverflowCompare(
   3283     self: *Self,
   3284     tuple_ty: Type,
   3285     frame_index: FrameIndex,
   3286     src_mcv: MCValue,
   3287     overflow_cc: ?Condition,
   3288 ) !void {
   3289     const mod = self.bin_file.options.module.?;
   3290     const src_lock = switch (src_mcv) {
   3291         .register => |reg| self.register_manager.lockReg(reg),
   3292         else => null,
   3293     };
   3294     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   3295 
   3296     const ty = tuple_ty.structFieldType(0, mod);
   3297     const int_info = ty.intInfo(mod);
   3298 
   3299     const hi_limb_bits = (int_info.bits - 1) % 64 + 1;
   3300     const hi_limb_ty = try mod.intType(int_info.signedness, hi_limb_bits);
   3301 
   3302     const rest_ty = try mod.intType(.unsigned, int_info.bits - hi_limb_bits);
   3303 
   3304     const temp_regs = try self.register_manager.allocRegs(3, .{ null, null, null }, gp);
   3305     const temp_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs);
   3306     defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
   3307 
   3308     const overflow_reg = temp_regs[0];
   3309     if (overflow_cc) |cc| try self.asmSetccRegister(overflow_reg.to8(), cc);
   3310 
   3311     const scratch_reg = temp_regs[1];
   3312     const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8;
   3313     const hi_limb_mcv = if (hi_limb_off > 0)
   3314         src_mcv.address().offset(int_info.bits / 64 * 8).deref()
   3315     else
   3316         src_mcv;
   3317     try self.genSetReg(scratch_reg, hi_limb_ty, hi_limb_mcv);
   3318     try self.truncateRegister(hi_limb_ty, scratch_reg);
   3319     try self.genBinOpMir(.{ ._, .cmp }, hi_limb_ty, .{ .register = scratch_reg }, hi_limb_mcv);
   3320 
   3321     const eq_reg = temp_regs[2];
   3322     if (overflow_cc) |_| {
   3323         try self.asmSetccRegister(eq_reg.to8(), .ne);
   3324         try self.genBinOpMir(
   3325             .{ ._, .@"or" },
   3326             Type.u8,
   3327             .{ .register = overflow_reg },
   3328             .{ .register = eq_reg },
   3329         );
   3330     }
   3331 
   3332     const payload_off: i32 = @intCast(tuple_ty.structFieldOffset(0, mod));
   3333     if (hi_limb_off > 0) try self.genSetMem(.{ .frame = frame_index }, payload_off, rest_ty, src_mcv);
   3334     try self.genSetMem(
   3335         .{ .frame = frame_index },
   3336         payload_off + hi_limb_off,
   3337         hi_limb_ty,
   3338         .{ .register = scratch_reg },
   3339     );
   3340     try self.genSetMem(
   3341         .{ .frame = frame_index },
   3342         @intCast(tuple_ty.structFieldOffset(1, mod)),
   3343         tuple_ty.structFieldType(1, mod),
   3344         if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne },
   3345     );
   3346 }
   3347 
   3348 fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
   3349     const mod = self.bin_file.options.module.?;
   3350     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   3351     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3352     const dst_ty = self.typeOf(bin_op.lhs);
   3353     const result: MCValue = switch (dst_ty.zigTypeTag(mod)) {
   3354         .Vector => return self.fail("TODO implement mul_with_overflow for Vector type", .{}),
   3355         .Int => result: {
   3356             try self.spillEflagsIfOccupied();
   3357             try self.spillRegisters(&.{ .rax, .rdx });
   3358 
   3359             const dst_info = dst_ty.intInfo(mod);
   3360             const cc: Condition = switch (dst_info.signedness) {
   3361                 .unsigned => .c,
   3362                 .signed => .o,
   3363             };
   3364 
   3365             const lhs_active_bits = self.activeIntBits(bin_op.lhs);
   3366             const rhs_active_bits = self.activeIntBits(bin_op.rhs);
   3367             const src_bits = @max(lhs_active_bits, rhs_active_bits, dst_info.bits / 2);
   3368             const src_ty = try mod.intType(dst_info.signedness, src_bits);
   3369 
   3370             const lhs = try self.resolveInst(bin_op.lhs);
   3371             const rhs = try self.resolveInst(bin_op.rhs);
   3372 
   3373             const tuple_ty = self.typeOfIndex(inst);
   3374             const extra_bits = if (dst_info.bits <= 64)
   3375                 self.regExtraBits(dst_ty)
   3376             else
   3377                 dst_info.bits % 64;
   3378             const partial_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs);
   3379 
   3380             switch (partial_mcv) {
   3381                 .register => |reg| if (extra_bits == 0) {
   3382                     self.eflags_inst = inst;
   3383                     break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
   3384                 } else {
   3385                     const frame_index =
   3386                         try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
   3387                     try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
   3388                     break :result .{ .load_frame = .{ .index = frame_index } };
   3389                 },
   3390                 else => {
   3391                     // For now, this is the only supported multiply that doesn't fit in a register.
   3392                     assert(dst_info.bits <= 128 and src_bits == 64);
   3393 
   3394                     const frame_index =
   3395                         try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
   3396                     if (dst_info.bits >= lhs_active_bits + rhs_active_bits) {
   3397                         try self.genSetMem(
   3398                             .{ .frame = frame_index },
   3399                             @intCast(tuple_ty.structFieldOffset(0, mod)),
   3400                             tuple_ty.structFieldType(0, mod),
   3401                             partial_mcv,
   3402                         );
   3403                         try self.genSetMem(
   3404                             .{ .frame = frame_index },
   3405                             @intCast(tuple_ty.structFieldOffset(1, mod)),
   3406                             tuple_ty.structFieldType(1, mod),
   3407                             .{ .immediate = 0 }, // cc being set is impossible
   3408                         );
   3409                     } else try self.genSetFrameTruncatedOverflowCompare(
   3410                         tuple_ty,
   3411                         frame_index,
   3412                         partial_mcv,
   3413                         null,
   3414                     );
   3415                     break :result .{ .load_frame = .{ .index = frame_index } };
   3416                 },
   3417             }
   3418         },
   3419         else => unreachable,
   3420     };
   3421     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3422 }
   3423 
   3424 /// Generates signed or unsigned integer multiplication/division.
   3425 /// Clobbers .rax and .rdx registers.
   3426 /// Quotient is saved in .rax and remainder in .rdx.
   3427 fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void {
   3428     const mod = self.bin_file.options.module.?;
   3429     const abi_size: u32 = @intCast(ty.abiSize(mod));
   3430     if (abi_size > 8) {
   3431         return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{});
   3432     }
   3433 
   3434     try self.genSetReg(.rax, ty, lhs);
   3435     switch (tag[1]) {
   3436         else => unreachable,
   3437         .mul => {},
   3438         .div => switch (tag[0]) {
   3439             ._ => try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx),
   3440             .i_ => switch (self.regBitSize(ty)) {
   3441                 8 => try self.asmOpOnly(.{ ._, .cbw }),
   3442                 16 => try self.asmOpOnly(.{ ._, .cwd }),
   3443                 32 => try self.asmOpOnly(.{ ._, .cdq }),
   3444                 64 => try self.asmOpOnly(.{ ._, .cqo }),
   3445                 else => unreachable,
   3446             },
   3447             else => unreachable,
   3448         },
   3449     }
   3450 
   3451     const mat_rhs: MCValue = switch (rhs) {
   3452         .register, .indirect, .load_frame => rhs,
   3453         else => .{ .register = try self.copyToTmpRegister(ty, rhs) },
   3454     };
   3455     switch (mat_rhs) {
   3456         .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)),
   3457         .memory, .indirect, .load_frame => try self.asmMemory(
   3458             tag,
   3459             mat_rhs.mem(Memory.PtrSize.fromSize(abi_size)),
   3460         ),
   3461         else => unreachable,
   3462     }
   3463 }
   3464 
   3465 /// Always returns a register.
   3466 /// Clobbers .rax and .rdx registers.
   3467 fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue {
   3468     const mod = self.bin_file.options.module.?;
   3469     const abi_size: u32 = @intCast(ty.abiSize(mod));
   3470     const int_info = ty.intInfo(mod);
   3471     const dividend: Register = switch (lhs) {
   3472         .register => |reg| reg,
   3473         else => try self.copyToTmpRegister(ty, lhs),
   3474     };
   3475     const dividend_lock = self.register_manager.lockReg(dividend);
   3476     defer if (dividend_lock) |lock| self.register_manager.unlockReg(lock);
   3477 
   3478     const divisor: Register = switch (rhs) {
   3479         .register => |reg| reg,
   3480         else => try self.copyToTmpRegister(ty, rhs),
   3481     };
   3482     const divisor_lock = self.register_manager.lockReg(divisor);
   3483     defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock);
   3484 
   3485     try self.genIntMulDivOpMir(
   3486         switch (int_info.signedness) {
   3487             .signed => .{ .i_, .div },
   3488             .unsigned => .{ ._, .div },
   3489         },
   3490         ty,
   3491         .{ .register = dividend },
   3492         .{ .register = divisor },
   3493     );
   3494 
   3495     try self.asmRegisterRegister(
   3496         .{ ._, .xor },
   3497         registerAlias(divisor, abi_size),
   3498         registerAlias(dividend, abi_size),
   3499     );
   3500     try self.asmRegisterImmediate(
   3501         .{ ._r, .sa },
   3502         registerAlias(divisor, abi_size),
   3503         Immediate.u(int_info.bits - 1),
   3504     );
   3505     try self.asmRegisterRegister(
   3506         .{ ._, .@"test" },
   3507         registerAlias(.rdx, abi_size),
   3508         registerAlias(.rdx, abi_size),
   3509     );
   3510     try self.asmCmovccRegisterRegister(
   3511         registerAlias(divisor, abi_size),
   3512         registerAlias(.rdx, abi_size),
   3513         .z,
   3514     );
   3515     try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax });
   3516     return MCValue{ .register = divisor };
   3517 }
   3518 
   3519 fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void {
   3520     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   3521 
   3522     try self.spillRegisters(&.{.rcx});
   3523 
   3524     const tag = self.air.instructions.items(.tag)[inst];
   3525     try self.register_manager.getReg(.rcx, null);
   3526     const lhs = try self.resolveInst(bin_op.lhs);
   3527     const rhs = try self.resolveInst(bin_op.rhs);
   3528     const lhs_ty = self.typeOf(bin_op.lhs);
   3529     const rhs_ty = self.typeOf(bin_op.rhs);
   3530 
   3531     const result = try self.genShiftBinOp(tag, inst, lhs, rhs, lhs_ty, rhs_ty);
   3532 
   3533     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3534 }
   3535 
   3536 fn airShlSat(self: *Self, inst: Air.Inst.Index) !void {
   3537     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   3538     _ = bin_op;
   3539     return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
   3540     //return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3541 }
   3542 
   3543 fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void {
   3544     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3545     const result: MCValue = result: {
   3546         const pl_ty = self.typeOfIndex(inst);
   3547         const opt_mcv = try self.resolveInst(ty_op.operand);
   3548 
   3549         if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) {
   3550             switch (opt_mcv) {
   3551                 .register => |reg| try self.truncateRegister(pl_ty, reg),
   3552                 .register_overflow => |ro| try self.truncateRegister(pl_ty, ro.reg),
   3553                 else => {},
   3554             }
   3555             break :result opt_mcv;
   3556         }
   3557 
   3558         const pl_mcv = try self.allocRegOrMem(inst, true);
   3559         try self.genCopy(pl_ty, pl_mcv, switch (opt_mcv) {
   3560             else => opt_mcv,
   3561             .register_overflow => |ro| .{ .register = ro.reg },
   3562         });
   3563         break :result pl_mcv;
   3564     };
   3565     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3566 }
   3567 
   3568 fn airOptionalPayloadPtr(self: *Self, inst: Air.Inst.Index) !void {
   3569     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3570 
   3571     const dst_ty = self.typeOfIndex(inst);
   3572     const opt_mcv = try self.resolveInst(ty_op.operand);
   3573 
   3574     const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv))
   3575         opt_mcv
   3576     else
   3577         try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv);
   3578     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3579 }
   3580 
   3581 fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
   3582     const mod = self.bin_file.options.module.?;
   3583     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3584     const result = result: {
   3585         const dst_ty = self.typeOfIndex(inst);
   3586         const src_ty = self.typeOf(ty_op.operand);
   3587         const opt_ty = src_ty.childType(mod);
   3588         const src_mcv = try self.resolveInst(ty_op.operand);
   3589 
   3590         if (opt_ty.optionalReprIsPayload(mod)) {
   3591             break :result if (self.liveness.isUnused(inst))
   3592                 .unreach
   3593             else if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3594                 src_mcv
   3595             else
   3596                 try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   3597         }
   3598 
   3599         const dst_mcv: MCValue = if (src_mcv.isRegister() and
   3600             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3601             src_mcv
   3602         else if (self.liveness.isUnused(inst))
   3603             .{ .register = try self.copyToTmpRegister(dst_ty, src_mcv) }
   3604         else
   3605             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   3606 
   3607         const pl_ty = dst_ty.childType(mod);
   3608         const pl_abi_size: i32 = @intCast(pl_ty.abiSize(mod));
   3609         try self.genSetMem(.{ .reg = dst_mcv.getReg().? }, pl_abi_size, Type.bool, .{ .immediate = 1 });
   3610         break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv;
   3611     };
   3612     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3613 }
   3614 
   3615 fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
   3616     const mod = self.bin_file.options.module.?;
   3617     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3618     const err_union_ty = self.typeOf(ty_op.operand);
   3619     const err_ty = err_union_ty.errorUnionSet(mod);
   3620     const payload_ty = err_union_ty.errorUnionPayload(mod);
   3621     const operand = try self.resolveInst(ty_op.operand);
   3622 
   3623     const result: MCValue = result: {
   3624         if (err_ty.errorSetIsEmpty(mod)) {
   3625             break :result MCValue{ .immediate = 0 };
   3626         }
   3627 
   3628         if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) {
   3629             break :result operand;
   3630         }
   3631 
   3632         const err_off = errUnionErrorOffset(payload_ty, mod);
   3633         switch (operand) {
   3634             .register => |reg| {
   3635                 // TODO reuse operand
   3636                 const eu_lock = self.register_manager.lockReg(reg);
   3637                 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
   3638 
   3639                 const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand);
   3640                 if (err_off > 0) try self.genShiftBinOpMir(
   3641                     .{ ._r, .sh },
   3642                     err_union_ty,
   3643                     result,
   3644                     .{ .immediate = @as(u6, @intCast(err_off * 8)) },
   3645                 ) else try self.truncateRegister(Type.anyerror, result.register);
   3646                 break :result result;
   3647             },
   3648             .load_frame => |frame_addr| break :result .{ .load_frame = .{
   3649                 .index = frame_addr.index,
   3650                 .off = frame_addr.off + @as(i32, @intCast(err_off)),
   3651             } },
   3652             else => return self.fail("TODO implement unwrap_err_err for {}", .{operand}),
   3653         }
   3654     };
   3655     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3656 }
   3657 
   3658 fn airUnwrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void {
   3659     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3660     const err_union_ty = self.typeOf(ty_op.operand);
   3661     const operand = try self.resolveInst(ty_op.operand);
   3662     const result = try self.genUnwrapErrorUnionPayloadMir(inst, err_union_ty, operand);
   3663     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3664 }
   3665 
   3666 fn genUnwrapErrorUnionPayloadMir(
   3667     self: *Self,
   3668     maybe_inst: ?Air.Inst.Index,
   3669     err_union_ty: Type,
   3670     err_union: MCValue,
   3671 ) !MCValue {
   3672     const mod = self.bin_file.options.module.?;
   3673     const payload_ty = err_union_ty.errorUnionPayload(mod);
   3674 
   3675     const result: MCValue = result: {
   3676         if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
   3677 
   3678         const payload_off = errUnionPayloadOffset(payload_ty, mod);
   3679         switch (err_union) {
   3680             .load_frame => |frame_addr| break :result .{ .load_frame = .{
   3681                 .index = frame_addr.index,
   3682                 .off = frame_addr.off + @as(i32, @intCast(payload_off)),
   3683             } },
   3684             .register => |reg| {
   3685                 // TODO reuse operand
   3686                 const eu_lock = self.register_manager.lockReg(reg);
   3687                 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
   3688 
   3689                 const result_mcv: MCValue = if (maybe_inst) |inst|
   3690                     try self.copyToRegisterWithInstTracking(inst, err_union_ty, err_union)
   3691                 else
   3692                     .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) };
   3693                 if (payload_off > 0) try self.genShiftBinOpMir(
   3694                     .{ ._r, .sh },
   3695                     err_union_ty,
   3696                     result_mcv,
   3697                     .{ .immediate = @as(u6, @intCast(payload_off * 8)) },
   3698                 ) else try self.truncateRegister(payload_ty, result_mcv.register);
   3699                 break :result result_mcv;
   3700             },
   3701             else => return self.fail("TODO implement genUnwrapErrorUnionPayloadMir for {}", .{err_union}),
   3702         }
   3703     };
   3704 
   3705     return result;
   3706 }
   3707 
   3708 // *(E!T) -> E
   3709 fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void {
   3710     const mod = self.bin_file.options.module.?;
   3711     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3712 
   3713     const src_ty = self.typeOf(ty_op.operand);
   3714     const src_mcv = try self.resolveInst(ty_op.operand);
   3715     const src_reg = switch (src_mcv) {
   3716         .register => |reg| reg,
   3717         else => try self.copyToTmpRegister(src_ty, src_mcv),
   3718     };
   3719     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   3720     defer self.register_manager.unlockReg(src_lock);
   3721 
   3722     const dst_reg = try self.register_manager.allocReg(inst, gp);
   3723     const dst_mcv = MCValue{ .register = dst_reg };
   3724     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   3725     defer self.register_manager.unlockReg(dst_lock);
   3726 
   3727     const eu_ty = src_ty.childType(mod);
   3728     const pl_ty = eu_ty.errorUnionPayload(mod);
   3729     const err_ty = eu_ty.errorUnionSet(mod);
   3730     const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod));
   3731     const err_abi_size: u32 = @intCast(err_ty.abiSize(mod));
   3732     try self.asmRegisterMemory(
   3733         .{ ._, .mov },
   3734         registerAlias(dst_reg, err_abi_size),
   3735         Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{
   3736             .base = .{ .reg = src_reg },
   3737             .disp = err_off,
   3738         }),
   3739     );
   3740 
   3741     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3742 }
   3743 
   3744 // *(E!T) -> *T
   3745 fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void {
   3746     const mod = self.bin_file.options.module.?;
   3747     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3748 
   3749     const src_ty = self.typeOf(ty_op.operand);
   3750     const src_mcv = try self.resolveInst(ty_op.operand);
   3751     const src_reg = switch (src_mcv) {
   3752         .register => |reg| reg,
   3753         else => try self.copyToTmpRegister(src_ty, src_mcv),
   3754     };
   3755     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   3756     defer self.register_manager.unlockReg(src_lock);
   3757 
   3758     const dst_ty = self.typeOfIndex(inst);
   3759     const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3760         src_reg
   3761     else
   3762         try self.register_manager.allocReg(inst, gp);
   3763     const dst_mcv = MCValue{ .register = dst_reg };
   3764     const dst_lock = self.register_manager.lockReg(dst_reg);
   3765     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   3766 
   3767     const eu_ty = src_ty.childType(mod);
   3768     const pl_ty = eu_ty.errorUnionPayload(mod);
   3769     const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod));
   3770     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   3771     try self.asmRegisterMemory(
   3772         .{ ._, .lea },
   3773         registerAlias(dst_reg, dst_abi_size),
   3774         Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }),
   3775     );
   3776 
   3777     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3778 }
   3779 
   3780 fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
   3781     const mod = self.bin_file.options.module.?;
   3782     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3783     const result: MCValue = result: {
   3784         const src_ty = self.typeOf(ty_op.operand);
   3785         const src_mcv = try self.resolveInst(ty_op.operand);
   3786         const src_reg = switch (src_mcv) {
   3787             .register => |reg| reg,
   3788             else => try self.copyToTmpRegister(src_ty, src_mcv),
   3789         };
   3790         const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   3791         defer self.register_manager.unlockReg(src_lock);
   3792 
   3793         const eu_ty = src_ty.childType(mod);
   3794         const pl_ty = eu_ty.errorUnionPayload(mod);
   3795         const err_ty = eu_ty.errorUnionSet(mod);
   3796         const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod));
   3797         const err_abi_size: u32 = @intCast(err_ty.abiSize(mod));
   3798         try self.asmMemoryImmediate(
   3799             .{ ._, .mov },
   3800             Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{
   3801                 .base = .{ .reg = src_reg },
   3802                 .disp = err_off,
   3803             }),
   3804             Immediate.u(0),
   3805         );
   3806 
   3807         if (self.liveness.isUnused(inst)) break :result .unreach;
   3808 
   3809         const dst_ty = self.typeOfIndex(inst);
   3810         const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3811             src_reg
   3812         else
   3813             try self.register_manager.allocReg(inst, gp);
   3814         const dst_lock = self.register_manager.lockReg(dst_reg);
   3815         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   3816 
   3817         const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod));
   3818         const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   3819         try self.asmRegisterMemory(
   3820             .{ ._, .lea },
   3821             registerAlias(dst_reg, dst_abi_size),
   3822             Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }),
   3823         );
   3824         break :result .{ .register = dst_reg };
   3825     };
   3826     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3827 }
   3828 
   3829 fn airErrReturnTrace(self: *Self, inst: Air.Inst.Index) !void {
   3830     _ = inst;
   3831     return self.fail("TODO implement airErrReturnTrace for {}", .{self.target.cpu.arch});
   3832     //return self.finishAir(inst, result, .{ .none, .none, .none });
   3833 }
   3834 
   3835 fn airSetErrReturnTrace(self: *Self, inst: Air.Inst.Index) !void {
   3836     _ = inst;
   3837     return self.fail("TODO implement airSetErrReturnTrace for {}", .{self.target.cpu.arch});
   3838 }
   3839 
   3840 fn airSaveErrReturnTraceIndex(self: *Self, inst: Air.Inst.Index) !void {
   3841     _ = inst;
   3842     return self.fail("TODO implement airSaveErrReturnTraceIndex for {}", .{self.target.cpu.arch});
   3843 }
   3844 
   3845 fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void {
   3846     const mod = self.bin_file.options.module.?;
   3847     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3848     const result: MCValue = result: {
   3849         const pl_ty = self.typeOf(ty_op.operand);
   3850         if (!pl_ty.hasRuntimeBits(mod)) break :result .{ .immediate = 1 };
   3851 
   3852         const opt_ty = self.typeOfIndex(inst);
   3853         const pl_mcv = try self.resolveInst(ty_op.operand);
   3854         const same_repr = opt_ty.optionalReprIsPayload(mod);
   3855         if (same_repr and self.reuseOperand(inst, ty_op.operand, 0, pl_mcv)) break :result pl_mcv;
   3856 
   3857         const pl_lock: ?RegisterLock = switch (pl_mcv) {
   3858             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3859             else => null,
   3860         };
   3861         defer if (pl_lock) |lock| self.register_manager.unlockReg(lock);
   3862 
   3863         const opt_mcv = try self.allocRegOrMem(inst, true);
   3864         try self.genCopy(pl_ty, opt_mcv, pl_mcv);
   3865 
   3866         if (!same_repr) {
   3867             const pl_abi_size: i32 = @intCast(pl_ty.abiSize(mod));
   3868             switch (opt_mcv) {
   3869                 else => unreachable,
   3870 
   3871                 .register => |opt_reg| try self.asmRegisterImmediate(
   3872                     .{ ._s, .bt },
   3873                     opt_reg,
   3874                     Immediate.u(@as(u6, @intCast(pl_abi_size * 8))),
   3875                 ),
   3876 
   3877                 .load_frame => |frame_addr| try self.asmMemoryImmediate(
   3878                     .{ ._, .mov },
   3879                     Memory.sib(.byte, .{
   3880                         .base = .{ .frame = frame_addr.index },
   3881                         .disp = frame_addr.off + pl_abi_size,
   3882                     }),
   3883                     Immediate.u(1),
   3884                 ),
   3885             }
   3886         }
   3887         break :result opt_mcv;
   3888     };
   3889     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3890 }
   3891 
   3892 /// T to E!T
   3893 fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void {
   3894     const mod = self.bin_file.options.module.?;
   3895     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3896 
   3897     const eu_ty = self.air.getRefType(ty_op.ty);
   3898     const pl_ty = eu_ty.errorUnionPayload(mod);
   3899     const err_ty = eu_ty.errorUnionSet(mod);
   3900     const operand = try self.resolveInst(ty_op.operand);
   3901 
   3902     const result: MCValue = result: {
   3903         if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .{ .immediate = 0 };
   3904 
   3905         const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, mod));
   3906         const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod));
   3907         const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod));
   3908         try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand);
   3909         try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 });
   3910         break :result .{ .load_frame = .{ .index = frame_index } };
   3911     };
   3912     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3913 }
   3914 
   3915 /// E to E!T
   3916 fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
   3917     const mod = self.bin_file.options.module.?;
   3918     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3919 
   3920     const eu_ty = self.air.getRefType(ty_op.ty);
   3921     const pl_ty = eu_ty.errorUnionPayload(mod);
   3922     const err_ty = eu_ty.errorUnionSet(mod);
   3923 
   3924     const result: MCValue = result: {
   3925         if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result try self.resolveInst(ty_op.operand);
   3926 
   3927         const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, mod));
   3928         const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod));
   3929         const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod));
   3930         try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef);
   3931         const operand = try self.resolveInst(ty_op.operand);
   3932         try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand);
   3933         break :result .{ .load_frame = .{ .index = frame_index } };
   3934     };
   3935     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3936 }
   3937 
   3938 fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void {
   3939     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3940     const result = result: {
   3941         const src_mcv = try self.resolveInst(ty_op.operand);
   3942         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
   3943 
   3944         const dst_mcv = try self.allocRegOrMem(inst, true);
   3945         const dst_ty = self.typeOfIndex(inst);
   3946         try self.genCopy(dst_ty, dst_mcv, src_mcv);
   3947         break :result dst_mcv;
   3948     };
   3949     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3950 }
   3951 
   3952 fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void {
   3953     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3954 
   3955     const operand = try self.resolveInst(ty_op.operand);
   3956     const dst_mcv: MCValue = blk: {
   3957         switch (operand) {
   3958             .load_frame => |frame_addr| break :blk .{ .load_frame = .{
   3959                 .index = frame_addr.index,
   3960                 .off = frame_addr.off + 8,
   3961             } },
   3962             else => return self.fail("TODO implement slice_len for {}", .{operand}),
   3963         }
   3964     };
   3965 
   3966     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3967 }
   3968 
   3969 fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void {
   3970     const mod = self.bin_file.options.module.?;
   3971     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   3972 
   3973     const src_ty = self.typeOf(ty_op.operand);
   3974     const src_mcv = try self.resolveInst(ty_op.operand);
   3975     const src_reg = switch (src_mcv) {
   3976         .register => |reg| reg,
   3977         else => try self.copyToTmpRegister(src_ty, src_mcv),
   3978     };
   3979     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   3980     defer self.register_manager.unlockReg(src_lock);
   3981 
   3982     const dst_ty = self.typeOfIndex(inst);
   3983     const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3984         src_reg
   3985     else
   3986         try self.register_manager.allocReg(inst, gp);
   3987     const dst_mcv = MCValue{ .register = dst_reg };
   3988     const dst_lock = self.register_manager.lockReg(dst_reg);
   3989     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   3990 
   3991     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   3992     try self.asmRegisterMemory(
   3993         .{ ._, .lea },
   3994         registerAlias(dst_reg, dst_abi_size),
   3995         Memory.sib(.qword, .{
   3996             .base = .{ .reg = src_reg },
   3997             .disp = @divExact(self.target.ptrBitWidth(), 8),
   3998         }),
   3999     );
   4000 
   4001     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   4002 }
   4003 
   4004 fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void {
   4005     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4006 
   4007     const dst_ty = self.typeOfIndex(inst);
   4008     const opt_mcv = try self.resolveInst(ty_op.operand);
   4009 
   4010     const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv))
   4011         opt_mcv
   4012     else
   4013         try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv);
   4014     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   4015 }
   4016 
   4017 fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Register {
   4018     const reg: Register = blk: {
   4019         switch (index) {
   4020             .immediate => |imm| {
   4021                 // Optimisation: if index MCValue is an immediate, we can multiply in `comptime`
   4022                 // and set the register directly to the scaled offset as an immediate.
   4023                 const reg = try self.register_manager.allocReg(null, gp);
   4024                 try self.genSetReg(reg, index_ty, .{ .immediate = imm * elem_size });
   4025                 break :blk reg;
   4026             },
   4027             else => {
   4028                 const reg = try self.copyToTmpRegister(index_ty, index);
   4029                 try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size });
   4030                 break :blk reg;
   4031             },
   4032         }
   4033     };
   4034     return reg;
   4035 }
   4036 
   4037 fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue {
   4038     const mod = self.bin_file.options.module.?;
   4039     const slice_ty = self.typeOf(lhs);
   4040     const slice_mcv = try self.resolveInst(lhs);
   4041     const slice_mcv_lock: ?RegisterLock = switch (slice_mcv) {
   4042         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4043         else => null,
   4044     };
   4045     defer if (slice_mcv_lock) |lock| self.register_manager.unlockReg(lock);
   4046 
   4047     const elem_ty = slice_ty.childType(mod);
   4048     const elem_size = elem_ty.abiSize(mod);
   4049     const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod);
   4050 
   4051     const index_ty = self.typeOf(rhs);
   4052     const index_mcv = try self.resolveInst(rhs);
   4053     const index_mcv_lock: ?RegisterLock = switch (index_mcv) {
   4054         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4055         else => null,
   4056     };
   4057     defer if (index_mcv_lock) |lock| self.register_manager.unlockReg(lock);
   4058 
   4059     const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_size);
   4060     const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   4061     defer self.register_manager.unlockReg(offset_reg_lock);
   4062 
   4063     const addr_reg = try self.register_manager.allocReg(null, gp);
   4064     try self.genSetReg(addr_reg, Type.usize, slice_mcv);
   4065     // TODO we could allocate register here, but need to expect addr register and potentially
   4066     // offset register.
   4067     try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{
   4068         .register = offset_reg,
   4069     });
   4070     return MCValue{ .register = addr_reg.to64() };
   4071 }
   4072 
   4073 fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
   4074     const mod = self.bin_file.options.module.?;
   4075     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   4076     const slice_ty = self.typeOf(bin_op.lhs);
   4077 
   4078     const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod);
   4079     const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs);
   4080     const dst_mcv = try self.allocRegOrMem(inst, false);
   4081     try self.load(dst_mcv, slice_ptr_field_type, elem_ptr);
   4082 
   4083     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   4084 }
   4085 
   4086 fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void {
   4087     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   4088     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
   4089     const dst_mcv = try self.genSliceElemPtr(extra.lhs, extra.rhs);
   4090     return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
   4091 }
   4092 
   4093 fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
   4094     const mod = self.bin_file.options.module.?;
   4095     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   4096 
   4097     const array_ty = self.typeOf(bin_op.lhs);
   4098     const array = try self.resolveInst(bin_op.lhs);
   4099     const array_lock: ?RegisterLock = switch (array) {
   4100         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4101         else => null,
   4102     };
   4103     defer if (array_lock) |lock| self.register_manager.unlockReg(lock);
   4104 
   4105     const elem_ty = array_ty.childType(mod);
   4106     const elem_abi_size = elem_ty.abiSize(mod);
   4107 
   4108     const index_ty = self.typeOf(bin_op.rhs);
   4109     const index = try self.resolveInst(bin_op.rhs);
   4110     const index_lock: ?RegisterLock = switch (index) {
   4111         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4112         else => null,
   4113     };
   4114     defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
   4115 
   4116     const offset_reg = try self.elemOffset(index_ty, index, elem_abi_size);
   4117     const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   4118     defer self.register_manager.unlockReg(offset_reg_lock);
   4119 
   4120     const addr_reg = try self.register_manager.allocReg(null, gp);
   4121     switch (array) {
   4122         .register => {
   4123             const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, mod));
   4124             try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array);
   4125             try self.asmRegisterMemory(
   4126                 .{ ._, .lea },
   4127                 addr_reg,
   4128                 Memory.sib(.qword, .{ .base = .{ .frame = frame_index } }),
   4129             );
   4130         },
   4131         .load_frame => |frame_addr| try self.asmRegisterMemory(
   4132             .{ ._, .lea },
   4133             addr_reg,
   4134             Memory.sib(.qword, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off }),
   4135         ),
   4136         .memory,
   4137         .load_direct,
   4138         .load_got,
   4139         .load_tlv,
   4140         => try self.genSetReg(addr_reg, Type.usize, array.address()),
   4141         .lea_direct, .lea_tlv => unreachable,
   4142         else => return self.fail("TODO implement array_elem_val when array is {}", .{array}),
   4143     }
   4144 
   4145     // TODO we could allocate register here, but need to expect addr register and potentially
   4146     // offset register.
   4147     const dst_mcv = try self.allocRegOrMem(inst, false);
   4148     try self.genBinOpMir(
   4149         .{ ._, .add },
   4150         Type.usize,
   4151         .{ .register = addr_reg },
   4152         .{ .register = offset_reg },
   4153     );
   4154     try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } });
   4155 
   4156     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   4157 }
   4158 
   4159 fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void {
   4160     const mod = self.bin_file.options.module.?;
   4161     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   4162     const ptr_ty = self.typeOf(bin_op.lhs);
   4163 
   4164     // this is identical to the `airPtrElemPtr` codegen expect here an
   4165     // additional `mov` is needed at the end to get the actual value
   4166 
   4167     const elem_ty = ptr_ty.elemType2(mod);
   4168     const elem_abi_size: u32 = @intCast(elem_ty.abiSize(mod));
   4169     const index_ty = self.typeOf(bin_op.rhs);
   4170     const index_mcv = try self.resolveInst(bin_op.rhs);
   4171     const index_lock = switch (index_mcv) {
   4172         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4173         else => null,
   4174     };
   4175     defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
   4176 
   4177     const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
   4178     const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   4179     defer self.register_manager.unlockReg(offset_lock);
   4180 
   4181     const ptr_mcv = try self.resolveInst(bin_op.lhs);
   4182     const elem_ptr_reg = if (ptr_mcv.isRegister() and self.liveness.operandDies(inst, 0))
   4183         ptr_mcv.register
   4184     else
   4185         try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   4186     const elem_ptr_lock = self.register_manager.lockRegAssumeUnused(elem_ptr_reg);
   4187     defer self.register_manager.unlockReg(elem_ptr_lock);
   4188     try self.asmRegisterRegister(
   4189         .{ ._, .add },
   4190         elem_ptr_reg,
   4191         offset_reg,
   4192     );
   4193 
   4194     const dst_mcv = try self.allocRegOrMem(inst, true);
   4195     const dst_lock = switch (dst_mcv) {
   4196         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4197         else => null,
   4198     };
   4199     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   4200     try self.load(dst_mcv, ptr_ty, .{ .register = elem_ptr_reg });
   4201 
   4202     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   4203 }
   4204 
   4205 fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void {
   4206     const mod = self.bin_file.options.module.?;
   4207     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   4208     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
   4209 
   4210     const ptr_ty = self.typeOf(extra.lhs);
   4211     const ptr = try self.resolveInst(extra.lhs);
   4212     const ptr_lock: ?RegisterLock = switch (ptr) {
   4213         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4214         else => null,
   4215     };
   4216     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
   4217 
   4218     const elem_ty = ptr_ty.elemType2(mod);
   4219     const elem_abi_size = elem_ty.abiSize(mod);
   4220     const index_ty = self.typeOf(extra.rhs);
   4221     const index = try self.resolveInst(extra.rhs);
   4222     const index_lock: ?RegisterLock = switch (index) {
   4223         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4224         else => null,
   4225     };
   4226     defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
   4227 
   4228     const offset_reg = try self.elemOffset(index_ty, index, elem_abi_size);
   4229     const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   4230     defer self.register_manager.unlockReg(offset_reg_lock);
   4231 
   4232     const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr);
   4233     try self.genBinOpMir(.{ ._, .add }, ptr_ty, dst_mcv, .{ .register = offset_reg });
   4234 
   4235     return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
   4236 }
   4237 
   4238 fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
   4239     const mod = self.bin_file.options.module.?;
   4240     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   4241     const ptr_union_ty = self.typeOf(bin_op.lhs);
   4242     const union_ty = ptr_union_ty.childType(mod);
   4243     const tag_ty = self.typeOf(bin_op.rhs);
   4244     const layout = union_ty.unionGetLayout(mod);
   4245 
   4246     if (layout.tag_size == 0) {
   4247         return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
   4248     }
   4249 
   4250     const ptr = try self.resolveInst(bin_op.lhs);
   4251     const ptr_lock: ?RegisterLock = switch (ptr) {
   4252         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4253         else => null,
   4254     };
   4255     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
   4256 
   4257     const tag = try self.resolveInst(bin_op.rhs);
   4258     const tag_lock: ?RegisterLock = switch (tag) {
   4259         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4260         else => null,
   4261     };
   4262     defer if (tag_lock) |lock| self.register_manager.unlockReg(lock);
   4263 
   4264     const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align < layout.payload_align) blk: {
   4265         // TODO reusing the operand
   4266         const reg = try self.copyToTmpRegister(ptr_union_ty, ptr);
   4267         try self.genBinOpMir(
   4268             .{ ._, .add },
   4269             ptr_union_ty,
   4270             .{ .register = reg },
   4271             .{ .immediate = layout.payload_size },
   4272         );
   4273         break :blk MCValue{ .register = reg };
   4274     } else ptr;
   4275 
   4276     const ptr_tag_ty = try mod.adjustPtrTypeChild(ptr_union_ty, tag_ty);
   4277     try self.store(ptr_tag_ty, adjusted_ptr, tag);
   4278 
   4279     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
   4280 }
   4281 
   4282 fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
   4283     const mod = self.bin_file.options.module.?;
   4284     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4285 
   4286     const tag_ty = self.typeOfIndex(inst);
   4287     const union_ty = self.typeOf(ty_op.operand);
   4288     const layout = union_ty.unionGetLayout(mod);
   4289 
   4290     if (layout.tag_size == 0) {
   4291         return self.finishAir(inst, .none, .{ ty_op.operand, .none, .none });
   4292     }
   4293 
   4294     // TODO reusing the operand
   4295     const operand = try self.resolveInst(ty_op.operand);
   4296     const operand_lock: ?RegisterLock = switch (operand) {
   4297         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4298         else => null,
   4299     };
   4300     defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
   4301 
   4302     const tag_abi_size = tag_ty.abiSize(mod);
   4303     const dst_mcv: MCValue = blk: {
   4304         switch (operand) {
   4305             .load_frame => |frame_addr| {
   4306                 if (tag_abi_size <= 8) {
   4307                     const off: i32 = if (layout.tag_align < layout.payload_align)
   4308                         @intCast(layout.payload_size)
   4309                     else
   4310                         0;
   4311                     break :blk try self.copyToRegisterWithInstTracking(inst, tag_ty, .{
   4312                         .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
   4313                     });
   4314                 }
   4315 
   4316                 return self.fail("TODO implement get_union_tag for ABI larger than 8 bytes and operand {}", .{operand});
   4317             },
   4318             .register => {
   4319                 const shift: u6 = if (layout.tag_align < layout.payload_align)
   4320                     @intCast(layout.payload_size * 8)
   4321                 else
   4322                     0;
   4323                 const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand);
   4324                 try self.genShiftBinOpMir(.{ ._r, .sh }, Type.usize, result, .{ .immediate = shift });
   4325                 break :blk MCValue{
   4326                     .register = registerAlias(result.register, @intCast(layout.tag_size)),
   4327                 };
   4328             },
   4329             else => return self.fail("TODO implement get_union_tag for {}", .{operand}),
   4330         }
   4331     };
   4332 
   4333     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   4334 }
   4335 
   4336 fn airClz(self: *Self, inst: Air.Inst.Index) !void {
   4337     const mod = self.bin_file.options.module.?;
   4338     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4339     const result = result: {
   4340         const dst_ty = self.typeOfIndex(inst);
   4341         const src_ty = self.typeOf(ty_op.operand);
   4342         if (src_ty.zigTypeTag(mod) == .Vector) return self.fail("TODO implement airClz for {}", .{
   4343             src_ty.fmt(mod),
   4344         });
   4345 
   4346         const src_mcv = try self.resolveInst(ty_op.operand);
   4347         const mat_src_mcv = switch (src_mcv) {
   4348             .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
   4349             else => src_mcv,
   4350         };
   4351         const mat_src_lock = switch (mat_src_mcv) {
   4352             .register => |reg| self.register_manager.lockReg(reg),
   4353             else => null,
   4354         };
   4355         defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
   4356 
   4357         const dst_reg = try self.register_manager.allocReg(inst, gp);
   4358         const dst_mcv = MCValue{ .register = dst_reg };
   4359         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   4360         defer self.register_manager.unlockReg(dst_lock);
   4361 
   4362         const src_bits = src_ty.bitSize(mod);
   4363         if (self.hasFeature(.lzcnt)) {
   4364             if (src_bits <= 8) {
   4365                 const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
   4366                 try self.truncateRegister(src_ty, wide_reg);
   4367                 try self.genBinOpMir(.{ ._, .lzcnt }, Type.u32, dst_mcv, .{ .register = wide_reg });
   4368                 try self.genBinOpMir(
   4369                     .{ ._, .sub },
   4370                     dst_ty,
   4371                     dst_mcv,
   4372                     .{ .immediate = 8 + self.regExtraBits(src_ty) },
   4373                 );
   4374             } else if (src_bits <= 64) {
   4375                 try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv);
   4376                 const extra_bits = self.regExtraBits(src_ty);
   4377                 if (extra_bits > 0) {
   4378                     try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits });
   4379                 }
   4380             } else if (src_bits <= 128) {
   4381                 const tmp_reg = try self.register_manager.allocReg(null, gp);
   4382                 const tmp_mcv = MCValue{ .register = tmp_reg };
   4383                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   4384                 defer self.register_manager.unlockReg(tmp_lock);
   4385 
   4386                 try self.genBinOpMir(.{ ._, .lzcnt }, Type.u64, dst_mcv, mat_src_mcv);
   4387                 try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 });
   4388                 try self.genBinOpMir(
   4389                     .{ ._, .lzcnt },
   4390                     Type.u64,
   4391                     tmp_mcv,
   4392                     mat_src_mcv.address().offset(8).deref(),
   4393                 );
   4394                 try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc);
   4395 
   4396                 if (src_bits < 128) {
   4397                     try self.genBinOpMir(
   4398                         .{ ._, .sub },
   4399                         dst_ty,
   4400                         dst_mcv,
   4401                         .{ .immediate = 128 - src_bits },
   4402                     );
   4403                 }
   4404             } else return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
   4405             break :result dst_mcv;
   4406         }
   4407 
   4408         if (src_bits > 64)
   4409             return self.fail("TODO airClz of {}", .{src_ty.fmt(mod)});
   4410         if (math.isPowerOfTwo(src_bits)) {
   4411             const imm_reg = try self.copyToTmpRegister(dst_ty, .{
   4412                 .immediate = src_bits ^ (src_bits - 1),
   4413             });
   4414             const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg);
   4415             defer self.register_manager.unlockReg(imm_lock);
   4416 
   4417             if (src_bits <= 8) {
   4418                 const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
   4419                 const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
   4420                 defer self.register_manager.unlockReg(wide_lock);
   4421 
   4422                 try self.truncateRegister(src_ty, wide_reg);
   4423                 try self.genBinOpMir(.{ ._, .bsr }, Type.u16, dst_mcv, .{ .register = wide_reg });
   4424             } else try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv);
   4425 
   4426             const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(mod))), 2);
   4427             try self.asmCmovccRegisterRegister(
   4428                 registerAlias(dst_reg, cmov_abi_size),
   4429                 registerAlias(imm_reg, cmov_abi_size),
   4430                 .z,
   4431             );
   4432 
   4433             try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 });
   4434         } else {
   4435             const imm_reg = try self.copyToTmpRegister(dst_ty, .{
   4436                 .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - self.regBitSize(dst_ty)),
   4437             });
   4438             const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg);
   4439             defer self.register_manager.unlockReg(imm_lock);
   4440 
   4441             const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
   4442             const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
   4443             defer self.register_manager.unlockReg(wide_lock);
   4444 
   4445             try self.truncateRegister(src_ty, wide_reg);
   4446             try self.genBinOpMir(
   4447                 .{ ._, .bsr },
   4448                 if (src_bits <= 8) Type.u16 else src_ty,
   4449                 dst_mcv,
   4450                 .{ .register = wide_reg },
   4451             );
   4452 
   4453             const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(mod))), 2);
   4454             try self.asmCmovccRegisterRegister(
   4455                 registerAlias(imm_reg, cmov_abi_size),
   4456                 registerAlias(dst_reg, cmov_abi_size),
   4457                 .nz,
   4458             );
   4459 
   4460             try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 });
   4461             try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .register = imm_reg });
   4462         }
   4463         break :result dst_mcv;
   4464     };
   4465     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4466 }
   4467 
   4468 fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
   4469     const mod = self.bin_file.options.module.?;
   4470     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4471     const result = result: {
   4472         const dst_ty = self.typeOfIndex(inst);
   4473         const src_ty = self.typeOf(ty_op.operand);
   4474         const src_bits = src_ty.bitSize(mod);
   4475 
   4476         const src_mcv = try self.resolveInst(ty_op.operand);
   4477         const mat_src_mcv = switch (src_mcv) {
   4478             .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
   4479             else => src_mcv,
   4480         };
   4481         const mat_src_lock = switch (mat_src_mcv) {
   4482             .register => |reg| self.register_manager.lockReg(reg),
   4483             else => null,
   4484         };
   4485         defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
   4486 
   4487         const dst_reg = try self.register_manager.allocReg(inst, gp);
   4488         const dst_mcv = MCValue{ .register = dst_reg };
   4489         const dst_lock = self.register_manager.lockReg(dst_reg);
   4490         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   4491 
   4492         if (self.hasFeature(.bmi)) {
   4493             if (src_bits <= 64) {
   4494                 const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0);
   4495                 const wide_ty = if (src_bits <= 8) Type.u16 else src_ty;
   4496                 const masked_mcv = if (extra_bits > 0) masked: {
   4497                     const tmp_mcv = tmp: {
   4498                         if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0))
   4499                             break :tmp src_mcv;
   4500                         try self.genSetReg(dst_reg, wide_ty, src_mcv);
   4501                         break :tmp dst_mcv;
   4502                     };
   4503                     try self.genBinOpMir(
   4504                         .{ ._, .@"or" },
   4505                         wide_ty,
   4506                         tmp_mcv,
   4507                         .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(64 - extra_bits)) <<
   4508                             @intCast(src_bits) },
   4509                     );
   4510                     break :masked tmp_mcv;
   4511                 } else mat_src_mcv;
   4512                 try self.genBinOpMir(.{ ._, .tzcnt }, wide_ty, dst_mcv, masked_mcv);
   4513             } else if (src_bits <= 128) {
   4514                 const tmp_reg = try self.register_manager.allocReg(null, gp);
   4515                 const tmp_mcv = MCValue{ .register = tmp_reg };
   4516                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   4517                 defer self.register_manager.unlockReg(tmp_lock);
   4518 
   4519                 const masked_mcv = if (src_bits < 128) masked: {
   4520                     try self.genCopy(Type.u64, dst_mcv, mat_src_mcv.address().offset(8).deref());
   4521                     try self.genBinOpMir(
   4522                         .{ ._, .@"or" },
   4523                         Type.u64,
   4524                         dst_mcv,
   4525                         .{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(src_bits - 64) },
   4526                     );
   4527                     break :masked dst_mcv;
   4528                 } else mat_src_mcv.address().offset(8).deref();
   4529                 try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, dst_mcv, masked_mcv);
   4530                 try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 });
   4531                 try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, tmp_mcv, mat_src_mcv);
   4532                 try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc);
   4533             } else return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
   4534             break :result dst_mcv;
   4535         }
   4536 
   4537         if (src_bits > 64)
   4538             return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
   4539 
   4540         const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits });
   4541         const width_lock = self.register_manager.lockRegAssumeUnused(width_reg);
   4542         defer self.register_manager.unlockReg(width_lock);
   4543 
   4544         if (src_bits <= 8 or !math.isPowerOfTwo(src_bits)) {
   4545             const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
   4546             const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
   4547             defer self.register_manager.unlockReg(wide_lock);
   4548 
   4549             try self.truncateRegister(src_ty, wide_reg);
   4550             try self.genBinOpMir(.{ ._, .bsf }, Type.u16, dst_mcv, .{ .register = wide_reg });
   4551         } else try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv);
   4552 
   4553         const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(mod))), 2);
   4554         try self.asmCmovccRegisterRegister(
   4555             registerAlias(dst_reg, cmov_abi_size),
   4556             registerAlias(width_reg, cmov_abi_size),
   4557             .z,
   4558         );
   4559         break :result dst_mcv;
   4560     };
   4561     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4562 }
   4563 
   4564 fn airPopcount(self: *Self, inst: Air.Inst.Index) !void {
   4565     const mod = self.bin_file.options.module.?;
   4566     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4567     const result: MCValue = result: {
   4568         const src_ty = self.typeOf(ty_op.operand);
   4569         const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
   4570         if (src_ty.zigTypeTag(mod) == .Vector or src_abi_size > 8)
   4571             return self.fail("TODO implement airPopcount for {}", .{src_ty.fmt(mod)});
   4572         const src_mcv = try self.resolveInst(ty_op.operand);
   4573 
   4574         if (self.hasFeature(.popcnt)) {
   4575             const mat_src_mcv = switch (src_mcv) {
   4576                 .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
   4577                 else => src_mcv,
   4578             };
   4579             const mat_src_lock = switch (mat_src_mcv) {
   4580                 .register => |reg| self.register_manager.lockReg(reg),
   4581                 else => null,
   4582             };
   4583             defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
   4584 
   4585             const dst_mcv: MCValue =
   4586                 if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4587                 src_mcv
   4588             else
   4589                 .{ .register = try self.register_manager.allocReg(inst, gp) };
   4590 
   4591             const popcnt_ty = if (src_abi_size > 1) src_ty else Type.u16;
   4592             try self.genBinOpMir(.{ ._, .popcnt }, popcnt_ty, dst_mcv, mat_src_mcv);
   4593             break :result dst_mcv;
   4594         }
   4595 
   4596         const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8);
   4597         const imm_0_1 = Immediate.u(mask / 0b1_1);
   4598         const imm_00_11 = Immediate.u(mask / 0b01_01);
   4599         const imm_0000_1111 = Immediate.u(mask / 0b0001_0001);
   4600         const imm_0000_0001 = Immediate.u(mask / 0b1111_1111);
   4601 
   4602         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4603             src_mcv
   4604         else
   4605             try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv);
   4606         const dst_reg = dst_mcv.register;
   4607         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   4608         defer self.register_manager.unlockReg(dst_lock);
   4609 
   4610         const tmp_reg = try self.register_manager.allocReg(null, gp);
   4611         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   4612         defer self.register_manager.unlockReg(tmp_lock);
   4613 
   4614         {
   4615             const dst = registerAlias(dst_reg, src_abi_size);
   4616             const tmp = registerAlias(tmp_reg, src_abi_size);
   4617             const imm = if (src_abi_size > 4)
   4618                 try self.register_manager.allocReg(null, gp)
   4619             else
   4620                 undefined;
   4621 
   4622             // dst = operand
   4623             try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   4624             // tmp = operand
   4625             try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1));
   4626             // tmp = operand >> 1
   4627             if (src_abi_size > 4) {
   4628                 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1);
   4629                 try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   4630             } else try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1);
   4631             // tmp = (operand >> 1) & 0x55...55
   4632             try self.asmRegisterRegister(.{ ._, .sub }, dst, tmp);
   4633             // dst = temp1 = operand - ((operand >> 1) & 0x55...55)
   4634             try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   4635             // tmp = temp1
   4636             try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2));
   4637             // dst = temp1 >> 2
   4638             if (src_abi_size > 4) {
   4639                 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11);
   4640                 try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   4641                 try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   4642             } else {
   4643                 try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11);
   4644                 try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11);
   4645             }
   4646             // tmp = temp1 & 0x33...33
   4647             // dst = (temp1 >> 2) & 0x33...33
   4648             try self.asmRegisterRegister(.{ ._, .add }, tmp, dst);
   4649             // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33)
   4650             try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp);
   4651             // dst = temp2
   4652             try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(4));
   4653             // tmp = temp2 >> 4
   4654             try self.asmRegisterRegister(.{ ._, .add }, dst, tmp);
   4655             // dst = temp2 + (temp2 >> 4)
   4656             if (src_abi_size > 4) {
   4657                 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111);
   4658                 try self.asmRegisterImmediate(.{ ._, .mov }, tmp, imm_0000_0001);
   4659                 try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   4660                 try self.asmRegisterRegister(.{ .i_, .mul }, dst, tmp);
   4661             } else {
   4662                 try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
   4663                 if (src_abi_size > 1) {
   4664                     try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst, dst, imm_0000_0001);
   4665                 }
   4666             }
   4667             // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f
   4668             // dst = temp3 * 0x01...01
   4669             if (src_abi_size > 1) {
   4670                 try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u((src_abi_size - 1) * 8));
   4671             }
   4672             // dst = (temp3 * 0x01...01) >> (bits - 8)
   4673         }
   4674         break :result dst_mcv;
   4675     };
   4676     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4677 }
   4678 
   4679 fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, mem_ok: bool) !MCValue {
   4680     const mod = self.bin_file.options.module.?;
   4681     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4682 
   4683     if (src_ty.zigTypeTag(mod) == .Vector or src_ty.abiSize(mod) > 8) return self.fail(
   4684         "TODO implement byteSwap for {}",
   4685         .{src_ty.fmt(mod)},
   4686     );
   4687     const src_bits = self.regBitSize(src_ty);
   4688     const src_lock = switch (src_mcv) {
   4689         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4690         else => null,
   4691     };
   4692     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   4693 
   4694     switch (src_bits) {
   4695         else => return self.fail("TODO implement byteSwap for {}", .{
   4696             src_ty.fmt(mod),
   4697         }),
   4698         8 => return if ((mem_ok or src_mcv.isRegister()) and
   4699             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4700             src_mcv
   4701         else
   4702             try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv),
   4703         16 => if ((mem_ok or src_mcv.isRegister()) and
   4704             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4705         {
   4706             try self.genBinOpMir(.{ ._l, .ro }, src_ty, src_mcv, .{ .immediate = 8 });
   4707             return src_mcv;
   4708         },
   4709         32, 64 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
   4710             try self.genUnOpMir(.{ ._, .bswap }, src_ty, src_mcv);
   4711             return src_mcv;
   4712         },
   4713     }
   4714 
   4715     if (src_mcv.isRegister()) {
   4716         const dst_mcv: MCValue = if (mem_ok)
   4717             try self.allocRegOrMem(inst, true)
   4718         else
   4719             .{ .register = try self.register_manager.allocReg(inst, gp) };
   4720         if (dst_mcv.isRegister()) {
   4721             const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register);
   4722             defer self.register_manager.unlockReg(dst_lock);
   4723 
   4724             try self.genSetReg(dst_mcv.register, src_ty, src_mcv);
   4725             switch (src_bits) {
   4726                 else => unreachable,
   4727                 16 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }),
   4728                 32, 64 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv),
   4729             }
   4730         } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
   4731         return dst_mcv;
   4732     }
   4733 
   4734     const dst_reg = try self.register_manager.allocReg(inst, gp);
   4735     const dst_mcv = MCValue{ .register = dst_reg };
   4736     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   4737     defer self.register_manager.unlockReg(dst_lock);
   4738 
   4739     try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
   4740     return dst_mcv;
   4741 }
   4742 
   4743 fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void {
   4744     const mod = self.bin_file.options.module.?;
   4745     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4746 
   4747     const src_ty = self.typeOf(ty_op.operand);
   4748     const src_mcv = try self.resolveInst(ty_op.operand);
   4749 
   4750     const dst_mcv = try self.byteSwap(inst, src_ty, src_mcv, true);
   4751     switch (self.regExtraBits(src_ty)) {
   4752         0 => {},
   4753         else => |extra| try self.genBinOpMir(
   4754             if (src_ty.isSignedInt(mod)) .{ ._r, .sa } else .{ ._r, .sh },
   4755             src_ty,
   4756             dst_mcv,
   4757             .{ .immediate = extra },
   4758         ),
   4759     }
   4760 
   4761     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   4762 }
   4763 
   4764 fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
   4765     const mod = self.bin_file.options.module.?;
   4766     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   4767 
   4768     const src_ty = self.typeOf(ty_op.operand);
   4769     const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
   4770     const src_mcv = try self.resolveInst(ty_op.operand);
   4771 
   4772     const dst_mcv = try self.byteSwap(inst, src_ty, src_mcv, false);
   4773     const dst_reg = dst_mcv.register;
   4774     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   4775     defer self.register_manager.unlockReg(dst_lock);
   4776 
   4777     const tmp_reg = try self.register_manager.allocReg(null, gp);
   4778     const tmp_lock = self.register_manager.lockReg(tmp_reg);
   4779     defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
   4780 
   4781     {
   4782         const dst = registerAlias(dst_reg, src_abi_size);
   4783         const tmp = registerAlias(tmp_reg, src_abi_size);
   4784         const imm = if (src_abi_size > 4)
   4785             try self.register_manager.allocReg(null, gp)
   4786         else
   4787             undefined;
   4788 
   4789         const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8);
   4790         const imm_0000_1111 = Immediate.u(mask / 0b0001_0001);
   4791         const imm_00_11 = Immediate.u(mask / 0b01_01);
   4792         const imm_0_1 = Immediate.u(mask / 0b1_1);
   4793 
   4794         // dst = temp1 = bswap(operand)
   4795         try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   4796         // tmp = temp1
   4797         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(4));
   4798         // dst = temp1 >> 4
   4799         if (src_abi_size > 4) {
   4800             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111);
   4801             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   4802             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   4803         } else {
   4804             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111);
   4805             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
   4806         }
   4807         // tmp = temp1 & 0x0F...0F
   4808         // dst = (temp1 >> 4) & 0x0F...0F
   4809         try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, Immediate.u(4));
   4810         // tmp = (temp1 & 0x0F...0F) << 4
   4811         try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp);
   4812         // dst = temp2 = ((temp1 >> 4) & 0x0F...0F) | ((temp1 & 0x0F...0F) << 4)
   4813         try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   4814         // tmp = temp2
   4815         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2));
   4816         // dst = temp2 >> 2
   4817         if (src_abi_size > 4) {
   4818             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11);
   4819             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   4820             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   4821         } else {
   4822             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11);
   4823             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11);
   4824         }
   4825         // tmp = temp2 & 0x33...33
   4826         // dst = (temp2 >> 2) & 0x33...33
   4827         try self.asmRegisterMemory(
   4828             .{ ._, .lea },
   4829             if (src_abi_size > 4) tmp.to64() else tmp.to32(),
   4830             Memory.sib(.qword, .{
   4831                 .base = .{ .reg = dst.to64() },
   4832                 .scale_index = .{ .index = tmp.to64(), .scale = 1 << 2 },
   4833             }),
   4834         );
   4835         // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2)
   4836         try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp);
   4837         // dst = temp3
   4838         try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1));
   4839         // tmp = temp3 >> 1
   4840         if (src_abi_size > 4) {
   4841             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1);
   4842             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   4843             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   4844         } else {
   4845             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0_1);
   4846             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1);
   4847         }
   4848         // dst = temp3 & 0x55...55
   4849         // tmp = (temp3 >> 1) & 0x55...55
   4850         try self.asmRegisterMemory(
   4851             .{ ._, .lea },
   4852             if (src_abi_size > 4) dst.to64() else dst.to32(),
   4853             Memory.sib(.qword, .{
   4854                 .base = .{ .reg = tmp.to64() },
   4855                 .scale_index = .{ .index = dst.to64(), .scale = 1 << 1 },
   4856             }),
   4857         );
   4858         // dst = ((temp3 >> 1) & 0x55...55) + ((temp3 & 0x55...55) << 1)
   4859     }
   4860 
   4861     switch (self.regExtraBits(src_ty)) {
   4862         0 => {},
   4863         else => |extra| try self.genBinOpMir(
   4864             if (src_ty.isSignedInt(mod)) .{ ._r, .sa } else .{ ._r, .sh },
   4865             src_ty,
   4866             dst_mcv,
   4867             .{ .immediate = extra },
   4868         ),
   4869     }
   4870 
   4871     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   4872 }
   4873 
   4874 fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
   4875     const mod = self.bin_file.options.module.?;
   4876     const tag = self.air.instructions.items(.tag)[inst];
   4877     const un_op = self.air.instructions.items(.data)[inst].un_op;
   4878     const ty = self.typeOf(un_op);
   4879     const abi_size: u32 = switch (ty.abiSize(mod)) {
   4880         1...16 => 16,
   4881         17...32 => 32,
   4882         else => return self.fail("TODO implement airFloatSign for {}", .{
   4883             ty.fmt(mod),
   4884         }),
   4885     };
   4886     const scalar_bits = ty.scalarType(mod).floatBits(self.target.*);
   4887     if (scalar_bits == 80) return self.fail("TODO implement airFloatSign for {}", .{
   4888         ty.fmt(mod),
   4889     });
   4890 
   4891     const src_mcv = try self.resolveInst(un_op);
   4892     const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
   4893     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   4894 
   4895     const dst_mcv: MCValue = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
   4896         src_mcv
   4897     else if (self.hasFeature(.avx))
   4898         .{ .register = try self.register_manager.allocReg(inst, sse) }
   4899     else
   4900         try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
   4901     const dst_reg = dst_mcv.getReg().?;
   4902     const dst_lock = self.register_manager.lockReg(dst_reg);
   4903     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   4904 
   4905     const vec_ty = try mod.vectorType(.{
   4906         .len = @divExact(abi_size * 8, scalar_bits),
   4907         .child = (try mod.intType(.signed, scalar_bits)).ip_index,
   4908     });
   4909 
   4910     const sign_val = switch (tag) {
   4911         .neg => try vec_ty.minInt(mod, vec_ty),
   4912         .fabs => try vec_ty.maxInt(mod, vec_ty),
   4913         else => unreachable,
   4914     };
   4915 
   4916     const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val });
   4917     const sign_mem = if (sign_mcv.isMemory())
   4918         sign_mcv.mem(Memory.PtrSize.fromSize(abi_size))
   4919     else
   4920         Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
   4921             .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) },
   4922         });
   4923 
   4924     if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory(
   4925         switch (scalar_bits) {
   4926             16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) {
   4927                 .neg => .{ .vp_, .xor },
   4928                 .fabs => .{ .vp_, .@"and" },
   4929                 else => unreachable,
   4930             } else switch (tag) {
   4931                 .neg => .{ .v_ps, .xor },
   4932                 .fabs => .{ .v_ps, .@"and" },
   4933                 else => unreachable,
   4934             },
   4935             32 => switch (tag) {
   4936                 .neg => .{ .v_ps, .xor },
   4937                 .fabs => .{ .v_ps, .@"and" },
   4938                 else => unreachable,
   4939             },
   4940             64 => switch (tag) {
   4941                 .neg => .{ .v_pd, .xor },
   4942                 .fabs => .{ .v_pd, .@"and" },
   4943                 else => unreachable,
   4944             },
   4945             80 => return self.fail("TODO implement airFloatSign for {}", .{
   4946                 ty.fmt(self.bin_file.options.module.?),
   4947             }),
   4948             else => unreachable,
   4949         },
   4950         registerAlias(dst_reg, abi_size),
   4951         registerAlias(if (src_mcv.isRegister())
   4952             src_mcv.getReg().?
   4953         else
   4954             try self.copyToTmpRegister(ty, src_mcv), abi_size),
   4955         sign_mem,
   4956     ) else try self.asmRegisterMemory(
   4957         switch (scalar_bits) {
   4958             16, 128 => switch (tag) {
   4959                 .neg => .{ .p_, .xor },
   4960                 .fabs => .{ .p_, .@"and" },
   4961                 else => unreachable,
   4962             },
   4963             32 => switch (tag) {
   4964                 .neg => .{ ._ps, .xor },
   4965                 .fabs => .{ ._ps, .@"and" },
   4966                 else => unreachable,
   4967             },
   4968             64 => switch (tag) {
   4969                 .neg => .{ ._pd, .xor },
   4970                 .fabs => .{ ._pd, .@"and" },
   4971                 else => unreachable,
   4972             },
   4973             80 => return self.fail("TODO implement airFloatSign for {}", .{
   4974                 ty.fmt(self.bin_file.options.module.?),
   4975             }),
   4976             else => unreachable,
   4977         },
   4978         registerAlias(dst_reg, abi_size),
   4979         sign_mem,
   4980     );
   4981     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
   4982 }
   4983 
   4984 fn airRound(self: *Self, inst: Air.Inst.Index, mode: u4) !void {
   4985     const un_op = self.air.instructions.items(.data)[inst].un_op;
   4986     const ty = self.typeOf(un_op);
   4987 
   4988     const src_mcv = try self.resolveInst(un_op);
   4989     const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
   4990         src_mcv
   4991     else
   4992         try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
   4993     const dst_reg = dst_mcv.getReg().?;
   4994     const dst_lock = self.register_manager.lockReg(dst_reg);
   4995     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   4996     try self.genRound(ty, dst_reg, src_mcv, mode);
   4997     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
   4998 }
   4999 
   5000 fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4) !void {
   5001     const mod = self.bin_file.options.module.?;
   5002     if (!self.hasFeature(.sse4_1))
   5003         return self.fail("TODO implement genRound without sse4_1 feature", .{});
   5004 
   5005     const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(mod)) {
   5006         .Float => switch (ty.floatBits(self.target.*)) {
   5007             32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
   5008             64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
   5009             16, 80, 128 => null,
   5010             else => unreachable,
   5011         },
   5012         .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
   5013             .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
   5014                 32 => switch (ty.vectorLen(mod)) {
   5015                     1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
   5016                     2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round },
   5017                     5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null,
   5018                     else => null,
   5019                 },
   5020                 64 => switch (ty.vectorLen(mod)) {
   5021                     1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
   5022                     2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round },
   5023                     3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null,
   5024                     else => null,
   5025                 },
   5026                 16, 80, 128 => null,
   5027                 else => unreachable,
   5028             },
   5029             else => null,
   5030         },
   5031         else => unreachable,
   5032     }) orelse return self.fail("TODO implement genRound for {}", .{
   5033         ty.fmt(self.bin_file.options.module.?),
   5034     });
   5035     const abi_size: u32 = @intCast(ty.abiSize(mod));
   5036     const dst_alias = registerAlias(dst_reg, abi_size);
   5037     switch (mir_tag[0]) {
   5038         .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   5039             mir_tag,
   5040             dst_alias,
   5041             dst_alias,
   5042             src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   5043             Immediate.u(mode),
   5044         ) else try self.asmRegisterRegisterRegisterImmediate(
   5045             mir_tag,
   5046             dst_alias,
   5047             dst_alias,
   5048             registerAlias(if (src_mcv.isRegister())
   5049                 src_mcv.getReg().?
   5050             else
   5051                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
   5052             Immediate.u(mode),
   5053         ),
   5054         else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
   5055             mir_tag,
   5056             dst_alias,
   5057             src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   5058             Immediate.u(mode),
   5059         ) else try self.asmRegisterRegisterImmediate(
   5060             mir_tag,
   5061             dst_alias,
   5062             registerAlias(if (src_mcv.isRegister())
   5063                 src_mcv.getReg().?
   5064             else
   5065                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
   5066             Immediate.u(mode),
   5067         ),
   5068     }
   5069 }
   5070 
   5071 fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
   5072     const mod = self.bin_file.options.module.?;
   5073     const un_op = self.air.instructions.items(.data)[inst].un_op;
   5074     const ty = self.typeOf(un_op);
   5075     const abi_size: u32 = @intCast(ty.abiSize(mod));
   5076 
   5077     const src_mcv = try self.resolveInst(un_op);
   5078     const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
   5079         src_mcv
   5080     else
   5081         try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
   5082     const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
   5083     const dst_lock = self.register_manager.lockReg(dst_reg);
   5084     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5085 
   5086     const result: MCValue = result: {
   5087         const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(mod)) {
   5088             .Float => switch (ty.floatBits(self.target.*)) {
   5089                 16 => if (self.hasFeature(.f16c)) {
   5090                     const mat_src_reg = if (src_mcv.isRegister())
   5091                         src_mcv.getReg().?
   5092                     else
   5093                         try self.copyToTmpRegister(ty, src_mcv);
   5094                     try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
   5095                     try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg);
   5096                     try self.asmRegisterRegisterImmediate(
   5097                         .{ .v_, .cvtps2ph },
   5098                         dst_reg,
   5099                         dst_reg,
   5100                         Immediate.u(0b1_00),
   5101                     );
   5102                     break :result dst_mcv;
   5103                 } else null,
   5104                 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
   5105                 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
   5106                 80, 128 => null,
   5107                 else => unreachable,
   5108             },
   5109             .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
   5110                 .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
   5111                     16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen(mod)) {
   5112                         1 => {
   5113                             try self.asmRegisterRegister(
   5114                                 .{ .v_ps, .cvtph2 },
   5115                                 dst_reg,
   5116                                 (if (src_mcv.isRegister())
   5117                                     src_mcv.getReg().?
   5118                                 else
   5119                                     try self.copyToTmpRegister(ty, src_mcv)).to128(),
   5120                             );
   5121                             try self.asmRegisterRegisterRegister(
   5122                                 .{ .v_ss, .sqrt },
   5123                                 dst_reg,
   5124                                 dst_reg,
   5125                                 dst_reg,
   5126                             );
   5127                             try self.asmRegisterRegisterImmediate(
   5128                                 .{ .v_, .cvtps2ph },
   5129                                 dst_reg,
   5130                                 dst_reg,
   5131                                 Immediate.u(0b1_00),
   5132                             );
   5133                             break :result dst_mcv;
   5134                         },
   5135                         2...8 => {
   5136                             const wide_reg = registerAlias(dst_reg, abi_size * 2);
   5137                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
   5138                                 .{ .v_ps, .cvtph2 },
   5139                                 wide_reg,
   5140                                 src_mcv.mem(Memory.PtrSize.fromSize(
   5141                                     @intCast(@divExact(wide_reg.bitSize(), 16)),
   5142                                 )),
   5143                             ) else try self.asmRegisterRegister(
   5144                                 .{ .v_ps, .cvtph2 },
   5145                                 wide_reg,
   5146                                 (if (src_mcv.isRegister())
   5147                                     src_mcv.getReg().?
   5148                                 else
   5149                                     try self.copyToTmpRegister(ty, src_mcv)).to128(),
   5150                             );
   5151                             try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg);
   5152                             try self.asmRegisterRegisterImmediate(
   5153                                 .{ .v_, .cvtps2ph },
   5154                                 dst_reg,
   5155                                 wide_reg,
   5156                                 Immediate.u(0b1_00),
   5157                             );
   5158                             break :result dst_mcv;
   5159                         },
   5160                         else => null,
   5161                     } else null,
   5162                     32 => switch (ty.vectorLen(mod)) {
   5163                         1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
   5164                         2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt },
   5165                         5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null,
   5166                         else => null,
   5167                     },
   5168                     64 => switch (ty.vectorLen(mod)) {
   5169                         1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
   5170                         2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt },
   5171                         3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null,
   5172                         else => null,
   5173                     },
   5174                     80, 128 => null,
   5175                     else => unreachable,
   5176                 },
   5177                 else => unreachable,
   5178             },
   5179             else => unreachable,
   5180         }) orelse return self.fail("TODO implement airSqrt for {}", .{
   5181             ty.fmt(mod),
   5182         });
   5183         switch (mir_tag[0]) {
   5184             .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   5185                 mir_tag,
   5186                 dst_reg,
   5187                 dst_reg,
   5188                 src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   5189             ) else try self.asmRegisterRegisterRegister(
   5190                 mir_tag,
   5191                 dst_reg,
   5192                 dst_reg,
   5193                 registerAlias(if (src_mcv.isRegister())
   5194                     src_mcv.getReg().?
   5195                 else
   5196                     try self.copyToTmpRegister(ty, src_mcv), abi_size),
   5197             ),
   5198             else => if (src_mcv.isMemory()) try self.asmRegisterMemory(
   5199                 mir_tag,
   5200                 dst_reg,
   5201                 src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   5202             ) else try self.asmRegisterRegister(
   5203                 mir_tag,
   5204                 dst_reg,
   5205                 registerAlias(if (src_mcv.isRegister())
   5206                     src_mcv.getReg().?
   5207                 else
   5208                     try self.copyToTmpRegister(ty, src_mcv), abi_size),
   5209             ),
   5210         }
   5211         break :result dst_mcv;
   5212     };
   5213     return self.finishAir(inst, result, .{ un_op, .none, .none });
   5214 }
   5215 
   5216 fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void {
   5217     const un_op = self.air.instructions.items(.data)[inst].un_op;
   5218     _ = un_op;
   5219     return self.fail("TODO implement airUnaryMath for {}", .{
   5220         self.air.instructions.items(.tag)[inst],
   5221     });
   5222     //return self.finishAir(inst, result, .{ un_op, .none, .none });
   5223 }
   5224 
   5225 fn reuseOperand(
   5226     self: *Self,
   5227     inst: Air.Inst.Index,
   5228     operand: Air.Inst.Ref,
   5229     op_index: Liveness.OperandInt,
   5230     mcv: MCValue,
   5231 ) bool {
   5232     return self.reuseOperandAdvanced(inst, operand, op_index, mcv, inst);
   5233 }
   5234 
   5235 fn reuseOperandAdvanced(
   5236     self: *Self,
   5237     inst: Air.Inst.Index,
   5238     operand: Air.Inst.Ref,
   5239     op_index: Liveness.OperandInt,
   5240     mcv: MCValue,
   5241     tracked_inst: Air.Inst.Index,
   5242 ) bool {
   5243     if (!self.liveness.operandDies(inst, op_index))
   5244         return false;
   5245 
   5246     switch (mcv) {
   5247         .register => |reg| {
   5248             // If it's in the registers table, need to associate the register with the
   5249             // new instruction.
   5250             if (!self.register_manager.isRegFree(reg)) {
   5251                 if (RegisterManager.indexOfRegIntoTracked(reg)) |index| {
   5252                     self.register_manager.registers[index] = tracked_inst;
   5253                 }
   5254             }
   5255         },
   5256         .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false,
   5257         else => return false,
   5258     }
   5259 
   5260     // Prevent the operand deaths processing code from deallocating it.
   5261     self.liveness.clearOperandDeath(inst, op_index);
   5262     const op_inst = Air.refToIndex(operand).?;
   5263     self.getResolvedInstValue(op_inst).reuse(self, tracked_inst, op_inst);
   5264 
   5265     return true;
   5266 }
   5267 
   5268 fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
   5269     const mod = self.bin_file.options.module.?;
   5270     const ptr_info = ptr_ty.ptrInfo(mod);
   5271 
   5272     const val_ty = ptr_info.child.toType();
   5273     const val_abi_size: u32 = @intCast(val_ty.abiSize(mod));
   5274     const limb_abi_size: u32 = @min(val_abi_size, 8);
   5275     const limb_abi_bits = limb_abi_size * 8;
   5276     const val_byte_off: i32 = @intCast(ptr_info.packed_offset.bit_offset / limb_abi_bits * limb_abi_size);
   5277     const val_bit_off = ptr_info.packed_offset.bit_offset % limb_abi_bits;
   5278     const val_extra_bits = self.regExtraBits(val_ty);
   5279 
   5280     if (val_abi_size > 8) return self.fail("TODO implement packed load of {}", .{
   5281         val_ty.fmt(self.bin_file.options.module.?),
   5282     });
   5283 
   5284     const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   5285     const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
   5286     defer self.register_manager.unlockReg(ptr_lock);
   5287 
   5288     const dst_reg = switch (dst_mcv) {
   5289         .register => |reg| reg,
   5290         else => try self.register_manager.allocReg(null, gp),
   5291     };
   5292     const dst_lock = self.register_manager.lockReg(dst_reg);
   5293     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5294 
   5295     const load_abi_size =
   5296         if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2;
   5297     if (load_abi_size <= 8) {
   5298         const load_reg = registerAlias(dst_reg, load_abi_size);
   5299         try self.asmRegisterMemory(
   5300             .{ ._, .mov },
   5301             load_reg,
   5302             Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{
   5303                 .base = .{ .reg = ptr_reg },
   5304                 .disp = val_byte_off,
   5305             }),
   5306         );
   5307         try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(val_bit_off));
   5308     } else {
   5309         const tmp_reg = registerAlias(try self.register_manager.allocReg(null, gp), val_abi_size);
   5310         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5311         defer self.register_manager.unlockReg(tmp_lock);
   5312 
   5313         const dst_alias = registerAlias(dst_reg, val_abi_size);
   5314         try self.asmRegisterMemory(
   5315             .{ ._, .mov },
   5316             dst_alias,
   5317             Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{
   5318                 .base = .{ .reg = ptr_reg },
   5319                 .disp = val_byte_off,
   5320             }),
   5321         );
   5322         try self.asmRegisterMemory(
   5323             .{ ._, .mov },
   5324             tmp_reg,
   5325             Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{
   5326                 .base = .{ .reg = ptr_reg },
   5327                 .disp = val_byte_off + 1,
   5328             }),
   5329         );
   5330         try self.asmRegisterRegisterImmediate(
   5331             .{ ._rd, .sh },
   5332             dst_alias,
   5333             tmp_reg,
   5334             Immediate.u(val_bit_off),
   5335         );
   5336     }
   5337 
   5338     if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg);
   5339     try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg });
   5340 }
   5341 
   5342 fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
   5343     const mod = self.bin_file.options.module.?;
   5344     const dst_ty = ptr_ty.childType(mod);
   5345     switch (ptr_mcv) {
   5346         .none,
   5347         .unreach,
   5348         .dead,
   5349         .undef,
   5350         .eflags,
   5351         .register_overflow,
   5352         .reserved_frame,
   5353         => unreachable, // not a valid pointer
   5354         .immediate,
   5355         .register,
   5356         .register_offset,
   5357         .lea_direct,
   5358         .lea_got,
   5359         .lea_tlv,
   5360         .lea_frame,
   5361         => try self.genCopy(dst_ty, dst_mcv, ptr_mcv.deref()),
   5362         .memory,
   5363         .indirect,
   5364         .load_direct,
   5365         .load_got,
   5366         .load_tlv,
   5367         .load_frame,
   5368         => {
   5369             const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   5370             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   5371             defer self.register_manager.unlockReg(addr_lock);
   5372 
   5373             try self.genCopy(dst_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } });
   5374         },
   5375     }
   5376 }
   5377 
   5378 fn airLoad(self: *Self, inst: Air.Inst.Index) !void {
   5379     const mod = self.bin_file.options.module.?;
   5380     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   5381     const elem_ty = self.typeOfIndex(inst);
   5382     const result: MCValue = result: {
   5383         if (!elem_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
   5384 
   5385         try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
   5386         const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
   5387         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
   5388 
   5389         const ptr_ty = self.typeOf(ty_op.operand);
   5390         const elem_size = elem_ty.abiSize(mod);
   5391 
   5392         const elem_rc = regClassForType(elem_ty, mod);
   5393         const ptr_rc = regClassForType(ptr_ty, mod);
   5394 
   5395         const ptr_mcv = try self.resolveInst(ty_op.operand);
   5396         const dst_mcv = if (elem_size <= 8 and elem_rc.supersetOf(ptr_rc) and
   5397             self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv))
   5398             // The MCValue that holds the pointer can be re-used as the value.
   5399             ptr_mcv
   5400         else
   5401             try self.allocRegOrMem(inst, true);
   5402 
   5403         if (ptr_ty.ptrInfo(mod).packed_offset.host_size > 0) {
   5404             try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv);
   5405         } else {
   5406             try self.load(dst_mcv, ptr_ty, ptr_mcv);
   5407         }
   5408         break :result dst_mcv;
   5409     };
   5410     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   5411 }
   5412 
   5413 fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
   5414     const mod = self.bin_file.options.module.?;
   5415     const ptr_info = ptr_ty.ptrInfo(mod);
   5416     const src_ty = ptr_ty.childType(mod);
   5417 
   5418     const limb_abi_size: u16 = @min(ptr_info.packed_offset.host_size, 8);
   5419     const limb_abi_bits = limb_abi_size * 8;
   5420     const limb_ty = try mod.intType(.unsigned, limb_abi_bits);
   5421 
   5422     const src_bit_size = src_ty.bitSize(mod);
   5423     const src_byte_off: i32 = @intCast(ptr_info.packed_offset.bit_offset / limb_abi_bits * limb_abi_size);
   5424     const src_bit_off = ptr_info.packed_offset.bit_offset % limb_abi_bits;
   5425 
   5426     const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   5427     const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
   5428     defer self.register_manager.unlockReg(ptr_lock);
   5429 
   5430     var limb_i: u16 = 0;
   5431     while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) {
   5432         const part_bit_off = if (limb_i == 0) src_bit_off else 0;
   5433         const part_bit_size =
   5434             @min(src_bit_off + src_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off;
   5435         const limb_mem = Memory.sib(Memory.PtrSize.fromSize(limb_abi_size), .{
   5436             .base = .{ .reg = ptr_reg },
   5437             .disp = src_byte_off + limb_i * limb_abi_bits,
   5438         });
   5439 
   5440         const part_mask = (@as(u64, math.maxInt(u64)) >> @intCast(64 - part_bit_size)) <<
   5441             @intCast(part_bit_off);
   5442         const part_mask_not = part_mask ^ (@as(u64, math.maxInt(u64)) >> @intCast(64 - limb_abi_bits));
   5443         if (limb_abi_size <= 4) {
   5444             try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.u(part_mask_not));
   5445         } else if (math.cast(i32, @as(i64, @bitCast(part_mask_not)))) |small| {
   5446             try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.s(small));
   5447         } else {
   5448             const part_mask_reg = try self.register_manager.allocReg(null, gp);
   5449             try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, Immediate.u(part_mask_not));
   5450             try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg);
   5451         }
   5452 
   5453         if (src_bit_size <= 64) {
   5454             const tmp_reg = try self.register_manager.allocReg(null, gp);
   5455             const tmp_mcv = MCValue{ .register = tmp_reg };
   5456             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5457             defer self.register_manager.unlockReg(tmp_lock);
   5458 
   5459             try self.genSetReg(tmp_reg, limb_ty, src_mcv);
   5460             switch (limb_i) {
   5461                 0 => try self.genShiftBinOpMir(
   5462                     .{ ._l, .sh },
   5463                     limb_ty,
   5464                     tmp_mcv,
   5465                     .{ .immediate = src_bit_off },
   5466                 ),
   5467                 1 => try self.genShiftBinOpMir(
   5468                     .{ ._r, .sh },
   5469                     limb_ty,
   5470                     tmp_mcv,
   5471                     .{ .immediate = limb_abi_bits - src_bit_off },
   5472                 ),
   5473                 else => unreachable,
   5474             }
   5475             try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
   5476             try self.asmMemoryRegister(
   5477                 .{ ._, .@"or" },
   5478                 limb_mem,
   5479                 registerAlias(tmp_reg, limb_abi_size),
   5480             );
   5481         } else return self.fail("TODO: implement packed store of {}", .{
   5482             src_ty.fmt(self.bin_file.options.module.?),
   5483         });
   5484     }
   5485 }
   5486 
   5487 fn store(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
   5488     const mod = self.bin_file.options.module.?;
   5489     const src_ty = ptr_ty.childType(mod);
   5490     switch (ptr_mcv) {
   5491         .none,
   5492         .unreach,
   5493         .dead,
   5494         .undef,
   5495         .eflags,
   5496         .register_overflow,
   5497         .reserved_frame,
   5498         => unreachable, // not a valid pointer
   5499         .immediate,
   5500         .register,
   5501         .register_offset,
   5502         .lea_direct,
   5503         .lea_got,
   5504         .lea_tlv,
   5505         .lea_frame,
   5506         => try self.genCopy(src_ty, ptr_mcv.deref(), src_mcv),
   5507         .memory,
   5508         .indirect,
   5509         .load_direct,
   5510         .load_got,
   5511         .load_tlv,
   5512         .load_frame,
   5513         => {
   5514             const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   5515             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   5516             defer self.register_manager.unlockReg(addr_lock);
   5517 
   5518             try self.genCopy(src_ty, .{ .indirect = .{ .reg = addr_reg } }, src_mcv);
   5519         },
   5520     }
   5521 }
   5522 
   5523 fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
   5524     const mod = self.bin_file.options.module.?;
   5525     if (safety) {
   5526         // TODO if the value is undef, write 0xaa bytes to dest
   5527     } else {
   5528         // TODO if the value is undef, don't lower this instruction
   5529     }
   5530     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   5531     const ptr_mcv = try self.resolveInst(bin_op.lhs);
   5532     const ptr_ty = self.typeOf(bin_op.lhs);
   5533     const src_mcv = try self.resolveInst(bin_op.rhs);
   5534     if (ptr_ty.ptrInfo(mod).packed_offset.host_size > 0) {
   5535         try self.packedStore(ptr_ty, ptr_mcv, src_mcv);
   5536     } else {
   5537         try self.store(ptr_ty, ptr_mcv, src_mcv);
   5538     }
   5539     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
   5540 }
   5541 
   5542 fn airStructFieldPtr(self: *Self, inst: Air.Inst.Index) !void {
   5543     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   5544     const extra = self.air.extraData(Air.StructField, ty_pl.payload).data;
   5545     const result = try self.fieldPtr(inst, extra.struct_operand, extra.field_index);
   5546     return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none });
   5547 }
   5548 
   5549 fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void {
   5550     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
   5551     const result = try self.fieldPtr(inst, ty_op.operand, index);
   5552     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   5553 }
   5554 
   5555 fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue {
   5556     const mod = self.bin_file.options.module.?;
   5557     const ptr_field_ty = self.typeOfIndex(inst);
   5558     const ptr_container_ty = self.typeOf(operand);
   5559     const ptr_container_ty_info = ptr_container_ty.ptrInfo(mod);
   5560     const container_ty = ptr_container_ty.childType(mod);
   5561 
   5562     const field_offset: i32 = @intCast(switch (container_ty.containerLayout(mod)) {
   5563         .Auto, .Extern => container_ty.structFieldOffset(index, mod),
   5564         .Packed => if (container_ty.zigTypeTag(mod) == .Struct and
   5565             ptr_field_ty.ptrInfo(mod).packed_offset.host_size == 0)
   5566             container_ty.packedStructFieldByteOffset(index, mod) + @divExact(ptr_container_ty_info.packed_offset.bit_offset, 8)
   5567         else
   5568             0,
   5569     });
   5570 
   5571     const src_mcv = try self.resolveInst(operand);
   5572     const dst_mcv = if (switch (src_mcv) {
   5573         .immediate, .lea_frame => true,
   5574         .register, .register_offset => self.reuseOperand(inst, operand, 0, src_mcv),
   5575         else => false,
   5576     }) src_mcv else try self.copyToRegisterWithInstTracking(inst, ptr_field_ty, src_mcv);
   5577     return dst_mcv.offset(field_offset);
   5578 }
   5579 
   5580 fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
   5581     const mod = self.bin_file.options.module.?;
   5582     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   5583     const extra = self.air.extraData(Air.StructField, ty_pl.payload).data;
   5584     const result: MCValue = result: {
   5585         const operand = extra.struct_operand;
   5586         const index = extra.field_index;
   5587 
   5588         const container_ty = self.typeOf(operand);
   5589         const container_rc = regClassForType(container_ty, mod);
   5590         const field_ty = container_ty.structFieldType(index, mod);
   5591         if (!field_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
   5592         const field_rc = regClassForType(field_ty, mod);
   5593         const field_is_gp = field_rc.supersetOf(gp);
   5594 
   5595         const src_mcv = try self.resolveInst(operand);
   5596         const field_off: u32 = switch (container_ty.containerLayout(mod)) {
   5597             .Auto, .Extern => @intCast(container_ty.structFieldOffset(index, mod) * 8),
   5598             .Packed => if (mod.typeToStruct(container_ty)) |struct_obj|
   5599                 struct_obj.packedFieldBitOffset(mod, index)
   5600             else
   5601                 0,
   5602         };
   5603 
   5604         switch (src_mcv) {
   5605             .load_frame => |frame_addr| {
   5606                 const field_abi_size: u32 = @intCast(field_ty.abiSize(mod));
   5607                 if (field_off % 8 == 0) {
   5608                     const off_mcv =
   5609                         src_mcv.address().offset(@intCast(@divExact(field_off, 8))).deref();
   5610 
   5611                     if (field_abi_size <= 8) {
   5612                         const int_ty = try mod.intType(
   5613                             if (field_ty.isAbiInt(mod)) field_ty.intInfo(mod).signedness else .unsigned,
   5614                             @intCast(field_ty.bitSize(mod)),
   5615                         );
   5616 
   5617                         const dst_reg =
   5618                             try self.register_manager.allocReg(if (field_is_gp) inst else null, gp);
   5619                         const dst_mcv = MCValue{ .register = dst_reg };
   5620                         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   5621                         defer self.register_manager.unlockReg(dst_lock);
   5622 
   5623                         try self.genCopy(int_ty, dst_mcv, off_mcv);
   5624                         if (self.regExtraBits(field_ty) > 0) try self.truncateRegister(int_ty, dst_reg);
   5625                         break :result if (field_is_gp)
   5626                             dst_mcv
   5627                         else
   5628                             try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
   5629                     }
   5630 
   5631                     if (self.reuseOperand(inst, operand, 0, src_mcv)) break :result off_mcv;
   5632 
   5633                     const dst_mcv = try self.allocRegOrMem(inst, true);
   5634                     try self.genCopy(field_ty, dst_mcv, off_mcv);
   5635                     break :result dst_mcv;
   5636                 }
   5637 
   5638                 const limb_abi_size: u32 = @min(field_abi_size, 8);
   5639                 const limb_abi_bits = limb_abi_size * 8;
   5640                 const field_byte_off: i32 = @intCast(field_off / limb_abi_bits * limb_abi_size);
   5641                 const field_bit_off = field_off % limb_abi_bits;
   5642 
   5643                 if (field_abi_size > 8) {
   5644                     return self.fail("TODO implement struct_field_val with large packed field", .{});
   5645                 }
   5646 
   5647                 const dst_reg = try self.register_manager.allocReg(if (field_is_gp) inst else null, gp);
   5648                 const field_extra_bits = self.regExtraBits(field_ty);
   5649                 const load_abi_size =
   5650                     if (field_bit_off < field_extra_bits) field_abi_size else field_abi_size * 2;
   5651                 if (load_abi_size <= 8) {
   5652                     const load_reg = registerAlias(dst_reg, load_abi_size);
   5653                     try self.asmRegisterMemory(
   5654                         .{ ._, .mov },
   5655                         load_reg,
   5656                         Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{
   5657                             .base = .{ .frame = frame_addr.index },
   5658                             .disp = frame_addr.off + field_byte_off,
   5659                         }),
   5660                     );
   5661                     try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(field_bit_off));
   5662                 } else {
   5663                     const tmp_reg = registerAlias(
   5664                         try self.register_manager.allocReg(null, gp),
   5665                         field_abi_size,
   5666                     );
   5667                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5668                     defer self.register_manager.unlockReg(tmp_lock);
   5669 
   5670                     const dst_alias = registerAlias(dst_reg, field_abi_size);
   5671                     try self.asmRegisterMemory(
   5672                         .{ ._, .mov },
   5673                         dst_alias,
   5674                         Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{
   5675                             .base = .{ .frame = frame_addr.index },
   5676                             .disp = frame_addr.off + field_byte_off,
   5677                         }),
   5678                     );
   5679                     try self.asmRegisterMemory(
   5680                         .{ ._, .mov },
   5681                         tmp_reg,
   5682                         Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{
   5683                             .base = .{ .frame = frame_addr.index },
   5684                             .disp = frame_addr.off + field_byte_off + @as(i32, @intCast(limb_abi_size)),
   5685                         }),
   5686                     );
   5687                     try self.asmRegisterRegisterImmediate(
   5688                         .{ ._rd, .sh },
   5689                         dst_alias,
   5690                         tmp_reg,
   5691                         Immediate.u(field_bit_off),
   5692                     );
   5693                 }
   5694 
   5695                 if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg);
   5696 
   5697                 const dst_mcv = MCValue{ .register = dst_reg };
   5698                 break :result if (field_is_gp)
   5699                     dst_mcv
   5700                 else
   5701                     try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
   5702             },
   5703             .register => |reg| {
   5704                 const reg_lock = self.register_manager.lockRegAssumeUnused(reg);
   5705                 defer self.register_manager.unlockReg(reg_lock);
   5706 
   5707                 const dst_reg = if (src_mcv.isRegister() and field_rc.supersetOf(container_rc) and
   5708                     self.reuseOperand(inst, operand, 0, src_mcv))
   5709                     src_mcv.getReg().?
   5710                 else
   5711                     try self.copyToTmpRegister(Type.usize, .{ .register = reg.to64() });
   5712                 const dst_mcv = MCValue{ .register = dst_reg };
   5713                 const dst_lock = self.register_manager.lockReg(dst_reg);
   5714                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5715 
   5716                 try self.genShiftBinOpMir(
   5717                     .{ ._r, .sh },
   5718                     Type.usize,
   5719                     dst_mcv,
   5720                     .{ .immediate = field_off },
   5721                 );
   5722                 if (self.regExtraBits(field_ty) > 0) try self.truncateRegister(field_ty, dst_reg);
   5723 
   5724                 break :result if (field_rc.supersetOf(gp))
   5725                     dst_mcv
   5726                 else
   5727                     try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
   5728             },
   5729             .register_overflow => |ro| {
   5730                 switch (index) {
   5731                     // Get wrapped value for overflow operation.
   5732                     0 => break :result if (self.liveness.operandDies(inst, 0))
   5733                         .{ .register = ro.reg }
   5734                     else
   5735                         try self.copyToRegisterWithInstTracking(
   5736                             inst,
   5737                             Type.usize,
   5738                             .{ .register = ro.reg },
   5739                         ),
   5740                     // Get overflow bit.
   5741                     1 => if (self.liveness.operandDies(inst, 0)) {
   5742                         self.eflags_inst = inst;
   5743                         break :result .{ .eflags = ro.eflags };
   5744                     } else {
   5745                         const dst_reg = try self.register_manager.allocReg(inst, gp);
   5746                         try self.asmSetccRegister(dst_reg.to8(), ro.eflags);
   5747                         break :result .{ .register = dst_reg.to8() };
   5748                     },
   5749                     else => unreachable,
   5750                 }
   5751             },
   5752             else => return self.fail("TODO implement codegen struct_field_val for {}", .{src_mcv}),
   5753         }
   5754     };
   5755     return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none });
   5756 }
   5757 
   5758 fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void {
   5759     const mod = self.bin_file.options.module.?;
   5760     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   5761     const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data;
   5762 
   5763     const inst_ty = self.typeOfIndex(inst);
   5764     const parent_ty = inst_ty.childType(mod);
   5765     const field_offset: i32 = @intCast(parent_ty.structFieldOffset(extra.field_index, mod));
   5766 
   5767     const src_mcv = try self.resolveInst(extra.field_ptr);
   5768     const dst_mcv = if (src_mcv.isRegisterOffset() and
   5769         self.reuseOperand(inst, extra.field_ptr, 0, src_mcv))
   5770         src_mcv
   5771     else
   5772         try self.copyToRegisterWithInstTracking(inst, inst_ty, src_mcv);
   5773     const result = dst_mcv.offset(-field_offset);
   5774     return self.finishAir(inst, result, .{ extra.field_ptr, .none, .none });
   5775 }
   5776 
   5777 fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue {
   5778     const mod = self.bin_file.options.module.?;
   5779     const src_ty = self.typeOf(src_air);
   5780     const src_mcv = try self.resolveInst(src_air);
   5781     if (src_ty.zigTypeTag(mod) == .Vector) {
   5782         return self.fail("TODO implement genUnOp for {}", .{src_ty.fmt(self.bin_file.options.module.?)});
   5783     }
   5784 
   5785     switch (src_mcv) {
   5786         .eflags => |cc| switch (tag) {
   5787             .not => return .{ .eflags = cc.negate() },
   5788             else => {},
   5789         },
   5790         else => {},
   5791     }
   5792 
   5793     const src_lock = switch (src_mcv) {
   5794         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5795         else => null,
   5796     };
   5797     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   5798 
   5799     const dst_mcv: MCValue = dst: {
   5800         if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) break :dst src_mcv;
   5801 
   5802         const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, maybe_inst, true);
   5803         try self.genCopy(src_ty, dst_mcv, src_mcv);
   5804         break :dst dst_mcv;
   5805     };
   5806     const dst_lock = switch (dst_mcv) {
   5807         .register => |reg| self.register_manager.lockReg(reg),
   5808         else => null,
   5809     };
   5810     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5811 
   5812     switch (tag) {
   5813         .not => {
   5814             const limb_abi_size: u16 = @intCast(@min(src_ty.abiSize(mod), 8));
   5815             const int_info = if (src_ty.ip_index == .bool_type)
   5816                 std.builtin.Type.Int{ .signedness = .unsigned, .bits = 1 }
   5817             else
   5818                 src_ty.intInfo(mod);
   5819             var byte_off: i32 = 0;
   5820             while (byte_off * 8 < int_info.bits) : (byte_off += limb_abi_size) {
   5821                 const limb_bits: u16 = @intCast(@min(int_info.bits - byte_off * 8, limb_abi_size * 8));
   5822                 const limb_ty = try mod.intType(int_info.signedness, limb_bits);
   5823                 const limb_mcv = switch (byte_off) {
   5824                     0 => dst_mcv,
   5825                     else => dst_mcv.address().offset(byte_off).deref(),
   5826                 };
   5827 
   5828                 if (int_info.signedness == .unsigned and self.regExtraBits(limb_ty) > 0) {
   5829                     const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - limb_bits);
   5830                     try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask });
   5831                 } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv);
   5832             }
   5833         },
   5834         .neg => try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv),
   5835         else => unreachable,
   5836     }
   5837     return dst_mcv;
   5838 }
   5839 
   5840 fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void {
   5841     const mod = self.bin_file.options.module.?;
   5842     const abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   5843     if (abi_size > 8) return self.fail("TODO implement {} for {}", .{
   5844         mir_tag,
   5845         dst_ty.fmt(self.bin_file.options.module.?),
   5846     });
   5847     switch (dst_mcv) {
   5848         .none,
   5849         .unreach,
   5850         .dead,
   5851         .undef,
   5852         .immediate,
   5853         .register_offset,
   5854         .eflags,
   5855         .register_overflow,
   5856         .lea_direct,
   5857         .lea_got,
   5858         .lea_tlv,
   5859         .lea_frame,
   5860         .reserved_frame,
   5861         => unreachable, // unmodifiable destination
   5862         .register => |dst_reg| try self.asmRegister(mir_tag, registerAlias(dst_reg, abi_size)),
   5863         .memory, .load_got, .load_direct, .load_tlv => {
   5864             const addr_reg = try self.register_manager.allocReg(null, gp);
   5865             const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   5866             defer self.register_manager.unlockReg(addr_reg_lock);
   5867 
   5868             try self.genSetReg(addr_reg, Type.usize, dst_mcv.address());
   5869             try self.asmMemory(
   5870                 mir_tag,
   5871                 Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ .reg = addr_reg } }),
   5872             );
   5873         },
   5874         .indirect, .load_frame => try self.asmMemory(
   5875             mir_tag,
   5876             dst_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   5877         ),
   5878     }
   5879 }
   5880 
   5881 /// Clobbers .rcx for non-immediate shift value.
   5882 fn genShiftBinOpMir(
   5883     self: *Self,
   5884     tag: Mir.Inst.FixedTag,
   5885     ty: Type,
   5886     lhs_mcv: MCValue,
   5887     shift_mcv: MCValue,
   5888 ) !void {
   5889     const mod = self.bin_file.options.module.?;
   5890     const rhs_mcv: MCValue = rhs: {
   5891         switch (shift_mcv) {
   5892             .immediate => |imm| switch (imm) {
   5893                 0 => return,
   5894                 else => break :rhs shift_mcv,
   5895             },
   5896             .register => |shift_reg| if (shift_reg == .rcx) break :rhs shift_mcv,
   5897             else => {},
   5898         }
   5899         self.register_manager.getRegAssumeFree(.rcx, null);
   5900         try self.genSetReg(.cl, Type.u8, shift_mcv);
   5901         break :rhs .{ .register = .rcx };
   5902     };
   5903 
   5904     const abi_size: u32 = @intCast(ty.abiSize(mod));
   5905     if (abi_size <= 8) {
   5906         switch (lhs_mcv) {
   5907             .register => |lhs_reg| switch (rhs_mcv) {
   5908                 .immediate => |rhs_imm| try self.asmRegisterImmediate(
   5909                     tag,
   5910                     registerAlias(lhs_reg, abi_size),
   5911                     Immediate.u(rhs_imm),
   5912                 ),
   5913                 .register => |rhs_reg| try self.asmRegisterRegister(
   5914                     tag,
   5915                     registerAlias(lhs_reg, abi_size),
   5916                     registerAlias(rhs_reg, 1),
   5917                 ),
   5918                 else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   5919                     @tagName(lhs_mcv),
   5920                     @tagName(rhs_mcv),
   5921                 }),
   5922             },
   5923             .memory, .indirect, .load_frame => {
   5924                 const lhs_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (lhs_mcv) {
   5925                     .memory => |addr| .{
   5926                         .base = .{ .reg = .ds },
   5927                         .disp = math.cast(i32, @as(i64, @bitCast(addr))) orelse
   5928                             return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   5929                             @tagName(lhs_mcv),
   5930                             @tagName(rhs_mcv),
   5931                         }),
   5932                     },
   5933                     .indirect => |reg_off| .{
   5934                         .base = .{ .reg = reg_off.reg },
   5935                         .disp = reg_off.off,
   5936                     },
   5937                     .load_frame => |frame_addr| .{
   5938                         .base = .{ .frame = frame_addr.index },
   5939                         .disp = frame_addr.off,
   5940                     },
   5941                     else => unreachable,
   5942                 });
   5943                 switch (rhs_mcv) {
   5944                     .immediate => |rhs_imm| try self.asmMemoryImmediate(
   5945                         tag,
   5946                         lhs_mem,
   5947                         Immediate.u(rhs_imm),
   5948                     ),
   5949                     .register => |rhs_reg| try self.asmMemoryRegister(
   5950                         tag,
   5951                         lhs_mem,
   5952                         registerAlias(rhs_reg, 1),
   5953                     ),
   5954                     else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   5955                         @tagName(lhs_mcv),
   5956                         @tagName(rhs_mcv),
   5957                     }),
   5958                 }
   5959             },
   5960             else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   5961                 @tagName(lhs_mcv),
   5962                 @tagName(rhs_mcv),
   5963             }),
   5964         }
   5965     } else if (abi_size <= 16) {
   5966         const tmp_reg = try self.register_manager.allocReg(null, gp);
   5967         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5968         defer self.register_manager.unlockReg(tmp_lock);
   5969 
   5970         const info: struct { offsets: [2]i32, double_tag: Mir.Inst.FixedTag } = switch (tag[0]) {
   5971             ._l => .{ .offsets = .{ 0, 8 }, .double_tag = .{ ._ld, .sh } },
   5972             ._r => .{ .offsets = .{ 8, 0 }, .double_tag = .{ ._rd, .sh } },
   5973             else => unreachable,
   5974         };
   5975         switch (lhs_mcv) {
   5976             .load_frame => |dst_frame_addr| switch (rhs_mcv) {
   5977                 .immediate => |rhs_imm| if (rhs_imm == 0) {} else if (rhs_imm < 64) {
   5978                     try self.asmRegisterMemory(
   5979                         .{ ._, .mov },
   5980                         tmp_reg,
   5981                         Memory.sib(.qword, .{
   5982                             .base = .{ .frame = dst_frame_addr.index },
   5983                             .disp = dst_frame_addr.off + info.offsets[0],
   5984                         }),
   5985                     );
   5986                     try self.asmMemoryRegisterImmediate(
   5987                         info.double_tag,
   5988                         Memory.sib(.qword, .{
   5989                             .base = .{ .frame = dst_frame_addr.index },
   5990                             .disp = dst_frame_addr.off + info.offsets[1],
   5991                         }),
   5992                         tmp_reg,
   5993                         Immediate.u(rhs_imm),
   5994                     );
   5995                     try self.asmMemoryImmediate(
   5996                         tag,
   5997                         Memory.sib(.qword, .{
   5998                             .base = .{ .frame = dst_frame_addr.index },
   5999                             .disp = dst_frame_addr.off + info.offsets[0],
   6000                         }),
   6001                         Immediate.u(rhs_imm),
   6002                     );
   6003                 } else {
   6004                     assert(rhs_imm < 128);
   6005                     try self.asmRegisterMemory(
   6006                         .{ ._, .mov },
   6007                         tmp_reg,
   6008                         Memory.sib(.qword, .{
   6009                             .base = .{ .frame = dst_frame_addr.index },
   6010                             .disp = dst_frame_addr.off + info.offsets[0],
   6011                         }),
   6012                     );
   6013                     if (rhs_imm > 64) {
   6014                         try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(rhs_imm - 64));
   6015                     }
   6016                     try self.asmMemoryRegister(
   6017                         .{ ._, .mov },
   6018                         Memory.sib(.qword, .{
   6019                             .base = .{ .frame = dst_frame_addr.index },
   6020                             .disp = dst_frame_addr.off + info.offsets[1],
   6021                         }),
   6022                         tmp_reg,
   6023                     );
   6024                     if (tag[0] == ._r and tag[1] == .sa) try self.asmMemoryImmediate(
   6025                         tag,
   6026                         Memory.sib(.qword, .{
   6027                             .base = .{ .frame = dst_frame_addr.index },
   6028                             .disp = dst_frame_addr.off + info.offsets[0],
   6029                         }),
   6030                         Immediate.u(63),
   6031                     ) else {
   6032                         try self.asmRegisterRegister(.{ ._, .xor }, tmp_reg.to32(), tmp_reg.to32());
   6033                         try self.asmMemoryRegister(
   6034                             .{ ._, .mov },
   6035                             Memory.sib(.qword, .{
   6036                                 .base = .{ .frame = dst_frame_addr.index },
   6037                                 .disp = dst_frame_addr.off + info.offsets[0],
   6038                             }),
   6039                             tmp_reg,
   6040                         );
   6041                     }
   6042                 },
   6043                 else => {
   6044                     const first_reg = try self.register_manager.allocReg(null, gp);
   6045                     const first_lock = self.register_manager.lockRegAssumeUnused(first_reg);
   6046                     defer self.register_manager.unlockReg(first_lock);
   6047 
   6048                     const second_reg = try self.register_manager.allocReg(null, gp);
   6049                     const second_lock = self.register_manager.lockRegAssumeUnused(second_reg);
   6050                     defer self.register_manager.unlockReg(second_lock);
   6051 
   6052                     try self.genSetReg(.cl, Type.u8, rhs_mcv);
   6053                     try self.asmRegisterMemory(
   6054                         .{ ._, .mov },
   6055                         first_reg,
   6056                         Memory.sib(.qword, .{
   6057                             .base = .{ .frame = dst_frame_addr.index },
   6058                             .disp = dst_frame_addr.off + info.offsets[0],
   6059                         }),
   6060                     );
   6061                     try self.asmRegisterMemory(
   6062                         .{ ._, .mov },
   6063                         second_reg,
   6064                         Memory.sib(.qword, .{
   6065                             .base = .{ .frame = dst_frame_addr.index },
   6066                             .disp = dst_frame_addr.off + info.offsets[1],
   6067                         }),
   6068                     );
   6069                     if (tag[0] == ._r and tag[1] == .sa) {
   6070                         try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, first_reg);
   6071                         try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63));
   6072                     } else try self.asmRegisterRegister(
   6073                         .{ ._, .xor },
   6074                         tmp_reg.to32(),
   6075                         tmp_reg.to32(),
   6076                     );
   6077                     try self.asmRegisterRegisterRegister(info.double_tag, second_reg, first_reg, .cl);
   6078                     try self.asmRegisterRegister(tag, first_reg, .cl);
   6079                     try self.asmRegisterImmediate(.{ ._, .cmp }, .cl, Immediate.u(64));
   6080                     try self.asmCmovccRegisterRegister(second_reg, first_reg, .ae);
   6081                     try self.asmCmovccRegisterRegister(first_reg, tmp_reg, .ae);
   6082                     try self.asmMemoryRegister(
   6083                         .{ ._, .mov },
   6084                         Memory.sib(.qword, .{
   6085                             .base = .{ .frame = dst_frame_addr.index },
   6086                             .disp = dst_frame_addr.off + info.offsets[1],
   6087                         }),
   6088                         second_reg,
   6089                     );
   6090                     try self.asmMemoryRegister(
   6091                         .{ ._, .mov },
   6092                         Memory.sib(.qword, .{
   6093                             .base = .{ .frame = dst_frame_addr.index },
   6094                             .disp = dst_frame_addr.off + info.offsets[0],
   6095                         }),
   6096                         first_reg,
   6097                     );
   6098                 },
   6099             },
   6100             else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   6101                 @tagName(lhs_mcv),
   6102                 @tagName(rhs_mcv),
   6103             }),
   6104         }
   6105     } else return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   6106         @tagName(lhs_mcv),
   6107         @tagName(rhs_mcv),
   6108     });
   6109 }
   6110 
   6111 /// Result is always a register.
   6112 /// Clobbers .rcx for non-immediate rhs, therefore care is needed to spill .rcx upfront.
   6113 /// Asserts .rcx is free.
   6114 fn genShiftBinOp(
   6115     self: *Self,
   6116     air_tag: Air.Inst.Tag,
   6117     maybe_inst: ?Air.Inst.Index,
   6118     lhs_mcv: MCValue,
   6119     rhs_mcv: MCValue,
   6120     lhs_ty: Type,
   6121     rhs_ty: Type,
   6122 ) !MCValue {
   6123     const mod = self.bin_file.options.module.?;
   6124     if (lhs_ty.zigTypeTag(mod) == .Vector) return self.fail("TODO implement genShiftBinOp for {}", .{
   6125         lhs_ty.fmt(mod),
   6126     });
   6127 
   6128     assert(rhs_ty.abiSize(mod) == 1);
   6129 
   6130     const lhs_abi_size = lhs_ty.abiSize(mod);
   6131     if (lhs_abi_size > 16) return self.fail("TODO implement genShiftBinOp for {}", .{
   6132         lhs_ty.fmt(mod),
   6133     });
   6134 
   6135     try self.register_manager.getReg(.rcx, null);
   6136     const rcx_lock = self.register_manager.lockRegAssumeUnused(.rcx);
   6137     defer self.register_manager.unlockReg(rcx_lock);
   6138 
   6139     const lhs_lock = switch (lhs_mcv) {
   6140         .register => |reg| self.register_manager.lockReg(reg),
   6141         else => null,
   6142     };
   6143     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   6144 
   6145     const rhs_lock = switch (rhs_mcv) {
   6146         .register => |reg| self.register_manager.lockReg(reg),
   6147         else => null,
   6148     };
   6149     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   6150 
   6151     const dst_mcv: MCValue = dst: {
   6152         if (maybe_inst) |inst| {
   6153             const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   6154             if (self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) break :dst lhs_mcv;
   6155         }
   6156         const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
   6157         try self.genCopy(lhs_ty, dst_mcv, lhs_mcv);
   6158         break :dst dst_mcv;
   6159     };
   6160 
   6161     const signedness = lhs_ty.intInfo(mod).signedness;
   6162     try self.genShiftBinOpMir(switch (air_tag) {
   6163         .shl, .shl_exact => switch (signedness) {
   6164             .signed => .{ ._l, .sa },
   6165             .unsigned => .{ ._l, .sh },
   6166         },
   6167         .shr, .shr_exact => switch (signedness) {
   6168             .signed => .{ ._r, .sa },
   6169             .unsigned => .{ ._r, .sh },
   6170         },
   6171         else => unreachable,
   6172     }, lhs_ty, dst_mcv, rhs_mcv);
   6173     return dst_mcv;
   6174 }
   6175 
   6176 /// Result is always a register.
   6177 /// Clobbers .rax and .rdx therefore care is needed to spill .rax and .rdx upfront.
   6178 /// Asserts .rax and .rdx are free.
   6179 fn genMulDivBinOp(
   6180     self: *Self,
   6181     tag: Air.Inst.Tag,
   6182     maybe_inst: ?Air.Inst.Index,
   6183     dst_ty: Type,
   6184     src_ty: Type,
   6185     lhs: MCValue,
   6186     rhs: MCValue,
   6187 ) !MCValue {
   6188     const mod = self.bin_file.options.module.?;
   6189     if (dst_ty.zigTypeTag(mod) == .Vector or dst_ty.zigTypeTag(mod) == .Float) return self.fail(
   6190         "TODO implement genMulDivBinOp for {}",
   6191         .{dst_ty.fmt(mod)},
   6192     );
   6193     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   6194     const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
   6195     if (switch (tag) {
   6196         else => unreachable,
   6197         .mul, .mul_wrap => dst_abi_size != src_abi_size and dst_abi_size != src_abi_size * 2,
   6198         .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_abi_size != src_abi_size,
   6199     } or src_abi_size > 8) return self.fail("TODO implement genMulDivBinOp from {} to {}", .{
   6200         src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
   6201     });
   6202     const ty = if (dst_abi_size <= 8) dst_ty else src_ty;
   6203     const abi_size = if (dst_abi_size <= 8) dst_abi_size else src_abi_size;
   6204 
   6205     assert(self.register_manager.isRegFree(.rax));
   6206     assert(self.register_manager.isRegFree(.rdx));
   6207 
   6208     const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx });
   6209     defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
   6210 
   6211     const signedness = ty.intInfo(mod).signedness;
   6212     switch (tag) {
   6213         .mul,
   6214         .mul_wrap,
   6215         .rem,
   6216         .div_trunc,
   6217         .div_exact,
   6218         => {
   6219             const track_inst_rax = switch (tag) {
   6220                 .mul, .mul_wrap => if (dst_abi_size <= 8) maybe_inst else null,
   6221                 .div_exact, .div_trunc => maybe_inst,
   6222                 else => null,
   6223             };
   6224             const track_inst_rdx = switch (tag) {
   6225                 .rem => maybe_inst,
   6226                 else => null,
   6227             };
   6228             try self.register_manager.getReg(.rax, track_inst_rax);
   6229             try self.register_manager.getReg(.rdx, track_inst_rdx);
   6230 
   6231             try self.genIntMulDivOpMir(switch (signedness) {
   6232                 .signed => switch (tag) {
   6233                     .mul, .mul_wrap => .{ .i_, .mul },
   6234                     .div_trunc, .div_exact, .rem => .{ .i_, .div },
   6235                     else => unreachable,
   6236                 },
   6237                 .unsigned => switch (tag) {
   6238                     .mul, .mul_wrap => .{ ._, .mul },
   6239                     .div_trunc, .div_exact, .rem => .{ ._, .div },
   6240                     else => unreachable,
   6241                 },
   6242             }, ty, lhs, rhs);
   6243 
   6244             if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) {
   6245                 .mul, .mul_wrap, .div_trunc, .div_exact => .rax,
   6246                 .rem => .rdx,
   6247                 else => unreachable,
   6248             }, dst_abi_size) };
   6249 
   6250             const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false);
   6251             try self.asmMemoryRegister(
   6252                 .{ ._, .mov },
   6253                 Memory.sib(.qword, .{
   6254                     .base = .{ .frame = dst_mcv.load_frame.index },
   6255                     .disp = dst_mcv.load_frame.off,
   6256                 }),
   6257                 .rax,
   6258             );
   6259             try self.asmMemoryRegister(
   6260                 .{ ._, .mov },
   6261                 Memory.sib(.qword, .{
   6262                     .base = .{ .frame = dst_mcv.load_frame.index },
   6263                     .disp = dst_mcv.load_frame.off + 8,
   6264                 }),
   6265                 .rdx,
   6266             );
   6267             return dst_mcv;
   6268         },
   6269 
   6270         .mod => {
   6271             try self.register_manager.getReg(.rax, null);
   6272             try self.register_manager.getReg(.rdx, if (signedness == .unsigned) maybe_inst else null);
   6273 
   6274             switch (signedness) {
   6275                 .signed => {
   6276                     const lhs_lock = switch (lhs) {
   6277                         .register => |reg| self.register_manager.lockReg(reg),
   6278                         else => null,
   6279                     };
   6280                     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   6281                     const rhs_lock = switch (rhs) {
   6282                         .register => |reg| self.register_manager.lockReg(reg),
   6283                         else => null,
   6284                     };
   6285                     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   6286 
   6287                     // hack around hazard between rhs and div_floor by copying rhs to another register
   6288                     const rhs_copy = try self.copyToTmpRegister(ty, rhs);
   6289                     const rhs_copy_lock = self.register_manager.lockRegAssumeUnused(rhs_copy);
   6290                     defer self.register_manager.unlockReg(rhs_copy_lock);
   6291 
   6292                     const div_floor = try self.genInlineIntDivFloor(ty, lhs, rhs);
   6293                     try self.genIntMulComplexOpMir(ty, div_floor, .{ .register = rhs_copy });
   6294                     const div_floor_lock = self.register_manager.lockReg(div_floor.register);
   6295                     defer if (div_floor_lock) |lock| self.register_manager.unlockReg(lock);
   6296 
   6297                     const result: MCValue = if (maybe_inst) |inst|
   6298                         try self.copyToRegisterWithInstTracking(inst, ty, lhs)
   6299                     else
   6300                         .{ .register = try self.copyToTmpRegister(ty, lhs) };
   6301                     try self.genBinOpMir(.{ ._, .sub }, ty, result, div_floor);
   6302 
   6303                     return result;
   6304                 },
   6305                 .unsigned => {
   6306                     try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, rhs);
   6307                     return .{ .register = registerAlias(.rdx, abi_size) };
   6308                 },
   6309             }
   6310         },
   6311 
   6312         .div_floor => {
   6313             try self.register_manager.getReg(.rax, if (signedness == .unsigned) maybe_inst else null);
   6314             try self.register_manager.getReg(.rdx, null);
   6315 
   6316             const lhs_lock: ?RegisterLock = switch (lhs) {
   6317                 .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   6318                 else => null,
   6319             };
   6320             defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   6321 
   6322             const actual_rhs: MCValue = blk: {
   6323                 switch (signedness) {
   6324                     .signed => {
   6325                         const rhs_lock: ?RegisterLock = switch (rhs) {
   6326                             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   6327                             else => null,
   6328                         };
   6329                         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   6330 
   6331                         if (maybe_inst) |inst| {
   6332                             break :blk try self.copyToRegisterWithInstTracking(inst, ty, rhs);
   6333                         }
   6334                         break :blk MCValue{ .register = try self.copyToTmpRegister(ty, rhs) };
   6335                     },
   6336                     .unsigned => break :blk rhs,
   6337                 }
   6338             };
   6339             const rhs_lock: ?RegisterLock = switch (actual_rhs) {
   6340                 .register => |reg| self.register_manager.lockReg(reg),
   6341                 else => null,
   6342             };
   6343             defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   6344 
   6345             switch (signedness) {
   6346                 .signed => return try self.genInlineIntDivFloor(ty, lhs, actual_rhs),
   6347                 .unsigned => {
   6348                     try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, actual_rhs);
   6349                     return .{ .register = registerAlias(.rax, abi_size) };
   6350                 },
   6351             }
   6352         },
   6353 
   6354         else => unreachable,
   6355     }
   6356 }
   6357 
   6358 fn genBinOp(
   6359     self: *Self,
   6360     maybe_inst: ?Air.Inst.Index,
   6361     air_tag: Air.Inst.Tag,
   6362     lhs_air: Air.Inst.Ref,
   6363     rhs_air: Air.Inst.Ref,
   6364 ) !MCValue {
   6365     const mod = self.bin_file.options.module.?;
   6366     const lhs_ty = self.typeOf(lhs_air);
   6367     const rhs_ty = self.typeOf(rhs_air);
   6368     const abi_size: u32 = @intCast(lhs_ty.abiSize(mod));
   6369     if ((lhs_ty.scalarType(mod).isRuntimeFloat() and
   6370         lhs_ty.scalarType(mod).floatBits(self.target.*) == 80) or
   6371         lhs_ty.abiSize(mod) > @as(u6, if (self.hasFeature(.avx)) 32 else 16))
   6372         return self.fail("TODO implement genBinOp for {s} {}", .{
   6373             @tagName(air_tag), lhs_ty.fmt(mod),
   6374         });
   6375 
   6376     const maybe_mask_reg = switch (air_tag) {
   6377         else => null,
   6378         .max, .min => if (lhs_ty.scalarType(mod).isRuntimeFloat()) registerAlias(
   6379             if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: {
   6380                 try self.register_manager.getReg(.xmm0, null);
   6381                 break :mask .xmm0;
   6382             } else try self.register_manager.allocReg(null, sse),
   6383             abi_size,
   6384         ) else null,
   6385         .rem, .mod => return self.fail("TODO implement genBinOp for {s} {}", .{
   6386             @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   6387         }),
   6388     };
   6389     const mask_lock =
   6390         if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null;
   6391     defer if (mask_lock) |lock| self.register_manager.unlockReg(lock);
   6392 
   6393     const lhs_mcv = try self.resolveInst(lhs_air);
   6394     const rhs_mcv = try self.resolveInst(rhs_air);
   6395     switch (lhs_mcv) {
   6396         .immediate => |imm| switch (imm) {
   6397             0 => switch (air_tag) {
   6398                 .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, rhs_air),
   6399                 else => {},
   6400             },
   6401             else => {},
   6402         },
   6403         else => {},
   6404     }
   6405 
   6406     const is_commutative = switch (air_tag) {
   6407         .add,
   6408         .add_wrap,
   6409         .mul,
   6410         .bool_or,
   6411         .bit_or,
   6412         .bool_and,
   6413         .bit_and,
   6414         .xor,
   6415         .min,
   6416         .max,
   6417         => true,
   6418 
   6419         else => false,
   6420     };
   6421     const vec_op = switch (lhs_ty.zigTypeTag(mod)) {
   6422         else => false,
   6423         .Float, .Vector => true,
   6424     };
   6425 
   6426     const lhs_lock: ?RegisterLock = switch (lhs_mcv) {
   6427         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   6428         else => null,
   6429     };
   6430     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   6431 
   6432     const rhs_lock: ?RegisterLock = switch (rhs_mcv) {
   6433         .register => |reg| self.register_manager.lockReg(reg),
   6434         else => null,
   6435     };
   6436     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   6437 
   6438     var flipped = false;
   6439     var copied_to_dst = true;
   6440     const dst_mcv: MCValue = dst: {
   6441         if (maybe_inst) |inst| {
   6442             if ((!vec_op or lhs_mcv.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs_mcv)) {
   6443                 break :dst lhs_mcv;
   6444             }
   6445             if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and
   6446                 self.reuseOperand(inst, rhs_air, 1, rhs_mcv))
   6447             {
   6448                 flipped = true;
   6449                 break :dst rhs_mcv;
   6450             }
   6451         }
   6452         const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
   6453         if (vec_op and lhs_mcv.isRegister() and self.hasFeature(.avx))
   6454             copied_to_dst = false
   6455         else
   6456             try self.genCopy(lhs_ty, dst_mcv, lhs_mcv);
   6457         break :dst dst_mcv;
   6458     };
   6459     const dst_lock: ?RegisterLock = switch (dst_mcv) {
   6460         .register => |reg| self.register_manager.lockReg(reg),
   6461         else => null,
   6462     };
   6463     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   6464 
   6465     const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv;
   6466     const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg|
   6467         if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and
   6468             self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: {
   6469             try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv);
   6470             break :src .{ .register = mask_reg };
   6471         }
   6472     else
   6473         unmat_src_mcv;
   6474 
   6475     if (!vec_op) {
   6476         switch (air_tag) {
   6477             .add,
   6478             .add_wrap,
   6479             => try self.genBinOpMir(.{ ._, .add }, lhs_ty, dst_mcv, src_mcv),
   6480 
   6481             .sub,
   6482             .sub_wrap,
   6483             => try self.genBinOpMir(.{ ._, .sub }, lhs_ty, dst_mcv, src_mcv),
   6484 
   6485             .ptr_add,
   6486             .ptr_sub,
   6487             => {
   6488                 const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv);
   6489                 const tmp_mcv = MCValue{ .register = tmp_reg };
   6490                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6491                 defer self.register_manager.unlockReg(tmp_lock);
   6492 
   6493                 const elem_size = lhs_ty.elemType2(mod).abiSize(mod);
   6494                 try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size });
   6495                 try self.genBinOpMir(
   6496                     switch (air_tag) {
   6497                         .ptr_add => .{ ._, .add },
   6498                         .ptr_sub => .{ ._, .sub },
   6499                         else => unreachable,
   6500                     },
   6501                     lhs_ty,
   6502                     dst_mcv,
   6503                     tmp_mcv,
   6504                 );
   6505             },
   6506 
   6507             .bool_or,
   6508             .bit_or,
   6509             => try self.genBinOpMir(.{ ._, .@"or" }, lhs_ty, dst_mcv, src_mcv),
   6510 
   6511             .bool_and,
   6512             .bit_and,
   6513             => try self.genBinOpMir(.{ ._, .@"and" }, lhs_ty, dst_mcv, src_mcv),
   6514 
   6515             .xor => try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv),
   6516 
   6517             .min,
   6518             .max,
   6519             => {
   6520                 const mat_src_mcv: MCValue = if (switch (src_mcv) {
   6521                     .immediate,
   6522                     .eflags,
   6523                     .register_offset,
   6524                     .load_direct,
   6525                     .lea_direct,
   6526                     .load_got,
   6527                     .lea_got,
   6528                     .load_tlv,
   6529                     .lea_tlv,
   6530                     .lea_frame,
   6531                     => true,
   6532                     .memory => |addr| math.cast(i32, @as(i64, @bitCast(addr))) == null,
   6533                     else => false,
   6534                 }) .{ .register = try self.copyToTmpRegister(rhs_ty, src_mcv) } else src_mcv;
   6535                 const mat_mcv_lock = switch (mat_src_mcv) {
   6536                     .register => |reg| self.register_manager.lockReg(reg),
   6537                     else => null,
   6538                 };
   6539                 defer if (mat_mcv_lock) |lock| self.register_manager.unlockReg(lock);
   6540 
   6541                 try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, dst_mcv, mat_src_mcv);
   6542 
   6543                 const int_info = lhs_ty.intInfo(mod);
   6544                 const cc: Condition = switch (int_info.signedness) {
   6545                     .unsigned => switch (air_tag) {
   6546                         .min => .a,
   6547                         .max => .b,
   6548                         else => unreachable,
   6549                     },
   6550                     .signed => switch (air_tag) {
   6551                         .min => .g,
   6552                         .max => .l,
   6553                         else => unreachable,
   6554                     },
   6555                 };
   6556 
   6557                 const cmov_abi_size = @max(@as(u32, @intCast(lhs_ty.abiSize(mod))), 2);
   6558                 const tmp_reg = switch (dst_mcv) {
   6559                     .register => |reg| reg,
   6560                     else => try self.copyToTmpRegister(lhs_ty, dst_mcv),
   6561                 };
   6562                 const tmp_lock = self.register_manager.lockReg(tmp_reg);
   6563                 defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
   6564                 switch (mat_src_mcv) {
   6565                     .none,
   6566                     .unreach,
   6567                     .dead,
   6568                     .undef,
   6569                     .immediate,
   6570                     .eflags,
   6571                     .register_offset,
   6572                     .register_overflow,
   6573                     .load_direct,
   6574                     .lea_direct,
   6575                     .load_got,
   6576                     .lea_got,
   6577                     .load_tlv,
   6578                     .lea_tlv,
   6579                     .lea_frame,
   6580                     .reserved_frame,
   6581                     => unreachable,
   6582                     .register => |src_reg| try self.asmCmovccRegisterRegister(
   6583                         registerAlias(tmp_reg, cmov_abi_size),
   6584                         registerAlias(src_reg, cmov_abi_size),
   6585                         cc,
   6586                     ),
   6587                     .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
   6588                         registerAlias(tmp_reg, cmov_abi_size),
   6589                         Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), switch (mat_src_mcv) {
   6590                             .memory => |addr| .{
   6591                                 .base = .{ .reg = .ds },
   6592                                 .disp = @intCast(@as(i64, @bitCast(addr))),
   6593                             },
   6594                             .indirect => |reg_off| .{
   6595                                 .base = .{ .reg = reg_off.reg },
   6596                                 .disp = reg_off.off,
   6597                             },
   6598                             .load_frame => |frame_addr| .{
   6599                                 .base = .{ .frame = frame_addr.index },
   6600                                 .disp = frame_addr.off,
   6601                             },
   6602                             else => unreachable,
   6603                         }),
   6604                         cc,
   6605                     ),
   6606                 }
   6607                 try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg });
   6608             },
   6609 
   6610             else => return self.fail("TODO implement genBinOp for {s} {}", .{
   6611                 @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   6612             }),
   6613         }
   6614         return dst_mcv;
   6615     }
   6616 
   6617     const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
   6618     const mir_tag = @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   6619         else => unreachable,
   6620         .Float => switch (lhs_ty.floatBits(self.target.*)) {
   6621             16 => if (self.hasFeature(.f16c)) {
   6622                 const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
   6623                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6624                 defer self.register_manager.unlockReg(tmp_lock);
   6625 
   6626                 if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   6627                     .{ .vp_w, .insr },
   6628                     dst_reg,
   6629                     dst_reg,
   6630                     src_mcv.mem(.word),
   6631                     Immediate.u(1),
   6632                 ) else try self.asmRegisterRegisterRegister(
   6633                     .{ .vp_, .unpcklwd },
   6634                     dst_reg,
   6635                     dst_reg,
   6636                     (if (src_mcv.isRegister())
   6637                         src_mcv.getReg().?
   6638                     else
   6639                         try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   6640                 );
   6641                 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
   6642                 try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
   6643                 try self.asmRegisterRegisterRegister(
   6644                     switch (air_tag) {
   6645                         .add => .{ .v_ss, .add },
   6646                         .sub => .{ .v_ss, .sub },
   6647                         .mul => .{ .v_ss, .mul },
   6648                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
   6649                         .max => .{ .v_ss, .max },
   6650                         .min => .{ .v_ss, .max },
   6651                         else => unreachable,
   6652                     },
   6653                     dst_reg,
   6654                     dst_reg,
   6655                     tmp_reg,
   6656                 );
   6657                 try self.asmRegisterRegisterImmediate(
   6658                     .{ .v_, .cvtps2ph },
   6659                     dst_reg,
   6660                     dst_reg,
   6661                     Immediate.u(0b1_00),
   6662                 );
   6663                 return dst_mcv;
   6664             } else null,
   6665             32 => switch (air_tag) {
   6666                 .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
   6667                 .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
   6668                 .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
   6669                 .div_float,
   6670                 .div_trunc,
   6671                 .div_floor,
   6672                 .div_exact,
   6673                 => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
   6674                 .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
   6675                 .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
   6676                 else => unreachable,
   6677             },
   6678             64 => switch (air_tag) {
   6679                 .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
   6680                 .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
   6681                 .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
   6682                 .div_float,
   6683                 .div_trunc,
   6684                 .div_floor,
   6685                 .div_exact,
   6686                 => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
   6687                 .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
   6688                 .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
   6689                 else => unreachable,
   6690             },
   6691             80, 128 => null,
   6692             else => unreachable,
   6693         },
   6694         .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   6695             else => null,
   6696             .Int => switch (lhs_ty.childType(mod).intInfo(mod).bits) {
   6697                 8 => switch (lhs_ty.vectorLen(mod)) {
   6698                     1...16 => switch (air_tag) {
   6699                         .add,
   6700                         .add_wrap,
   6701                         => if (self.hasFeature(.avx)) .{ .vp_b, .add } else .{ .p_b, .add },
   6702                         .sub,
   6703                         .sub_wrap,
   6704                         => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub },
   6705                         .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
   6706                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
   6707                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
   6708                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6709                             .signed => if (self.hasFeature(.avx))
   6710                                 .{ .vp_b, .mins }
   6711                             else if (self.hasFeature(.sse4_1))
   6712                                 .{ .p_b, .mins }
   6713                             else
   6714                                 null,
   6715                             .unsigned => if (self.hasFeature(.avx))
   6716                                 .{ .vp_b, .minu }
   6717                             else if (self.hasFeature(.sse4_1))
   6718                                 .{ .p_b, .minu }
   6719                             else
   6720                                 null,
   6721                         },
   6722                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6723                             .signed => if (self.hasFeature(.avx))
   6724                                 .{ .vp_b, .maxs }
   6725                             else if (self.hasFeature(.sse4_1))
   6726                                 .{ .p_b, .maxs }
   6727                             else
   6728                                 null,
   6729                             .unsigned => if (self.hasFeature(.avx))
   6730                                 .{ .vp_b, .maxu }
   6731                             else if (self.hasFeature(.sse4_1))
   6732                                 .{ .p_b, .maxu }
   6733                             else
   6734                                 null,
   6735                         },
   6736                         else => null,
   6737                     },
   6738                     17...32 => switch (air_tag) {
   6739                         .add,
   6740                         .add_wrap,
   6741                         => if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null,
   6742                         .sub,
   6743                         .sub_wrap,
   6744                         => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null,
   6745                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
   6746                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
   6747                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
   6748                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6749                             .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null,
   6750                             .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null,
   6751                         },
   6752                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6753                             .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null,
   6754                             .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null,
   6755                         },
   6756                         else => null,
   6757                     },
   6758                     else => null,
   6759                 },
   6760                 16 => switch (lhs_ty.vectorLen(mod)) {
   6761                     1...8 => switch (air_tag) {
   6762                         .add,
   6763                         .add_wrap,
   6764                         => if (self.hasFeature(.avx)) .{ .vp_w, .add } else .{ .p_w, .add },
   6765                         .sub,
   6766                         .sub_wrap,
   6767                         => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub },
   6768                         .mul,
   6769                         .mul_wrap,
   6770                         => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull },
   6771                         .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
   6772                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
   6773                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
   6774                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6775                             .signed => if (self.hasFeature(.avx))
   6776                                 .{ .vp_w, .mins }
   6777                             else
   6778                                 .{ .p_w, .mins },
   6779                             .unsigned => if (self.hasFeature(.avx))
   6780                                 .{ .vp_w, .minu }
   6781                             else
   6782                                 .{ .p_w, .minu },
   6783                         },
   6784                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6785                             .signed => if (self.hasFeature(.avx))
   6786                                 .{ .vp_w, .maxs }
   6787                             else
   6788                                 .{ .p_w, .maxs },
   6789                             .unsigned => if (self.hasFeature(.avx))
   6790                                 .{ .vp_w, .maxu }
   6791                             else
   6792                                 .{ .p_w, .maxu },
   6793                         },
   6794                         else => null,
   6795                     },
   6796                     9...16 => switch (air_tag) {
   6797                         .add,
   6798                         .add_wrap,
   6799                         => if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null,
   6800                         .sub,
   6801                         .sub_wrap,
   6802                         => if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null,
   6803                         .mul,
   6804                         .mul_wrap,
   6805                         => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null,
   6806                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
   6807                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
   6808                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
   6809                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6810                             .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null,
   6811                             .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null,
   6812                         },
   6813                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6814                             .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null,
   6815                             .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null,
   6816                         },
   6817                         else => null,
   6818                     },
   6819                     else => null,
   6820                 },
   6821                 32 => switch (lhs_ty.vectorLen(mod)) {
   6822                     1...4 => switch (air_tag) {
   6823                         .add,
   6824                         .add_wrap,
   6825                         => if (self.hasFeature(.avx)) .{ .vp_d, .add } else .{ .p_d, .add },
   6826                         .sub,
   6827                         .sub_wrap,
   6828                         => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub },
   6829                         .mul,
   6830                         .mul_wrap,
   6831                         => if (self.hasFeature(.avx))
   6832                             .{ .vp_d, .mull }
   6833                         else if (self.hasFeature(.sse4_1))
   6834                             .{ .p_d, .mull }
   6835                         else
   6836                             null,
   6837                         .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
   6838                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
   6839                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
   6840                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6841                             .signed => if (self.hasFeature(.avx))
   6842                                 .{ .vp_d, .mins }
   6843                             else if (self.hasFeature(.sse4_1))
   6844                                 .{ .p_d, .mins }
   6845                             else
   6846                                 null,
   6847                             .unsigned => if (self.hasFeature(.avx))
   6848                                 .{ .vp_d, .minu }
   6849                             else if (self.hasFeature(.sse4_1))
   6850                                 .{ .p_d, .minu }
   6851                             else
   6852                                 null,
   6853                         },
   6854                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6855                             .signed => if (self.hasFeature(.avx))
   6856                                 .{ .vp_d, .maxs }
   6857                             else if (self.hasFeature(.sse4_1))
   6858                                 .{ .p_d, .maxs }
   6859                             else
   6860                                 null,
   6861                             .unsigned => if (self.hasFeature(.avx))
   6862                                 .{ .vp_d, .maxu }
   6863                             else if (self.hasFeature(.sse4_1))
   6864                                 .{ .p_d, .maxu }
   6865                             else
   6866                                 null,
   6867                         },
   6868                         else => null,
   6869                     },
   6870                     5...8 => switch (air_tag) {
   6871                         .add,
   6872                         .add_wrap,
   6873                         => if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null,
   6874                         .sub,
   6875                         .sub_wrap,
   6876                         => if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null,
   6877                         .mul,
   6878                         .mul_wrap,
   6879                         => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null,
   6880                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
   6881                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
   6882                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
   6883                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6884                             .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null,
   6885                             .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null,
   6886                         },
   6887                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   6888                             .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null,
   6889                             .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null,
   6890                         },
   6891                         else => null,
   6892                     },
   6893                     else => null,
   6894                 },
   6895                 64 => switch (lhs_ty.vectorLen(mod)) {
   6896                     1...2 => switch (air_tag) {
   6897                         .add,
   6898                         .add_wrap,
   6899                         => if (self.hasFeature(.avx)) .{ .vp_q, .add } else .{ .p_q, .add },
   6900                         .sub,
   6901                         .sub_wrap,
   6902                         => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub },
   6903                         .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
   6904                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
   6905                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
   6906                         else => null,
   6907                     },
   6908                     3...4 => switch (air_tag) {
   6909                         .add,
   6910                         .add_wrap,
   6911                         => if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null,
   6912                         .sub,
   6913                         .sub_wrap,
   6914                         => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null,
   6915                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
   6916                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
   6917                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
   6918                         else => null,
   6919                     },
   6920                     else => null,
   6921                 },
   6922                 else => null,
   6923             },
   6924             .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   6925                 16 => if (self.hasFeature(.f16c)) switch (lhs_ty.vectorLen(mod)) {
   6926                     1 => {
   6927                         const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
   6928                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6929                         defer self.register_manager.unlockReg(tmp_lock);
   6930 
   6931                         if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   6932                             .{ .vp_w, .insr },
   6933                             dst_reg,
   6934                             dst_reg,
   6935                             src_mcv.mem(.word),
   6936                             Immediate.u(1),
   6937                         ) else try self.asmRegisterRegisterRegister(
   6938                             .{ .vp_, .unpcklwd },
   6939                             dst_reg,
   6940                             dst_reg,
   6941                             (if (src_mcv.isRegister())
   6942                                 src_mcv.getReg().?
   6943                             else
   6944                                 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   6945                         );
   6946                         try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
   6947                         try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
   6948                         try self.asmRegisterRegisterRegister(
   6949                             switch (air_tag) {
   6950                                 .add => .{ .v_ss, .add },
   6951                                 .sub => .{ .v_ss, .sub },
   6952                                 .mul => .{ .v_ss, .mul },
   6953                                 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
   6954                                 .max => .{ .v_ss, .max },
   6955                                 .min => .{ .v_ss, .max },
   6956                                 else => unreachable,
   6957                             },
   6958                             dst_reg,
   6959                             dst_reg,
   6960                             tmp_reg,
   6961                         );
   6962                         try self.asmRegisterRegisterImmediate(
   6963                             .{ .v_, .cvtps2ph },
   6964                             dst_reg,
   6965                             dst_reg,
   6966                             Immediate.u(0b1_00),
   6967                         );
   6968                         return dst_mcv;
   6969                     },
   6970                     2 => {
   6971                         const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
   6972                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6973                         defer self.register_manager.unlockReg(tmp_lock);
   6974 
   6975                         if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
   6976                             .{ .vp_d, .insr },
   6977                             dst_reg,
   6978                             src_mcv.mem(.dword),
   6979                             Immediate.u(1),
   6980                         ) else try self.asmRegisterRegisterRegister(
   6981                             .{ .v_ps, .unpckl },
   6982                             dst_reg,
   6983                             dst_reg,
   6984                             (if (src_mcv.isRegister())
   6985                                 src_mcv.getReg().?
   6986                             else
   6987                                 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   6988                         );
   6989                         try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
   6990                         try self.asmRegisterRegisterRegister(
   6991                             .{ .v_ps, .movhl },
   6992                             tmp_reg,
   6993                             dst_reg,
   6994                             dst_reg,
   6995                         );
   6996                         try self.asmRegisterRegisterRegister(
   6997                             switch (air_tag) {
   6998                                 .add => .{ .v_ps, .add },
   6999                                 .sub => .{ .v_ps, .sub },
   7000                                 .mul => .{ .v_ps, .mul },
   7001                                 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   7002                                 .max => .{ .v_ps, .max },
   7003                                 .min => .{ .v_ps, .max },
   7004                                 else => unreachable,
   7005                             },
   7006                             dst_reg,
   7007                             dst_reg,
   7008                             tmp_reg,
   7009                         );
   7010                         try self.asmRegisterRegisterImmediate(
   7011                             .{ .v_, .cvtps2ph },
   7012                             dst_reg,
   7013                             dst_reg,
   7014                             Immediate.u(0b1_00),
   7015                         );
   7016                         return dst_mcv;
   7017                     },
   7018                     3...4 => {
   7019                         const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
   7020                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7021                         defer self.register_manager.unlockReg(tmp_lock);
   7022 
   7023                         try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
   7024                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
   7025                             .{ .v_ps, .cvtph2 },
   7026                             tmp_reg,
   7027                             src_mcv.mem(.qword),
   7028                         ) else try self.asmRegisterRegister(
   7029                             .{ .v_ps, .cvtph2 },
   7030                             tmp_reg,
   7031                             (if (src_mcv.isRegister())
   7032                                 src_mcv.getReg().?
   7033                             else
   7034                                 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   7035                         );
   7036                         try self.asmRegisterRegisterRegister(
   7037                             switch (air_tag) {
   7038                                 .add => .{ .v_ps, .add },
   7039                                 .sub => .{ .v_ps, .sub },
   7040                                 .mul => .{ .v_ps, .mul },
   7041                                 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   7042                                 .max => .{ .v_ps, .max },
   7043                                 .min => .{ .v_ps, .max },
   7044                                 else => unreachable,
   7045                             },
   7046                             dst_reg,
   7047                             dst_reg,
   7048                             tmp_reg,
   7049                         );
   7050                         try self.asmRegisterRegisterImmediate(
   7051                             .{ .v_, .cvtps2ph },
   7052                             dst_reg,
   7053                             dst_reg,
   7054                             Immediate.u(0b1_00),
   7055                         );
   7056                         return dst_mcv;
   7057                     },
   7058                     5...8 => {
   7059                         const tmp_reg = (try self.register_manager.allocReg(null, sse)).to256();
   7060                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7061                         defer self.register_manager.unlockReg(tmp_lock);
   7062 
   7063                         try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg);
   7064                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
   7065                             .{ .v_ps, .cvtph2 },
   7066                             tmp_reg,
   7067                             src_mcv.mem(.xword),
   7068                         ) else try self.asmRegisterRegister(
   7069                             .{ .v_ps, .cvtph2 },
   7070                             tmp_reg,
   7071                             (if (src_mcv.isRegister())
   7072                                 src_mcv.getReg().?
   7073                             else
   7074                                 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   7075                         );
   7076                         try self.asmRegisterRegisterRegister(
   7077                             switch (air_tag) {
   7078                                 .add => .{ .v_ps, .add },
   7079                                 .sub => .{ .v_ps, .sub },
   7080                                 .mul => .{ .v_ps, .mul },
   7081                                 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   7082                                 .max => .{ .v_ps, .max },
   7083                                 .min => .{ .v_ps, .max },
   7084                                 else => unreachable,
   7085                             },
   7086                             dst_reg.to256(),
   7087                             dst_reg.to256(),
   7088                             tmp_reg,
   7089                         );
   7090                         try self.asmRegisterRegisterImmediate(
   7091                             .{ .v_, .cvtps2ph },
   7092                             dst_reg,
   7093                             dst_reg.to256(),
   7094                             Immediate.u(0b1_00),
   7095                         );
   7096                         return dst_mcv;
   7097                     },
   7098                     else => null,
   7099                 } else null,
   7100                 32 => switch (lhs_ty.vectorLen(mod)) {
   7101                     1 => switch (air_tag) {
   7102                         .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
   7103                         .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
   7104                         .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
   7105                         .div_float,
   7106                         .div_trunc,
   7107                         .div_floor,
   7108                         .div_exact,
   7109                         => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
   7110                         .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
   7111                         .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
   7112                         else => unreachable,
   7113                     },
   7114                     2...4 => switch (air_tag) {
   7115                         .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add },
   7116                         .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub },
   7117                         .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul },
   7118                         .div_float,
   7119                         .div_trunc,
   7120                         .div_floor,
   7121                         .div_exact,
   7122                         => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div },
   7123                         .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max },
   7124                         .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min },
   7125                         else => unreachable,
   7126                     },
   7127                     5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
   7128                         .add => .{ .v_ps, .add },
   7129                         .sub => .{ .v_ps, .sub },
   7130                         .mul => .{ .v_ps, .mul },
   7131                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   7132                         .max => .{ .v_ps, .max },
   7133                         .min => .{ .v_ps, .min },
   7134                         else => unreachable,
   7135                     } else null,
   7136                     else => null,
   7137                 },
   7138                 64 => switch (lhs_ty.vectorLen(mod)) {
   7139                     1 => switch (air_tag) {
   7140                         .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
   7141                         .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
   7142                         .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
   7143                         .div_float,
   7144                         .div_trunc,
   7145                         .div_floor,
   7146                         .div_exact,
   7147                         => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
   7148                         .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
   7149                         .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
   7150                         else => unreachable,
   7151                     },
   7152                     2 => switch (air_tag) {
   7153                         .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add },
   7154                         .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub },
   7155                         .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul },
   7156                         .div_float,
   7157                         .div_trunc,
   7158                         .div_floor,
   7159                         .div_exact,
   7160                         => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div },
   7161                         .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max },
   7162                         .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min },
   7163                         else => unreachable,
   7164                     },
   7165                     3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
   7166                         .add => .{ .v_pd, .add },
   7167                         .sub => .{ .v_pd, .sub },
   7168                         .mul => .{ .v_pd, .mul },
   7169                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div },
   7170                         .max => .{ .v_pd, .max },
   7171                         .min => .{ .v_pd, .min },
   7172                         else => unreachable,
   7173                     } else null,
   7174                     else => null,
   7175                 },
   7176                 80, 128 => null,
   7177                 else => unreachable,
   7178             },
   7179         },
   7180     }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   7181         @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   7182     });
   7183 
   7184     const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias(
   7185         if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?,
   7186         abi_size,
   7187     ) else null;
   7188     const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null;
   7189     defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock);
   7190 
   7191     if (self.hasFeature(.avx)) {
   7192         const lhs_reg =
   7193             if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
   7194         if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   7195             mir_tag,
   7196             dst_reg,
   7197             lhs_reg,
   7198             src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   7199         ) else try self.asmRegisterRegisterRegister(
   7200             mir_tag,
   7201             dst_reg,
   7202             lhs_reg,
   7203             registerAlias(if (src_mcv.isRegister())
   7204                 src_mcv.getReg().?
   7205             else
   7206                 try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
   7207         );
   7208     } else {
   7209         assert(copied_to_dst);
   7210         if (src_mcv.isMemory()) try self.asmRegisterMemory(
   7211             mir_tag,
   7212             dst_reg,
   7213             src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
   7214         ) else try self.asmRegisterRegister(
   7215             mir_tag,
   7216             dst_reg,
   7217             registerAlias(if (src_mcv.isRegister())
   7218                 src_mcv.getReg().?
   7219             else
   7220                 try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
   7221         );
   7222     }
   7223 
   7224     switch (air_tag) {
   7225         .add, .add_wrap, .sub, .sub_wrap, .mul, .mul_wrap, .div_float, .div_exact => {},
   7226         .div_trunc, .div_floor => if (self.hasFeature(.sse4_1)) try self.genRound(
   7227             lhs_ty,
   7228             dst_reg,
   7229             .{ .register = dst_reg },
   7230             switch (air_tag) {
   7231                 .div_trunc => 0b1_0_11,
   7232                 .div_floor => 0b1_0_01,
   7233                 else => unreachable,
   7234             },
   7235         ) else return self.fail("TODO implement genBinOp for {s} {} without sse4_1 feature", .{
   7236             @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   7237         }),
   7238         .bit_and, .bit_or, .xor => {},
   7239         .max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) {
   7240             const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size);
   7241 
   7242             try self.asmRegisterRegisterRegisterImmediate(
   7243                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   7244                     .Float => switch (lhs_ty.floatBits(self.target.*)) {
   7245                         32 => .{ .v_ss, .cmp },
   7246                         64 => .{ .v_sd, .cmp },
   7247                         16, 80, 128 => null,
   7248                         else => unreachable,
   7249                     },
   7250                     .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   7251                         .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   7252                             32 => switch (lhs_ty.vectorLen(mod)) {
   7253                                 1 => .{ .v_ss, .cmp },
   7254                                 2...8 => .{ .v_ps, .cmp },
   7255                                 else => null,
   7256                             },
   7257                             64 => switch (lhs_ty.vectorLen(mod)) {
   7258                                 1 => .{ .v_sd, .cmp },
   7259                                 2...4 => .{ .v_pd, .cmp },
   7260                                 else => null,
   7261                             },
   7262                             16, 80, 128 => null,
   7263                             else => unreachable,
   7264                         },
   7265                         else => unreachable,
   7266                     },
   7267                     else => unreachable,
   7268                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   7269                     @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   7270                 }),
   7271                 mask_reg,
   7272                 rhs_copy_reg,
   7273                 rhs_copy_reg,
   7274                 Immediate.u(3), // unord
   7275             );
   7276             try self.asmRegisterRegisterRegisterRegister(
   7277                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   7278                     .Float => switch (lhs_ty.floatBits(self.target.*)) {
   7279                         32 => .{ .v_ps, .blendv },
   7280                         64 => .{ .v_pd, .blendv },
   7281                         16, 80, 128 => null,
   7282                         else => unreachable,
   7283                     },
   7284                     .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   7285                         .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   7286                             32 => switch (lhs_ty.vectorLen(mod)) {
   7287                                 1...8 => .{ .v_ps, .blendv },
   7288                                 else => null,
   7289                             },
   7290                             64 => switch (lhs_ty.vectorLen(mod)) {
   7291                                 1...4 => .{ .v_pd, .blendv },
   7292                                 else => null,
   7293                             },
   7294                             16, 80, 128 => null,
   7295                             else => unreachable,
   7296                         },
   7297                         else => unreachable,
   7298                     },
   7299                     else => unreachable,
   7300                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   7301                     @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   7302                 }),
   7303                 dst_reg,
   7304                 dst_reg,
   7305                 lhs_copy_reg.?,
   7306                 mask_reg,
   7307             );
   7308         } else {
   7309             const has_blend = self.hasFeature(.sse4_1);
   7310             try self.asmRegisterRegisterImmediate(
   7311                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   7312                     .Float => switch (lhs_ty.floatBits(self.target.*)) {
   7313                         32 => .{ ._ss, .cmp },
   7314                         64 => .{ ._sd, .cmp },
   7315                         16, 80, 128 => null,
   7316                         else => unreachable,
   7317                     },
   7318                     .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   7319                         .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   7320                             32 => switch (lhs_ty.vectorLen(mod)) {
   7321                                 1 => .{ ._ss, .cmp },
   7322                                 2...4 => .{ ._ps, .cmp },
   7323                                 else => null,
   7324                             },
   7325                             64 => switch (lhs_ty.vectorLen(mod)) {
   7326                                 1 => .{ ._sd, .cmp },
   7327                                 2 => .{ ._pd, .cmp },
   7328                                 else => null,
   7329                             },
   7330                             16, 80, 128 => null,
   7331                             else => unreachable,
   7332                         },
   7333                         else => unreachable,
   7334                     },
   7335                     else => unreachable,
   7336                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   7337                     @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   7338                 }),
   7339                 mask_reg,
   7340                 mask_reg,
   7341                 Immediate.u(if (has_blend) 3 else 7), // unord, ord
   7342             );
   7343             if (has_blend) try self.asmRegisterRegisterRegister(
   7344                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   7345                     .Float => switch (lhs_ty.floatBits(self.target.*)) {
   7346                         32 => .{ ._ps, .blendv },
   7347                         64 => .{ ._pd, .blendv },
   7348                         16, 80, 128 => null,
   7349                         else => unreachable,
   7350                     },
   7351                     .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   7352                         .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   7353                             32 => switch (lhs_ty.vectorLen(mod)) {
   7354                                 1...4 => .{ ._ps, .blendv },
   7355                                 else => null,
   7356                             },
   7357                             64 => switch (lhs_ty.vectorLen(mod)) {
   7358                                 1...2 => .{ ._pd, .blendv },
   7359                                 else => null,
   7360                             },
   7361                             16, 80, 128 => null,
   7362                             else => unreachable,
   7363                         },
   7364                         else => unreachable,
   7365                     },
   7366                     else => unreachable,
   7367                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   7368                     @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   7369                 }),
   7370                 dst_reg,
   7371                 lhs_copy_reg.?,
   7372                 mask_reg,
   7373             ) else {
   7374                 try self.asmRegisterRegister(
   7375                     @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   7376                         .Float => switch (lhs_ty.floatBits(self.target.*)) {
   7377                             32 => .{ ._ps, .@"and" },
   7378                             64 => .{ ._pd, .@"and" },
   7379                             16, 80, 128 => null,
   7380                             else => unreachable,
   7381                         },
   7382                         .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   7383                             .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   7384                                 32 => switch (lhs_ty.vectorLen(mod)) {
   7385                                     1...4 => .{ ._ps, .@"and" },
   7386                                     else => null,
   7387                                 },
   7388                                 64 => switch (lhs_ty.vectorLen(mod)) {
   7389                                     1...2 => .{ ._pd, .@"and" },
   7390                                     else => null,
   7391                                 },
   7392                                 16, 80, 128 => null,
   7393                                 else => unreachable,
   7394                             },
   7395                             else => unreachable,
   7396                         },
   7397                         else => unreachable,
   7398                     }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   7399                         @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   7400                     }),
   7401                     dst_reg,
   7402                     mask_reg,
   7403                 );
   7404                 try self.asmRegisterRegister(
   7405                     @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   7406                         .Float => switch (lhs_ty.floatBits(self.target.*)) {
   7407                             32 => .{ ._ps, .andn },
   7408                             64 => .{ ._pd, .andn },
   7409                             16, 80, 128 => null,
   7410                             else => unreachable,
   7411                         },
   7412                         .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   7413                             .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   7414                                 32 => switch (lhs_ty.vectorLen(mod)) {
   7415                                     1...4 => .{ ._ps, .andn },
   7416                                     else => null,
   7417                                 },
   7418                                 64 => switch (lhs_ty.vectorLen(mod)) {
   7419                                     1...2 => .{ ._pd, .andn },
   7420                                     else => null,
   7421                                 },
   7422                                 16, 80, 128 => null,
   7423                                 else => unreachable,
   7424                             },
   7425                             else => unreachable,
   7426                         },
   7427                         else => unreachable,
   7428                     }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   7429                         @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   7430                     }),
   7431                     mask_reg,
   7432                     lhs_copy_reg.?,
   7433                 );
   7434                 try self.asmRegisterRegister(
   7435                     @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   7436                         .Float => switch (lhs_ty.floatBits(self.target.*)) {
   7437                             32 => .{ ._ps, .@"or" },
   7438                             64 => .{ ._pd, .@"or" },
   7439                             16, 80, 128 => null,
   7440                             else => unreachable,
   7441                         },
   7442                         .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   7443                             .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   7444                                 32 => switch (lhs_ty.vectorLen(mod)) {
   7445                                     1...4 => .{ ._ps, .@"or" },
   7446                                     else => null,
   7447                                 },
   7448                                 64 => switch (lhs_ty.vectorLen(mod)) {
   7449                                     1...2 => .{ ._pd, .@"or" },
   7450                                     else => null,
   7451                                 },
   7452                                 16, 80, 128 => null,
   7453                                 else => unreachable,
   7454                             },
   7455                             else => unreachable,
   7456                         },
   7457                         else => unreachable,
   7458                     }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   7459                         @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
   7460                     }),
   7461                     dst_reg,
   7462                     mask_reg,
   7463                 );
   7464             }
   7465         },
   7466         else => unreachable,
   7467     }
   7468 
   7469     return dst_mcv;
   7470 }
   7471 
   7472 fn genBinOpMir(
   7473     self: *Self,
   7474     mir_tag: Mir.Inst.FixedTag,
   7475     ty: Type,
   7476     dst_mcv: MCValue,
   7477     src_mcv: MCValue,
   7478 ) !void {
   7479     const mod = self.bin_file.options.module.?;
   7480     const abi_size: u32 = @intCast(ty.abiSize(mod));
   7481     switch (dst_mcv) {
   7482         .none,
   7483         .unreach,
   7484         .dead,
   7485         .undef,
   7486         .immediate,
   7487         .eflags,
   7488         .register_overflow,
   7489         .lea_direct,
   7490         .lea_got,
   7491         .lea_tlv,
   7492         .lea_frame,
   7493         .reserved_frame,
   7494         => unreachable, // unmodifiable destination
   7495         .register, .register_offset => {
   7496             assert(dst_mcv.isRegister());
   7497             const dst_reg = dst_mcv.getReg().?;
   7498             const dst_alias = registerAlias(dst_reg, abi_size);
   7499             switch (src_mcv) {
   7500                 .none,
   7501                 .unreach,
   7502                 .dead,
   7503                 .undef,
   7504                 .register_overflow,
   7505                 .reserved_frame,
   7506                 => unreachable,
   7507                 .register => |src_reg| try self.asmRegisterRegister(
   7508                     mir_tag,
   7509                     dst_alias,
   7510                     registerAlias(src_reg, abi_size),
   7511                 ),
   7512                 .immediate => |imm| switch (self.regBitSize(ty)) {
   7513                     8 => try self.asmRegisterImmediate(
   7514                         mir_tag,
   7515                         dst_alias,
   7516                         if (math.cast(i8, @as(i64, @bitCast(imm)))) |small|
   7517                             Immediate.s(small)
   7518                         else
   7519                             Immediate.u(@as(u8, @intCast(imm))),
   7520                     ),
   7521                     16 => try self.asmRegisterImmediate(
   7522                         mir_tag,
   7523                         dst_alias,
   7524                         if (math.cast(i16, @as(i64, @bitCast(imm)))) |small|
   7525                             Immediate.s(small)
   7526                         else
   7527                             Immediate.u(@as(u16, @intCast(imm))),
   7528                     ),
   7529                     32 => try self.asmRegisterImmediate(
   7530                         mir_tag,
   7531                         dst_alias,
   7532                         if (math.cast(i32, @as(i64, @bitCast(imm)))) |small|
   7533                             Immediate.s(small)
   7534                         else
   7535                             Immediate.u(@as(u32, @intCast(imm))),
   7536                     ),
   7537                     64 => if (math.cast(i32, @as(i64, @bitCast(imm)))) |small|
   7538                         try self.asmRegisterImmediate(mir_tag, dst_alias, Immediate.s(small))
   7539                     else
   7540                         try self.asmRegisterRegister(mir_tag, dst_alias, registerAlias(
   7541                             try self.copyToTmpRegister(ty, src_mcv),
   7542                             abi_size,
   7543                         )),
   7544                     else => unreachable,
   7545                 },
   7546                 .eflags,
   7547                 .register_offset,
   7548                 .memory,
   7549                 .indirect,
   7550                 .load_direct,
   7551                 .lea_direct,
   7552                 .load_got,
   7553                 .lea_got,
   7554                 .load_tlv,
   7555                 .lea_tlv,
   7556                 .load_frame,
   7557                 .lea_frame,
   7558                 => {
   7559                     blk: {
   7560                         return self.asmRegisterMemory(
   7561                             mir_tag,
   7562                             registerAlias(dst_reg, abi_size),
   7563                             Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
   7564                                 .memory => |addr| .{
   7565                                     .base = .{ .reg = .ds },
   7566                                     .disp = math.cast(i32, addr) orelse break :blk,
   7567                                 },
   7568                                 .indirect => |reg_off| .{
   7569                                     .base = .{ .reg = reg_off.reg },
   7570                                     .disp = reg_off.off,
   7571                                 },
   7572                                 .load_frame => |frame_addr| .{
   7573                                     .base = .{ .frame = frame_addr.index },
   7574                                     .disp = frame_addr.off,
   7575                                 },
   7576                                 else => break :blk,
   7577                             }),
   7578                         );
   7579                     }
   7580 
   7581                     const dst_reg_lock = self.register_manager.lockReg(dst_reg);
   7582                     defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock);
   7583 
   7584                     switch (src_mcv) {
   7585                         .eflags,
   7586                         .register_offset,
   7587                         .lea_direct,
   7588                         .lea_got,
   7589                         .lea_tlv,
   7590                         .lea_frame,
   7591                         => {
   7592                             const reg = try self.copyToTmpRegister(ty, src_mcv);
   7593                             return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg });
   7594                         },
   7595                         .memory,
   7596                         .load_direct,
   7597                         .load_got,
   7598                         .load_tlv,
   7599                         => {
   7600                             const ptr_ty = try mod.singleConstPtrType(ty);
   7601                             const addr_reg = try self.copyToTmpRegister(ptr_ty, src_mcv.address());
   7602                             return self.genBinOpMir(mir_tag, ty, dst_mcv, .{
   7603                                 .indirect = .{ .reg = addr_reg },
   7604                             });
   7605                         },
   7606                         else => unreachable,
   7607                     }
   7608                 },
   7609             }
   7610         },
   7611         .memory, .indirect, .load_got, .load_direct, .load_tlv, .load_frame => {
   7612             const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock };
   7613             const limb_abi_size: u32 = @min(abi_size, 8);
   7614 
   7615             const dst_info: OpInfo = switch (dst_mcv) {
   7616                 else => unreachable,
   7617                 .memory, .load_got, .load_direct, .load_tlv => dst: {
   7618                     const dst_addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
   7619                     const dst_addr_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg);
   7620                     errdefer self.register_manager.unlockReg(dst_addr_lock);
   7621 
   7622                     try self.genSetReg(dst_addr_reg, Type.usize, dst_mcv.address());
   7623                     break :dst .{
   7624                         .addr_reg = dst_addr_reg,
   7625                         .addr_lock = dst_addr_lock,
   7626                     };
   7627                 },
   7628                 .load_frame => null,
   7629             };
   7630             defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock);
   7631 
   7632             const src_info: OpInfo = switch (src_mcv) {
   7633                 .none,
   7634                 .unreach,
   7635                 .dead,
   7636                 .undef,
   7637                 .register_overflow,
   7638                 .reserved_frame,
   7639                 => unreachable,
   7640                 .immediate,
   7641                 .register,
   7642                 .register_offset,
   7643                 .eflags,
   7644                 .indirect,
   7645                 .lea_direct,
   7646                 .lea_got,
   7647                 .lea_tlv,
   7648                 .load_frame,
   7649                 .lea_frame,
   7650                 => null,
   7651                 .memory, .load_got, .load_direct, .load_tlv => src: {
   7652                     switch (src_mcv) {
   7653                         .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr))) != null and
   7654                             math.cast(i32, @as(i64, @bitCast(addr)) + abi_size - limb_abi_size) != null)
   7655                             break :src null,
   7656                         .load_got, .load_direct, .load_tlv => {},
   7657                         else => unreachable,
   7658                     }
   7659 
   7660                     const src_addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
   7661                     const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
   7662                     errdefer self.register_manager.unlockReg(src_addr_lock);
   7663 
   7664                     try self.genSetReg(src_addr_reg, Type.usize, src_mcv.address());
   7665                     break :src .{
   7666                         .addr_reg = src_addr_reg,
   7667                         .addr_lock = src_addr_lock,
   7668                     };
   7669                 },
   7670             };
   7671             defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock);
   7672 
   7673             const ty_signedness =
   7674                 if (ty.isAbiInt(mod)) ty.intInfo(mod).signedness else .unsigned;
   7675             const limb_ty = if (abi_size <= 8) ty else switch (ty_signedness) {
   7676                 .signed => Type.usize,
   7677                 .unsigned => Type.isize,
   7678             };
   7679             var off: i32 = 0;
   7680             while (off < abi_size) : (off += 8) {
   7681                 const mir_limb_tag: Mir.Inst.FixedTag = switch (off) {
   7682                     0 => mir_tag,
   7683                     else => switch (mir_tag[1]) {
   7684                         .add => .{ ._, .adc },
   7685                         .sub, .cmp => .{ ._, .sbb },
   7686                         .@"or", .@"and", .xor => mir_tag,
   7687                         else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{
   7688                             @tagName(mir_tag[1]),
   7689                         }),
   7690                     },
   7691                 };
   7692                 const dst_limb_mem = Memory.sib(
   7693                     Memory.PtrSize.fromSize(limb_abi_size),
   7694                     switch (dst_mcv) {
   7695                         .memory,
   7696                         .load_got,
   7697                         .load_direct,
   7698                         .load_tlv,
   7699                         => .{ .base = .{ .reg = dst_info.?.addr_reg }, .disp = off },
   7700                         .indirect => |reg_off| .{
   7701                             .base = .{ .reg = reg_off.reg },
   7702                             .disp = reg_off.off + off,
   7703                         },
   7704                         .load_frame => |frame_addr| .{
   7705                             .base = .{ .frame = frame_addr.index },
   7706                             .disp = frame_addr.off + off,
   7707                         },
   7708                         else => unreachable,
   7709                     },
   7710                 );
   7711                 switch (src_mcv) {
   7712                     .none,
   7713                     .unreach,
   7714                     .dead,
   7715                     .undef,
   7716                     .register_overflow,
   7717                     .reserved_frame,
   7718                     => unreachable,
   7719                     .register => |src_reg| switch (off) {
   7720                         0 => try self.asmMemoryRegister(
   7721                             mir_limb_tag,
   7722                             dst_limb_mem,
   7723                             registerAlias(src_reg, limb_abi_size),
   7724                         ),
   7725                         else => unreachable,
   7726                     },
   7727                     .immediate => |src_imm| {
   7728                         const imm: u64 = switch (off) {
   7729                             0 => src_imm,
   7730                             else => switch (ty_signedness) {
   7731                                 .signed => @bitCast(@as(i64, @bitCast(src_imm)) >> 63),
   7732                                 .unsigned => 0,
   7733                             },
   7734                         };
   7735                         switch (self.regBitSize(limb_ty)) {
   7736                             8 => try self.asmMemoryImmediate(
   7737                                 mir_limb_tag,
   7738                                 dst_limb_mem,
   7739                                 if (math.cast(i8, @as(i64, @bitCast(imm)))) |small|
   7740                                     Immediate.s(small)
   7741                                 else
   7742                                     Immediate.u(@as(u8, @intCast(imm))),
   7743                             ),
   7744                             16 => try self.asmMemoryImmediate(
   7745                                 mir_limb_tag,
   7746                                 dst_limb_mem,
   7747                                 if (math.cast(i16, @as(i64, @bitCast(imm)))) |small|
   7748                                     Immediate.s(small)
   7749                                 else
   7750                                     Immediate.u(@as(u16, @intCast(imm))),
   7751                             ),
   7752                             32 => try self.asmMemoryImmediate(
   7753                                 mir_limb_tag,
   7754                                 dst_limb_mem,
   7755                                 if (math.cast(i32, @as(i64, @bitCast(imm)))) |small|
   7756                                     Immediate.s(small)
   7757                                 else
   7758                                     Immediate.u(@as(u32, @intCast(imm))),
   7759                             ),
   7760                             64 => if (math.cast(i32, @as(i64, @bitCast(imm)))) |small|
   7761                                 try self.asmMemoryImmediate(
   7762                                     mir_limb_tag,
   7763                                     dst_limb_mem,
   7764                                     Immediate.s(small),
   7765                                 )
   7766                             else
   7767                                 try self.asmMemoryRegister(
   7768                                     mir_limb_tag,
   7769                                     dst_limb_mem,
   7770                                     registerAlias(
   7771                                         try self.copyToTmpRegister(limb_ty, .{ .immediate = imm }),
   7772                                         limb_abi_size,
   7773                                     ),
   7774                                 ),
   7775                             else => unreachable,
   7776                         }
   7777                     },
   7778                     .register_offset,
   7779                     .eflags,
   7780                     .memory,
   7781                     .indirect,
   7782                     .load_direct,
   7783                     .lea_direct,
   7784                     .load_got,
   7785                     .lea_got,
   7786                     .load_tlv,
   7787                     .lea_tlv,
   7788                     .load_frame,
   7789                     .lea_frame,
   7790                     => {
   7791                         const src_limb_reg = try self.copyToTmpRegister(limb_ty, if (src_info) |info| .{
   7792                             .indirect = .{ .reg = info.addr_reg, .off = off },
   7793                         } else switch (src_mcv) {
   7794                             .eflags,
   7795                             .register_offset,
   7796                             .lea_direct,
   7797                             .lea_got,
   7798                             .lea_tlv,
   7799                             .lea_frame,
   7800                             => switch (off) {
   7801                                 0 => src_mcv,
   7802                                 else => .{ .immediate = 0 },
   7803                             },
   7804                             .memory => |addr| .{ .memory = @bitCast(@as(i64, @bitCast(addr)) + off) },
   7805                             .indirect => |reg_off| .{ .indirect = .{
   7806                                 .reg = reg_off.reg,
   7807                                 .off = reg_off.off + off,
   7808                             } },
   7809                             .load_frame => |frame_addr| .{ .load_frame = .{
   7810                                 .index = frame_addr.index,
   7811                                 .off = frame_addr.off + off,
   7812                             } },
   7813                             else => unreachable,
   7814                         });
   7815                         try self.asmMemoryRegister(
   7816                             mir_limb_tag,
   7817                             dst_limb_mem,
   7818                             registerAlias(src_limb_reg, limb_abi_size),
   7819                         );
   7820                     },
   7821                 }
   7822             }
   7823         },
   7824     }
   7825 }
   7826 
   7827 /// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
   7828 /// Does not support byte-size operands.
   7829 fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
   7830     const mod = self.bin_file.options.module.?;
   7831     const abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   7832     switch (dst_mcv) {
   7833         .none,
   7834         .unreach,
   7835         .dead,
   7836         .undef,
   7837         .immediate,
   7838         .register_offset,
   7839         .eflags,
   7840         .register_overflow,
   7841         .lea_direct,
   7842         .lea_got,
   7843         .lea_tlv,
   7844         .lea_frame,
   7845         .reserved_frame,
   7846         => unreachable, // unmodifiable destination
   7847         .register => |dst_reg| {
   7848             const dst_alias = registerAlias(dst_reg, abi_size);
   7849             const dst_lock = self.register_manager.lockReg(dst_reg);
   7850             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   7851 
   7852             switch (src_mcv) {
   7853                 .none,
   7854                 .unreach,
   7855                 .dead,
   7856                 .undef,
   7857                 .register_overflow,
   7858                 .reserved_frame,
   7859                 => unreachable,
   7860                 .register => |src_reg| try self.asmRegisterRegister(
   7861                     .{ .i_, .mul },
   7862                     dst_alias,
   7863                     registerAlias(src_reg, abi_size),
   7864                 ),
   7865                 .immediate => |imm| {
   7866                     if (math.cast(i32, imm)) |small| {
   7867                         try self.asmRegisterRegisterImmediate(
   7868                             .{ .i_, .mul },
   7869                             dst_alias,
   7870                             dst_alias,
   7871                             Immediate.s(small),
   7872                         );
   7873                     } else {
   7874                         const src_reg = try self.copyToTmpRegister(dst_ty, src_mcv);
   7875                         return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg });
   7876                     }
   7877                 },
   7878                 .register_offset,
   7879                 .eflags,
   7880                 .load_direct,
   7881                 .lea_direct,
   7882                 .load_got,
   7883                 .lea_got,
   7884                 .load_tlv,
   7885                 .lea_tlv,
   7886                 .lea_frame,
   7887                 => try self.asmRegisterRegister(
   7888                     .{ .i_, .mul },
   7889                     dst_alias,
   7890                     registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size),
   7891                 ),
   7892                 .memory, .indirect, .load_frame => try self.asmRegisterMemory(
   7893                     .{ .i_, .mul },
   7894                     dst_alias,
   7895                     Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
   7896                         .memory => |addr| .{
   7897                             .base = .{ .reg = .ds },
   7898                             .disp = math.cast(i32, @as(i64, @bitCast(addr))) orelse
   7899                                 return self.asmRegisterRegister(
   7900                                 .{ .i_, .mul },
   7901                                 dst_alias,
   7902                                 registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size),
   7903                             ),
   7904                         },
   7905                         .indirect => |reg_off| .{
   7906                             .base = .{ .reg = reg_off.reg },
   7907                             .disp = reg_off.off,
   7908                         },
   7909                         .load_frame => |frame_addr| .{
   7910                             .base = .{ .frame = frame_addr.index },
   7911                             .disp = frame_addr.off,
   7912                         },
   7913                         else => unreachable,
   7914                     }),
   7915                 ),
   7916             }
   7917         },
   7918         .memory, .indirect, .load_direct, .load_got, .load_tlv, .load_frame => {
   7919             const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv);
   7920             const tmp_mcv = MCValue{ .register = tmp_reg };
   7921             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7922             defer self.register_manager.unlockReg(tmp_lock);
   7923 
   7924             try self.genIntMulComplexOpMir(dst_ty, tmp_mcv, src_mcv);
   7925             try self.genCopy(dst_ty, dst_mcv, tmp_mcv);
   7926         },
   7927     }
   7928 }
   7929 
   7930 fn airArg(self: *Self, inst: Air.Inst.Index) !void {
   7931     const mod = self.bin_file.options.module.?;
   7932     // skip zero-bit arguments as they don't have a corresponding arg instruction
   7933     var arg_index = self.arg_index;
   7934     while (self.args[arg_index] == .none) arg_index += 1;
   7935     self.arg_index = arg_index + 1;
   7936 
   7937     const result: MCValue = if (self.liveness.isUnused(inst)) .unreach else result: {
   7938         const dst_mcv = self.args[arg_index];
   7939         switch (dst_mcv) {
   7940             .register => |reg| self.register_manager.getRegAssumeFree(reg, inst),
   7941             .load_frame => {},
   7942             else => return self.fail("TODO implement arg for {}", .{dst_mcv}),
   7943         }
   7944 
   7945         const ty = self.typeOfIndex(inst);
   7946         const src_index = self.air.instructions.items(.data)[inst].arg.src_index;
   7947         const name = mod.getParamName(self.owner.func_index, src_index);
   7948         try self.genArgDbgInfo(ty, name, dst_mcv);
   7949 
   7950         break :result dst_mcv;
   7951     };
   7952     return self.finishAir(inst, result, .{ .none, .none, .none });
   7953 }
   7954 
   7955 fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void {
   7956     const mod = self.bin_file.options.module.?;
   7957     switch (self.debug_output) {
   7958         .dwarf => |dw| {
   7959             const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
   7960                 .register => |reg| .{ .register = reg.dwarfNum() },
   7961                 // TODO use a frame index
   7962                 .load_frame => return,
   7963                 //.stack_offset => |off| .{
   7964                 //    .stack = .{
   7965                 //        // TODO handle -fomit-frame-pointer
   7966                 //        .fp_register = Register.rbp.dwarfNum(),
   7967                 //        .offset = -off,
   7968                 //    },
   7969                 //},
   7970                 else => unreachable, // not a valid function parameter
   7971             };
   7972             // TODO: this might need adjusting like the linkers do.
   7973             // Instead of flattening the owner and passing Decl.Index here we may
   7974             // want to special case LazySymbol in DWARF linker too.
   7975             try dw.genArgDbgInfo(name, ty, self.owner.getDecl(mod), loc);
   7976         },
   7977         .plan9 => {},
   7978         .none => {},
   7979     }
   7980 }
   7981 
   7982 fn genVarDbgInfo(
   7983     self: Self,
   7984     tag: Air.Inst.Tag,
   7985     ty: Type,
   7986     mcv: MCValue,
   7987     name: [:0]const u8,
   7988 ) !void {
   7989     const mod = self.bin_file.options.module.?;
   7990     const is_ptr = switch (tag) {
   7991         .dbg_var_ptr => true,
   7992         .dbg_var_val => false,
   7993         else => unreachable,
   7994     };
   7995 
   7996     switch (self.debug_output) {
   7997         .dwarf => |dw| {
   7998             const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
   7999                 .register => |reg| .{ .register = reg.dwarfNum() },
   8000                 // TODO use a frame index
   8001                 .load_frame, .lea_frame => return,
   8002                 //=> |off| .{ .stack = .{
   8003                 //    .fp_register = Register.rbp.dwarfNum(),
   8004                 //    .offset = -off,
   8005                 //} },
   8006                 .memory => |address| .{ .memory = address },
   8007                 .load_got => |sym_index| .{ .linker_load = .{ .type = .got, .sym_index = sym_index } },
   8008                 .load_direct => |sym_index| .{ .linker_load = .{ .type = .direct, .sym_index = sym_index } },
   8009                 .immediate => |x| .{ .immediate = x },
   8010                 .undef => .undef,
   8011                 .none => .none,
   8012                 else => blk: {
   8013                     log.debug("TODO generate debug info for {}", .{mcv});
   8014                     break :blk .nop;
   8015                 },
   8016             };
   8017             // TODO: this might need adjusting like the linkers do.
   8018             // Instead of flattening the owner and passing Decl.Index here we may
   8019             // want to special case LazySymbol in DWARF linker too.
   8020             try dw.genVarDbgInfo(name, ty, self.owner.getDecl(mod), is_ptr, loc);
   8021         },
   8022         .plan9 => {},
   8023         .none => {},
   8024     }
   8025 }
   8026 
   8027 fn airTrap(self: *Self) !void {
   8028     try self.asmOpOnly(.{ ._, .ud2 });
   8029     return self.finishAirBookkeeping();
   8030 }
   8031 
   8032 fn airBreakpoint(self: *Self) !void {
   8033     try self.asmOpOnly(.{ ._, .int3 });
   8034     return self.finishAirBookkeeping();
   8035 }
   8036 
   8037 fn airRetAddr(self: *Self, inst: Air.Inst.Index) !void {
   8038     const dst_mcv = try self.allocRegOrMem(inst, true);
   8039     try self.genCopy(Type.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } });
   8040     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
   8041 }
   8042 
   8043 fn airFrameAddress(self: *Self, inst: Air.Inst.Index) !void {
   8044     const dst_mcv = try self.allocRegOrMem(inst, true);
   8045     try self.genCopy(Type.usize, dst_mcv, .{ .lea_frame = .{ .index = .base_ptr } });
   8046     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
   8047 }
   8048 
   8049 fn airFence(self: *Self, inst: Air.Inst.Index) !void {
   8050     const order = self.air.instructions.items(.data)[inst].fence;
   8051     switch (order) {
   8052         .Unordered, .Monotonic => unreachable,
   8053         .Acquire, .Release, .AcqRel => {},
   8054         .SeqCst => try self.asmOpOnly(.{ ._, .mfence }),
   8055     }
   8056     return self.finishAirBookkeeping();
   8057 }
   8058 
   8059 fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void {
   8060     const mod = self.bin_file.options.module.?;
   8061     if (modifier == .always_tail) return self.fail("TODO implement tail calls for x86_64", .{});
   8062     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   8063     const callee = pl_op.operand;
   8064     const extra = self.air.extraData(Air.Call, pl_op.payload);
   8065     const args: []const Air.Inst.Ref = @ptrCast(self.air.extra[extra.end..][0..extra.data.args_len]);
   8066     const ty = self.typeOf(callee);
   8067 
   8068     const fn_ty = switch (ty.zigTypeTag(mod)) {
   8069         .Fn => ty,
   8070         .Pointer => ty.childType(mod),
   8071         else => unreachable,
   8072     };
   8073 
   8074     const fn_info = mod.typeToFunc(fn_ty).?;
   8075 
   8076     var info = try self.resolveCallingConventionValues(fn_info, args[fn_info.param_types.len..], .call_frame);
   8077     defer info.deinit(self);
   8078 
   8079     // We need a properly aligned and sized call frame to be able to call this function.
   8080     {
   8081         const needed_call_frame =
   8082             FrameAlloc.init(.{ .size = info.stack_byte_count, .alignment = info.stack_align });
   8083         const frame_allocs_slice = self.frame_allocs.slice();
   8084         const stack_frame_size =
   8085             &frame_allocs_slice.items(.abi_size)[@intFromEnum(FrameIndex.call_frame)];
   8086         stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size);
   8087         const stack_frame_align =
   8088             &frame_allocs_slice.items(.abi_align)[@intFromEnum(FrameIndex.call_frame)];
   8089         stack_frame_align.* = @max(stack_frame_align.*, needed_call_frame.abi_align);
   8090     }
   8091 
   8092     try self.spillEflagsIfOccupied();
   8093     try self.spillRegisters(abi.getCallerPreservedRegs(self.target.*));
   8094 
   8095     // set stack arguments first because this can clobber registers
   8096     // also clobber spill arguments as we go
   8097     switch (info.return_value.long) {
   8098         .none, .unreach => {},
   8099         .indirect => |reg_off| try self.spillRegisters(&.{reg_off.reg}),
   8100         else => unreachable,
   8101     }
   8102     for (args, info.args) |arg, mc_arg| {
   8103         const arg_ty = self.typeOf(arg);
   8104         const arg_mcv = try self.resolveInst(arg);
   8105         switch (mc_arg) {
   8106             .none => {},
   8107             .register => |reg| try self.spillRegisters(&.{reg}),
   8108             .load_frame => try self.genCopy(arg_ty, mc_arg, arg_mcv),
   8109             else => unreachable,
   8110         }
   8111     }
   8112 
   8113     // now we are free to set register arguments
   8114     const ret_lock = switch (info.return_value.long) {
   8115         .none, .unreach => null,
   8116         .indirect => |reg_off| lock: {
   8117             const ret_ty = fn_info.return_type.toType();
   8118             const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ret_ty, mod));
   8119             try self.genSetReg(reg_off.reg, Type.usize, .{
   8120                 .lea_frame = .{ .index = frame_index, .off = -reg_off.off },
   8121             });
   8122             info.return_value.short = .{ .load_frame = .{ .index = frame_index } };
   8123             break :lock self.register_manager.lockRegAssumeUnused(reg_off.reg);
   8124         },
   8125         else => unreachable,
   8126     };
   8127     defer if (ret_lock) |lock| self.register_manager.unlockReg(lock);
   8128 
   8129     for (args, info.args) |arg, mc_arg| {
   8130         const arg_ty = self.typeOf(arg);
   8131         const arg_mcv = try self.resolveInst(arg);
   8132         switch (mc_arg) {
   8133             .none, .load_frame => {},
   8134             .register => try self.genCopy(arg_ty, mc_arg, arg_mcv),
   8135             else => unreachable,
   8136         }
   8137     }
   8138 
   8139     // Due to incremental compilation, how function calls are generated depends
   8140     // on linking.
   8141     if (try self.air.value(callee, mod)) |func_value| {
   8142         const func_key = mod.intern_pool.indexToKey(func_value.ip_index);
   8143         if (switch (func_key) {
   8144             .func => |func| func.owner_decl,
   8145             .ptr => |ptr| switch (ptr.addr) {
   8146                 .decl => |decl| decl,
   8147                 else => null,
   8148             },
   8149             else => null,
   8150         }) |owner_decl| {
   8151             if (self.bin_file.cast(link.File.Elf)) |elf_file| {
   8152                 const atom_index = try elf_file.getOrCreateAtomForDecl(owner_decl);
   8153                 const atom = elf_file.getAtom(atom_index);
   8154                 _ = try atom.getOrCreateOffsetTableEntry(elf_file);
   8155                 const got_addr = atom.getOffsetTableAddress(elf_file);
   8156                 try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{
   8157                     .base = .{ .reg = .ds },
   8158                     .disp = @intCast(got_addr),
   8159                 }));
   8160             } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
   8161                 const atom = try coff_file.getOrCreateAtomForDecl(owner_decl);
   8162                 const sym_index = coff_file.getAtom(atom).getSymbolIndex().?;
   8163                 try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
   8164                 try self.asmRegister(.{ ._, .call }, .rax);
   8165             } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
   8166                 const atom = try macho_file.getOrCreateAtomForDecl(owner_decl);
   8167                 const sym_index = macho_file.getAtom(atom).getSymbolIndex().?;
   8168                 try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
   8169                 try self.asmRegister(.{ ._, .call }, .rax);
   8170             } else if (self.bin_file.cast(link.File.Plan9)) |p9| {
   8171                 const atom_index = try p9.seeDecl(owner_decl);
   8172                 const atom = p9.getAtom(atom_index);
   8173                 try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{
   8174                     .base = .{ .reg = .ds },
   8175                     .disp = @intCast(atom.getOffsetTableAddress(p9)),
   8176                 }));
   8177             } else unreachable;
   8178         } else if (func_value.getExternFunc(mod)) |extern_func| {
   8179             const decl_name = mod.intern_pool.stringToSlice(mod.declPtr(extern_func.decl).name);
   8180             const lib_name = mod.intern_pool.stringToSliceUnwrap(extern_func.lib_name);
   8181             if (self.bin_file.cast(link.File.Coff)) |coff_file| {
   8182                 const atom_index = try self.owner.getSymbolIndex(self);
   8183                 const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name);
   8184                 _ = try self.addInst(.{
   8185                     .tag = .mov,
   8186                     .ops = .import_reloc,
   8187                     .data = .{ .rx = .{
   8188                         .r1 = .rax,
   8189                         .payload = try self.addExtra(Mir.Reloc{
   8190                             .atom_index = atom_index,
   8191                             .sym_index = sym_index,
   8192                         }),
   8193                     } },
   8194                 });
   8195                 try self.asmRegister(.{ ._, .call }, .rax);
   8196             } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
   8197                 const atom_index = try self.owner.getSymbolIndex(self);
   8198                 const sym_index = try macho_file.getGlobalSymbol(decl_name, lib_name);
   8199                 _ = try self.addInst(.{
   8200                     .tag = .call,
   8201                     .ops = .extern_fn_reloc,
   8202                     .data = .{ .reloc = .{
   8203                         .atom_index = atom_index,
   8204                         .sym_index = sym_index,
   8205                     } },
   8206                 });
   8207             } else {
   8208                 return self.fail("TODO implement calling extern functions", .{});
   8209             }
   8210         } else {
   8211             return self.fail("TODO implement calling bitcasted functions", .{});
   8212         }
   8213     } else {
   8214         assert(ty.zigTypeTag(mod) == .Pointer);
   8215         const mcv = try self.resolveInst(callee);
   8216         try self.genSetReg(.rax, Type.usize, mcv);
   8217         try self.asmRegister(.{ ._, .call }, .rax);
   8218     }
   8219 
   8220     var bt = self.liveness.iterateBigTomb(inst);
   8221     self.feed(&bt, callee);
   8222     for (args) |arg| self.feed(&bt, arg);
   8223 
   8224     const result = if (self.liveness.isUnused(inst)) .unreach else info.return_value.short;
   8225     return self.finishAirResult(inst, result);
   8226 }
   8227 
   8228 fn airRet(self: *Self, inst: Air.Inst.Index) !void {
   8229     const mod = self.bin_file.options.module.?;
   8230     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8231     const operand = try self.resolveInst(un_op);
   8232     const ret_ty = self.fn_type.fnReturnType(mod);
   8233     switch (self.ret_mcv.short) {
   8234         .none => {},
   8235         .register => try self.genCopy(ret_ty, self.ret_mcv.short, operand),
   8236         .indirect => |reg_off| {
   8237             try self.register_manager.getReg(reg_off.reg, null);
   8238             const lock = self.register_manager.lockRegAssumeUnused(reg_off.reg);
   8239             defer self.register_manager.unlockReg(lock);
   8240 
   8241             try self.genSetReg(reg_off.reg, Type.usize, self.ret_mcv.long);
   8242             try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ret_ty, operand);
   8243         },
   8244         else => unreachable,
   8245     }
   8246     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
   8247     // which is available if the jump is 127 bytes or less forward.
   8248     const jmp_reloc = try self.asmJmpReloc(undefined);
   8249     try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc);
   8250     return self.finishAir(inst, .unreach, .{ un_op, .none, .none });
   8251 }
   8252 
   8253 fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void {
   8254     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8255     const ptr = try self.resolveInst(un_op);
   8256     const ptr_ty = self.typeOf(un_op);
   8257     switch (self.ret_mcv.short) {
   8258         .none => {},
   8259         .register => try self.load(self.ret_mcv.short, ptr_ty, ptr),
   8260         .indirect => |reg_off| try self.genSetReg(reg_off.reg, ptr_ty, ptr),
   8261         else => unreachable,
   8262     }
   8263     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
   8264     // which is available if the jump is 127 bytes or less forward.
   8265     const jmp_reloc = try self.asmJmpReloc(undefined);
   8266     try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc);
   8267     return self.finishAir(inst, .unreach, .{ un_op, .none, .none });
   8268 }
   8269 
   8270 fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
   8271     const mod = self.bin_file.options.module.?;
   8272     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
   8273     const ty = self.typeOf(bin_op.lhs);
   8274 
   8275     try self.spillEflagsIfOccupied();
   8276     self.eflags_inst = inst;
   8277 
   8278     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   8279     const lhs_lock = switch (lhs_mcv) {
   8280         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   8281         else => null,
   8282     };
   8283     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   8284 
   8285     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   8286     const rhs_lock = switch (rhs_mcv) {
   8287         .register => |reg| self.register_manager.lockReg(reg),
   8288         else => null,
   8289     };
   8290     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   8291 
   8292     const result = MCValue{
   8293         .eflags = switch (ty.zigTypeTag(mod)) {
   8294             else => result: {
   8295                 const abi_size: u16 = @intCast(ty.abiSize(mod));
   8296                 const may_flip: enum {
   8297                     may_flip,
   8298                     must_flip,
   8299                     must_not_flip,
   8300                 } = if (abi_size > 8) switch (op) {
   8301                     .lt, .gte => .must_not_flip,
   8302                     .lte, .gt => .must_flip,
   8303                     .eq, .neq => .may_flip,
   8304                 } else .may_flip;
   8305 
   8306                 const flipped = switch (may_flip) {
   8307                     .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isMemory(),
   8308                     .must_flip => true,
   8309                     .must_not_flip => false,
   8310                 };
   8311                 const unmat_dst_mcv = if (flipped) rhs_mcv else lhs_mcv;
   8312                 const dst_mcv = if (unmat_dst_mcv.isRegister() or
   8313                     (abi_size <= 8 and unmat_dst_mcv.isMemory())) unmat_dst_mcv else dst: {
   8314                     const dst_mcv = try self.allocTempRegOrMem(ty, true);
   8315                     try self.genCopy(ty, dst_mcv, unmat_dst_mcv);
   8316                     break :dst dst_mcv;
   8317                 };
   8318                 const dst_lock =
   8319                     if (dst_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
   8320                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   8321 
   8322                 const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
   8323                 const src_lock =
   8324                     if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
   8325                 defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   8326 
   8327                 break :result Condition.fromCompareOperator(
   8328                     if (ty.isAbiInt(mod)) ty.intInfo(mod).signedness else .unsigned,
   8329                     result_op: {
   8330                         const flipped_op = if (flipped) op.reverse() else op;
   8331                         if (abi_size > 8) switch (flipped_op) {
   8332                             .lt, .gte => {},
   8333                             .lte, .gt => unreachable,
   8334                             .eq, .neq => {
   8335                                 const dst_addr_mcv: MCValue = switch (dst_mcv) {
   8336                                     .memory, .indirect, .load_frame => dst_mcv.address(),
   8337                                     else => .{ .register = try self.copyToTmpRegister(
   8338                                         Type.usize,
   8339                                         dst_mcv.address(),
   8340                                     ) },
   8341                                 };
   8342                                 const dst_addr_lock = if (dst_addr_mcv.getReg()) |reg|
   8343                                     self.register_manager.lockReg(reg)
   8344                                 else
   8345                                     null;
   8346                                 defer if (dst_addr_lock) |lock| self.register_manager.unlockReg(lock);
   8347 
   8348                                 const src_addr_mcv: MCValue = switch (src_mcv) {
   8349                                     .memory, .indirect, .load_frame => src_mcv.address(),
   8350                                     else => .{ .register = try self.copyToTmpRegister(
   8351                                         Type.usize,
   8352                                         src_mcv.address(),
   8353                                     ) },
   8354                                 };
   8355                                 const src_addr_lock = if (src_addr_mcv.getReg()) |reg|
   8356                                     self.register_manager.lockReg(reg)
   8357                                 else
   8358                                     null;
   8359                                 defer if (src_addr_lock) |lock| self.register_manager.unlockReg(lock);
   8360 
   8361                                 const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp);
   8362                                 const acc_reg = regs[0].to64();
   8363                                 const locks = self.register_manager.lockRegsAssumeUnused(2, regs);
   8364                                 defer for (locks) |lock| self.register_manager.unlockReg(lock);
   8365 
   8366                                 const limbs_len = std.math.divCeil(u16, abi_size, 8) catch unreachable;
   8367                                 var limb_i: u16 = 0;
   8368                                 while (limb_i < limbs_len) : (limb_i += 1) {
   8369                                     const tmp_reg = regs[@min(limb_i, 1)].to64();
   8370                                     try self.genSetReg(
   8371                                         tmp_reg,
   8372                                         Type.usize,
   8373                                         dst_addr_mcv.offset(limb_i * 8).deref(),
   8374                                     );
   8375                                     try self.genBinOpMir(
   8376                                         .{ ._, .xor },
   8377                                         Type.usize,
   8378                                         .{ .register = tmp_reg },
   8379                                         src_addr_mcv.offset(limb_i * 8).deref(),
   8380                                     );
   8381                                     if (limb_i > 0) try self.asmRegisterRegister(
   8382                                         .{ ._, .@"or" },
   8383                                         acc_reg,
   8384                                         tmp_reg,
   8385                                     );
   8386                                 }
   8387                                 try self.asmRegisterRegister(.{ ._, .@"test" }, acc_reg, acc_reg);
   8388                                 break :result_op flipped_op;
   8389                             },
   8390                         };
   8391                         try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, src_mcv);
   8392                         break :result_op flipped_op;
   8393                     },
   8394                 );
   8395             },
   8396             .Float => result: {
   8397                 const flipped = switch (op) {
   8398                     .lt, .lte => true,
   8399                     .eq, .gte, .gt, .neq => false,
   8400                 };
   8401 
   8402                 const dst_mcv = if (flipped) rhs_mcv else lhs_mcv;
   8403                 const dst_reg = if (dst_mcv.isRegister())
   8404                     dst_mcv.getReg().?
   8405                 else
   8406                     try self.copyToTmpRegister(ty, dst_mcv);
   8407                 const dst_lock = self.register_manager.lockReg(dst_reg);
   8408                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   8409                 const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
   8410 
   8411                 switch (ty.floatBits(self.target.*)) {
   8412                     16 => if (self.hasFeature(.f16c)) {
   8413                         const tmp1_reg = (try self.register_manager.allocReg(null, sse)).to128();
   8414                         const tmp1_mcv = MCValue{ .register = tmp1_reg };
   8415                         const tmp1_lock = self.register_manager.lockRegAssumeUnused(tmp1_reg);
   8416                         defer self.register_manager.unlockReg(tmp1_lock);
   8417 
   8418                         const tmp2_reg = (try self.register_manager.allocReg(null, sse)).to128();
   8419                         const tmp2_mcv = MCValue{ .register = tmp2_reg };
   8420                         const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg);
   8421                         defer self.register_manager.unlockReg(tmp2_lock);
   8422 
   8423                         if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   8424                             .{ .vp_w, .insr },
   8425                             tmp1_reg,
   8426                             dst_reg.to128(),
   8427                             src_mcv.mem(.word),
   8428                             Immediate.u(1),
   8429                         ) else try self.asmRegisterRegisterRegister(
   8430                             .{ .vp_, .unpcklwd },
   8431                             tmp1_reg,
   8432                             dst_reg.to128(),
   8433                             (if (src_mcv.isRegister())
   8434                                 src_mcv.getReg().?
   8435                             else
   8436                                 try self.copyToTmpRegister(ty, src_mcv)).to128(),
   8437                         );
   8438                         try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg);
   8439                         try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg);
   8440                         try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv);
   8441                     } else return self.fail("TODO implement airCmp for {}", .{
   8442                         ty.fmt(mod),
   8443                     }),
   8444                     32 => try self.genBinOpMir(
   8445                         .{ ._ss, .ucomi },
   8446                         ty,
   8447                         .{ .register = dst_reg },
   8448                         src_mcv,
   8449                     ),
   8450                     64 => try self.genBinOpMir(
   8451                         .{ ._sd, .ucomi },
   8452                         ty,
   8453                         .{ .register = dst_reg },
   8454                         src_mcv,
   8455                     ),
   8456                     else => return self.fail("TODO implement airCmp for {}", .{
   8457                         ty.fmt(mod),
   8458                     }),
   8459                 }
   8460 
   8461                 break :result switch (if (flipped) op.reverse() else op) {
   8462                     .lt, .lte => unreachable, // required to have been canonicalized to gt(e)
   8463                     .gt => .a,
   8464                     .gte => .ae,
   8465                     .eq => .z_and_np,
   8466                     .neq => .nz_or_p,
   8467                 };
   8468             },
   8469         },
   8470     };
   8471     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   8472 }
   8473 
   8474 fn airCmpVector(self: *Self, inst: Air.Inst.Index) !void {
   8475     _ = inst;
   8476     return self.fail("TODO implement airCmpVector for {}", .{self.target.cpu.arch});
   8477 }
   8478 
   8479 fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void {
   8480     const mod = self.bin_file.options.module.?;
   8481     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8482 
   8483     const addr_reg = try self.register_manager.allocReg(null, gp);
   8484     const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   8485     defer self.register_manager.unlockReg(addr_lock);
   8486     try self.genLazySymbolRef(.lea, addr_reg, link.File.LazySymbol.initDecl(.const_data, null, mod));
   8487 
   8488     try self.spillEflagsIfOccupied();
   8489     self.eflags_inst = inst;
   8490 
   8491     const op_ty = self.typeOf(un_op);
   8492     const op_abi_size: u32 = @intCast(op_ty.abiSize(mod));
   8493     const op_mcv = try self.resolveInst(un_op);
   8494     const dst_reg = switch (op_mcv) {
   8495         .register => |reg| reg,
   8496         else => try self.copyToTmpRegister(op_ty, op_mcv),
   8497     };
   8498     try self.asmRegisterMemory(
   8499         .{ ._, .cmp },
   8500         registerAlias(dst_reg, op_abi_size),
   8501         Memory.sib(Memory.PtrSize.fromSize(op_abi_size), .{ .base = .{ .reg = addr_reg } }),
   8502     );
   8503     const result = MCValue{ .eflags = .b };
   8504     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8505 }
   8506 
   8507 fn airTry(self: *Self, inst: Air.Inst.Index) !void {
   8508     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   8509     const extra = self.air.extraData(Air.Try, pl_op.payload);
   8510     const body = self.air.extra[extra.end..][0..extra.data.body_len];
   8511     const err_union_ty = self.typeOf(pl_op.operand);
   8512     const result = try self.genTry(inst, pl_op.operand, body, err_union_ty, false);
   8513     return self.finishAir(inst, result, .{ .none, .none, .none });
   8514 }
   8515 
   8516 fn airTryPtr(self: *Self, inst: Air.Inst.Index) !void {
   8517     const mod = self.bin_file.options.module.?;
   8518     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   8519     const extra = self.air.extraData(Air.TryPtr, ty_pl.payload);
   8520     const body = self.air.extra[extra.end..][0..extra.data.body_len];
   8521     const err_union_ty = self.typeOf(extra.data.ptr).childType(mod);
   8522     const result = try self.genTry(inst, extra.data.ptr, body, err_union_ty, true);
   8523     return self.finishAir(inst, result, .{ .none, .none, .none });
   8524 }
   8525 
   8526 fn genTry(
   8527     self: *Self,
   8528     inst: Air.Inst.Index,
   8529     err_union: Air.Inst.Ref,
   8530     body: []const Air.Inst.Index,
   8531     err_union_ty: Type,
   8532     operand_is_ptr: bool,
   8533 ) !MCValue {
   8534     if (operand_is_ptr) {
   8535         return self.fail("TODO genTry for pointers", .{});
   8536     }
   8537     const liveness_cond_br = self.liveness.getCondBr(inst);
   8538 
   8539     const err_union_mcv = try self.resolveInst(err_union);
   8540     const is_err_mcv = try self.isErr(null, err_union_ty, err_union_mcv);
   8541 
   8542     const reloc = try self.genCondBrMir(Type.anyerror, is_err_mcv);
   8543 
   8544     if (self.liveness.operandDies(inst, 0)) {
   8545         if (Air.refToIndex(err_union)) |err_union_inst| self.processDeath(err_union_inst);
   8546     }
   8547 
   8548     self.scope_generation += 1;
   8549     const state = try self.saveState();
   8550 
   8551     for (liveness_cond_br.else_deaths) |operand| self.processDeath(operand);
   8552     try self.genBody(body);
   8553     try self.restoreState(state, &.{}, .{
   8554         .emit_instructions = false,
   8555         .update_tracking = true,
   8556         .resurrect = true,
   8557         .close_scope = true,
   8558     });
   8559 
   8560     try self.performReloc(reloc);
   8561 
   8562     for (liveness_cond_br.then_deaths) |operand| self.processDeath(operand);
   8563 
   8564     const result = if (self.liveness.isUnused(inst))
   8565         .unreach
   8566     else
   8567         try self.genUnwrapErrorUnionPayloadMir(inst, err_union_ty, err_union_mcv);
   8568     return result;
   8569 }
   8570 
   8571 fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void {
   8572     const dbg_stmt = self.air.instructions.items(.data)[inst].dbg_stmt;
   8573     _ = try self.addInst(.{
   8574         .tag = .pseudo,
   8575         .ops = .pseudo_dbg_line_line_column,
   8576         .data = .{ .line_column = .{
   8577             .line = dbg_stmt.line,
   8578             .column = dbg_stmt.column,
   8579         } },
   8580     });
   8581     return self.finishAirBookkeeping();
   8582 }
   8583 
   8584 fn airDbgInline(self: *Self, inst: Air.Inst.Index) !void {
   8585     const ty_fn = self.air.instructions.items(.data)[inst].ty_fn;
   8586     const mod = self.bin_file.options.module.?;
   8587     const func = mod.funcInfo(ty_fn.func);
   8588     // TODO emit debug info for function change
   8589     _ = func;
   8590     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
   8591 }
   8592 
   8593 fn airDbgBlock(self: *Self, inst: Air.Inst.Index) !void {
   8594     // TODO emit debug info lexical block
   8595     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
   8596 }
   8597 
   8598 fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void {
   8599     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   8600     const operand = pl_op.operand;
   8601     const ty = self.typeOf(operand);
   8602     const mcv = try self.resolveInst(operand);
   8603 
   8604     const name = self.air.nullTerminatedString(pl_op.payload);
   8605 
   8606     const tag = self.air.instructions.items(.tag)[inst];
   8607     try self.genVarDbgInfo(tag, ty, mcv, name);
   8608 
   8609     return self.finishAir(inst, .unreach, .{ operand, .none, .none });
   8610 }
   8611 
   8612 fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 {
   8613     const mod = self.bin_file.options.module.?;
   8614     const abi_size = ty.abiSize(mod);
   8615     switch (mcv) {
   8616         .eflags => |cc| {
   8617             // Here we map the opposites since the jump is to the false branch.
   8618             return self.asmJccReloc(undefined, cc.negate());
   8619         },
   8620         .register => |reg| {
   8621             try self.spillEflagsIfOccupied();
   8622             try self.asmRegisterImmediate(.{ ._, .@"test" }, reg, Immediate.u(1));
   8623             return self.asmJccReloc(undefined, .e);
   8624         },
   8625         .immediate,
   8626         .load_frame,
   8627         => {
   8628             try self.spillEflagsIfOccupied();
   8629             if (abi_size <= 8) {
   8630                 const reg = try self.copyToTmpRegister(ty, mcv);
   8631                 return self.genCondBrMir(ty, .{ .register = reg });
   8632             }
   8633             return self.fail("TODO implement condbr when condition is {} with abi larger than 8 bytes", .{mcv});
   8634         },
   8635         else => return self.fail("TODO implement condbr when condition is {s}", .{@tagName(mcv)}),
   8636     }
   8637     return 0; // TODO
   8638 }
   8639 
   8640 fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
   8641     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   8642     const cond = try self.resolveInst(pl_op.operand);
   8643     const cond_ty = self.typeOf(pl_op.operand);
   8644     const extra = self.air.extraData(Air.CondBr, pl_op.payload);
   8645     const then_body = self.air.extra[extra.end..][0..extra.data.then_body_len];
   8646     const else_body = self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len];
   8647     const liveness_cond_br = self.liveness.getCondBr(inst);
   8648 
   8649     const reloc = try self.genCondBrMir(cond_ty, cond);
   8650 
   8651     // If the condition dies here in this condbr instruction, process
   8652     // that death now instead of later as this has an effect on
   8653     // whether it needs to be spilled in the branches
   8654     if (self.liveness.operandDies(inst, 0)) {
   8655         if (Air.refToIndex(pl_op.operand)) |op_inst| self.processDeath(op_inst);
   8656     }
   8657 
   8658     self.scope_generation += 1;
   8659     const state = try self.saveState();
   8660 
   8661     for (liveness_cond_br.then_deaths) |operand| self.processDeath(operand);
   8662     try self.genBody(then_body);
   8663     try self.restoreState(state, &.{}, .{
   8664         .emit_instructions = false,
   8665         .update_tracking = true,
   8666         .resurrect = true,
   8667         .close_scope = true,
   8668     });
   8669 
   8670     try self.performReloc(reloc);
   8671 
   8672     for (liveness_cond_br.else_deaths) |operand| self.processDeath(operand);
   8673     try self.genBody(else_body);
   8674     try self.restoreState(state, &.{}, .{
   8675         .emit_instructions = false,
   8676         .update_tracking = true,
   8677         .resurrect = true,
   8678         .close_scope = true,
   8679     });
   8680 
   8681     // We already took care of pl_op.operand earlier, so we're going
   8682     // to pass .none here
   8683     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
   8684 }
   8685 
   8686 fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MCValue {
   8687     const mod = self.bin_file.options.module.?;
   8688     switch (opt_mcv) {
   8689         .register_overflow => |ro| return .{ .eflags = ro.eflags.negate() },
   8690         else => {},
   8691     }
   8692 
   8693     try self.spillEflagsIfOccupied();
   8694     self.eflags_inst = inst;
   8695 
   8696     const pl_ty = opt_ty.optionalChild(mod);
   8697 
   8698     const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(mod))
   8699         .{ .off = 0, .ty = if (pl_ty.isSlice(mod)) pl_ty.slicePtrFieldType(mod) else pl_ty }
   8700     else
   8701         .{ .off = @intCast(pl_ty.abiSize(mod)), .ty = Type.bool };
   8702 
   8703     switch (opt_mcv) {
   8704         .none,
   8705         .unreach,
   8706         .dead,
   8707         .undef,
   8708         .immediate,
   8709         .eflags,
   8710         .register_offset,
   8711         .register_overflow,
   8712         .lea_direct,
   8713         .lea_got,
   8714         .lea_tlv,
   8715         .lea_frame,
   8716         .reserved_frame,
   8717         => unreachable,
   8718 
   8719         .register => |opt_reg| {
   8720             if (some_info.off == 0) {
   8721                 const some_abi_size: u32 = @intCast(some_info.ty.abiSize(mod));
   8722                 const alias_reg = registerAlias(opt_reg, some_abi_size);
   8723                 assert(some_abi_size * 8 == alias_reg.bitSize());
   8724                 try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg);
   8725                 return .{ .eflags = .z };
   8726             }
   8727             assert(some_info.ty.ip_index == .bool_type);
   8728             const opt_abi_size: u32 = @intCast(opt_ty.abiSize(mod));
   8729             try self.asmRegisterImmediate(
   8730                 .{ ._, .bt },
   8731                 registerAlias(opt_reg, opt_abi_size),
   8732                 Immediate.u(@as(u6, @intCast(some_info.off * 8))),
   8733             );
   8734             return .{ .eflags = .nc };
   8735         },
   8736 
   8737         .memory,
   8738         .load_got,
   8739         .load_direct,
   8740         .load_tlv,
   8741         => {
   8742             const addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
   8743             const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   8744             defer self.register_manager.unlockReg(addr_reg_lock);
   8745 
   8746             try self.genSetReg(addr_reg, Type.usize, opt_mcv.address());
   8747             const some_abi_size: u32 = @intCast(some_info.ty.abiSize(mod));
   8748             try self.asmMemoryImmediate(
   8749                 .{ ._, .cmp },
   8750                 Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{
   8751                     .base = .{ .reg = addr_reg },
   8752                     .disp = some_info.off,
   8753                 }),
   8754                 Immediate.u(0),
   8755             );
   8756             return .{ .eflags = .e };
   8757         },
   8758 
   8759         .indirect, .load_frame => {
   8760             const some_abi_size: u32 = @intCast(some_info.ty.abiSize(mod));
   8761             try self.asmMemoryImmediate(
   8762                 .{ ._, .cmp },
   8763                 Memory.sib(Memory.PtrSize.fromSize(some_abi_size), switch (opt_mcv) {
   8764                     .indirect => |reg_off| .{
   8765                         .base = .{ .reg = reg_off.reg },
   8766                         .disp = reg_off.off + some_info.off,
   8767                     },
   8768                     .load_frame => |frame_addr| .{
   8769                         .base = .{ .frame = frame_addr.index },
   8770                         .disp = frame_addr.off + some_info.off,
   8771                     },
   8772                     else => unreachable,
   8773                 }),
   8774                 Immediate.u(0),
   8775             );
   8776             return .{ .eflags = .e };
   8777         },
   8778     }
   8779 }
   8780 
   8781 fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
   8782     const mod = self.bin_file.options.module.?;
   8783     try self.spillEflagsIfOccupied();
   8784     self.eflags_inst = inst;
   8785 
   8786     const opt_ty = ptr_ty.childType(mod);
   8787     const pl_ty = opt_ty.optionalChild(mod);
   8788 
   8789     const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(mod))
   8790         .{ .off = 0, .ty = if (pl_ty.isSlice(mod)) pl_ty.slicePtrFieldType(mod) else pl_ty }
   8791     else
   8792         .{ .off = @intCast(pl_ty.abiSize(mod)), .ty = Type.bool };
   8793 
   8794     const ptr_reg = switch (ptr_mcv) {
   8795         .register => |reg| reg,
   8796         else => try self.copyToTmpRegister(ptr_ty, ptr_mcv),
   8797     };
   8798     const ptr_lock = self.register_manager.lockReg(ptr_reg);
   8799     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
   8800 
   8801     const some_abi_size: u32 = @intCast(some_info.ty.abiSize(mod));
   8802     try self.asmMemoryImmediate(
   8803         .{ ._, .cmp },
   8804         Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{
   8805             .base = .{ .reg = ptr_reg },
   8806             .disp = some_info.off,
   8807         }),
   8808         Immediate.u(0),
   8809     );
   8810     return .{ .eflags = .e };
   8811 }
   8812 
   8813 fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) !MCValue {
   8814     const mod = self.bin_file.options.module.?;
   8815     const err_type = ty.errorUnionSet(mod);
   8816 
   8817     if (err_type.errorSetIsEmpty(mod)) {
   8818         return MCValue{ .immediate = 0 }; // always false
   8819     }
   8820 
   8821     try self.spillEflagsIfOccupied();
   8822     if (maybe_inst) |inst| {
   8823         self.eflags_inst = inst;
   8824     }
   8825 
   8826     const err_off = errUnionErrorOffset(ty.errorUnionPayload(mod), mod);
   8827     switch (operand) {
   8828         .register => |reg| {
   8829             const eu_lock = self.register_manager.lockReg(reg);
   8830             defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
   8831 
   8832             const tmp_reg = try self.copyToTmpRegister(ty, operand);
   8833             if (err_off > 0) {
   8834                 try self.genShiftBinOpMir(
   8835                     .{ ._r, .sh },
   8836                     ty,
   8837                     .{ .register = tmp_reg },
   8838                     .{ .immediate = @as(u6, @intCast(err_off * 8)) },
   8839                 );
   8840             } else {
   8841                 try self.truncateRegister(Type.anyerror, tmp_reg);
   8842             }
   8843             try self.genBinOpMir(
   8844                 .{ ._, .cmp },
   8845                 Type.anyerror,
   8846                 .{ .register = tmp_reg },
   8847                 .{ .immediate = 0 },
   8848             );
   8849         },
   8850         .load_frame => |frame_addr| try self.genBinOpMir(
   8851             .{ ._, .cmp },
   8852             Type.anyerror,
   8853             .{ .load_frame = .{
   8854                 .index = frame_addr.index,
   8855                 .off = frame_addr.off + @as(i32, @intCast(err_off)),
   8856             } },
   8857             .{ .immediate = 0 },
   8858         ),
   8859         else => return self.fail("TODO implement isErr for {}", .{operand}),
   8860     }
   8861 
   8862     return MCValue{ .eflags = .a };
   8863 }
   8864 
   8865 fn isNonErr(self: *Self, inst: Air.Inst.Index, ty: Type, operand: MCValue) !MCValue {
   8866     const is_err_res = try self.isErr(inst, ty, operand);
   8867     switch (is_err_res) {
   8868         .eflags => |cc| {
   8869             assert(cc == .a);
   8870             return MCValue{ .eflags = cc.negate() };
   8871         },
   8872         .immediate => |imm| {
   8873             assert(imm == 0);
   8874             return MCValue{ .immediate = 1 };
   8875         },
   8876         else => unreachable,
   8877     }
   8878 }
   8879 
   8880 fn airIsNull(self: *Self, inst: Air.Inst.Index) !void {
   8881     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8882     const operand = try self.resolveInst(un_op);
   8883     const ty = self.typeOf(un_op);
   8884     const result = try self.isNull(inst, ty, operand);
   8885     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8886 }
   8887 
   8888 fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void {
   8889     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8890     const operand = try self.resolveInst(un_op);
   8891     const ty = self.typeOf(un_op);
   8892     const result = try self.isNullPtr(inst, ty, operand);
   8893     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8894 }
   8895 
   8896 fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void {
   8897     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8898     const operand = try self.resolveInst(un_op);
   8899     const ty = self.typeOf(un_op);
   8900     const result = switch (try self.isNull(inst, ty, operand)) {
   8901         .eflags => |cc| .{ .eflags = cc.negate() },
   8902         else => unreachable,
   8903     };
   8904     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8905 }
   8906 
   8907 fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void {
   8908     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8909     const operand = try self.resolveInst(un_op);
   8910     const ty = self.typeOf(un_op);
   8911     const result = switch (try self.isNullPtr(inst, ty, operand)) {
   8912         .eflags => |cc| .{ .eflags = cc.negate() },
   8913         else => unreachable,
   8914     };
   8915     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8916 }
   8917 
   8918 fn airIsErr(self: *Self, inst: Air.Inst.Index) !void {
   8919     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8920     const operand = try self.resolveInst(un_op);
   8921     const ty = self.typeOf(un_op);
   8922     const result = try self.isErr(inst, ty, operand);
   8923     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8924 }
   8925 
   8926 fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void {
   8927     const mod = self.bin_file.options.module.?;
   8928     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8929 
   8930     const operand_ptr = try self.resolveInst(un_op);
   8931     const operand_ptr_lock: ?RegisterLock = switch (operand_ptr) {
   8932         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   8933         else => null,
   8934     };
   8935     defer if (operand_ptr_lock) |lock| self.register_manager.unlockReg(lock);
   8936 
   8937     const operand: MCValue = blk: {
   8938         if (self.reuseOperand(inst, un_op, 0, operand_ptr)) {
   8939             // The MCValue that holds the pointer can be re-used as the value.
   8940             break :blk operand_ptr;
   8941         } else {
   8942             break :blk try self.allocRegOrMem(inst, true);
   8943         }
   8944     };
   8945     const ptr_ty = self.typeOf(un_op);
   8946     try self.load(operand, ptr_ty, operand_ptr);
   8947 
   8948     const result = try self.isErr(inst, ptr_ty.childType(mod), operand);
   8949 
   8950     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8951 }
   8952 
   8953 fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void {
   8954     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8955     const operand = try self.resolveInst(un_op);
   8956     const ty = self.typeOf(un_op);
   8957     const result = try self.isNonErr(inst, ty, operand);
   8958     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8959 }
   8960 
   8961 fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void {
   8962     const mod = self.bin_file.options.module.?;
   8963     const un_op = self.air.instructions.items(.data)[inst].un_op;
   8964 
   8965     const operand_ptr = try self.resolveInst(un_op);
   8966     const operand_ptr_lock: ?RegisterLock = switch (operand_ptr) {
   8967         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   8968         else => null,
   8969     };
   8970     defer if (operand_ptr_lock) |lock| self.register_manager.unlockReg(lock);
   8971 
   8972     const operand: MCValue = blk: {
   8973         if (self.reuseOperand(inst, un_op, 0, operand_ptr)) {
   8974             // The MCValue that holds the pointer can be re-used as the value.
   8975             break :blk operand_ptr;
   8976         } else {
   8977             break :blk try self.allocRegOrMem(inst, true);
   8978         }
   8979     };
   8980     const ptr_ty = self.typeOf(un_op);
   8981     try self.load(operand, ptr_ty, operand_ptr);
   8982 
   8983     const result = try self.isNonErr(inst, ptr_ty.childType(mod), operand);
   8984 
   8985     return self.finishAir(inst, result, .{ un_op, .none, .none });
   8986 }
   8987 
   8988 fn airLoop(self: *Self, inst: Air.Inst.Index) !void {
   8989     // A loop is a setup to be able to jump back to the beginning.
   8990     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   8991     const loop = self.air.extraData(Air.Block, ty_pl.payload);
   8992     const body = self.air.extra[loop.end..][0..loop.data.body_len];
   8993     const jmp_target: u32 = @intCast(self.mir_instructions.len);
   8994 
   8995     self.scope_generation += 1;
   8996     const state = try self.saveState();
   8997 
   8998     try self.genBody(body);
   8999     try self.restoreState(state, &.{}, .{
   9000         .emit_instructions = true,
   9001         .update_tracking = false,
   9002         .resurrect = false,
   9003         .close_scope = true,
   9004     });
   9005     _ = try self.asmJmpReloc(jmp_target);
   9006 
   9007     return self.finishAirBookkeeping();
   9008 }
   9009 
   9010 fn airBlock(self: *Self, inst: Air.Inst.Index) !void {
   9011     // A block is a setup to be able to jump to the end.
   9012     const inst_tracking_i = self.inst_tracking.count();
   9013     self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(.unreach));
   9014 
   9015     self.scope_generation += 1;
   9016     try self.blocks.putNoClobber(self.gpa, inst, .{ .state = self.initRetroactiveState() });
   9017     const liveness = self.liveness.getBlock(inst);
   9018 
   9019     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   9020     const extra = self.air.extraData(Air.Block, ty_pl.payload);
   9021     const body = self.air.extra[extra.end..][0..extra.data.body_len];
   9022     try self.genBody(body);
   9023 
   9024     var block_data = self.blocks.fetchRemove(inst).?;
   9025     defer block_data.value.deinit(self.gpa);
   9026     if (block_data.value.relocs.items.len > 0) {
   9027         try self.restoreState(block_data.value.state, liveness.deaths, .{
   9028             .emit_instructions = false,
   9029             .update_tracking = true,
   9030             .resurrect = true,
   9031             .close_scope = true,
   9032         });
   9033         for (block_data.value.relocs.items) |reloc| try self.performReloc(reloc);
   9034     }
   9035 
   9036     if (std.debug.runtime_safety) assert(self.inst_tracking.getIndex(inst).? == inst_tracking_i);
   9037     const tracking = &self.inst_tracking.values()[inst_tracking_i];
   9038     if (self.liveness.isUnused(inst)) tracking.die(self, inst);
   9039     self.getValue(tracking.short, inst);
   9040     self.finishAirBookkeeping();
   9041 }
   9042 
   9043 fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void {
   9044     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
   9045     const condition = try self.resolveInst(pl_op.operand);
   9046     const condition_ty = self.typeOf(pl_op.operand);
   9047     const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload);
   9048     var extra_index: usize = switch_br.end;
   9049     var case_i: u32 = 0;
   9050     const liveness = try self.liveness.getSwitchBr(self.gpa, inst, switch_br.data.cases_len + 1);
   9051     defer self.gpa.free(liveness.deaths);
   9052 
   9053     // If the condition dies here in this switch instruction, process
   9054     // that death now instead of later as this has an effect on
   9055     // whether it needs to be spilled in the branches
   9056     if (self.liveness.operandDies(inst, 0)) {
   9057         if (Air.refToIndex(pl_op.operand)) |op_inst| self.processDeath(op_inst);
   9058     }
   9059 
   9060     self.scope_generation += 1;
   9061     const state = try self.saveState();
   9062 
   9063     while (case_i < switch_br.data.cases_len) : (case_i += 1) {
   9064         const case = self.air.extraData(Air.SwitchBr.Case, extra_index);
   9065         const items: []const Air.Inst.Ref =
   9066             @ptrCast(self.air.extra[case.end..][0..case.data.items_len]);
   9067         const case_body = self.air.extra[case.end + items.len ..][0..case.data.body_len];
   9068         extra_index = case.end + items.len + case_body.len;
   9069 
   9070         var relocs = try self.gpa.alloc(u32, items.len);
   9071         defer self.gpa.free(relocs);
   9072 
   9073         try self.spillEflagsIfOccupied();
   9074         for (items, relocs, 0..) |item, *reloc, i| {
   9075             const item_mcv = try self.resolveInst(item);
   9076             try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, item_mcv);
   9077             reloc.* = try self.asmJccReloc(undefined, if (i < relocs.len - 1) .e else .ne);
   9078         }
   9079 
   9080         for (liveness.deaths[case_i]) |operand| self.processDeath(operand);
   9081 
   9082         for (relocs[0 .. relocs.len - 1]) |reloc| try self.performReloc(reloc);
   9083         try self.genBody(case_body);
   9084         try self.restoreState(state, &.{}, .{
   9085             .emit_instructions = false,
   9086             .update_tracking = true,
   9087             .resurrect = true,
   9088             .close_scope = true,
   9089         });
   9090 
   9091         try self.performReloc(relocs[relocs.len - 1]);
   9092     }
   9093 
   9094     if (switch_br.data.else_body_len > 0) {
   9095         const else_body = self.air.extra[extra_index..][0..switch_br.data.else_body_len];
   9096 
   9097         const else_deaths = liveness.deaths.len - 1;
   9098         for (liveness.deaths[else_deaths]) |operand| self.processDeath(operand);
   9099 
   9100         try self.genBody(else_body);
   9101         try self.restoreState(state, &.{}, .{
   9102             .emit_instructions = false,
   9103             .update_tracking = true,
   9104             .resurrect = true,
   9105             .close_scope = true,
   9106         });
   9107     }
   9108 
   9109     // We already took care of pl_op.operand earlier, so we're going to pass .none here
   9110     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
   9111 }
   9112 
   9113 fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void {
   9114     const next_inst: u32 = @intCast(self.mir_instructions.len);
   9115     switch (self.mir_instructions.items(.tag)[reloc]) {
   9116         .j, .jmp => {},
   9117         .pseudo => switch (self.mir_instructions.items(.ops)[reloc]) {
   9118             .pseudo_j_z_and_np_inst, .pseudo_j_nz_or_p_inst => {},
   9119             else => unreachable,
   9120         },
   9121         else => unreachable,
   9122     }
   9123     self.mir_instructions.items(.data)[reloc].inst.inst = next_inst;
   9124 }
   9125 
   9126 fn airBr(self: *Self, inst: Air.Inst.Index) !void {
   9127     const mod = self.bin_file.options.module.?;
   9128     const br = self.air.instructions.items(.data)[inst].br;
   9129     const src_mcv = try self.resolveInst(br.operand);
   9130 
   9131     const block_ty = self.typeOfIndex(br.block_inst);
   9132     const block_unused =
   9133         !block_ty.hasRuntimeBitsIgnoreComptime(mod) or self.liveness.isUnused(br.block_inst);
   9134     const block_tracking = self.inst_tracking.getPtr(br.block_inst).?;
   9135     const block_data = self.blocks.getPtr(br.block_inst).?;
   9136     const first_br = block_data.relocs.items.len == 0;
   9137     const block_result = result: {
   9138         if (block_unused) break :result .none;
   9139 
   9140         if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) {
   9141             if (first_br) break :result src_mcv;
   9142 
   9143             if (block_tracking.getReg()) |block_reg|
   9144                 try self.register_manager.getReg(block_reg, br.block_inst);
   9145             // .long = .none to avoid merging operand and block result stack frames.
   9146             var current_tracking = InstTracking{ .long = .none, .short = src_mcv };
   9147             try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*);
   9148             if (src_mcv.getReg()) |src_reg| self.register_manager.freeReg(src_reg);
   9149             break :result block_tracking.short;
   9150         }
   9151 
   9152         const dst_mcv = if (first_br) try self.allocRegOrMem(br.block_inst, true) else dst: {
   9153             self.getValue(block_tracking.short, br.block_inst);
   9154             break :dst block_tracking.short;
   9155         };
   9156         try self.genCopy(block_ty, dst_mcv, src_mcv);
   9157         break :result dst_mcv;
   9158     };
   9159 
   9160     // Process operand death so that it is properly accounted for in the State below.
   9161     if (self.liveness.operandDies(inst, 0)) {
   9162         if (Air.refToIndex(br.operand)) |op_inst| self.processDeath(op_inst);
   9163     }
   9164 
   9165     if (first_br) {
   9166         block_tracking.* = InstTracking.init(block_result);
   9167         try self.saveRetroactiveState(&block_data.state);
   9168     } else try self.restoreState(block_data.state, &.{}, .{
   9169         .emit_instructions = true,
   9170         .update_tracking = false,
   9171         .resurrect = false,
   9172         .close_scope = false,
   9173     });
   9174 
   9175     // Stop tracking block result without forgetting tracking info
   9176     self.freeValue(block_tracking.short);
   9177 
   9178     // Emit a jump with a relocation. It will be patched up after the block ends.
   9179     // Leave the jump offset undefined
   9180     const jmp_reloc = try self.asmJmpReloc(undefined);
   9181     try block_data.relocs.append(self.gpa, jmp_reloc);
   9182 
   9183     self.finishAirBookkeeping();
   9184 }
   9185 
   9186 fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
   9187     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
   9188     const extra = self.air.extraData(Air.Asm, ty_pl.payload);
   9189     const clobbers_len: u31 = @truncate(extra.data.flags);
   9190     var extra_i: usize = extra.end;
   9191     const outputs: []const Air.Inst.Ref =
   9192         @ptrCast(self.air.extra[extra_i..][0..extra.data.outputs_len]);
   9193     extra_i += outputs.len;
   9194     const inputs: []const Air.Inst.Ref = @ptrCast(self.air.extra[extra_i..][0..extra.data.inputs_len]);
   9195     extra_i += inputs.len;
   9196 
   9197     var result: MCValue = .none;
   9198     var args = std.StringArrayHashMap(MCValue).init(self.gpa);
   9199     try args.ensureTotalCapacity(outputs.len + inputs.len + clobbers_len);
   9200     defer {
   9201         for (args.values()) |arg| switch (arg) {
   9202             .register => |reg| self.register_manager.unlockReg(.{ .register = reg }),
   9203             else => {},
   9204         };
   9205         args.deinit();
   9206     }
   9207 
   9208     if (outputs.len > 1) {
   9209         return self.fail("TODO implement codegen for asm with more than 1 output", .{});
   9210     }
   9211 
   9212     for (outputs) |output| {
   9213         if (output != .none) {
   9214             return self.fail("TODO implement codegen for non-expr asm", .{});
   9215         }
   9216         const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
   9217         const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
   9218         const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
   9219         // This equation accounts for the fact that even if we have exactly 4 bytes
   9220         // for the string, we still use the next u32 for the null terminator.
   9221         extra_i += (constraint.len + name.len + (2 + 3)) / 4;
   9222 
   9223         const mcv: MCValue = if (mem.eql(u8, constraint, "=r"))
   9224             .{ .register = self.register_manager.tryAllocReg(inst, gp) orelse
   9225                 return self.fail("ran out of registers lowering inline asm", .{}) }
   9226         else if (mem.startsWith(u8, constraint, "={") and mem.endsWith(u8, constraint, "}"))
   9227             .{ .register = parseRegName(constraint["={".len .. constraint.len - "}".len]) orelse
   9228                 return self.fail("unrecognized register constraint: '{s}'", .{constraint}) }
   9229         else
   9230             return self.fail("unrecognized constraint: '{s}'", .{constraint});
   9231         args.putAssumeCapacity(name, mcv);
   9232         switch (mcv) {
   9233             .register => |reg| _ = if (RegisterManager.indexOfRegIntoTracked(reg)) |_|
   9234                 self.register_manager.lockRegAssumeUnused(reg),
   9235             else => {},
   9236         }
   9237         if (output == .none) result = mcv;
   9238     }
   9239 
   9240     for (inputs) |input| {
   9241         const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
   9242         const constraint = std.mem.sliceTo(input_bytes, 0);
   9243         const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
   9244         // This equation accounts for the fact that even if we have exactly 4 bytes
   9245         // for the string, we still use the next u32 for the null terminator.
   9246         extra_i += (constraint.len + name.len + (2 + 3)) / 4;
   9247 
   9248         if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
   9249             return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
   9250         }
   9251         const reg_name = constraint[1 .. constraint.len - 1];
   9252         const reg = parseRegName(reg_name) orelse
   9253             return self.fail("unrecognized register: '{s}'", .{reg_name});
   9254 
   9255         const arg_mcv = try self.resolveInst(input);
   9256         try self.register_manager.getReg(reg, null);
   9257         try self.genSetReg(reg, self.typeOf(input), arg_mcv);
   9258     }
   9259 
   9260     {
   9261         var clobber_i: u32 = 0;
   9262         while (clobber_i < clobbers_len) : (clobber_i += 1) {
   9263             const clobber = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
   9264             // This equation accounts for the fact that even if we have exactly 4 bytes
   9265             // for the string, we still use the next u32 for the null terminator.
   9266             extra_i += clobber.len / 4 + 1;
   9267 
   9268             // TODO honor these
   9269         }
   9270     }
   9271 
   9272     const asm_source = mem.sliceAsBytes(self.air.extra[extra_i..])[0..extra.data.source_len];
   9273     var line_it = mem.tokenizeAny(u8, asm_source, "\n\r;");
   9274     while (line_it.next()) |line| {
   9275         var mnem_it = mem.tokenizeAny(u8, line, " \t");
   9276         const mnem_str = mnem_it.next() orelse continue;
   9277         if (mem.startsWith(u8, mnem_str, "#")) continue;
   9278 
   9279         const mnem_size: ?Memory.PtrSize = if (mem.endsWith(u8, mnem_str, "b"))
   9280             .byte
   9281         else if (mem.endsWith(u8, mnem_str, "w"))
   9282             .word
   9283         else if (mem.endsWith(u8, mnem_str, "l"))
   9284             .dword
   9285         else if (mem.endsWith(u8, mnem_str, "q"))
   9286             .qword
   9287         else
   9288             null;
   9289         const mnem_tag = Mir.Inst.FixedTag{ ._, mnem: {
   9290             if (mnem_size) |_| {
   9291                 if (std.meta.stringToEnum(Mir.Inst.Tag, mnem_str[0 .. mnem_str.len - 1])) |mnem| {
   9292                     break :mnem mnem;
   9293                 }
   9294             }
   9295             break :mnem std.meta.stringToEnum(Mir.Inst.Tag, mnem_str) orelse
   9296                 return self.fail("Invalid mnemonic: '{s}'", .{mnem_str});
   9297         } };
   9298 
   9299         var op_it = mem.tokenizeScalar(u8, mnem_it.rest(), ',');
   9300         var ops = [1]encoder.Instruction.Operand{.none} ** 4;
   9301         for (&ops) |*op| {
   9302             const op_str = mem.trim(u8, op_it.next() orelse break, " \t");
   9303             if (mem.startsWith(u8, op_str, "#")) break;
   9304             if (mem.startsWith(u8, op_str, "%%")) {
   9305                 const colon = mem.indexOfScalarPos(u8, op_str, "%%".len + 2, ':');
   9306                 const reg = parseRegName(op_str["%%".len .. colon orelse op_str.len]) orelse
   9307                     return self.fail("Invalid register: '{s}'", .{op_str});
   9308                 if (colon) |colon_pos| {
   9309                     const disp = std.fmt.parseInt(i32, op_str[colon_pos + 1 ..], 0) catch
   9310                         return self.fail("Invalid displacement: '{s}'", .{op_str});
   9311                     op.* = .{ .mem = Memory.sib(
   9312                         mnem_size orelse return self.fail("Unknown size: '{s}'", .{op_str}),
   9313                         .{ .base = .{ .reg = reg }, .disp = disp },
   9314                     ) };
   9315                 } else {
   9316                     if (mnem_size) |size| if (reg.bitSize() != size.bitSize())
   9317                         return self.fail("Invalid register size: '{s}'", .{op_str});
   9318                     op.* = .{ .reg = reg };
   9319                 }
   9320             } else if (mem.startsWith(u8, op_str, "%[") and mem.endsWith(u8, op_str, "]")) {
   9321                 switch (args.get(op_str["%[".len .. op_str.len - "]".len]) orelse
   9322                     return self.fail("No matching constraint: '{s}'", .{op_str})) {
   9323                     .register => |reg| op.* = .{ .reg = reg },
   9324                     else => return self.fail("Invalid constraint: '{s}'", .{op_str}),
   9325                 }
   9326             } else if (mem.startsWith(u8, op_str, "$")) {
   9327                 if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| {
   9328                     if (mnem_size) |size| {
   9329                         const max = @as(u64, math.maxInt(u64)) >> @intCast(64 - (size.bitSize() - 1));
   9330                         if ((if (s < 0) ~s else s) > max)
   9331                             return self.fail("Invalid immediate size: '{s}'", .{op_str});
   9332                     }
   9333                     op.* = .{ .imm = Immediate.s(s) };
   9334                 } else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| {
   9335                     if (mnem_size) |size| {
   9336                         const max = @as(u64, math.maxInt(u64)) >> @intCast(64 - size.bitSize());
   9337                         if (u > max)
   9338                             return self.fail("Invalid immediate size: '{s}'", .{op_str});
   9339                     }
   9340                     op.* = .{ .imm = Immediate.u(u) };
   9341                 } else |_| return self.fail("Invalid immediate: '{s}'", .{op_str});
   9342             } else return self.fail("Invalid operand: '{s}'", .{op_str});
   9343         } else if (op_it.next()) |op_str| return self.fail("Extra operand: '{s}'", .{op_str});
   9344 
   9345         (switch (ops[0]) {
   9346             .none => self.asmOpOnly(mnem_tag),
   9347             .reg => |reg0| switch (ops[1]) {
   9348                 .none => self.asmRegister(mnem_tag, reg0),
   9349                 .reg => |reg1| switch (ops[2]) {
   9350                     .none => self.asmRegisterRegister(mnem_tag, reg1, reg0),
   9351                     .reg => |reg2| switch (ops[3]) {
   9352                         .none => self.asmRegisterRegisterRegister(mnem_tag, reg2, reg1, reg0),
   9353                         else => error.InvalidInstruction,
   9354                     },
   9355                     .mem => |mem2| switch (ops[3]) {
   9356                         .none => self.asmMemoryRegisterRegister(mnem_tag, mem2, reg1, reg0),
   9357                         else => error.InvalidInstruction,
   9358                     },
   9359                     else => error.InvalidInstruction,
   9360                 },
   9361                 .mem => |mem1| switch (ops[2]) {
   9362                     .none => self.asmMemoryRegister(mnem_tag, mem1, reg0),
   9363                     else => error.InvalidInstruction,
   9364                 },
   9365                 else => error.InvalidInstruction,
   9366             },
   9367             .mem => |mem0| switch (ops[1]) {
   9368                 .none => self.asmMemory(mnem_tag, mem0),
   9369                 .reg => |reg1| switch (ops[2]) {
   9370                     .none => self.asmRegisterMemory(mnem_tag, reg1, mem0),
   9371                     else => error.InvalidInstruction,
   9372                 },
   9373                 else => error.InvalidInstruction,
   9374             },
   9375             .imm => |imm0| switch (ops[1]) {
   9376                 .none => self.asmImmediate(mnem_tag, imm0),
   9377                 .reg => |reg1| switch (ops[2]) {
   9378                     .none => self.asmRegisterImmediate(mnem_tag, reg1, imm0),
   9379                     .reg => |reg2| switch (ops[3]) {
   9380                         .none => self.asmRegisterRegisterImmediate(mnem_tag, reg2, reg1, imm0),
   9381                         else => error.InvalidInstruction,
   9382                     },
   9383                     .mem => |mem2| switch (ops[3]) {
   9384                         .none => self.asmMemoryRegisterImmediate(mnem_tag, mem2, reg1, imm0),
   9385                         else => error.InvalidInstruction,
   9386                     },
   9387                     else => error.InvalidInstruction,
   9388                 },
   9389                 .mem => |mem1| switch (ops[2]) {
   9390                     .none => self.asmMemoryImmediate(mnem_tag, mem1, imm0),
   9391                     else => error.InvalidInstruction,
   9392                 },
   9393                 else => error.InvalidInstruction,
   9394             },
   9395         }) catch |err| switch (err) {
   9396             error.InvalidInstruction => return self.fail(
   9397                 "Invalid instruction: '{s} {s} {s} {s} {s}'",
   9398                 .{
   9399                     @tagName(mnem_tag[1]),
   9400                     @tagName(ops[0]),
   9401                     @tagName(ops[1]),
   9402                     @tagName(ops[2]),
   9403                     @tagName(ops[3]),
   9404                 },
   9405             ),
   9406             else => |e| return e,
   9407         };
   9408     }
   9409 
   9410     simple: {
   9411         var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1);
   9412         var buf_index: usize = 0;
   9413         for (outputs) |output| {
   9414             if (output == .none) continue;
   9415 
   9416             if (buf_index >= buf.len) break :simple;
   9417             buf[buf_index] = output;
   9418             buf_index += 1;
   9419         }
   9420         if (buf_index + inputs.len > buf.len) break :simple;
   9421         @memcpy(buf[buf_index..][0..inputs.len], inputs);
   9422         return self.finishAir(inst, result, buf);
   9423     }
   9424     var bt = self.liveness.iterateBigTomb(inst);
   9425     for (outputs) |output| if (output != .none) self.feed(&bt, output);
   9426     for (inputs) |input| self.feed(&bt, input);
   9427     return self.finishAirResult(inst, result);
   9428 }
   9429 
   9430 const MoveStrategy = union(enum) {
   9431     move: Mir.Inst.FixedTag,
   9432     insert_extract: InsertExtract,
   9433     vex_insert_extract: InsertExtract,
   9434 
   9435     const InsertExtract = struct {
   9436         insert: Mir.Inst.FixedTag,
   9437         extract: Mir.Inst.FixedTag,
   9438     };
   9439 };
   9440 fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
   9441     const mod = self.bin_file.options.module.?;
   9442     switch (ty.zigTypeTag(mod)) {
   9443         else => return .{ .move = .{ ._, .mov } },
   9444         .Float => switch (ty.floatBits(self.target.*)) {
   9445             16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
   9446                 .insert = .{ .vp_w, .insr },
   9447                 .extract = .{ .vp_w, .extr },
   9448             } } else .{ .insert_extract = .{
   9449                 .insert = .{ .p_w, .insr },
   9450                 .extract = .{ .p_w, .extr },
   9451             } },
   9452             32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } },
   9453             64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } },
   9454             128 => return .{ .move = if (self.hasFeature(.avx))
   9455                 if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
   9456             else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
   9457             else => {},
   9458         },
   9459         .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
   9460             .Int => switch (ty.childType(mod).intInfo(mod).bits) {
   9461                 8 => switch (ty.vectorLen(mod)) {
   9462                     1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
   9463                         .insert = .{ .vp_b, .insr },
   9464                         .extract = .{ .vp_b, .extr },
   9465                     } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
   9466                         .insert = .{ .p_b, .insr },
   9467                         .extract = .{ .p_b, .extr },
   9468                     } },
   9469                     2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
   9470                         .insert = .{ .vp_w, .insr },
   9471                         .extract = .{ .vp_w, .extr },
   9472                     } } else .{ .insert_extract = .{
   9473                         .insert = .{ .p_w, .insr },
   9474                         .extract = .{ .p_w, .extr },
   9475                     } },
   9476                     3...4 => return .{ .move = if (self.hasFeature(.avx))
   9477                         .{ .v_d, .mov }
   9478                     else
   9479                         .{ ._d, .mov } },
   9480                     5...8 => return .{ .move = if (self.hasFeature(.avx))
   9481                         .{ .v_q, .mov }
   9482                     else
   9483                         .{ ._q, .mov } },
   9484                     9...16 => return .{ .move = if (self.hasFeature(.avx))
   9485                         if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
   9486                     else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
   9487                     17...32 => if (self.hasFeature(.avx))
   9488                         return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
   9489                     else => {},
   9490                 },
   9491                 16 => switch (ty.vectorLen(mod)) {
   9492                     1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
   9493                         .insert = .{ .vp_w, .insr },
   9494                         .extract = .{ .vp_w, .extr },
   9495                     } } else .{ .insert_extract = .{
   9496                         .insert = .{ .p_w, .insr },
   9497                         .extract = .{ .p_w, .extr },
   9498                     } },
   9499                     2 => return .{ .move = if (self.hasFeature(.avx))
   9500                         .{ .v_d, .mov }
   9501                     else
   9502                         .{ ._d, .mov } },
   9503                     3...4 => return .{ .move = if (self.hasFeature(.avx))
   9504                         .{ .v_q, .mov }
   9505                     else
   9506                         .{ ._q, .mov } },
   9507                     5...8 => return .{ .move = if (self.hasFeature(.avx))
   9508                         if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
   9509                     else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
   9510                     9...16 => if (self.hasFeature(.avx))
   9511                         return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
   9512                     else => {},
   9513                 },
   9514                 32 => switch (ty.vectorLen(mod)) {
   9515                     1 => return .{ .move = if (self.hasFeature(.avx))
   9516                         .{ .v_d, .mov }
   9517                     else
   9518                         .{ ._d, .mov } },
   9519                     2 => return .{ .move = if (self.hasFeature(.avx))
   9520                         .{ .v_q, .mov }
   9521                     else
   9522                         .{ ._q, .mov } },
   9523                     3...4 => return .{ .move = if (self.hasFeature(.avx))
   9524                         if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
   9525                     else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
   9526                     5...8 => if (self.hasFeature(.avx))
   9527                         return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
   9528                     else => {},
   9529                 },
   9530                 64 => switch (ty.vectorLen(mod)) {
   9531                     1 => return .{ .move = if (self.hasFeature(.avx))
   9532                         .{ .v_q, .mov }
   9533                     else
   9534                         .{ ._q, .mov } },
   9535                     2 => return .{ .move = if (self.hasFeature(.avx))
   9536                         if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
   9537                     else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
   9538                     3...4 => if (self.hasFeature(.avx))
   9539                         return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
   9540                     else => {},
   9541                 },
   9542                 128 => switch (ty.vectorLen(mod)) {
   9543                     1 => return .{ .move = if (self.hasFeature(.avx))
   9544                         if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
   9545                     else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
   9546                     2 => if (self.hasFeature(.avx))
   9547                         return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
   9548                     else => {},
   9549                 },
   9550                 256 => switch (ty.vectorLen(mod)) {
   9551                     1 => if (self.hasFeature(.avx))
   9552                         return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
   9553                     else => {},
   9554                 },
   9555                 else => {},
   9556             },
   9557             .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
   9558                 16 => switch (ty.vectorLen(mod)) {
   9559                     1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
   9560                         .insert = .{ .vp_w, .insr },
   9561                         .extract = .{ .vp_w, .extr },
   9562                     } } else .{ .insert_extract = .{
   9563                         .insert = .{ .p_w, .insr },
   9564                         .extract = .{ .p_w, .extr },
   9565                     } },
   9566                     2 => return .{ .move = if (self.hasFeature(.avx))
   9567                         .{ .v_d, .mov }
   9568                     else
   9569                         .{ ._d, .mov } },
   9570                     3...4 => return .{ .move = if (self.hasFeature(.avx))
   9571                         .{ .v_q, .mov }
   9572                     else
   9573                         .{ ._q, .mov } },
   9574                     5...8 => return .{ .move = if (self.hasFeature(.avx))
   9575                         if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
   9576                     else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
   9577                     9...16 => if (self.hasFeature(.avx))
   9578                         return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
   9579                     else => {},
   9580                 },
   9581                 32 => switch (ty.vectorLen(mod)) {
   9582                     1 => return .{ .move = if (self.hasFeature(.avx))
   9583                         .{ .v_ss, .mov }
   9584                     else
   9585                         .{ ._ss, .mov } },
   9586                     2 => return .{ .move = if (self.hasFeature(.avx))
   9587                         .{ .v_sd, .mov }
   9588                     else
   9589                         .{ ._sd, .mov } },
   9590                     3...4 => return .{ .move = if (self.hasFeature(.avx))
   9591                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
   9592                     else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
   9593                     5...8 => if (self.hasFeature(.avx))
   9594                         return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
   9595                     else => {},
   9596                 },
   9597                 64 => switch (ty.vectorLen(mod)) {
   9598                     1 => return .{ .move = if (self.hasFeature(.avx))
   9599                         .{ .v_sd, .mov }
   9600                     else
   9601                         .{ ._sd, .mov } },
   9602                     2 => return .{ .move = if (self.hasFeature(.avx))
   9603                         if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
   9604                     else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
   9605                     3...4 => if (self.hasFeature(.avx))
   9606                         return .{ .move = if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } },
   9607                     else => {},
   9608                 },
   9609                 128 => switch (ty.vectorLen(mod)) {
   9610                     1 => return .{ .move = if (self.hasFeature(.avx))
   9611                         if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
   9612                     else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
   9613                     2 => if (self.hasFeature(.avx))
   9614                         return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
   9615                     else => {},
   9616                 },
   9617                 else => {},
   9618             },
   9619             else => {},
   9620         },
   9621     }
   9622     return self.fail("TODO moveStrategy for {}", .{ty.fmt(self.bin_file.options.module.?)});
   9623 }
   9624 
   9625 fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
   9626     const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
   9627     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   9628 
   9629     switch (dst_mcv) {
   9630         .none,
   9631         .unreach,
   9632         .dead,
   9633         .undef,
   9634         .immediate,
   9635         .eflags,
   9636         .register_overflow,
   9637         .lea_direct,
   9638         .lea_got,
   9639         .lea_tlv,
   9640         .lea_frame,
   9641         .reserved_frame,
   9642         => unreachable, // unmodifiable destination
   9643         .register => |reg| try self.genSetReg(reg, ty, src_mcv),
   9644         .register_offset => |dst_reg_off| try self.genSetReg(dst_reg_off.reg, ty, switch (src_mcv) {
   9645             .none,
   9646             .unreach,
   9647             .dead,
   9648             .undef,
   9649             .register_overflow,
   9650             .reserved_frame,
   9651             => unreachable,
   9652             .immediate,
   9653             .register,
   9654             .register_offset,
   9655             .lea_frame,
   9656             => src_mcv.offset(-dst_reg_off.off),
   9657             else => .{ .register_offset = .{
   9658                 .reg = try self.copyToTmpRegister(ty, src_mcv),
   9659                 .off = -dst_reg_off.off,
   9660             } },
   9661         }),
   9662         .indirect => |reg_off| try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ty, src_mcv),
   9663         .memory, .load_direct, .load_got, .load_tlv => {
   9664             switch (dst_mcv) {
   9665                 .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr|
   9666                     return self.genSetMem(.{ .reg = .ds }, small_addr, ty, src_mcv),
   9667                 .load_direct, .load_got, .load_tlv => {},
   9668                 else => unreachable,
   9669             }
   9670 
   9671             const addr_reg = try self.copyToTmpRegister(Type.usize, dst_mcv.address());
   9672             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   9673             defer self.register_manager.unlockReg(addr_lock);
   9674 
   9675             try self.genSetMem(.{ .reg = addr_reg }, 0, ty, src_mcv);
   9676         },
   9677         .load_frame => |frame_addr| try self.genSetMem(
   9678             .{ .frame = frame_addr.index },
   9679             frame_addr.off,
   9680             ty,
   9681             src_mcv,
   9682         ),
   9683     }
   9684 }
   9685 
   9686 fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerError!void {
   9687     const mod = self.bin_file.options.module.?;
   9688     const abi_size: u32 = @intCast(ty.abiSize(mod));
   9689     if (abi_size * 8 > dst_reg.bitSize())
   9690         return self.fail("genSetReg called with a value larger than dst_reg", .{});
   9691     switch (src_mcv) {
   9692         .none,
   9693         .unreach,
   9694         .dead,
   9695         .register_overflow,
   9696         .reserved_frame,
   9697         => unreachable,
   9698         .undef => if (self.wantSafety())
   9699             try self.genSetReg(dst_reg.to64(), Type.usize, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
   9700         .eflags => |cc| try self.asmSetccRegister(dst_reg.to8(), cc),
   9701         .immediate => |imm| {
   9702             if (imm == 0) {
   9703                 // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
   9704                 // register is the fastest way to zero a register.
   9705                 try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32());
   9706             } else if (abi_size > 4 and math.cast(u32, imm) != null) {
   9707                 // 32-bit moves zero-extend to 64-bit.
   9708                 try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), Immediate.u(imm));
   9709             } else if (abi_size <= 4 and @as(i64, @bitCast(imm)) < 0) {
   9710                 try self.asmRegisterImmediate(
   9711                     .{ ._, .mov },
   9712                     registerAlias(dst_reg, abi_size),
   9713                     Immediate.s(@intCast(@as(i64, @bitCast(imm)))),
   9714                 );
   9715             } else {
   9716                 try self.asmRegisterImmediate(
   9717                     .{ ._, .mov },
   9718                     registerAlias(dst_reg, abi_size),
   9719                     Immediate.u(imm),
   9720                 );
   9721             }
   9722         },
   9723         .register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) {
   9724             .general_purpose => switch (src_reg.class()) {
   9725                 .general_purpose => try self.asmRegisterRegister(
   9726                     .{ ._, .mov },
   9727                     registerAlias(dst_reg, abi_size),
   9728                     registerAlias(src_reg, abi_size),
   9729                 ),
   9730                 .segment => try self.asmRegisterRegister(
   9731                     .{ ._, .mov },
   9732                     registerAlias(dst_reg, abi_size),
   9733                     src_reg,
   9734                 ),
   9735                 .sse => try self.asmRegisterRegister(
   9736                     switch (abi_size) {
   9737                         1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
   9738                         5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
   9739                         else => unreachable,
   9740                     },
   9741                     registerAlias(dst_reg, @max(abi_size, 4)),
   9742                     src_reg.to128(),
   9743                 ),
   9744                 .x87, .mmx => unreachable,
   9745             },
   9746             .segment => try self.asmRegisterRegister(
   9747                 .{ ._, .mov },
   9748                 dst_reg,
   9749                 switch (src_reg.class()) {
   9750                     .general_purpose, .segment => registerAlias(src_reg, abi_size),
   9751                     .sse => try self.copyToTmpRegister(ty, src_mcv),
   9752                     .x87, .mmx => unreachable,
   9753                 },
   9754             ),
   9755             .sse => switch (src_reg.class()) {
   9756                 .general_purpose => try self.asmRegisterRegister(
   9757                     switch (abi_size) {
   9758                         1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
   9759                         5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
   9760                         else => unreachable,
   9761                     },
   9762                     dst_reg.to128(),
   9763                     registerAlias(src_reg, @max(abi_size, 4)),
   9764                 ),
   9765                 .segment => try self.genSetReg(
   9766                     dst_reg,
   9767                     ty,
   9768                     .{ .register = try self.copyToTmpRegister(ty, src_mcv) },
   9769                 ),
   9770                 .sse => try self.asmRegisterRegister(
   9771                     @as(?Mir.Inst.FixedTag, switch (ty.scalarType(mod).zigTypeTag(mod)) {
   9772                         else => switch (abi_size) {
   9773                             1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
   9774                             5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
   9775                             9...16 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
   9776                             17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
   9777                             else => null,
   9778                         },
   9779                         .Float => switch (ty.scalarType(mod).floatBits(self.target.*)) {
   9780                             16, 128 => switch (abi_size) {
   9781                                 2...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
   9782                                 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
   9783                                 9...16 => if (self.hasFeature(.avx))
   9784                                     .{ .v_, .movdqa }
   9785                                 else
   9786                                     .{ ._, .movdqa },
   9787                                 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
   9788                                 else => null,
   9789                             },
   9790                             32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova },
   9791                             64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova },
   9792                             80 => null,
   9793                             else => unreachable,
   9794                         },
   9795                     }) orelse return self.fail("TODO implement genSetReg for {}", .{
   9796                         ty.fmt(self.bin_file.options.module.?),
   9797                     }),
   9798                     registerAlias(dst_reg, abi_size),
   9799                     registerAlias(src_reg, abi_size),
   9800                 ),
   9801                 .x87, .mmx => unreachable,
   9802             },
   9803             .x87, .mmx => unreachable,
   9804         },
   9805         .register_offset,
   9806         .indirect,
   9807         .load_frame,
   9808         .lea_frame,
   9809         => {
   9810             const dst_alias = registerAlias(dst_reg, abi_size);
   9811             const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
   9812                 .register_offset, .indirect => |reg_off| .{
   9813                     .base = .{ .reg = reg_off.reg },
   9814                     .disp = reg_off.off,
   9815                 },
   9816                 .load_frame, .lea_frame => |frame_addr| .{
   9817                     .base = .{ .frame = frame_addr.index },
   9818                     .disp = frame_addr.off,
   9819                 },
   9820                 else => unreachable,
   9821             });
   9822             switch (@as(MoveStrategy, switch (src_mcv) {
   9823                 .register_offset => |reg_off| switch (reg_off.off) {
   9824                     0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }),
   9825                     else => .{ .move = .{ ._, .lea } },
   9826                 },
   9827                 .indirect => try self.moveStrategy(ty, false),
   9828                 .load_frame => |frame_addr| try self.moveStrategy(
   9829                     ty,
   9830                     self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(mod),
   9831                 ),
   9832                 .lea_frame => .{ .move = .{ ._, .lea } },
   9833                 else => unreachable,
   9834             })) {
   9835                 .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
   9836                 .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
   9837                     ie.insert,
   9838                     dst_alias,
   9839                     src_mem,
   9840                     Immediate.u(0),
   9841                 ),
   9842                 .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
   9843                     ie.insert,
   9844                     dst_alias,
   9845                     dst_alias,
   9846                     src_mem,
   9847                     Immediate.u(0),
   9848                 ),
   9849             }
   9850         },
   9851         .memory, .load_direct, .load_got, .load_tlv => {
   9852             switch (src_mcv) {
   9853                 .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| {
   9854                     const dst_alias = registerAlias(dst_reg, abi_size);
   9855                     const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
   9856                         .base = .{ .reg = .ds },
   9857                         .disp = small_addr,
   9858                     });
   9859                     switch (try self.moveStrategy(ty, mem.isAlignedGeneric(
   9860                         u32,
   9861                         @as(u32, @bitCast(small_addr)),
   9862                         ty.abiAlignment(mod),
   9863                     ))) {
   9864                         .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
   9865                         .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
   9866                             ie.insert,
   9867                             dst_alias,
   9868                             src_mem,
   9869                             Immediate.u(0),
   9870                         ),
   9871                         .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
   9872                             ie.insert,
   9873                             dst_alias,
   9874                             dst_alias,
   9875                             src_mem,
   9876                             Immediate.u(0),
   9877                         ),
   9878                     }
   9879                 },
   9880                 .load_direct => |sym_index| switch (ty.zigTypeTag(mod)) {
   9881                     else => {
   9882                         const atom_index = try self.owner.getSymbolIndex(self);
   9883                         _ = try self.addInst(.{
   9884                             .tag = .mov,
   9885                             .ops = .direct_reloc,
   9886                             .data = .{ .rx = .{
   9887                                 .r1 = dst_reg.to64(),
   9888                                 .payload = try self.addExtra(Mir.Reloc{
   9889                                     .atom_index = atom_index,
   9890                                     .sym_index = sym_index,
   9891                                 }),
   9892                             } },
   9893                         });
   9894                         return;
   9895                     },
   9896                     .Float, .Vector => {},
   9897                 },
   9898                 .load_got, .load_tlv => {},
   9899                 else => unreachable,
   9900             }
   9901 
   9902             const addr_reg = try self.copyToTmpRegister(Type.usize, src_mcv.address());
   9903             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   9904             defer self.register_manager.unlockReg(addr_lock);
   9905 
   9906             const dst_alias = registerAlias(dst_reg, abi_size);
   9907             const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
   9908                 .base = .{ .reg = addr_reg },
   9909             });
   9910             switch (try self.moveStrategy(ty, false)) {
   9911                 .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
   9912                 .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
   9913                     ie.insert,
   9914                     dst_alias,
   9915                     src_mem,
   9916                     Immediate.u(0),
   9917                 ),
   9918                 .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
   9919                     ie.insert,
   9920                     dst_alias,
   9921                     dst_alias,
   9922                     src_mem,
   9923                     Immediate.u(0),
   9924                 ),
   9925             }
   9926         },
   9927         .lea_direct, .lea_got => |sym_index| {
   9928             const atom_index = try self.owner.getSymbolIndex(self);
   9929             _ = try self.addInst(.{
   9930                 .tag = switch (src_mcv) {
   9931                     .lea_direct => .lea,
   9932                     .lea_got => .mov,
   9933                     else => unreachable,
   9934                 },
   9935                 .ops = switch (src_mcv) {
   9936                     .lea_direct => .direct_reloc,
   9937                     .lea_got => .got_reloc,
   9938                     else => unreachable,
   9939                 },
   9940                 .data = .{ .rx = .{
   9941                     .r1 = dst_reg.to64(),
   9942                     .payload = try self.addExtra(Mir.Reloc{
   9943                         .atom_index = atom_index,
   9944                         .sym_index = sym_index,
   9945                     }),
   9946                 } },
   9947             });
   9948         },
   9949         .lea_tlv => |sym_index| {
   9950             const atom_index = try self.owner.getSymbolIndex(self);
   9951             if (self.bin_file.cast(link.File.MachO)) |_| {
   9952                 _ = try self.addInst(.{
   9953                     .tag = .lea,
   9954                     .ops = .tlv_reloc,
   9955                     .data = .{ .rx = .{
   9956                         .r1 = .rdi,
   9957                         .payload = try self.addExtra(Mir.Reloc{
   9958                             .atom_index = atom_index,
   9959                             .sym_index = sym_index,
   9960                         }),
   9961                     } },
   9962                 });
   9963                 // TODO: spill registers before calling
   9964                 try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } }));
   9965                 try self.genSetReg(dst_reg.to64(), Type.usize, .{ .register = .rax });
   9966             } else return self.fail("TODO emit ptr to TLV sequence on {s}", .{
   9967                 @tagName(self.bin_file.tag),
   9968             });
   9969         },
   9970     }
   9971 }
   9972 
   9973 fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCValue) InnerError!void {
   9974     const mod = self.bin_file.options.module.?;
   9975     const abi_size: u32 = @intCast(ty.abiSize(mod));
   9976     const dst_ptr_mcv: MCValue = switch (base) {
   9977         .none => .{ .immediate = @bitCast(@as(i64, disp)) },
   9978         .reg => |base_reg| .{ .register_offset = .{ .reg = base_reg, .off = disp } },
   9979         .frame => |base_frame_index| .{ .lea_frame = .{ .index = base_frame_index, .off = disp } },
   9980     };
   9981     switch (src_mcv) {
   9982         .none, .unreach, .dead, .reserved_frame => unreachable,
   9983         .undef => if (self.wantSafety())
   9984             try self.genInlineMemset(dst_ptr_mcv, .{ .immediate = 0xaa }, .{ .immediate = abi_size }),
   9985         .immediate => |imm| switch (abi_size) {
   9986             1, 2, 4 => {
   9987                 const immediate = if (ty.isSignedInt(mod))
   9988                     Immediate.s(@truncate(@as(i64, @bitCast(imm))))
   9989                 else
   9990                     Immediate.u(@as(u32, @intCast(imm)));
   9991                 try self.asmMemoryImmediate(
   9992                     .{ ._, .mov },
   9993                     Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }),
   9994                     immediate,
   9995                 );
   9996             },
   9997             3, 5...7 => unreachable,
   9998             else => if (math.cast(i32, @as(i64, @bitCast(imm)))) |small| {
   9999                 try self.asmMemoryImmediate(
  10000                     .{ ._, .mov },
  10001                     Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }),
  10002                     Immediate.s(small),
  10003                 );
  10004             } else {
  10005                 var offset: i32 = 0;
  10006                 while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate(
  10007                     .{ ._, .mov },
  10008                     Memory.sib(.dword, .{ .base = base, .disp = disp + offset }),
  10009                     if (ty.isSignedInt(mod))
  10010                         Immediate.s(
  10011                             @truncate(@as(i64, @bitCast(imm)) >> (math.cast(u6, offset * 8) orelse 63)),
  10012                         )
  10013                     else
  10014                         Immediate.u(@as(
  10015                             u32,
  10016                             @truncate(if (math.cast(u6, offset * 8)) |shift| imm >> shift else 0),
  10017                         )),
  10018                 );
  10019             },
  10020         },
  10021         .eflags => |cc| try self.asmSetccMemory(Memory.sib(.byte, .{ .base = base, .disp = disp }), cc),
  10022         .register => |src_reg| {
  10023             const dst_mem = Memory.sib(
  10024                 Memory.PtrSize.fromSize(abi_size),
  10025                 .{ .base = base, .disp = disp },
  10026             );
  10027             const src_alias = registerAlias(src_reg, abi_size);
  10028             switch (try self.moveStrategy(ty, switch (base) {
  10029                 .none => mem.isAlignedGeneric(
  10030                     u32,
  10031                     @as(u32, @bitCast(disp)),
  10032                     ty.abiAlignment(mod),
  10033                 ),
  10034                 .reg => |reg| switch (reg) {
  10035                     .es, .cs, .ss, .ds => mem.isAlignedGeneric(
  10036                         u32,
  10037                         @as(u32, @bitCast(disp)),
  10038                         ty.abiAlignment(mod),
  10039                     ),
  10040                     else => false,
  10041                 },
  10042                 .frame => |frame_index| self.getFrameAddrAlignment(
  10043                     .{ .index = frame_index, .off = disp },
  10044                 ) >= ty.abiAlignment(mod),
  10045             })) {
  10046                 .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_alias),
  10047                 .insert_extract, .vex_insert_extract => |ie| try self.asmMemoryRegisterImmediate(
  10048                     ie.extract,
  10049                     dst_mem,
  10050                     src_alias,
  10051                     Immediate.u(0),
  10052                 ),
  10053             }
  10054         },
  10055         .register_overflow => |ro| {
  10056             try self.genSetMem(
  10057                 base,
  10058                 disp + @as(i32, @intCast(ty.structFieldOffset(0, mod))),
  10059                 ty.structFieldType(0, mod),
  10060                 .{ .register = ro.reg },
  10061             );
  10062             try self.genSetMem(
  10063                 base,
  10064                 disp + @as(i32, @intCast(ty.structFieldOffset(1, mod))),
  10065                 ty.structFieldType(1, mod),
  10066                 .{ .eflags = ro.eflags },
  10067             );
  10068         },
  10069         .register_offset,
  10070         .memory,
  10071         .indirect,
  10072         .load_direct,
  10073         .lea_direct,
  10074         .load_got,
  10075         .lea_got,
  10076         .load_tlv,
  10077         .lea_tlv,
  10078         .load_frame,
  10079         .lea_frame,
  10080         => switch (abi_size) {
  10081             0 => {},
  10082             1, 2, 4, 8 => {
  10083                 const src_reg = try self.copyToTmpRegister(ty, src_mcv);
  10084                 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  10085                 defer self.register_manager.unlockReg(src_lock);
  10086 
  10087                 try self.genSetMem(base, disp, ty, .{ .register = src_reg });
  10088             },
  10089             else => try self.genInlineMemcpy(dst_ptr_mcv, src_mcv.address(), .{ .immediate = abi_size }),
  10090         },
  10091     }
  10092 }
  10093 
  10094 fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue) InnerError!void {
  10095     try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
  10096     try self.genSetReg(.rdi, Type.usize, dst_ptr);
  10097     try self.genSetReg(.rsi, Type.usize, src_ptr);
  10098     try self.genSetReg(.rcx, Type.usize, len);
  10099     try self.asmOpOnly(.{ .@"rep _sb", .mov });
  10100 }
  10101 
  10102 fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) InnerError!void {
  10103     try self.spillRegisters(&.{ .rdi, .al, .rcx });
  10104     try self.genSetReg(.rdi, Type.usize, dst_ptr);
  10105     try self.genSetReg(.al, Type.u8, value);
  10106     try self.genSetReg(.rcx, Type.usize, len);
  10107     try self.asmOpOnly(.{ .@"rep _sb", .sto });
  10108 }
  10109 
  10110 fn genLazySymbolRef(
  10111     self: *Self,
  10112     comptime tag: Mir.Inst.Tag,
  10113     reg: Register,
  10114     lazy_sym: link.File.LazySymbol,
  10115 ) InnerError!void {
  10116     if (self.bin_file.cast(link.File.Elf)) |elf_file| {
  10117         const atom_index = elf_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
  10118             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  10119         const atom = elf_file.getAtom(atom_index);
  10120         _ = try atom.getOrCreateOffsetTableEntry(elf_file);
  10121         const got_addr = atom.getOffsetTableAddress(elf_file);
  10122         const got_mem =
  10123             Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(got_addr) });
  10124         switch (tag) {
  10125             .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), got_mem),
  10126             .call => try self.asmMemory(.{ ._, .call }, got_mem),
  10127             else => unreachable,
  10128         }
  10129         switch (tag) {
  10130             .lea, .call => {},
  10131             .mov => try self.asmRegisterMemory(
  10132                 .{ ._, tag },
  10133                 reg.to64(),
  10134                 Memory.sib(.qword, .{ .base = .{ .reg = reg.to64() } }),
  10135             ),
  10136             else => unreachable,
  10137         }
  10138     } else if (self.bin_file.cast(link.File.Plan9)) |p9_file| {
  10139         const atom_index = p9_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
  10140             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  10141         var atom = p9_file.getAtom(atom_index);
  10142         _ = atom.getOrCreateOffsetTableEntry(p9_file);
  10143         const got_addr = atom.getOffsetTableAddress(p9_file);
  10144         const got_mem =
  10145             Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(got_addr) });
  10146         switch (tag) {
  10147             .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), got_mem),
  10148             .call => try self.asmMemory(.{ ._, .call }, got_mem),
  10149             else => unreachable,
  10150         }
  10151         switch (tag) {
  10152             .lea, .call => {},
  10153             .mov => try self.asmRegisterMemory(
  10154                 .{ ._, tag },
  10155                 reg.to64(),
  10156                 Memory.sib(.qword, .{ .base = .{ .reg = reg.to64() } }),
  10157             ),
  10158             else => unreachable,
  10159         }
  10160     } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
  10161         const atom_index = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
  10162             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  10163         const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?;
  10164         switch (tag) {
  10165             .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }),
  10166             .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }),
  10167             else => unreachable,
  10168         }
  10169         switch (tag) {
  10170             .lea, .mov => {},
  10171             .call => try self.asmRegister(.{ ._, .call }, reg),
  10172             else => unreachable,
  10173         }
  10174     } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
  10175         const atom_index = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
  10176             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  10177         const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?;
  10178         switch (tag) {
  10179             .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }),
  10180             .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }),
  10181             else => unreachable,
  10182         }
  10183         switch (tag) {
  10184             .lea, .mov => {},
  10185             .call => try self.asmRegister(.{ ._, .call }, reg),
  10186             else => unreachable,
  10187         }
  10188     } else {
  10189         return self.fail("TODO implement genLazySymbol for x86_64 {s}", .{@tagName(self.bin_file.tag)});
  10190     }
  10191 }
  10192 
  10193 fn airIntFromPtr(self: *Self, inst: Air.Inst.Index) !void {
  10194     const un_op = self.air.instructions.items(.data)[inst].un_op;
  10195     const result = result: {
  10196         // TODO: handle case where the operand is a slice not a raw pointer
  10197         const src_mcv = try self.resolveInst(un_op);
  10198         if (self.reuseOperand(inst, un_op, 0, src_mcv)) break :result src_mcv;
  10199 
  10200         const dst_mcv = try self.allocRegOrMem(inst, true);
  10201         const dst_ty = self.typeOfIndex(inst);
  10202         try self.genCopy(dst_ty, dst_mcv, src_mcv);
  10203         break :result dst_mcv;
  10204     };
  10205     return self.finishAir(inst, result, .{ un_op, .none, .none });
  10206 }
  10207 
  10208 fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
  10209     const mod = self.bin_file.options.module.?;
  10210     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
  10211     const dst_ty = self.typeOfIndex(inst);
  10212     const src_ty = self.typeOf(ty_op.operand);
  10213 
  10214     const result = result: {
  10215         const dst_rc = regClassForType(dst_ty, mod);
  10216         const src_rc = regClassForType(src_ty, mod);
  10217         const src_mcv = try self.resolveInst(ty_op.operand);
  10218 
  10219         const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  10220         defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  10221 
  10222         const dst_mcv = if (dst_rc.supersetOf(src_rc) and
  10223             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  10224             src_mcv
  10225         else dst: {
  10226             const dst_mcv = try self.allocRegOrMem(inst, true);
  10227             try self.genCopy(
  10228                 if (!dst_mcv.isMemory() or src_mcv.isMemory()) dst_ty else src_ty,
  10229                 dst_mcv,
  10230                 src_mcv,
  10231             );
  10232             break :dst dst_mcv;
  10233         };
  10234 
  10235         const dst_signedness =
  10236             if (dst_ty.isAbiInt(mod)) dst_ty.intInfo(mod).signedness else .unsigned;
  10237         const src_signedness =
  10238             if (src_ty.isAbiInt(mod)) src_ty.intInfo(mod).signedness else .unsigned;
  10239         if (dst_signedness == src_signedness) break :result dst_mcv;
  10240 
  10241         const abi_size: u16 = @intCast(dst_ty.abiSize(mod));
  10242         const bit_size: u16 = @intCast(dst_ty.bitSize(mod));
  10243         if (abi_size * 8 <= bit_size) break :result dst_mcv;
  10244 
  10245         const dst_limbs_len = math.divCeil(i32, bit_size, 64) catch unreachable;
  10246         const high_reg = if (dst_mcv.isRegister())
  10247             dst_mcv.getReg().?
  10248         else
  10249             try self.copyToTmpRegister(
  10250                 Type.usize,
  10251                 dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(),
  10252             );
  10253         const high_lock = self.register_manager.lockReg(high_reg);
  10254         defer if (high_lock) |lock| self.register_manager.unlockReg(lock);
  10255 
  10256         const high_ty = try mod.intType(dst_signedness, bit_size % 64);
  10257 
  10258         try self.truncateRegister(high_ty, high_reg);
  10259         if (!dst_mcv.isRegister()) try self.genCopy(
  10260             Type.usize,
  10261             dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(),
  10262             .{ .register = high_reg },
  10263         );
  10264         break :result dst_mcv;
  10265     };
  10266     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  10267 }
  10268 
  10269 fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void {
  10270     const mod = self.bin_file.options.module.?;
  10271     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
  10272 
  10273     const slice_ty = self.typeOfIndex(inst);
  10274     const ptr_ty = self.typeOf(ty_op.operand);
  10275     const ptr = try self.resolveInst(ty_op.operand);
  10276     const array_ty = ptr_ty.childType(mod);
  10277     const array_len = array_ty.arrayLen(mod);
  10278 
  10279     const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, mod));
  10280     try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr);
  10281     try self.genSetMem(
  10282         .{ .frame = frame_index },
  10283         @intCast(ptr_ty.abiSize(mod)),
  10284         Type.usize,
  10285         .{ .immediate = array_len },
  10286     );
  10287 
  10288     const result = MCValue{ .load_frame = .{ .index = frame_index } };
  10289     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  10290 }
  10291 
  10292 fn airFloatFromInt(self: *Self, inst: Air.Inst.Index) !void {
  10293     const mod = self.bin_file.options.module.?;
  10294     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
  10295 
  10296     const src_ty = self.typeOf(ty_op.operand);
  10297     const src_bits: u32 = @intCast(src_ty.bitSize(mod));
  10298     const src_signedness =
  10299         if (src_ty.isAbiInt(mod)) src_ty.intInfo(mod).signedness else .unsigned;
  10300     const dst_ty = self.typeOfIndex(inst);
  10301 
  10302     const src_size = math.divCeil(u32, @max(switch (src_signedness) {
  10303         .signed => src_bits,
  10304         .unsigned => src_bits + 1,
  10305     }, 32), 8) catch unreachable;
  10306     if (src_size > 8) return self.fail("TODO implement airFloatFromInt from {} to {}", .{
  10307         src_ty.fmt(mod), dst_ty.fmt(mod),
  10308     });
  10309 
  10310     const src_mcv = try self.resolveInst(ty_op.operand);
  10311     const src_reg = if (src_mcv.isRegister())
  10312         src_mcv.getReg().?
  10313     else
  10314         try self.copyToTmpRegister(src_ty, src_mcv);
  10315     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  10316     defer self.register_manager.unlockReg(src_lock);
  10317 
  10318     if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg);
  10319 
  10320     const dst_reg = try self.register_manager.allocReg(inst, regClassForType(dst_ty, mod));
  10321     const dst_mcv = MCValue{ .register = dst_reg };
  10322     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  10323     defer self.register_manager.unlockReg(dst_lock);
  10324 
  10325     const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag(mod)) {
  10326         .Float => switch (dst_ty.floatBits(self.target.*)) {
  10327             32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 },
  10328             64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 },
  10329             16, 80, 128 => null,
  10330             else => unreachable,
  10331         },
  10332         else => null,
  10333     }) orelse return self.fail("TODO implement airFloatFromInt from {} to {}", .{
  10334         src_ty.fmt(mod), dst_ty.fmt(mod),
  10335     });
  10336     const dst_alias = dst_reg.to128();
  10337     const src_alias = registerAlias(src_reg, src_size);
  10338     switch (mir_tag[0]) {
  10339         .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias),
  10340         else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias),
  10341     }
  10342 
  10343     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  10344 }
  10345 
  10346 fn airIntFromFloat(self: *Self, inst: Air.Inst.Index) !void {
  10347     const mod = self.bin_file.options.module.?;
  10348     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
  10349 
  10350     const src_ty = self.typeOf(ty_op.operand);
  10351     const dst_ty = self.typeOfIndex(inst);
  10352     const dst_bits: u32 = @intCast(dst_ty.bitSize(mod));
  10353     const dst_signedness =
  10354         if (dst_ty.isAbiInt(mod)) dst_ty.intInfo(mod).signedness else .unsigned;
  10355 
  10356     const dst_size = math.divCeil(u32, @max(switch (dst_signedness) {
  10357         .signed => dst_bits,
  10358         .unsigned => dst_bits + 1,
  10359     }, 32), 8) catch unreachable;
  10360     if (dst_size > 8) return self.fail("TODO implement airIntFromFloat from {} to {}", .{
  10361         src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
  10362     });
  10363 
  10364     const src_mcv = try self.resolveInst(ty_op.operand);
  10365     const src_reg = if (src_mcv.isRegister())
  10366         src_mcv.getReg().?
  10367     else
  10368         try self.copyToTmpRegister(src_ty, src_mcv);
  10369     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  10370     defer self.register_manager.unlockReg(src_lock);
  10371 
  10372     const dst_reg = try self.register_manager.allocReg(inst, regClassForType(dst_ty, mod));
  10373     const dst_mcv = MCValue{ .register = dst_reg };
  10374     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  10375     defer self.register_manager.unlockReg(dst_lock);
  10376 
  10377     try self.asmRegisterRegister(
  10378         @as(?Mir.Inst.FixedTag, switch (src_ty.zigTypeTag(mod)) {
  10379             .Float => switch (src_ty.floatBits(self.target.*)) {
  10380                 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si },
  10381                 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si },
  10382                 16, 80, 128 => null,
  10383                 else => unreachable,
  10384             },
  10385             else => null,
  10386         }) orelse return self.fail("TODO implement airIntFromFloat from {} to {}", .{
  10387             src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
  10388         }),
  10389         registerAlias(dst_reg, dst_size),
  10390         src_reg.to128(),
  10391     );
  10392 
  10393     if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg);
  10394 
  10395     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  10396 }
  10397 
  10398 fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
  10399     const mod = self.bin_file.options.module.?;
  10400     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
  10401     const extra = self.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
  10402 
  10403     const ptr_ty = self.typeOf(extra.ptr);
  10404     const val_ty = self.typeOf(extra.expected_value);
  10405     const val_abi_size: u32 = @intCast(val_ty.abiSize(mod));
  10406 
  10407     try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx });
  10408     const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx });
  10409     defer for (regs_lock) |lock| self.register_manager.unlockReg(lock);
  10410 
  10411     const exp_mcv = try self.resolveInst(extra.expected_value);
  10412     if (val_abi_size > 8) {
  10413         const exp_addr_mcv: MCValue = switch (exp_mcv) {
  10414             .memory, .indirect, .load_frame => exp_mcv.address(),
  10415             else => .{ .register = try self.copyToTmpRegister(Type.usize, exp_mcv.address()) },
  10416         };
  10417         const exp_addr_lock =
  10418             if (exp_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  10419         defer if (exp_addr_lock) |lock| self.register_manager.unlockReg(lock);
  10420 
  10421         try self.genSetReg(.rax, Type.usize, exp_addr_mcv.deref());
  10422         try self.genSetReg(.rdx, Type.usize, exp_addr_mcv.offset(8).deref());
  10423     } else try self.genSetReg(.rax, val_ty, exp_mcv);
  10424 
  10425     const new_mcv = try self.resolveInst(extra.new_value);
  10426     const new_reg = if (val_abi_size > 8) new: {
  10427         const new_addr_mcv: MCValue = switch (new_mcv) {
  10428             .memory, .indirect, .load_frame => new_mcv.address(),
  10429             else => .{ .register = try self.copyToTmpRegister(Type.usize, new_mcv.address()) },
  10430         };
  10431         const new_addr_lock =
  10432             if (new_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  10433         defer if (new_addr_lock) |lock| self.register_manager.unlockReg(lock);
  10434 
  10435         try self.genSetReg(.rbx, Type.usize, new_addr_mcv.deref());
  10436         try self.genSetReg(.rcx, Type.usize, new_addr_mcv.offset(8).deref());
  10437         break :new null;
  10438     } else try self.copyToTmpRegister(val_ty, new_mcv);
  10439     const new_lock = if (new_reg) |reg| self.register_manager.lockRegAssumeUnused(reg) else null;
  10440     defer if (new_lock) |lock| self.register_manager.unlockReg(lock);
  10441 
  10442     const ptr_mcv = try self.resolveInst(extra.ptr);
  10443     const ptr_size = Memory.PtrSize.fromSize(val_abi_size);
  10444     const ptr_mem = switch (ptr_mcv) {
  10445         .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(ptr_size),
  10446         else => Memory.sib(ptr_size, .{
  10447             .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
  10448         }),
  10449     };
  10450     switch (ptr_mem) {
  10451         .sib, .rip => {},
  10452         .moffs => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}),
  10453     }
  10454     const ptr_lock = switch (ptr_mem.base()) {
  10455         .none, .frame => null,
  10456         .reg => |reg| self.register_manager.lockReg(reg),
  10457     };
  10458     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  10459 
  10460     try self.spillEflagsIfOccupied();
  10461     if (val_abi_size <= 8) try self.asmMemoryRegister(
  10462         .{ .@"lock _", .cmpxchg },
  10463         ptr_mem,
  10464         registerAlias(new_reg.?, val_abi_size),
  10465     ) else try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem);
  10466 
  10467     const result: MCValue = result: {
  10468         if (self.liveness.isUnused(inst)) break :result .unreach;
  10469 
  10470         if (val_abi_size <= 8) {
  10471             self.eflags_inst = inst;
  10472             break :result .{ .register_overflow = .{ .reg = .rax, .eflags = .ne } };
  10473         }
  10474 
  10475         const dst_mcv = try self.allocRegOrMem(inst, false);
  10476         try self.genCopy(Type.usize, dst_mcv, .{ .register = .rax });
  10477         try self.genCopy(Type.usize, dst_mcv.address().offset(8).deref(), .{ .register = .rdx });
  10478         try self.genCopy(Type.bool, dst_mcv.address().offset(16).deref(), .{ .eflags = .ne });
  10479         break :result dst_mcv;
  10480     };
  10481     return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value });
  10482 }
  10483 
  10484 fn atomicOp(
  10485     self: *Self,
  10486     ptr_mcv: MCValue,
  10487     val_mcv: MCValue,
  10488     ptr_ty: Type,
  10489     val_ty: Type,
  10490     unused: bool,
  10491     rmw_op: ?std.builtin.AtomicRmwOp,
  10492     order: std.builtin.AtomicOrder,
  10493 ) InnerError!MCValue {
  10494     const mod = self.bin_file.options.module.?;
  10495     const ptr_lock = switch (ptr_mcv) {
  10496         .register => |reg| self.register_manager.lockReg(reg),
  10497         else => null,
  10498     };
  10499     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  10500 
  10501     const val_lock = switch (val_mcv) {
  10502         .register => |reg| self.register_manager.lockReg(reg),
  10503         else => null,
  10504     };
  10505     defer if (val_lock) |lock| self.register_manager.unlockReg(lock);
  10506 
  10507     const val_abi_size: u32 = @intCast(val_ty.abiSize(mod));
  10508     const ptr_size = Memory.PtrSize.fromSize(val_abi_size);
  10509     const ptr_mem = switch (ptr_mcv) {
  10510         .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(ptr_size),
  10511         else => Memory.sib(ptr_size, .{
  10512             .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
  10513         }),
  10514     };
  10515     switch (ptr_mem) {
  10516         .sib, .rip => {},
  10517         .moffs => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}),
  10518     }
  10519     const mem_lock = switch (ptr_mem.base()) {
  10520         .none, .frame => null,
  10521         .reg => |reg| self.register_manager.lockReg(reg),
  10522     };
  10523     defer if (mem_lock) |lock| self.register_manager.unlockReg(lock);
  10524 
  10525     const method: enum { lock, loop, libcall } = if (val_ty.isRuntimeFloat())
  10526         .loop
  10527     else switch (rmw_op orelse .Xchg) {
  10528         .Xchg,
  10529         .Add,
  10530         .Sub,
  10531         => if (val_abi_size <= 8) .lock else if (val_abi_size <= 16) .loop else .libcall,
  10532         .And,
  10533         .Or,
  10534         .Xor,
  10535         => if (val_abi_size <= 8 and unused) .lock else if (val_abi_size <= 16) .loop else .libcall,
  10536         .Nand,
  10537         .Max,
  10538         .Min,
  10539         => if (val_abi_size <= 16) .loop else .libcall,
  10540     };
  10541     switch (method) {
  10542         .lock => {
  10543             const tag: Mir.Inst.Tag = if (rmw_op) |op| switch (op) {
  10544                 .Xchg => if (unused) .mov else .xchg,
  10545                 .Add => if (unused) .add else .xadd,
  10546                 .Sub => if (unused) .sub else .xadd,
  10547                 .And => .@"and",
  10548                 .Or => .@"or",
  10549                 .Xor => .xor,
  10550                 else => unreachable,
  10551             } else switch (order) {
  10552                 .Unordered, .Monotonic, .Release, .AcqRel => .mov,
  10553                 .Acquire => unreachable,
  10554                 .SeqCst => .xchg,
  10555             };
  10556 
  10557             const dst_reg = try self.register_manager.allocReg(null, gp);
  10558             const dst_mcv = MCValue{ .register = dst_reg };
  10559             const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  10560             defer self.register_manager.unlockReg(dst_lock);
  10561 
  10562             try self.genSetReg(dst_reg, val_ty, val_mcv);
  10563             if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) {
  10564                 try self.genUnOpMir(.{ ._, .neg }, val_ty, dst_mcv);
  10565             }
  10566             try self.asmMemoryRegister(
  10567                 switch (tag) {
  10568                     .mov, .xchg => .{ ._, tag },
  10569                     .xadd, .add, .sub, .@"and", .@"or", .xor => .{ .@"lock _", tag },
  10570                     else => unreachable,
  10571                 },
  10572                 ptr_mem,
  10573                 registerAlias(dst_reg, val_abi_size),
  10574             );
  10575 
  10576             return if (unused) .unreach else dst_mcv;
  10577         },
  10578         .loop => _ = if (val_abi_size <= 8) {
  10579             const tmp_reg = try self.register_manager.allocReg(null, gp);
  10580             const tmp_mcv = MCValue{ .register = tmp_reg };
  10581             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  10582             defer self.register_manager.unlockReg(tmp_lock);
  10583 
  10584             try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(.rax, val_abi_size), ptr_mem);
  10585             const loop: u32 = @intCast(self.mir_instructions.len);
  10586             if (rmw_op != std.builtin.AtomicRmwOp.Xchg) {
  10587                 try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax });
  10588             }
  10589             if (rmw_op) |op| switch (op) {
  10590                 .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv),
  10591                 .Add => try self.genBinOpMir(.{ ._, .add }, val_ty, tmp_mcv, val_mcv),
  10592                 .Sub => try self.genBinOpMir(.{ ._, .sub }, val_ty, tmp_mcv, val_mcv),
  10593                 .And => try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv),
  10594                 .Nand => {
  10595                     try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv);
  10596                     try self.genUnOpMir(.{ ._, .not }, val_ty, tmp_mcv);
  10597                 },
  10598                 .Or => try self.genBinOpMir(.{ ._, .@"or" }, val_ty, tmp_mcv, val_mcv),
  10599                 .Xor => try self.genBinOpMir(.{ ._, .xor }, val_ty, tmp_mcv, val_mcv),
  10600                 .Min, .Max => {
  10601                     const cc: Condition = switch (if (val_ty.isAbiInt(mod))
  10602                         val_ty.intInfo(mod).signedness
  10603                     else
  10604                         .unsigned) {
  10605                         .unsigned => switch (op) {
  10606                             .Min => .a,
  10607                             .Max => .b,
  10608                             else => unreachable,
  10609                         },
  10610                         .signed => switch (op) {
  10611                             .Min => .g,
  10612                             .Max => .l,
  10613                             else => unreachable,
  10614                         },
  10615                     };
  10616 
  10617                     try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv);
  10618                     const cmov_abi_size = @max(val_abi_size, 2);
  10619                     switch (val_mcv) {
  10620                         .register => |val_reg| try self.asmCmovccRegisterRegister(
  10621                             registerAlias(tmp_reg, cmov_abi_size),
  10622                             registerAlias(val_reg, cmov_abi_size),
  10623                             cc,
  10624                         ),
  10625                         .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
  10626                             registerAlias(tmp_reg, cmov_abi_size),
  10627                             val_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)),
  10628                             cc,
  10629                         ),
  10630                         else => {
  10631                             const val_reg = try self.copyToTmpRegister(val_ty, val_mcv);
  10632                             try self.asmCmovccRegisterRegister(
  10633                                 registerAlias(tmp_reg, cmov_abi_size),
  10634                                 registerAlias(val_reg, cmov_abi_size),
  10635                                 cc,
  10636                             );
  10637                         },
  10638                     }
  10639                 },
  10640             };
  10641             try self.asmMemoryRegister(
  10642                 .{ .@"lock _", .cmpxchg },
  10643                 ptr_mem,
  10644                 registerAlias(tmp_reg, val_abi_size),
  10645             );
  10646             _ = try self.asmJccReloc(loop, .ne);
  10647             return if (unused) .unreach else .{ .register = .rax };
  10648         } else {
  10649             try self.asmRegisterMemory(.{ ._, .mov }, .rax, Memory.sib(.qword, .{
  10650                 .base = ptr_mem.sib.base,
  10651                 .scale_index = ptr_mem.scaleIndex(),
  10652                 .disp = ptr_mem.sib.disp + 0,
  10653             }));
  10654             try self.asmRegisterMemory(.{ ._, .mov }, .rdx, Memory.sib(.qword, .{
  10655                 .base = ptr_mem.sib.base,
  10656                 .scale_index = ptr_mem.scaleIndex(),
  10657                 .disp = ptr_mem.sib.disp + 8,
  10658             }));
  10659             const loop: u32 = @intCast(self.mir_instructions.len);
  10660             const val_mem_mcv: MCValue = switch (val_mcv) {
  10661                 .memory, .indirect, .load_frame => val_mcv,
  10662                 else => .{ .indirect = .{
  10663                     .reg = try self.copyToTmpRegister(Type.usize, val_mcv.address()),
  10664                 } },
  10665             };
  10666             const val_lo_mem = val_mem_mcv.mem(.qword);
  10667             const val_hi_mem = val_mem_mcv.address().offset(8).deref().mem(.qword);
  10668             if (rmw_op != std.builtin.AtomicRmwOp.Xchg) {
  10669                 try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax);
  10670                 try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx);
  10671             }
  10672             if (rmw_op) |op| switch (op) {
  10673                 .Xchg => {
  10674                     try self.asmRegisterMemory(.{ ._, .mov }, .rbx, val_lo_mem);
  10675                     try self.asmRegisterMemory(.{ ._, .mov }, .rcx, val_hi_mem);
  10676                 },
  10677                 .Add => {
  10678                     try self.asmRegisterMemory(.{ ._, .add }, .rbx, val_lo_mem);
  10679                     try self.asmRegisterMemory(.{ ._, .adc }, .rcx, val_hi_mem);
  10680                 },
  10681                 .Sub => {
  10682                     try self.asmRegisterMemory(.{ ._, .sub }, .rbx, val_lo_mem);
  10683                     try self.asmRegisterMemory(.{ ._, .sbb }, .rcx, val_hi_mem);
  10684                 },
  10685                 .And => {
  10686                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem);
  10687                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem);
  10688                 },
  10689                 .Nand => {
  10690                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem);
  10691                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem);
  10692                     try self.asmRegister(.{ ._, .not }, .rbx);
  10693                     try self.asmRegister(.{ ._, .not }, .rcx);
  10694                 },
  10695                 .Or => {
  10696                     try self.asmRegisterMemory(.{ ._, .@"or" }, .rbx, val_lo_mem);
  10697                     try self.asmRegisterMemory(.{ ._, .@"or" }, .rcx, val_hi_mem);
  10698                 },
  10699                 .Xor => {
  10700                     try self.asmRegisterMemory(.{ ._, .xor }, .rbx, val_lo_mem);
  10701                     try self.asmRegisterMemory(.{ ._, .xor }, .rcx, val_hi_mem);
  10702                 },
  10703                 else => return self.fail("TODO implement x86 atomic loop for {} {s}", .{
  10704                     val_ty.fmt(self.bin_file.options.module.?), @tagName(op),
  10705                 }),
  10706             };
  10707             try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem);
  10708             _ = try self.asmJccReloc(loop, .ne);
  10709 
  10710             if (unused) return .unreach;
  10711             const dst_mcv = try self.allocTempRegOrMem(val_ty, false);
  10712             try self.asmMemoryRegister(
  10713                 .{ ._, .mov },
  10714                 Memory.sib(.qword, .{
  10715                     .base = .{ .frame = dst_mcv.load_frame.index },
  10716                     .disp = dst_mcv.load_frame.off + 0,
  10717                 }),
  10718                 .rax,
  10719             );
  10720             try self.asmMemoryRegister(
  10721                 .{ ._, .mov },
  10722                 Memory.sib(.qword, .{
  10723                     .base = .{ .frame = dst_mcv.load_frame.index },
  10724                     .disp = dst_mcv.load_frame.off + 8,
  10725                 }),
  10726                 .rdx,
  10727             );
  10728             return dst_mcv;
  10729         },
  10730         .libcall => return self.fail("TODO implement x86 atomic libcall", .{}),
  10731     }
  10732 }
  10733 
  10734 fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void {
  10735     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
  10736     const extra = self.air.extraData(Air.AtomicRmw, pl_op.payload).data;
  10737 
  10738     try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx });
  10739     const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx });
  10740     defer for (regs_lock) |lock| self.register_manager.unlockReg(lock);
  10741 
  10742     const unused = self.liveness.isUnused(inst);
  10743 
  10744     const ptr_ty = self.typeOf(pl_op.operand);
  10745     const ptr_mcv = try self.resolveInst(pl_op.operand);
  10746 
  10747     const val_ty = self.typeOf(extra.operand);
  10748     const val_mcv = try self.resolveInst(extra.operand);
  10749 
  10750     const result =
  10751         try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, unused, extra.op(), extra.ordering());
  10752     return self.finishAir(inst, result, .{ pl_op.operand, extra.operand, .none });
  10753 }
  10754 
  10755 fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void {
  10756     const atomic_load = self.air.instructions.items(.data)[inst].atomic_load;
  10757 
  10758     const ptr_ty = self.typeOf(atomic_load.ptr);
  10759     const ptr_mcv = try self.resolveInst(atomic_load.ptr);
  10760     const ptr_lock = switch (ptr_mcv) {
  10761         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  10762         else => null,
  10763     };
  10764     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  10765 
  10766     const dst_mcv =
  10767         if (self.reuseOperand(inst, atomic_load.ptr, 0, ptr_mcv))
  10768         ptr_mcv
  10769     else
  10770         try self.allocRegOrMem(inst, true);
  10771 
  10772     try self.load(dst_mcv, ptr_ty, ptr_mcv);
  10773     return self.finishAir(inst, dst_mcv, .{ atomic_load.ptr, .none, .none });
  10774 }
  10775 
  10776 fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void {
  10777     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
  10778 
  10779     const ptr_ty = self.typeOf(bin_op.lhs);
  10780     const ptr_mcv = try self.resolveInst(bin_op.lhs);
  10781 
  10782     const val_ty = self.typeOf(bin_op.rhs);
  10783     const val_mcv = try self.resolveInst(bin_op.rhs);
  10784 
  10785     const result = try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, true, null, order);
  10786     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  10787 }
  10788 
  10789 fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
  10790     const mod = self.bin_file.options.module.?;
  10791     if (safety) {
  10792         // TODO if the value is undef, write 0xaa bytes to dest
  10793     } else {
  10794         // TODO if the value is undef, don't lower this instruction
  10795     }
  10796 
  10797     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
  10798 
  10799     const dst_ptr = try self.resolveInst(bin_op.lhs);
  10800     const dst_ptr_ty = self.typeOf(bin_op.lhs);
  10801     const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) {
  10802         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  10803         else => null,
  10804     };
  10805     defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock);
  10806 
  10807     const src_val = try self.resolveInst(bin_op.rhs);
  10808     const elem_ty = self.typeOf(bin_op.rhs);
  10809     const src_val_lock: ?RegisterLock = switch (src_val) {
  10810         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  10811         else => null,
  10812     };
  10813     defer if (src_val_lock) |lock| self.register_manager.unlockReg(lock);
  10814 
  10815     const elem_abi_size: u31 = @intCast(elem_ty.abiSize(mod));
  10816 
  10817     if (elem_abi_size == 1) {
  10818         const ptr: MCValue = switch (dst_ptr_ty.ptrSize(mod)) {
  10819             // TODO: this only handles slices stored in the stack
  10820             .Slice => dst_ptr,
  10821             .One => dst_ptr,
  10822             .C, .Many => unreachable,
  10823         };
  10824         const len: MCValue = switch (dst_ptr_ty.ptrSize(mod)) {
  10825             // TODO: this only handles slices stored in the stack
  10826             .Slice => dst_ptr.address().offset(8).deref(),
  10827             .One => .{ .immediate = dst_ptr_ty.childType(mod).arrayLen(mod) },
  10828             .C, .Many => unreachable,
  10829         };
  10830         const len_lock: ?RegisterLock = switch (len) {
  10831             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  10832             else => null,
  10833         };
  10834         defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
  10835 
  10836         try self.genInlineMemset(ptr, src_val, len);
  10837         return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
  10838     }
  10839 
  10840     // Store the first element, and then rely on memcpy copying forwards.
  10841     // Length zero requires a runtime check - so we handle arrays specially
  10842     // here to elide it.
  10843     switch (dst_ptr_ty.ptrSize(mod)) {
  10844         .Slice => {
  10845             const slice_ptr_ty = dst_ptr_ty.slicePtrFieldType(mod);
  10846 
  10847             // TODO: this only handles slices stored in the stack
  10848             const ptr = dst_ptr;
  10849             const len = dst_ptr.address().offset(8).deref();
  10850 
  10851             // Used to store the number of elements for comparison.
  10852             // After comparison, updated to store number of bytes needed to copy.
  10853             const len_reg = try self.register_manager.allocReg(null, gp);
  10854             const len_mcv: MCValue = .{ .register = len_reg };
  10855             const len_lock = self.register_manager.lockRegAssumeUnused(len_reg);
  10856             defer self.register_manager.unlockReg(len_lock);
  10857 
  10858             try self.genSetReg(len_reg, Type.usize, len);
  10859 
  10860             const skip_reloc = try self.asmJccReloc(undefined, .z);
  10861             try self.store(slice_ptr_ty, ptr, src_val);
  10862 
  10863             const second_elem_ptr_reg = try self.register_manager.allocReg(null, gp);
  10864             const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
  10865             const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
  10866             defer self.register_manager.unlockReg(second_elem_ptr_lock);
  10867 
  10868             try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{
  10869                 .reg = try self.copyToTmpRegister(Type.usize, ptr),
  10870                 .off = elem_abi_size,
  10871             } });
  10872 
  10873             try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 });
  10874             try self.asmRegisterRegisterImmediate(
  10875                 .{ .i_, .mul },
  10876                 len_reg,
  10877                 len_reg,
  10878                 Immediate.u(elem_abi_size),
  10879             );
  10880             try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv);
  10881 
  10882             try self.performReloc(skip_reloc);
  10883         },
  10884         .One => {
  10885             const elem_ptr_ty = try mod.singleMutPtrType(elem_ty);
  10886 
  10887             const len = dst_ptr_ty.childType(mod).arrayLen(mod);
  10888 
  10889             assert(len != 0); // prevented by Sema
  10890             try self.store(elem_ptr_ty, dst_ptr, src_val);
  10891 
  10892             const second_elem_ptr_reg = try self.register_manager.allocReg(null, gp);
  10893             const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
  10894             const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
  10895             defer self.register_manager.unlockReg(second_elem_ptr_lock);
  10896 
  10897             try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{
  10898                 .reg = try self.copyToTmpRegister(Type.usize, dst_ptr),
  10899                 .off = elem_abi_size,
  10900             } });
  10901 
  10902             const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) };
  10903             try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy);
  10904         },
  10905         .C, .Many => unreachable,
  10906     }
  10907 
  10908     return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
  10909 }
  10910 
  10911 fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
  10912     const mod = self.bin_file.options.module.?;
  10913     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
  10914 
  10915     const dst_ptr = try self.resolveInst(bin_op.lhs);
  10916     const dst_ptr_ty = self.typeOf(bin_op.lhs);
  10917     const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) {
  10918         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  10919         else => null,
  10920     };
  10921     defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock);
  10922 
  10923     const src_ptr = try self.resolveInst(bin_op.rhs);
  10924     const src_ptr_lock: ?RegisterLock = switch (src_ptr) {
  10925         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  10926         else => null,
  10927     };
  10928     defer if (src_ptr_lock) |lock| self.register_manager.unlockReg(lock);
  10929 
  10930     const len: MCValue = switch (dst_ptr_ty.ptrSize(mod)) {
  10931         .Slice => dst_ptr.address().offset(8).deref(),
  10932         .One => .{ .immediate = dst_ptr_ty.childType(mod).arrayLen(mod) },
  10933         .C, .Many => unreachable,
  10934     };
  10935     const len_lock: ?RegisterLock = switch (len) {
  10936         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  10937         else => null,
  10938     };
  10939     defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
  10940 
  10941     // TODO: dst_ptr and src_ptr could be slices rather than raw pointers
  10942     try self.genInlineMemcpy(dst_ptr, src_ptr, len);
  10943 
  10944     return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
  10945 }
  10946 
  10947 fn airTagName(self: *Self, inst: Air.Inst.Index) !void {
  10948     const mod = self.bin_file.options.module.?;
  10949     const un_op = self.air.instructions.items(.data)[inst].un_op;
  10950     const inst_ty = self.typeOfIndex(inst);
  10951     const enum_ty = self.typeOf(un_op);
  10952 
  10953     // We need a properly aligned and sized call frame to be able to call this function.
  10954     {
  10955         const needed_call_frame = FrameAlloc.init(.{
  10956             .size = inst_ty.abiSize(mod),
  10957             .alignment = inst_ty.abiAlignment(mod),
  10958         });
  10959         const frame_allocs_slice = self.frame_allocs.slice();
  10960         const stack_frame_size =
  10961             &frame_allocs_slice.items(.abi_size)[@intFromEnum(FrameIndex.call_frame)];
  10962         stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size);
  10963         const stack_frame_align =
  10964             &frame_allocs_slice.items(.abi_align)[@intFromEnum(FrameIndex.call_frame)];
  10965         stack_frame_align.* = @max(stack_frame_align.*, needed_call_frame.abi_align);
  10966     }
  10967 
  10968     try self.spillEflagsIfOccupied();
  10969     try self.spillRegisters(abi.getCallerPreservedRegs(self.target.*));
  10970 
  10971     const param_regs = abi.getCAbiIntParamRegs(self.target.*);
  10972 
  10973     const dst_mcv = try self.allocRegOrMem(inst, false);
  10974     try self.genSetReg(param_regs[0], Type.usize, dst_mcv.address());
  10975 
  10976     const operand = try self.resolveInst(un_op);
  10977     try self.genSetReg(param_regs[1], enum_ty, operand);
  10978 
  10979     try self.genLazySymbolRef(
  10980         .call,
  10981         .rax,
  10982         link.File.LazySymbol.initDecl(.code, enum_ty.getOwnerDecl(mod), mod),
  10983     );
  10984 
  10985     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
  10986 }
  10987 
  10988 fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
  10989     const mod = self.bin_file.options.module.?;
  10990     const un_op = self.air.instructions.items(.data)[inst].un_op;
  10991 
  10992     const err_ty = self.typeOf(un_op);
  10993     const err_mcv = try self.resolveInst(un_op);
  10994     const err_reg = try self.copyToTmpRegister(err_ty, err_mcv);
  10995     const err_lock = self.register_manager.lockRegAssumeUnused(err_reg);
  10996     defer self.register_manager.unlockReg(err_lock);
  10997 
  10998     const addr_reg = try self.register_manager.allocReg(null, gp);
  10999     const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  11000     defer self.register_manager.unlockReg(addr_lock);
  11001     try self.genLazySymbolRef(.lea, addr_reg, link.File.LazySymbol.initDecl(.const_data, null, mod));
  11002 
  11003     const start_reg = try self.register_manager.allocReg(null, gp);
  11004     const start_lock = self.register_manager.lockRegAssumeUnused(start_reg);
  11005     defer self.register_manager.unlockReg(start_lock);
  11006 
  11007     const end_reg = try self.register_manager.allocReg(null, gp);
  11008     const end_lock = self.register_manager.lockRegAssumeUnused(end_reg);
  11009     defer self.register_manager.unlockReg(end_lock);
  11010 
  11011     try self.truncateRegister(err_ty, err_reg.to32());
  11012 
  11013     try self.asmRegisterMemory(
  11014         .{ ._, .mov },
  11015         start_reg.to32(),
  11016         Memory.sib(.dword, .{
  11017             .base = .{ .reg = addr_reg.to64() },
  11018             .scale_index = .{ .scale = 4, .index = err_reg.to64() },
  11019             .disp = 4,
  11020         }),
  11021     );
  11022     try self.asmRegisterMemory(
  11023         .{ ._, .mov },
  11024         end_reg.to32(),
  11025         Memory.sib(.dword, .{
  11026             .base = .{ .reg = addr_reg.to64() },
  11027             .scale_index = .{ .scale = 4, .index = err_reg.to64() },
  11028             .disp = 8,
  11029         }),
  11030     );
  11031     try self.asmRegisterRegister(.{ ._, .sub }, end_reg.to32(), start_reg.to32());
  11032     try self.asmRegisterMemory(
  11033         .{ ._, .lea },
  11034         start_reg.to64(),
  11035         Memory.sib(.byte, .{
  11036             .base = .{ .reg = addr_reg.to64() },
  11037             .scale_index = .{ .scale = 1, .index = start_reg.to64() },
  11038             .disp = 0,
  11039         }),
  11040     );
  11041     try self.asmRegisterMemory(
  11042         .{ ._, .lea },
  11043         end_reg.to32(),
  11044         Memory.sib(.byte, .{
  11045             .base = .{ .reg = end_reg.to64() },
  11046             .disp = -1,
  11047         }),
  11048     );
  11049 
  11050     const dst_mcv = try self.allocRegOrMem(inst, false);
  11051     try self.asmMemoryRegister(
  11052         .{ ._, .mov },
  11053         Memory.sib(.qword, .{
  11054             .base = .{ .frame = dst_mcv.load_frame.index },
  11055             .disp = dst_mcv.load_frame.off,
  11056         }),
  11057         start_reg.to64(),
  11058     );
  11059     try self.asmMemoryRegister(
  11060         .{ ._, .mov },
  11061         Memory.sib(.qword, .{
  11062             .base = .{ .frame = dst_mcv.load_frame.index },
  11063             .disp = dst_mcv.load_frame.off + 8,
  11064         }),
  11065         end_reg.to64(),
  11066     );
  11067 
  11068     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
  11069 }
  11070 
  11071 fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
  11072     const mod = self.bin_file.options.module.?;
  11073     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
  11074     const vector_ty = self.typeOfIndex(inst);
  11075     const dst_rc = regClassForType(vector_ty, mod);
  11076     const scalar_ty = vector_ty.scalarType(mod);
  11077 
  11078     const src_mcv = try self.resolveInst(ty_op.operand);
  11079     const result: MCValue = result: {
  11080         switch (scalar_ty.zigTypeTag(mod)) {
  11081             else => {},
  11082             .Float => switch (scalar_ty.floatBits(self.target.*)) {
  11083                 32 => switch (vector_ty.vectorLen(mod)) {
  11084                     1 => {
  11085                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  11086                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  11087                         try self.genSetReg(dst_reg, scalar_ty, src_mcv);
  11088                         break :result .{ .register = dst_reg };
  11089                     },
  11090                     2...4 => {
  11091                         if (self.hasFeature(.avx)) {
  11092                             const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  11093                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
  11094                                 .{ .v_ss, .broadcast },
  11095                                 dst_reg.to128(),
  11096                                 src_mcv.mem(.dword),
  11097                             ) else {
  11098                                 const src_reg = if (src_mcv.isRegister())
  11099                                     src_mcv.getReg().?
  11100                                 else
  11101                                     try self.copyToTmpRegister(scalar_ty, src_mcv);
  11102                                 try self.asmRegisterRegisterRegisterImmediate(
  11103                                     .{ .v_ps, .shuf },
  11104                                     dst_reg.to128(),
  11105                                     src_reg.to128(),
  11106                                     src_reg.to128(),
  11107                                     Immediate.u(0),
  11108                                 );
  11109                             }
  11110                             break :result .{ .register = dst_reg };
  11111                         } else {
  11112                             const dst_mcv = if (src_mcv.isRegister() and
  11113                                 self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  11114                                 src_mcv
  11115                             else
  11116                                 try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv);
  11117                             const dst_reg = dst_mcv.getReg().?;
  11118                             try self.asmRegisterRegisterImmediate(
  11119                                 .{ ._ps, .shuf },
  11120                                 dst_reg.to128(),
  11121                                 dst_reg.to128(),
  11122                                 Immediate.u(0),
  11123                             );
  11124                             break :result dst_mcv;
  11125                         }
  11126                     },
  11127                     5...8 => if (self.hasFeature(.avx)) {
  11128                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  11129                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
  11130                             .{ .v_ss, .broadcast },
  11131                             dst_reg.to256(),
  11132                             src_mcv.mem(.dword),
  11133                         ) else {
  11134                             const src_reg = if (src_mcv.isRegister())
  11135                                 src_mcv.getReg().?
  11136                             else
  11137                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  11138                             if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
  11139                                 .{ .v_ss, .broadcast },
  11140                                 dst_reg.to256(),
  11141                                 src_reg.to128(),
  11142                             ) else {
  11143                                 try self.asmRegisterRegisterRegisterImmediate(
  11144                                     .{ .v_ps, .shuf },
  11145                                     dst_reg.to128(),
  11146                                     src_reg.to128(),
  11147                                     src_reg.to128(),
  11148                                     Immediate.u(0),
  11149                                 );
  11150                                 try self.asmRegisterRegisterRegisterImmediate(
  11151                                     .{ .v_f128, .insert },
  11152                                     dst_reg.to256(),
  11153                                     dst_reg.to256(),
  11154                                     dst_reg.to128(),
  11155                                     Immediate.u(1),
  11156                                 );
  11157                             }
  11158                         }
  11159                         break :result .{ .register = dst_reg };
  11160                     },
  11161                     else => {},
  11162                 },
  11163                 64 => switch (vector_ty.vectorLen(mod)) {
  11164                     1 => {
  11165                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  11166                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  11167                         try self.genSetReg(dst_reg, scalar_ty, src_mcv);
  11168                         break :result .{ .register = dst_reg };
  11169                     },
  11170                     2 => {
  11171                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  11172                         if (self.hasFeature(.sse3)) {
  11173                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
  11174                                 if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
  11175                                 dst_reg.to128(),
  11176                                 src_mcv.mem(.qword),
  11177                             ) else try self.asmRegisterRegister(
  11178                                 if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
  11179                                 dst_reg.to128(),
  11180                                 (if (src_mcv.isRegister())
  11181                                     src_mcv.getReg().?
  11182                                 else
  11183                                     try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
  11184                             );
  11185                             break :result .{ .register = dst_reg };
  11186                         } else try self.asmRegisterRegister(
  11187                             .{ ._ps, .movlh },
  11188                             dst_reg.to128(),
  11189                             (if (src_mcv.isRegister())
  11190                                 src_mcv.getReg().?
  11191                             else
  11192                                 try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
  11193                         );
  11194                     },
  11195                     3...4 => if (self.hasFeature(.avx)) {
  11196                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  11197                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
  11198                             .{ .v_sd, .broadcast },
  11199                             dst_reg.to256(),
  11200                             src_mcv.mem(.qword),
  11201                         ) else {
  11202                             const src_reg = if (src_mcv.isRegister())
  11203                                 src_mcv.getReg().?
  11204                             else
  11205                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  11206                             if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
  11207                                 .{ .v_sd, .broadcast },
  11208                                 dst_reg.to256(),
  11209                                 src_reg.to128(),
  11210                             ) else {
  11211                                 try self.asmRegisterRegister(
  11212                                     .{ .v_, .movddup },
  11213                                     dst_reg.to128(),
  11214                                     src_reg.to128(),
  11215                                 );
  11216                                 try self.asmRegisterRegisterRegisterImmediate(
  11217                                     .{ .v_f128, .insert },
  11218                                     dst_reg.to256(),
  11219                                     dst_reg.to256(),
  11220                                     dst_reg.to128(),
  11221                                     Immediate.u(1),
  11222                                 );
  11223                             }
  11224                         }
  11225                         break :result .{ .register = dst_reg };
  11226                     },
  11227                     else => {},
  11228                 },
  11229                 128 => switch (vector_ty.vectorLen(mod)) {
  11230                     1 => {
  11231                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  11232                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  11233                         try self.genSetReg(dst_reg, scalar_ty, src_mcv);
  11234                         break :result .{ .register = dst_reg };
  11235                     },
  11236                     2 => if (self.hasFeature(.avx)) {
  11237                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  11238                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
  11239                             .{ .v_f128, .broadcast },
  11240                             dst_reg.to256(),
  11241                             src_mcv.mem(.xword),
  11242                         ) else {
  11243                             const src_reg = if (src_mcv.isRegister())
  11244                                 src_mcv.getReg().?
  11245                             else
  11246                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  11247                             try self.asmRegisterRegisterRegisterImmediate(
  11248                                 .{ .v_f128, .insert },
  11249                                 dst_reg.to256(),
  11250                                 src_reg.to256(),
  11251                                 src_reg.to128(),
  11252                                 Immediate.u(1),
  11253                             );
  11254                         }
  11255                         break :result .{ .register = dst_reg };
  11256                     },
  11257                     else => {},
  11258                 },
  11259                 16, 80 => {},
  11260                 else => unreachable,
  11261             },
  11262         }
  11263         return self.fail("TODO implement airSplat for {}", .{
  11264             vector_ty.fmt(self.bin_file.options.module.?),
  11265         });
  11266     };
  11267     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  11268 }
  11269 
  11270 fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
  11271     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
  11272     const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
  11273     _ = extra;
  11274     return self.fail("TODO implement airSelect for x86_64", .{});
  11275     //return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
  11276 }
  11277 
  11278 fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
  11279     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
  11280     _ = ty_pl;
  11281     return self.fail("TODO implement airShuffle for x86_64", .{});
  11282     //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  11283 }
  11284 
  11285 fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
  11286     const reduce = self.air.instructions.items(.data)[inst].reduce;
  11287     _ = reduce;
  11288     return self.fail("TODO implement airReduce for x86_64", .{});
  11289     //return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
  11290 }
  11291 
  11292 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
  11293     const mod = self.bin_file.options.module.?;
  11294     const result_ty = self.typeOfIndex(inst);
  11295     const len: usize = @intCast(result_ty.arrayLen(mod));
  11296     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
  11297     const elements: []const Air.Inst.Ref = @ptrCast(self.air.extra[ty_pl.payload..][0..len]);
  11298     const result: MCValue = result: {
  11299         switch (result_ty.zigTypeTag(mod)) {
  11300             .Struct => {
  11301                 const frame_index =
  11302                     try self.allocFrameIndex(FrameAlloc.initType(result_ty, mod));
  11303                 if (result_ty.containerLayout(mod) == .Packed) {
  11304                     const struct_obj = mod.typeToStruct(result_ty).?;
  11305                     try self.genInlineMemset(
  11306                         .{ .lea_frame = .{ .index = frame_index } },
  11307                         .{ .immediate = 0 },
  11308                         .{ .immediate = result_ty.abiSize(mod) },
  11309                     );
  11310                     for (elements, 0..) |elem, elem_i| {
  11311                         if ((try result_ty.structFieldValueComptime(mod, elem_i)) != null) continue;
  11312 
  11313                         const elem_ty = result_ty.structFieldType(elem_i, mod);
  11314                         const elem_bit_size: u32 = @intCast(elem_ty.bitSize(mod));
  11315                         if (elem_bit_size > 64) {
  11316                             return self.fail(
  11317                                 "TODO airAggregateInit implement packed structs with large fields",
  11318                                 .{},
  11319                             );
  11320                         }
  11321                         const elem_abi_size: u32 = @intCast(elem_ty.abiSize(mod));
  11322                         const elem_abi_bits = elem_abi_size * 8;
  11323                         const elem_off = struct_obj.packedFieldBitOffset(mod, elem_i);
  11324                         const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size);
  11325                         const elem_bit_off = elem_off % elem_abi_bits;
  11326                         const elem_mcv = try self.resolveInst(elem);
  11327                         const mat_elem_mcv = switch (elem_mcv) {
  11328                             .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  11329                             else => elem_mcv,
  11330                         };
  11331                         const elem_lock = switch (mat_elem_mcv) {
  11332                             .register => |reg| self.register_manager.lockReg(reg),
  11333                             .immediate => |imm| lock: {
  11334                                 if (imm == 0) continue;
  11335                                 break :lock null;
  11336                             },
  11337                             else => null,
  11338                         };
  11339                         defer if (elem_lock) |lock| self.register_manager.unlockReg(lock);
  11340                         const elem_reg = registerAlias(
  11341                             try self.copyToTmpRegister(elem_ty, mat_elem_mcv),
  11342                             elem_abi_size,
  11343                         );
  11344                         const elem_extra_bits = self.regExtraBits(elem_ty);
  11345                         if (elem_bit_off < elem_extra_bits) {
  11346                             try self.truncateRegister(elem_ty, elem_reg);
  11347                         }
  11348                         if (elem_bit_off > 0) try self.genShiftBinOpMir(
  11349                             .{ ._l, .sh },
  11350                             elem_ty,
  11351                             .{ .register = elem_reg },
  11352                             .{ .immediate = elem_bit_off },
  11353                         );
  11354                         try self.genBinOpMir(
  11355                             .{ ._, .@"or" },
  11356                             elem_ty,
  11357                             .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } },
  11358                             .{ .register = elem_reg },
  11359                         );
  11360                         if (elem_bit_off > elem_extra_bits) {
  11361                             const reg = try self.copyToTmpRegister(elem_ty, mat_elem_mcv);
  11362                             if (elem_extra_bits > 0) {
  11363                                 try self.truncateRegister(elem_ty, registerAlias(reg, elem_abi_size));
  11364                             }
  11365                             try self.genShiftBinOpMir(
  11366                                 .{ ._r, .sh },
  11367                                 elem_ty,
  11368                                 .{ .register = reg },
  11369                                 .{ .immediate = elem_abi_bits - elem_bit_off },
  11370                             );
  11371                             try self.genBinOpMir(
  11372                                 .{ ._, .@"or" },
  11373                                 elem_ty,
  11374                                 .{ .load_frame = .{
  11375                                     .index = frame_index,
  11376                                     .off = elem_byte_off + @as(i32, @intCast(elem_abi_size)),
  11377                                 } },
  11378                                 .{ .register = reg },
  11379                             );
  11380                         }
  11381                     }
  11382                 } else for (elements, 0..) |elem, elem_i| {
  11383                     if ((try result_ty.structFieldValueComptime(mod, elem_i)) != null) continue;
  11384 
  11385                     const elem_ty = result_ty.structFieldType(elem_i, mod);
  11386                     const elem_off: i32 = @intCast(result_ty.structFieldOffset(elem_i, mod));
  11387                     const elem_mcv = try self.resolveInst(elem);
  11388                     const mat_elem_mcv = switch (elem_mcv) {
  11389                         .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  11390                         else => elem_mcv,
  11391                     };
  11392                     try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv);
  11393                 }
  11394                 break :result .{ .load_frame = .{ .index = frame_index } };
  11395             },
  11396             .Array => {
  11397                 const frame_index =
  11398                     try self.allocFrameIndex(FrameAlloc.initType(result_ty, mod));
  11399                 const elem_ty = result_ty.childType(mod);
  11400                 const elem_size: u32 = @intCast(elem_ty.abiSize(mod));
  11401 
  11402                 for (elements, 0..) |elem, elem_i| {
  11403                     const elem_mcv = try self.resolveInst(elem);
  11404                     const mat_elem_mcv = switch (elem_mcv) {
  11405                         .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  11406                         else => elem_mcv,
  11407                     };
  11408                     const elem_off: i32 = @intCast(elem_size * elem_i);
  11409                     try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv);
  11410                 }
  11411                 if (result_ty.sentinel(mod)) |sentinel| try self.genSetMem(
  11412                     .{ .frame = frame_index },
  11413                     @intCast(elem_size * elements.len),
  11414                     elem_ty,
  11415                     try self.genTypedValue(.{ .ty = elem_ty, .val = sentinel }),
  11416                 );
  11417                 break :result .{ .load_frame = .{ .index = frame_index } };
  11418             },
  11419             .Vector => return self.fail("TODO implement aggregate_init for vectors", .{}),
  11420             else => unreachable,
  11421         }
  11422     };
  11423 
  11424     if (elements.len <= Liveness.bpi - 1) {
  11425         var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1);
  11426         @memcpy(buf[0..elements.len], elements);
  11427         return self.finishAir(inst, result, buf);
  11428     }
  11429     var bt = self.liveness.iterateBigTomb(inst);
  11430     for (elements) |elem| self.feed(&bt, elem);
  11431     return self.finishAirResult(inst, result);
  11432 }
  11433 
  11434 fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void {
  11435     const mod = self.bin_file.options.module.?;
  11436     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
  11437     const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data;
  11438     const result: MCValue = result: {
  11439         const union_ty = self.typeOfIndex(inst);
  11440         const layout = union_ty.unionGetLayout(mod);
  11441 
  11442         const src_ty = self.typeOf(extra.init);
  11443         const src_mcv = try self.resolveInst(extra.init);
  11444         if (layout.tag_size == 0) {
  11445             if (self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv;
  11446 
  11447             const dst_mcv = try self.allocRegOrMem(inst, true);
  11448             try self.genCopy(union_ty, dst_mcv, src_mcv);
  11449             break :result dst_mcv;
  11450         }
  11451 
  11452         const dst_mcv = try self.allocRegOrMem(inst, false);
  11453 
  11454         const union_obj = mod.typeToUnion(union_ty).?;
  11455         const field_name = union_obj.fields.keys()[extra.field_index];
  11456         const tag_ty = union_obj.tag_ty;
  11457         const field_index = tag_ty.enumFieldIndex(field_name, mod).?;
  11458         const tag_val = try mod.enumValueFieldIndex(tag_ty, field_index);
  11459         const tag_int_val = try tag_val.intFromEnum(tag_ty, mod);
  11460         const tag_int = tag_int_val.toUnsignedInt(mod);
  11461         const tag_off: i32 = if (layout.tag_align < layout.payload_align)
  11462             @intCast(layout.payload_size)
  11463         else
  11464             0;
  11465         try self.genCopy(tag_ty, dst_mcv.address().offset(tag_off).deref(), .{ .immediate = tag_int });
  11466 
  11467         const pl_off: i32 = if (layout.tag_align < layout.payload_align)
  11468             0
  11469         else
  11470             @intCast(layout.tag_size);
  11471         try self.genCopy(src_ty, dst_mcv.address().offset(pl_off).deref(), src_mcv);
  11472 
  11473         break :result dst_mcv;
  11474     };
  11475     return self.finishAir(inst, result, .{ extra.init, .none, .none });
  11476 }
  11477 
  11478 fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
  11479     const prefetch = self.air.instructions.items(.data)[inst].prefetch;
  11480     return self.finishAir(inst, .unreach, .{ prefetch.ptr, .none, .none });
  11481 }
  11482 
  11483 fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
  11484     const mod = self.bin_file.options.module.?;
  11485     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
  11486     const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
  11487     const ty = self.typeOfIndex(inst);
  11488 
  11489     if (!self.hasFeature(.fma)) return self.fail("TODO implement airMulAdd for {}", .{
  11490         ty.fmt(self.bin_file.options.module.?),
  11491     });
  11492 
  11493     const ops = [3]Air.Inst.Ref{ extra.lhs, extra.rhs, pl_op.operand };
  11494     var mcvs: [3]MCValue = undefined;
  11495     var locks = [1]?RegisterManager.RegisterLock{null} ** 3;
  11496     defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
  11497     var order = [1]u2{0} ** 3;
  11498     var unused = std.StaticBitSet(3).initFull();
  11499     for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| {
  11500         const op_index: u2 = @intCast(op_i);
  11501         mcv.* = try self.resolveInst(op);
  11502         if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) {
  11503             order[op_index] = 1;
  11504             unused.unset(0);
  11505         } else if (unused.isSet(2) and mcv.isMemory()) {
  11506             order[op_index] = 3;
  11507             unused.unset(2);
  11508         }
  11509         switch (mcv.*) {
  11510             .register => |reg| lock.* = self.register_manager.lockReg(reg),
  11511             else => {},
  11512         }
  11513     }
  11514     for (&order, &mcvs, &locks) |*mop_index, *mcv, *lock| {
  11515         if (mop_index.* != 0) continue;
  11516         mop_index.* = 1 + @as(u2, @intCast(unused.toggleFirstSet().?));
  11517         if (mop_index.* > 1 and mcv.isRegister()) continue;
  11518         const reg = try self.copyToTmpRegister(ty, mcv.*);
  11519         mcv.* = .{ .register = reg };
  11520         if (lock.*) |old_lock| self.register_manager.unlockReg(old_lock);
  11521         lock.* = self.register_manager.lockRegAssumeUnused(reg);
  11522     }
  11523 
  11524     const mir_tag = @as(?Mir.Inst.FixedTag, if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or
  11525         mem.eql(u2, &order, &.{ 3, 1, 2 }))
  11526         switch (ty.zigTypeTag(mod)) {
  11527             .Float => switch (ty.floatBits(self.target.*)) {
  11528                 32 => .{ .v_ss, .fmadd132 },
  11529                 64 => .{ .v_sd, .fmadd132 },
  11530                 16, 80, 128 => null,
  11531                 else => unreachable,
  11532             },
  11533             .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
  11534                 .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
  11535                     32 => switch (ty.vectorLen(mod)) {
  11536                         1 => .{ .v_ss, .fmadd132 },
  11537                         2...8 => .{ .v_ps, .fmadd132 },
  11538                         else => null,
  11539                     },
  11540                     64 => switch (ty.vectorLen(mod)) {
  11541                         1 => .{ .v_sd, .fmadd132 },
  11542                         2...4 => .{ .v_pd, .fmadd132 },
  11543                         else => null,
  11544                     },
  11545                     16, 80, 128 => null,
  11546                     else => unreachable,
  11547                 },
  11548                 else => unreachable,
  11549             },
  11550             else => unreachable,
  11551         }
  11552     else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 }))
  11553         switch (ty.zigTypeTag(mod)) {
  11554             .Float => switch (ty.floatBits(self.target.*)) {
  11555                 32 => .{ .v_ss, .fmadd213 },
  11556                 64 => .{ .v_sd, .fmadd213 },
  11557                 16, 80, 128 => null,
  11558                 else => unreachable,
  11559             },
  11560             .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
  11561                 .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
  11562                     32 => switch (ty.vectorLen(mod)) {
  11563                         1 => .{ .v_ss, .fmadd213 },
  11564                         2...8 => .{ .v_ps, .fmadd213 },
  11565                         else => null,
  11566                     },
  11567                     64 => switch (ty.vectorLen(mod)) {
  11568                         1 => .{ .v_sd, .fmadd213 },
  11569                         2...4 => .{ .v_pd, .fmadd213 },
  11570                         else => null,
  11571                     },
  11572                     16, 80, 128 => null,
  11573                     else => unreachable,
  11574                 },
  11575                 else => unreachable,
  11576             },
  11577             else => unreachable,
  11578         }
  11579     else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 }))
  11580         switch (ty.zigTypeTag(mod)) {
  11581             .Float => switch (ty.floatBits(self.target.*)) {
  11582                 32 => .{ .v_ss, .fmadd231 },
  11583                 64 => .{ .v_sd, .fmadd231 },
  11584                 16, 80, 128 => null,
  11585                 else => unreachable,
  11586             },
  11587             .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
  11588                 .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
  11589                     32 => switch (ty.vectorLen(mod)) {
  11590                         1 => .{ .v_ss, .fmadd231 },
  11591                         2...8 => .{ .v_ps, .fmadd231 },
  11592                         else => null,
  11593                     },
  11594                     64 => switch (ty.vectorLen(mod)) {
  11595                         1 => .{ .v_sd, .fmadd231 },
  11596                         2...4 => .{ .v_pd, .fmadd231 },
  11597                         else => null,
  11598                     },
  11599                     16, 80, 128 => null,
  11600                     else => unreachable,
  11601                 },
  11602                 else => unreachable,
  11603             },
  11604             else => unreachable,
  11605         }
  11606     else
  11607         unreachable) orelse return self.fail("TODO implement airMulAdd for {}", .{
  11608         ty.fmt(self.bin_file.options.module.?),
  11609     });
  11610 
  11611     var mops: [3]MCValue = undefined;
  11612     for (order, mcvs) |mop_index, mcv| mops[mop_index - 1] = mcv;
  11613 
  11614     const abi_size: u32 = @intCast(ty.abiSize(mod));
  11615     const mop1_reg = registerAlias(mops[0].getReg().?, abi_size);
  11616     const mop2_reg = registerAlias(mops[1].getReg().?, abi_size);
  11617     if (mops[2].isRegister()) try self.asmRegisterRegisterRegister(
  11618         mir_tag,
  11619         mop1_reg,
  11620         mop2_reg,
  11621         registerAlias(mops[2].getReg().?, abi_size),
  11622     ) else try self.asmRegisterRegisterMemory(
  11623         mir_tag,
  11624         mop1_reg,
  11625         mop2_reg,
  11626         mops[2].mem(Memory.PtrSize.fromSize(abi_size)),
  11627     );
  11628     return self.finishAir(inst, mops[0], ops);
  11629 }
  11630 
  11631 fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue {
  11632     const mod = self.bin_file.options.module.?;
  11633     const ty = self.typeOf(ref);
  11634 
  11635     // If the type has no codegen bits, no need to store it.
  11636     if (!ty.hasRuntimeBitsIgnoreComptime(mod)) return .none;
  11637 
  11638     const mcv = if (Air.refToIndex(ref)) |inst| mcv: {
  11639         break :mcv self.inst_tracking.getPtr(inst).?.short;
  11640     } else mcv: {
  11641         const ip_index = Air.refToInterned(ref).?;
  11642         const gop = try self.const_tracking.getOrPut(self.gpa, ip_index);
  11643         if (!gop.found_existing) gop.value_ptr.* = InstTracking.init(try self.genTypedValue(.{
  11644             .ty = ty,
  11645             .val = ip_index.toValue(),
  11646         }));
  11647         break :mcv gop.value_ptr.short;
  11648     };
  11649 
  11650     switch (mcv) {
  11651         .none, .unreach, .dead => unreachable,
  11652         else => return mcv,
  11653     }
  11654 }
  11655 
  11656 fn getResolvedInstValue(self: *Self, inst: Air.Inst.Index) *InstTracking {
  11657     const tracking = self.inst_tracking.getPtr(inst).?;
  11658     return switch (tracking.short) {
  11659         .none, .unreach, .dead => unreachable,
  11660         else => tracking,
  11661     };
  11662 }
  11663 
  11664 /// If the MCValue is an immediate, and it does not fit within this type,
  11665 /// we put it in a register.
  11666 /// A potential opportunity for future optimization here would be keeping track
  11667 /// of the fact that the instruction is available both as an immediate
  11668 /// and as a register.
  11669 fn limitImmediateType(self: *Self, operand: Air.Inst.Ref, comptime T: type) !MCValue {
  11670     const mcv = try self.resolveInst(operand);
  11671     const ti = @typeInfo(T).Int;
  11672     switch (mcv) {
  11673         .immediate => |imm| {
  11674             // This immediate is unsigned.
  11675             const U = std.meta.Int(.unsigned, ti.bits - @intFromBool(ti.signedness == .signed));
  11676             if (imm >= math.maxInt(U)) {
  11677                 return MCValue{ .register = try self.copyToTmpRegister(Type.usize, mcv) };
  11678             }
  11679         },
  11680         else => {},
  11681     }
  11682     return mcv;
  11683 }
  11684 
  11685 fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue {
  11686     const mod = self.bin_file.options.module.?;
  11687     return switch (try codegen.genTypedValue(self.bin_file, self.src_loc, arg_tv, self.owner.getDecl(mod))) {
  11688         .mcv => |mcv| switch (mcv) {
  11689             .none => .none,
  11690             .undef => .undef,
  11691             .immediate => |imm| .{ .immediate = imm },
  11692             .memory => |addr| .{ .memory = addr },
  11693             .load_direct => |sym_index| .{ .load_direct = sym_index },
  11694             .load_got => |sym_index| .{ .lea_got = sym_index },
  11695             .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
  11696         },
  11697         .fail => |msg| {
  11698             self.err_msg = msg;
  11699             return error.CodegenFail;
  11700         },
  11701     };
  11702 }
  11703 
  11704 const CallMCValues = struct {
  11705     args: []MCValue,
  11706     return_value: InstTracking,
  11707     stack_byte_count: u31,
  11708     stack_align: u31,
  11709 
  11710     fn deinit(self: *CallMCValues, func: *Self) void {
  11711         func.gpa.free(self.args);
  11712         self.* = undefined;
  11713     }
  11714 };
  11715 
  11716 /// Caller must call `CallMCValues.deinit`.
  11717 fn resolveCallingConventionValues(
  11718     self: *Self,
  11719     fn_info: InternPool.Key.FuncType,
  11720     var_args: []const Air.Inst.Ref,
  11721     stack_frame_base: FrameIndex,
  11722 ) !CallMCValues {
  11723     const mod = self.bin_file.options.module.?;
  11724     const ip = &mod.intern_pool;
  11725     const cc = fn_info.cc;
  11726     const param_types = try self.gpa.alloc(Type, fn_info.param_types.len + var_args.len);
  11727     defer self.gpa.free(param_types);
  11728 
  11729     for (param_types[0..fn_info.param_types.len], fn_info.param_types.get(ip)) |*dest, src| {
  11730         dest.* = src.toType();
  11731     }
  11732     // TODO: promote var arg types
  11733     for (param_types[fn_info.param_types.len..], var_args) |*param_ty, arg| {
  11734         param_ty.* = self.typeOf(arg);
  11735     }
  11736 
  11737     var result: CallMCValues = .{
  11738         .args = try self.gpa.alloc(MCValue, param_types.len),
  11739         // These undefined values must be populated before returning from this function.
  11740         .return_value = undefined,
  11741         .stack_byte_count = 0,
  11742         .stack_align = undefined,
  11743     };
  11744     errdefer self.gpa.free(result.args);
  11745 
  11746     const ret_ty = fn_info.return_type.toType();
  11747 
  11748     switch (cc) {
  11749         .Naked => {
  11750             assert(result.args.len == 0);
  11751             result.return_value = InstTracking.init(.unreach);
  11752             result.stack_align = 8;
  11753         },
  11754         .C => {
  11755             var param_reg_i: usize = 0;
  11756             var param_sse_reg_i: usize = 0;
  11757             result.stack_align = 16;
  11758 
  11759             switch (self.target.os.tag) {
  11760                 .windows => {
  11761                     // Align the stack to 16bytes before allocating shadow stack space (if any).
  11762                     result.stack_byte_count += @intCast(4 * Type.usize.abiSize(mod));
  11763                 },
  11764                 else => {},
  11765             }
  11766 
  11767             // Return values
  11768             if (ret_ty.zigTypeTag(mod) == .NoReturn) {
  11769                 result.return_value = InstTracking.init(.unreach);
  11770             } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod)) {
  11771                 // TODO: is this even possible for C calling convention?
  11772                 result.return_value = InstTracking.init(.none);
  11773             } else {
  11774                 const classes = switch (self.target.os.tag) {
  11775                     .windows => &[1]abi.Class{abi.classifyWindows(ret_ty, mod)},
  11776                     else => mem.sliceTo(&abi.classifySystemV(ret_ty, mod, .ret), .none),
  11777                 };
  11778                 if (classes.len > 1) {
  11779                     return self.fail("TODO handle multiple classes per type", .{});
  11780                 }
  11781                 const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0];
  11782                 result.return_value = switch (classes[0]) {
  11783                     .integer => InstTracking.init(.{ .register = registerAlias(
  11784                         ret_reg,
  11785                         @intCast(ret_ty.abiSize(mod)),
  11786                     ) }),
  11787                     .float, .sse => InstTracking.init(.{ .register = .xmm0 }),
  11788                     .memory => ret: {
  11789                         const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i];
  11790                         param_reg_i += 1;
  11791                         break :ret .{
  11792                             .short = .{ .indirect = .{ .reg = ret_reg } },
  11793                             .long = .{ .indirect = .{ .reg = ret_indirect_reg } },
  11794                         };
  11795                     },
  11796                     else => |class| return self.fail("TODO handle calling convention class {s}", .{
  11797                         @tagName(class),
  11798                     }),
  11799                 };
  11800             }
  11801 
  11802             // Input params
  11803             for (param_types, result.args) |ty, *arg| {
  11804                 assert(ty.hasRuntimeBitsIgnoreComptime(mod));
  11805 
  11806                 const classes = switch (self.target.os.tag) {
  11807                     .windows => &[1]abi.Class{abi.classifyWindows(ty, mod)},
  11808                     else => mem.sliceTo(&abi.classifySystemV(ty, mod, .arg), .none),
  11809                 };
  11810                 if (classes.len > 1) {
  11811                     return self.fail("TODO handle multiple classes per type", .{});
  11812                 }
  11813                 switch (classes[0]) {
  11814                     .integer => if (param_reg_i < abi.getCAbiIntParamRegs(self.target.*).len) {
  11815                         arg.* = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i] };
  11816                         param_reg_i += 1;
  11817                         continue;
  11818                     },
  11819                     .float, .sse => switch (self.target.os.tag) {
  11820                         .windows => if (param_reg_i < 4) {
  11821                             arg.* = .{
  11822                                 .register = @enumFromInt(@intFromEnum(Register.xmm0) + param_reg_i),
  11823                             };
  11824                             param_reg_i += 1;
  11825                             continue;
  11826                         },
  11827                         else => if (param_sse_reg_i < 8) {
  11828                             arg.* = .{
  11829                                 .register = @enumFromInt(@intFromEnum(Register.xmm0) + param_sse_reg_i),
  11830                             };
  11831                             param_sse_reg_i += 1;
  11832                             continue;
  11833                         },
  11834                     },
  11835                     .memory => {}, // fallthrough
  11836                     else => |class| return self.fail("TODO handle calling convention class {s}", .{
  11837                         @tagName(class),
  11838                     }),
  11839                 }
  11840 
  11841                 const param_size: u31 = @intCast(ty.abiSize(mod));
  11842                 const param_align: u31 = @intCast(ty.abiAlignment(mod));
  11843                 result.stack_byte_count =
  11844                     mem.alignForward(u31, result.stack_byte_count, param_align);
  11845                 arg.* = .{ .load_frame = .{
  11846                     .index = stack_frame_base,
  11847                     .off = result.stack_byte_count,
  11848                 } };
  11849                 result.stack_byte_count += param_size;
  11850             }
  11851         },
  11852         .Unspecified => {
  11853             result.stack_align = 16;
  11854 
  11855             // Return values
  11856             if (ret_ty.zigTypeTag(mod) == .NoReturn) {
  11857                 result.return_value = InstTracking.init(.unreach);
  11858             } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod)) {
  11859                 result.return_value = InstTracking.init(.none);
  11860             } else {
  11861                 const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0];
  11862                 const ret_ty_size: u31 = @intCast(ret_ty.abiSize(mod));
  11863                 if (ret_ty_size <= 8 and !ret_ty.isRuntimeFloat()) {
  11864                     const aliased_reg = registerAlias(ret_reg, ret_ty_size);
  11865                     result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none };
  11866                 } else {
  11867                     const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
  11868                     result.return_value = .{
  11869                         .short = .{ .indirect = .{ .reg = ret_reg } },
  11870                         .long = .{ .indirect = .{ .reg = ret_indirect_reg } },
  11871                     };
  11872                 }
  11873             }
  11874 
  11875             // Input params
  11876             for (param_types, result.args) |ty, *arg| {
  11877                 if (!ty.hasRuntimeBitsIgnoreComptime(mod)) {
  11878                     arg.* = .none;
  11879                     continue;
  11880                 }
  11881                 const param_size: u31 = @intCast(ty.abiSize(mod));
  11882                 const param_align: u31 = @intCast(ty.abiAlignment(mod));
  11883                 result.stack_byte_count =
  11884                     mem.alignForward(u31, result.stack_byte_count, param_align);
  11885                 arg.* = .{ .load_frame = .{
  11886                     .index = stack_frame_base,
  11887                     .off = result.stack_byte_count,
  11888                 } };
  11889                 result.stack_byte_count += param_size;
  11890             }
  11891         },
  11892         else => return self.fail("TODO implement function parameters and return values for {} on x86_64", .{cc}),
  11893     }
  11894 
  11895     result.stack_byte_count = mem.alignForward(u31, result.stack_byte_count, result.stack_align);
  11896     return result;
  11897 }
  11898 
  11899 /// TODO support scope overrides. Also note this logic is duplicated with `Module.wantSafety`.
  11900 fn wantSafety(self: *Self) bool {
  11901     return switch (self.bin_file.options.optimize_mode) {
  11902         .Debug => true,
  11903         .ReleaseSafe => true,
  11904         .ReleaseFast => false,
  11905         .ReleaseSmall => false,
  11906     };
  11907 }
  11908 
  11909 fn fail(self: *Self, comptime format: []const u8, args: anytype) InnerError {
  11910     @setCold(true);
  11911     assert(self.err_msg == null);
  11912     self.err_msg = try ErrorMsg.create(self.bin_file.allocator, self.src_loc, format, args);
  11913     return error.CodegenFail;
  11914 }
  11915 
  11916 fn failSymbol(self: *Self, comptime format: []const u8, args: anytype) InnerError {
  11917     @setCold(true);
  11918     assert(self.err_msg == null);
  11919     self.err_msg = try ErrorMsg.create(self.bin_file.allocator, self.src_loc, format, args);
  11920     return error.CodegenFail;
  11921 }
  11922 
  11923 fn parseRegName(name: []const u8) ?Register {
  11924     if (@hasDecl(Register, "parseRegName")) {
  11925         return Register.parseRegName(name);
  11926     }
  11927     return std.meta.stringToEnum(Register, name);
  11928 }
  11929 
  11930 /// Returns register wide enough to hold at least `size_bytes`.
  11931 fn registerAlias(reg: Register, size_bytes: u32) Register {
  11932     return switch (reg.class()) {
  11933         .general_purpose => if (size_bytes == 0)
  11934             unreachable // should be comptime-known
  11935         else if (size_bytes <= 1)
  11936             reg.to8()
  11937         else if (size_bytes <= 2)
  11938             reg.to16()
  11939         else if (size_bytes <= 4)
  11940             reg.to32()
  11941         else if (size_bytes <= 8)
  11942             reg.to64()
  11943         else
  11944             unreachable,
  11945         .segment => if (size_bytes <= 2)
  11946             reg
  11947         else
  11948             unreachable,
  11949         .x87 => unreachable,
  11950         .mmx => if (size_bytes <= 8)
  11951             reg
  11952         else
  11953             unreachable,
  11954         .sse => if (size_bytes <= 16)
  11955             reg.to128()
  11956         else if (size_bytes <= 32)
  11957             reg.to256()
  11958         else
  11959             unreachable,
  11960     };
  11961 }
  11962 
  11963 /// Truncates the value in the register in place.
  11964 /// Clobbers any remaining bits.
  11965 fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
  11966     const mod = self.bin_file.options.module.?;
  11967     const int_info = if (ty.isAbiInt(mod)) ty.intInfo(mod) else std.builtin.Type.Int{
  11968         .signedness = .unsigned,
  11969         .bits = @intCast(ty.bitSize(mod)),
  11970     };
  11971     const max_reg_bit_width = Register.rax.bitSize();
  11972     switch (int_info.signedness) {
  11973         .signed => {
  11974             const shift: u6 = @intCast(max_reg_bit_width - int_info.bits);
  11975             try self.genShiftBinOpMir(
  11976                 .{ ._l, .sa },
  11977                 Type.isize,
  11978                 .{ .register = reg },
  11979                 .{ .immediate = shift },
  11980             );
  11981             try self.genShiftBinOpMir(
  11982                 .{ ._r, .sa },
  11983                 Type.isize,
  11984                 .{ .register = reg },
  11985                 .{ .immediate = shift },
  11986             );
  11987         },
  11988         .unsigned => {
  11989             const shift: u6 = @intCast(max_reg_bit_width - int_info.bits);
  11990             const mask = ~@as(u64, 0) >> shift;
  11991             if (int_info.bits <= 32) {
  11992                 try self.genBinOpMir(
  11993                     .{ ._, .@"and" },
  11994                     Type.u32,
  11995                     .{ .register = reg },
  11996                     .{ .immediate = mask },
  11997                 );
  11998             } else {
  11999                 const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask });
  12000                 try self.genBinOpMir(
  12001                     .{ ._, .@"and" },
  12002                     Type.usize,
  12003                     .{ .register = reg },
  12004                     .{ .register = tmp_reg },
  12005                 );
  12006             }
  12007         },
  12008     }
  12009 }
  12010 
  12011 fn regBitSize(self: *Self, ty: Type) u64 {
  12012     const mod = self.bin_file.options.module.?;
  12013     const abi_size = ty.abiSize(mod);
  12014     return switch (ty.zigTypeTag(mod)) {
  12015         else => switch (abi_size) {
  12016             1 => 8,
  12017             2 => 16,
  12018             3...4 => 32,
  12019             5...8 => 64,
  12020             else => unreachable,
  12021         },
  12022         .Float => switch (abi_size) {
  12023             1...16 => 128,
  12024             17...32 => 256,
  12025             else => unreachable,
  12026         },
  12027     };
  12028 }
  12029 
  12030 fn regExtraBits(self: *Self, ty: Type) u64 {
  12031     const mod = self.bin_file.options.module.?;
  12032     return self.regBitSize(ty) - ty.bitSize(mod);
  12033 }
  12034 
  12035 fn hasFeature(self: *Self, feature: Target.x86.Feature) bool {
  12036     return Target.x86.featureSetHas(self.target.cpu.features, feature);
  12037 }
  12038 fn hasAnyFeatures(self: *Self, features: anytype) bool {
  12039     return Target.x86.featureSetHasAny(self.target.cpu.features, features);
  12040 }
  12041 fn hasAllFeatures(self: *Self, features: anytype) bool {
  12042     return Target.x86.featureSetHasAll(self.target.cpu.features, features);
  12043 }
  12044 
  12045 fn typeOf(self: *Self, inst: Air.Inst.Ref) Type {
  12046     const mod = self.bin_file.options.module.?;
  12047     return self.air.typeOf(inst, &mod.intern_pool);
  12048 }
  12049 
  12050 fn typeOfIndex(self: *Self, inst: Air.Inst.Index) Type {
  12051     const mod = self.bin_file.options.module.?;
  12052     return self.air.typeOfIndex(inst, &mod.intern_pool);
  12053 }