zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob 2b4932c5 (733970B) - Raw


      1 const std = @import("std");
      2 const build_options = @import("build_options");
      3 const builtin = @import("builtin");
      4 const assert = std.debug.assert;
      5 const codegen = @import("../../codegen.zig");
      6 const leb128 = std.leb;
      7 const link = @import("../../link.zig");
      8 const log = std.log.scoped(.codegen);
      9 const tracking_log = std.log.scoped(.tracking);
     10 const verbose_tracking_log = std.log.scoped(.verbose_tracking);
     11 const wip_mir_log = std.log.scoped(.wip_mir);
     12 const math = std.math;
     13 const mem = std.mem;
     14 const trace = @import("../../tracy.zig").trace;
     15 
     16 const Air = @import("../../Air.zig");
     17 const Allocator = mem.Allocator;
     18 const CodeGenError = codegen.CodeGenError;
     19 const Compilation = @import("../../Compilation.zig");
     20 const DebugInfoOutput = codegen.DebugInfoOutput;
     21 const DW = std.dwarf;
     22 const ErrorMsg = Module.ErrorMsg;
     23 const Result = codegen.Result;
     24 const Emit = @import("Emit.zig");
     25 const Liveness = @import("../../Liveness.zig");
     26 const Lower = @import("Lower.zig");
     27 const Mir = @import("Mir.zig");
     28 const Package = @import("../../Package.zig");
     29 const Module = @import("../../Module.zig");
     30 const Zcu = Module;
     31 const InternPool = @import("../../InternPool.zig");
     32 const Alignment = InternPool.Alignment;
     33 const Target = std.Target;
     34 const Type = @import("../../type.zig").Type;
     35 const TypedValue = @import("../../TypedValue.zig");
     36 const Value = @import("../../value.zig").Value;
     37 const Instruction = @import("encoder.zig").Instruction;
     38 
     39 const abi = @import("abi.zig");
     40 const bits = @import("bits.zig");
     41 const errUnionErrorOffset = codegen.errUnionErrorOffset;
     42 const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
     43 
     44 const Condition = bits.Condition;
     45 const Immediate = bits.Immediate;
     46 const Memory = bits.Memory;
     47 const Register = bits.Register;
     48 const RegisterManager = abi.RegisterManager;
     49 const RegisterLock = RegisterManager.RegisterLock;
     50 const FrameIndex = bits.FrameIndex;
     51 
     52 const InnerError = CodeGenError || error{OutOfRegisters};
     53 
     54 gpa: Allocator,
     55 air: Air,
     56 liveness: Liveness,
     57 bin_file: *link.File,
     58 debug_output: DebugInfoOutput,
     59 target: *const std.Target,
     60 owner: Owner,
     61 mod: *Package.Module,
     62 err_msg: ?*ErrorMsg,
     63 args: []MCValue,
     64 va_info: union {
     65     sysv: struct {
     66         gp_count: u32,
     67         fp_count: u32,
     68         overflow_arg_area: FrameAddr,
     69         reg_save_area: FrameAddr,
     70     },
     71     win64: struct {},
     72 },
     73 ret_mcv: InstTracking,
     74 fn_type: Type,
     75 arg_index: u32,
     76 src_loc: Module.SrcLoc,
     77 
     78 eflags_inst: ?Air.Inst.Index = null,
     79 
     80 /// MIR Instructions
     81 mir_instructions: std.MultiArrayList(Mir.Inst) = .{},
     82 /// MIR extra data
     83 mir_extra: std.ArrayListUnmanaged(u32) = .{},
     84 
     85 /// Byte offset within the source file of the ending curly.
     86 end_di_line: u32,
     87 end_di_column: u32,
     88 
     89 /// The value is an offset into the `Function` `code` from the beginning.
     90 /// To perform the reloc, write 32-bit signed little-endian integer
     91 /// which is a relative jump, based on the address following the reloc.
     92 exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
     93 
     94 const_tracking: ConstTrackingMap = .{},
     95 inst_tracking: InstTrackingMap = .{},
     96 
     97 // Key is the block instruction
     98 blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockData) = .{},
     99 
    100 register_manager: RegisterManager = .{},
    101 
    102 /// Generation of the current scope, increments by 1 for every entered scope.
    103 scope_generation: u32 = 0,
    104 
    105 frame_allocs: std.MultiArrayList(FrameAlloc) = .{},
    106 free_frame_indices: std.AutoArrayHashMapUnmanaged(FrameIndex, void) = .{},
    107 frame_locs: std.MultiArrayList(Mir.FrameLoc) = .{},
    108 
    109 /// Debug field, used to find bugs in the compiler.
    110 air_bookkeeping: @TypeOf(air_bookkeeping_init) = air_bookkeeping_init,
    111 
    112 /// For mir debug info, maps a mir index to a air index
    113 mir_to_air_map: @TypeOf(mir_to_air_map_init) = mir_to_air_map_init,
    114 
    115 const air_bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {};
    116 
    117 const mir_to_air_map_init = if (builtin.mode == .Debug) std.AutoHashMapUnmanaged(Mir.Inst.Index, Air.Inst.Index){} else {};
    118 
    119 const FrameAddr = struct { index: FrameIndex, off: i32 = 0 };
    120 const RegisterOffset = struct { reg: Register, off: i32 = 0 };
    121 const SymbolOffset = struct { sym: u32, off: i32 = 0 };
    122 
    123 const Owner = union(enum) {
    124     func_index: InternPool.Index,
    125     lazy_sym: link.File.LazySymbol,
    126 
    127     fn getDecl(owner: Owner, mod: *Module) InternPool.DeclIndex {
    128         return switch (owner) {
    129             .func_index => |func_index| mod.funcOwnerDeclIndex(func_index),
    130             .lazy_sym => |lazy_sym| lazy_sym.ty.getOwnerDecl(mod),
    131         };
    132     }
    133 
    134     fn getSymbolIndex(owner: Owner, ctx: *Self) !u32 {
    135         switch (owner) {
    136             .func_index => |func_index| {
    137                 const mod = ctx.bin_file.comp.module.?;
    138                 const decl_index = mod.funcOwnerDeclIndex(func_index);
    139                 if (ctx.bin_file.cast(link.File.Elf)) |elf_file| {
    140                     return elf_file.zigObjectPtr().?.getOrCreateMetadataForDecl(elf_file, decl_index);
    141                 } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| {
    142                     return macho_file.getZigObject().?.getOrCreateMetadataForDecl(macho_file, decl_index);
    143                 } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| {
    144                     const atom = try coff_file.getOrCreateAtomForDecl(decl_index);
    145                     return coff_file.getAtom(atom).getSymbolIndex().?;
    146                 } else if (ctx.bin_file.cast(link.File.Plan9)) |p9_file| {
    147                     return p9_file.seeDecl(decl_index);
    148                 } else unreachable;
    149             },
    150             .lazy_sym => |lazy_sym| {
    151                 if (ctx.bin_file.cast(link.File.Elf)) |elf_file| {
    152                     return elf_file.zigObjectPtr().?.getOrCreateMetadataForLazySymbol(elf_file, lazy_sym) catch |err|
    153                         ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    154                 } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| {
    155                     return macho_file.getZigObject().?.getOrCreateMetadataForLazySymbol(macho_file, lazy_sym) catch |err|
    156                         ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    157                 } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| {
    158                     const atom = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
    159                         return ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    160                     return coff_file.getAtom(atom).getSymbolIndex().?;
    161                 } else if (ctx.bin_file.cast(link.File.Plan9)) |p9_file| {
    162                     return p9_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
    163                         return ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    164                 } else unreachable;
    165             },
    166         }
    167     }
    168 };
    169 
    170 pub const MCValue = union(enum) {
    171     /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
    172     /// TODO Look into deleting this tag and using `dead` instead, since every use
    173     /// of MCValue.none should be instead looking at the type and noticing it is 0 bits.
    174     none,
    175     /// Control flow will not allow this value to be observed.
    176     unreach,
    177     /// No more references to this value remain.
    178     /// The payload is the value of scope_generation at the point where the death occurred
    179     dead: u32,
    180     /// The value is undefined.
    181     undef,
    182     /// A pointer-sized integer that fits in a register.
    183     /// If the type is a pointer, this is the pointer address in virtual address space.
    184     immediate: u64,
    185     /// The value resides in the EFLAGS register.
    186     eflags: Condition,
    187     /// The value is in a register.
    188     register: Register,
    189     /// The value is split across two registers.
    190     register_pair: [2]Register,
    191     /// The value is a constant offset from the value in a register.
    192     register_offset: RegisterOffset,
    193     /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register.
    194     register_overflow: struct { reg: Register, eflags: Condition },
    195     /// The value is in memory at a hard-coded address.
    196     /// If the type is a pointer, it means the pointer address is stored at this memory location.
    197     memory: u64,
    198     /// The value is in memory at an address not-yet-allocated by the linker.
    199     /// This traditionally corresponds to a relocation emitted in a relocatable object file.
    200     load_symbol: SymbolOffset,
    201     /// The address of the memory location not-yet-allocated by the linker.
    202     lea_symbol: SymbolOffset,
    203     /// The value is in memory at a constant offset from the address in a register.
    204     indirect: RegisterOffset,
    205     /// The value is in memory.
    206     /// Payload is a symbol index.
    207     load_direct: u32,
    208     /// The value is a pointer to a value in memory.
    209     /// Payload is a symbol index.
    210     lea_direct: u32,
    211     /// The value is in memory referenced indirectly via GOT.
    212     /// Payload is a symbol index.
    213     load_got: u32,
    214     /// The value is a pointer to a value referenced indirectly via GOT.
    215     /// Payload is a symbol index.
    216     lea_got: u32,
    217     /// The value is a threadlocal variable.
    218     /// Payload is a symbol index.
    219     load_tlv: u32,
    220     /// The value is a pointer to a threadlocal variable.
    221     /// Payload is a symbol index.
    222     lea_tlv: u32,
    223     /// The value stored at an offset from a frame index
    224     /// Payload is a frame address.
    225     load_frame: FrameAddr,
    226     /// The address of an offset from a frame index
    227     /// Payload is a frame address.
    228     lea_frame: FrameAddr,
    229     /// This indicates that we have already allocated a frame index for this instruction,
    230     /// but it has not been spilled there yet in the current control flow.
    231     /// Payload is a frame index.
    232     reserved_frame: FrameIndex,
    233     air_ref: Air.Inst.Ref,
    234 
    235     fn isMemory(mcv: MCValue) bool {
    236         return switch (mcv) {
    237             .memory, .indirect, .load_frame => true,
    238             else => false,
    239         };
    240     }
    241 
    242     fn isImmediate(mcv: MCValue) bool {
    243         return switch (mcv) {
    244             .immediate => true,
    245             else => false,
    246         };
    247     }
    248 
    249     fn isRegister(mcv: MCValue) bool {
    250         return switch (mcv) {
    251             .register => true,
    252             .register_offset => |reg_off| return reg_off.off == 0,
    253             else => false,
    254         };
    255     }
    256 
    257     fn isRegisterOffset(mcv: MCValue) bool {
    258         return switch (mcv) {
    259             .register, .register_offset => true,
    260             else => false,
    261         };
    262     }
    263 
    264     fn getReg(mcv: MCValue) ?Register {
    265         return switch (mcv) {
    266             .register => |reg| reg,
    267             .register_offset, .indirect => |ro| ro.reg,
    268             .register_overflow => |ro| ro.reg,
    269             else => null,
    270         };
    271     }
    272 
    273     fn getRegs(mcv: *const MCValue) []const Register {
    274         return switch (mcv.*) {
    275             .register => |*reg| @as(*const [1]Register, reg),
    276             .register_pair => |*regs| regs,
    277             .register_offset, .indirect => |*ro| @as(*const [1]Register, &ro.reg),
    278             .register_overflow => |*ro| @as(*const [1]Register, &ro.reg),
    279             else => &.{},
    280         };
    281     }
    282 
    283     fn getCondition(mcv: MCValue) ?Condition {
    284         return switch (mcv) {
    285             .eflags => |cc| cc,
    286             .register_overflow => |reg_ov| reg_ov.eflags,
    287             else => null,
    288         };
    289     }
    290 
    291     fn address(mcv: MCValue) MCValue {
    292         return switch (mcv) {
    293             .none,
    294             .unreach,
    295             .dead,
    296             .undef,
    297             .immediate,
    298             .eflags,
    299             .register,
    300             .register_pair,
    301             .register_offset,
    302             .register_overflow,
    303             .lea_symbol,
    304             .lea_direct,
    305             .lea_got,
    306             .lea_tlv,
    307             .lea_frame,
    308             .reserved_frame,
    309             .air_ref,
    310             => unreachable, // not in memory
    311             .memory => |addr| .{ .immediate = addr },
    312             .indirect => |reg_off| switch (reg_off.off) {
    313                 0 => .{ .register = reg_off.reg },
    314                 else => .{ .register_offset = reg_off },
    315             },
    316             .load_direct => |sym_index| .{ .lea_direct = sym_index },
    317             .load_got => |sym_index| .{ .lea_got = sym_index },
    318             .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
    319             .load_frame => |frame_addr| .{ .lea_frame = frame_addr },
    320             .load_symbol => |sym_off| .{ .lea_symbol = sym_off },
    321         };
    322     }
    323 
    324     fn deref(mcv: MCValue) MCValue {
    325         return switch (mcv) {
    326             .none,
    327             .unreach,
    328             .dead,
    329             .undef,
    330             .eflags,
    331             .register_pair,
    332             .register_overflow,
    333             .memory,
    334             .indirect,
    335             .load_direct,
    336             .load_got,
    337             .load_tlv,
    338             .load_frame,
    339             .load_symbol,
    340             .reserved_frame,
    341             .air_ref,
    342             => unreachable, // not dereferenceable
    343             .immediate => |addr| .{ .memory = addr },
    344             .register => |reg| .{ .indirect = .{ .reg = reg } },
    345             .register_offset => |reg_off| .{ .indirect = reg_off },
    346             .lea_direct => |sym_index| .{ .load_direct = sym_index },
    347             .lea_got => |sym_index| .{ .load_got = sym_index },
    348             .lea_tlv => |sym_index| .{ .load_tlv = sym_index },
    349             .lea_frame => |frame_addr| .{ .load_frame = frame_addr },
    350             .lea_symbol => |sym_index| .{ .load_symbol = sym_index },
    351         };
    352     }
    353 
    354     fn offset(mcv: MCValue, off: i32) MCValue {
    355         return switch (mcv) {
    356             .none,
    357             .unreach,
    358             .dead,
    359             .undef,
    360             .reserved_frame,
    361             .air_ref,
    362             => unreachable, // not valid
    363             .eflags,
    364             .register_pair,
    365             .register_overflow,
    366             .memory,
    367             .indirect,
    368             .load_direct,
    369             .lea_direct,
    370             .load_got,
    371             .lea_got,
    372             .load_tlv,
    373             .lea_tlv,
    374             .load_frame,
    375             .load_symbol,
    376             .lea_symbol,
    377             => switch (off) {
    378                 0 => mcv,
    379                 else => unreachable, // not offsettable
    380             },
    381             .immediate => |imm| .{ .immediate = @bitCast(@as(i64, @bitCast(imm)) +% off) },
    382             .register => |reg| .{ .register_offset = .{ .reg = reg, .off = off } },
    383             .register_offset => |reg_off| .{
    384                 .register_offset = .{ .reg = reg_off.reg, .off = reg_off.off + off },
    385             },
    386             .lea_frame => |frame_addr| .{
    387                 .lea_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
    388             },
    389         };
    390     }
    391 
    392     fn mem(mcv: MCValue, function: *Self, size: Memory.Size) !Memory {
    393         return switch (mcv) {
    394             .none,
    395             .unreach,
    396             .dead,
    397             .undef,
    398             .immediate,
    399             .eflags,
    400             .register,
    401             .register_pair,
    402             .register_offset,
    403             .register_overflow,
    404             .load_direct,
    405             .lea_direct,
    406             .load_got,
    407             .lea_got,
    408             .load_tlv,
    409             .lea_tlv,
    410             .lea_frame,
    411             .reserved_frame,
    412             .air_ref,
    413             .lea_symbol,
    414             => unreachable,
    415             .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| .{
    416                 .base = .{ .reg = .ds },
    417                 .mod = .{ .rm = .{
    418                     .size = size,
    419                     .disp = small_addr,
    420                 } },
    421             } else .{ .base = .{ .reg = .ds }, .mod = .{ .off = addr } },
    422             .indirect => |reg_off| .{
    423                 .base = .{ .reg = reg_off.reg },
    424                 .mod = .{ .rm = .{
    425                     .size = size,
    426                     .disp = reg_off.off,
    427                 } },
    428             },
    429             .load_frame => |frame_addr| .{
    430                 .base = .{ .frame = frame_addr.index },
    431                 .mod = .{ .rm = .{
    432                     .size = size,
    433                     .disp = frame_addr.off,
    434                 } },
    435             },
    436             .load_symbol => |sym_off| {
    437                 assert(sym_off.off == 0);
    438                 return .{
    439                     .base = .{ .reloc = .{
    440                         .atom_index = try function.owner.getSymbolIndex(function),
    441                         .sym_index = sym_off.sym,
    442                     } },
    443                     .mod = .{ .rm = .{
    444                         .size = size,
    445                         .disp = sym_off.off,
    446                     } },
    447                 };
    448             },
    449         };
    450     }
    451 
    452     pub fn format(
    453         mcv: MCValue,
    454         comptime _: []const u8,
    455         _: std.fmt.FormatOptions,
    456         writer: anytype,
    457     ) @TypeOf(writer).Error!void {
    458         switch (mcv) {
    459             .none, .unreach, .dead, .undef => try writer.print("({s})", .{@tagName(mcv)}),
    460             .immediate => |pl| try writer.print("0x{x}", .{pl}),
    461             .memory => |pl| try writer.print("[ds:0x{x}]", .{pl}),
    462             inline .eflags, .register => |pl| try writer.print("{s}", .{@tagName(pl)}),
    463             .register_pair => |pl| try writer.print("{s}:{s}", .{ @tagName(pl[1]), @tagName(pl[0]) }),
    464             .register_offset => |pl| try writer.print("{s} + 0x{x}", .{ @tagName(pl.reg), pl.off }),
    465             .register_overflow => |pl| try writer.print("{s}:{s}", .{
    466                 @tagName(pl.eflags), @tagName(pl.reg),
    467             }),
    468             .load_symbol => |pl| try writer.print("[{} + 0x{x}]", .{ pl.sym, pl.off }),
    469             .lea_symbol => |pl| try writer.print("{} + 0x{x}", .{ pl.sym, pl.off }),
    470             .indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }),
    471             .load_direct => |pl| try writer.print("[direct:{d}]", .{pl}),
    472             .lea_direct => |pl| try writer.print("direct:{d}", .{pl}),
    473             .load_got => |pl| try writer.print("[got:{d}]", .{pl}),
    474             .lea_got => |pl| try writer.print("got:{d}", .{pl}),
    475             .load_tlv => |pl| try writer.print("[tlv:{d}]", .{pl}),
    476             .lea_tlv => |pl| try writer.print("tlv:{d}", .{pl}),
    477             .load_frame => |pl| try writer.print("[{} + 0x{x}]", .{ pl.index, pl.off }),
    478             .lea_frame => |pl| try writer.print("{} + 0x{x}", .{ pl.index, pl.off }),
    479             .reserved_frame => |pl| try writer.print("(dead:{})", .{pl}),
    480             .air_ref => |pl| try writer.print("(air:0x{x})", .{@intFromEnum(pl)}),
    481         }
    482     }
    483 };
    484 
    485 const InstTrackingMap = std.AutoArrayHashMapUnmanaged(Air.Inst.Index, InstTracking);
    486 const ConstTrackingMap = std.AutoArrayHashMapUnmanaged(InternPool.Index, InstTracking);
    487 const InstTracking = struct {
    488     long: MCValue,
    489     short: MCValue,
    490 
    491     fn init(result: MCValue) InstTracking {
    492         return .{ .long = switch (result) {
    493             .none,
    494             .unreach,
    495             .undef,
    496             .immediate,
    497             .memory,
    498             .load_direct,
    499             .lea_direct,
    500             .load_got,
    501             .lea_got,
    502             .load_tlv,
    503             .lea_tlv,
    504             .load_frame,
    505             .lea_frame,
    506             .load_symbol,
    507             .lea_symbol,
    508             => result,
    509             .dead,
    510             .reserved_frame,
    511             .air_ref,
    512             => unreachable,
    513             .eflags,
    514             .register,
    515             .register_pair,
    516             .register_offset,
    517             .register_overflow,
    518             .indirect,
    519             => .none,
    520         }, .short = result };
    521     }
    522 
    523     fn getReg(self: InstTracking) ?Register {
    524         return self.short.getReg();
    525     }
    526 
    527     fn getRegs(self: *const InstTracking) []const Register {
    528         return self.short.getRegs();
    529     }
    530 
    531     fn getCondition(self: InstTracking) ?Condition {
    532         return self.short.getCondition();
    533     }
    534 
    535     fn spill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void {
    536         if (std.meta.eql(self.long, self.short)) return; // Already spilled
    537         // Allocate or reuse frame index
    538         switch (self.long) {
    539             .none => self.long = try function.allocRegOrMem(inst, false),
    540             .load_frame => {},
    541             .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
    542             else => unreachable,
    543         }
    544         tracking_log.debug("spill %{d} from {} to {}", .{ inst, self.short, self.long });
    545         try function.genCopy(function.typeOfIndex(inst), self.long, self.short);
    546     }
    547 
    548     fn reuseFrame(self: *InstTracking) void {
    549         switch (self.long) {
    550             .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
    551             else => {},
    552         }
    553         self.short = switch (self.long) {
    554             .none,
    555             .unreach,
    556             .undef,
    557             .immediate,
    558             .memory,
    559             .load_direct,
    560             .lea_direct,
    561             .load_got,
    562             .lea_got,
    563             .load_tlv,
    564             .lea_tlv,
    565             .load_frame,
    566             .lea_frame,
    567             .load_symbol,
    568             .lea_symbol,
    569             => self.long,
    570             .dead,
    571             .eflags,
    572             .register,
    573             .register_pair,
    574             .register_offset,
    575             .register_overflow,
    576             .indirect,
    577             .reserved_frame,
    578             .air_ref,
    579             => unreachable,
    580         };
    581     }
    582 
    583     fn trackSpill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void {
    584         try function.freeValue(self.short);
    585         self.reuseFrame();
    586         tracking_log.debug("%{d} => {} (spilled)", .{ inst, self.* });
    587     }
    588 
    589     fn verifyMaterialize(self: InstTracking, target: InstTracking) void {
    590         switch (self.long) {
    591             .none,
    592             .unreach,
    593             .undef,
    594             .immediate,
    595             .memory,
    596             .load_direct,
    597             .lea_direct,
    598             .load_got,
    599             .lea_got,
    600             .load_tlv,
    601             .lea_tlv,
    602             .lea_frame,
    603             .load_symbol,
    604             .lea_symbol,
    605             => assert(std.meta.eql(self.long, target.long)),
    606             .load_frame,
    607             .reserved_frame,
    608             => switch (target.long) {
    609                 .none,
    610                 .load_frame,
    611                 .reserved_frame,
    612                 => {},
    613                 else => unreachable,
    614             },
    615             .dead,
    616             .eflags,
    617             .register,
    618             .register_pair,
    619             .register_offset,
    620             .register_overflow,
    621             .indirect,
    622             .air_ref,
    623             => unreachable,
    624         }
    625     }
    626 
    627     fn materialize(
    628         self: *InstTracking,
    629         function: *Self,
    630         inst: Air.Inst.Index,
    631         target: InstTracking,
    632     ) !void {
    633         self.verifyMaterialize(target);
    634         try self.materializeUnsafe(function, inst, target);
    635     }
    636 
    637     fn materializeUnsafe(
    638         self: InstTracking,
    639         function: *Self,
    640         inst: Air.Inst.Index,
    641         target: InstTracking,
    642     ) !void {
    643         const ty = function.typeOfIndex(inst);
    644         if ((self.long == .none or self.long == .reserved_frame) and target.long == .load_frame)
    645             try function.genCopy(ty, target.long, self.short);
    646         try function.genCopy(ty, target.short, self.short);
    647     }
    648 
    649     fn trackMaterialize(self: *InstTracking, inst: Air.Inst.Index, target: InstTracking) void {
    650         self.verifyMaterialize(target);
    651         // Don't clobber reserved frame indices
    652         self.long = if (target.long == .none) switch (self.long) {
    653             .load_frame => |addr| .{ .reserved_frame = addr.index },
    654             .reserved_frame => self.long,
    655             else => target.long,
    656         } else target.long;
    657         self.short = target.short;
    658         tracking_log.debug("%{d} => {} (materialize)", .{ inst, self.* });
    659     }
    660 
    661     fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void {
    662         switch (self.short) {
    663             .dead => |die_generation| if (die_generation >= scope_generation) {
    664                 self.reuseFrame();
    665                 tracking_log.debug("%{d} => {} (resurrect)", .{ inst, self.* });
    666             },
    667             else => {},
    668         }
    669     }
    670 
    671     fn die(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void {
    672         if (self.short == .dead) return;
    673         try function.freeValue(self.short);
    674         self.short = .{ .dead = function.scope_generation };
    675         tracking_log.debug("%{d} => {} (death)", .{ inst, self.* });
    676     }
    677 
    678     fn reuse(
    679         self: *InstTracking,
    680         function: *Self,
    681         new_inst: ?Air.Inst.Index,
    682         old_inst: Air.Inst.Index,
    683     ) void {
    684         self.short = .{ .dead = function.scope_generation };
    685         if (new_inst) |inst|
    686             tracking_log.debug("%{d} => {} (reuse %{d})", .{ inst, self.*, old_inst })
    687         else
    688             tracking_log.debug("tmp => {} (reuse %{d})", .{ self.*, old_inst });
    689     }
    690 
    691     fn liveOut(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void {
    692         for (self.getRegs()) |reg| {
    693             if (function.register_manager.isRegFree(reg)) {
    694                 tracking_log.debug("%{d} => {} (live-out)", .{ inst, self.* });
    695                 continue;
    696             }
    697 
    698             const index = RegisterManager.indexOfRegIntoTracked(reg).?;
    699             const tracked_inst = function.register_manager.registers[index];
    700             const tracking = function.getResolvedInstValue(tracked_inst);
    701 
    702             // Disable death.
    703             var found_reg = false;
    704             var remaining_reg: Register = .none;
    705             for (tracking.getRegs()) |tracked_reg| if (tracked_reg.id() == reg.id()) {
    706                 assert(!found_reg);
    707                 found_reg = true;
    708             } else {
    709                 assert(remaining_reg == .none);
    710                 remaining_reg = tracked_reg;
    711             };
    712             assert(found_reg);
    713             tracking.short = switch (remaining_reg) {
    714                 .none => .{ .dead = function.scope_generation },
    715                 else => .{ .register = remaining_reg },
    716             };
    717 
    718             // Perform side-effects of freeValue manually.
    719             function.register_manager.freeReg(reg);
    720 
    721             tracking_log.debug("%{d} => {} (live-out %{d})", .{ inst, self.*, tracked_inst });
    722         }
    723     }
    724 
    725     pub fn format(
    726         self: InstTracking,
    727         comptime _: []const u8,
    728         _: std.fmt.FormatOptions,
    729         writer: anytype,
    730     ) @TypeOf(writer).Error!void {
    731         if (!std.meta.eql(self.long, self.short)) try writer.print("|{}| ", .{self.long});
    732         try writer.print("{}", .{self.short});
    733     }
    734 };
    735 
    736 const FrameAlloc = struct {
    737     abi_size: u31,
    738     spill_pad: u3,
    739     abi_align: Alignment,
    740     ref_count: u16,
    741 
    742     fn init(alloc_abi: struct { size: u64, pad: u3 = 0, alignment: Alignment }) FrameAlloc {
    743         return .{
    744             .abi_size = @intCast(alloc_abi.size),
    745             .spill_pad = alloc_abi.pad,
    746             .abi_align = alloc_abi.alignment,
    747             .ref_count = 0,
    748         };
    749     }
    750     fn initType(ty: Type, mod: *Module) FrameAlloc {
    751         return init(.{
    752             .size = ty.abiSize(mod),
    753             .alignment = ty.abiAlignment(mod),
    754         });
    755     }
    756     fn initSpill(ty: Type, mod: *Module) FrameAlloc {
    757         const abi_size = ty.abiSize(mod);
    758         const spill_size = if (abi_size < 8)
    759             math.ceilPowerOfTwoAssert(u64, abi_size)
    760         else
    761             std.mem.alignForward(u64, abi_size, 8);
    762         return init(.{
    763             .size = spill_size,
    764             .pad = @intCast(spill_size - abi_size),
    765             .alignment = ty.abiAlignment(mod).maxStrict(
    766                 Alignment.fromNonzeroByteUnits(@min(spill_size, 8)),
    767             ),
    768         });
    769     }
    770 };
    771 
    772 const StackAllocation = struct {
    773     inst: ?Air.Inst.Index,
    774     /// TODO do we need size? should be determined by inst.ty.abiSize(mod)
    775     size: u32,
    776 };
    777 
    778 const BlockData = struct {
    779     relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
    780     state: State,
    781 
    782     fn deinit(self: *BlockData, gpa: Allocator) void {
    783         self.relocs.deinit(gpa);
    784         self.* = undefined;
    785     }
    786 };
    787 
    788 const Self = @This();
    789 
    790 pub fn generate(
    791     bin_file: *link.File,
    792     src_loc: Module.SrcLoc,
    793     func_index: InternPool.Index,
    794     air: Air,
    795     liveness: Liveness,
    796     code: *std.ArrayList(u8),
    797     debug_output: DebugInfoOutput,
    798 ) CodeGenError!Result {
    799     const comp = bin_file.comp;
    800     const gpa = comp.gpa;
    801     const zcu = comp.module.?;
    802     const func = zcu.funcInfo(func_index);
    803     const fn_owner_decl = zcu.declPtr(func.owner_decl);
    804     assert(fn_owner_decl.has_tv);
    805     const fn_type = fn_owner_decl.ty;
    806     const namespace = zcu.namespacePtr(fn_owner_decl.src_namespace);
    807     const mod = namespace.file_scope.mod;
    808 
    809     var function = Self{
    810         .gpa = gpa,
    811         .air = air,
    812         .liveness = liveness,
    813         .target = &mod.resolved_target.result,
    814         .mod = mod,
    815         .bin_file = bin_file,
    816         .debug_output = debug_output,
    817         .owner = .{ .func_index = func_index },
    818         .err_msg = null,
    819         .args = undefined, // populated after `resolveCallingConventionValues`
    820         .va_info = undefined, // populated after `resolveCallingConventionValues`
    821         .ret_mcv = undefined, // populated after `resolveCallingConventionValues`
    822         .fn_type = fn_type,
    823         .arg_index = 0,
    824         .src_loc = src_loc,
    825         .end_di_line = func.rbrace_line,
    826         .end_di_column = func.rbrace_column,
    827     };
    828     defer {
    829         function.frame_allocs.deinit(gpa);
    830         function.free_frame_indices.deinit(gpa);
    831         function.frame_locs.deinit(gpa);
    832         var block_it = function.blocks.valueIterator();
    833         while (block_it.next()) |block| block.deinit(gpa);
    834         function.blocks.deinit(gpa);
    835         function.inst_tracking.deinit(gpa);
    836         function.const_tracking.deinit(gpa);
    837         function.exitlude_jump_relocs.deinit(gpa);
    838         function.mir_instructions.deinit(gpa);
    839         function.mir_extra.deinit(gpa);
    840         if (builtin.mode == .Debug) function.mir_to_air_map.deinit(gpa);
    841     }
    842 
    843     wip_mir_log.debug("{}:", .{function.fmtDecl(func.owner_decl)});
    844 
    845     const ip = &zcu.intern_pool;
    846 
    847     try function.frame_allocs.resize(gpa, FrameIndex.named_count);
    848     function.frame_allocs.set(
    849         @intFromEnum(FrameIndex.stack_frame),
    850         FrameAlloc.init(.{
    851             .size = 0,
    852             .alignment = func.analysis(ip).stack_alignment.max(.@"1"),
    853         }),
    854     );
    855     function.frame_allocs.set(
    856         @intFromEnum(FrameIndex.call_frame),
    857         FrameAlloc.init(.{ .size = 0, .alignment = .@"1" }),
    858     );
    859 
    860     const fn_info = zcu.typeToFunc(fn_type).?;
    861     const cc = abi.resolveCallingConvention(fn_info.cc, function.target.*);
    862     var call_info = function.resolveCallingConventionValues(fn_info, &.{}, .args_frame) catch |err| switch (err) {
    863         error.CodegenFail => return Result{ .fail = function.err_msg.? },
    864         error.OutOfRegisters => return Result{
    865             .fail = try ErrorMsg.create(
    866                 gpa,
    867                 src_loc,
    868                 "CodeGen ran out of registers. This is a bug in the Zig compiler.",
    869                 .{},
    870             ),
    871         },
    872         else => |e| return e,
    873     };
    874     defer call_info.deinit(&function);
    875 
    876     function.args = call_info.args;
    877     function.ret_mcv = call_info.return_value;
    878     function.frame_allocs.set(@intFromEnum(FrameIndex.ret_addr), FrameAlloc.init(.{
    879         .size = Type.usize.abiSize(zcu),
    880         .alignment = Type.usize.abiAlignment(zcu).min(call_info.stack_align),
    881     }));
    882     function.frame_allocs.set(@intFromEnum(FrameIndex.base_ptr), FrameAlloc.init(.{
    883         .size = Type.usize.abiSize(zcu),
    884         .alignment = Alignment.min(
    885             call_info.stack_align,
    886             Alignment.fromNonzeroByteUnits(function.target.stackAlignment()),
    887         ),
    888     }));
    889     function.frame_allocs.set(
    890         @intFromEnum(FrameIndex.args_frame),
    891         FrameAlloc.init(.{
    892             .size = call_info.stack_byte_count,
    893             .alignment = call_info.stack_align,
    894         }),
    895     );
    896     function.va_info = switch (cc) {
    897         .SysV => .{ .sysv = .{
    898             .gp_count = call_info.gp_count,
    899             .fp_count = call_info.fp_count,
    900             .overflow_arg_area = .{ .index = .args_frame, .off = call_info.stack_byte_count },
    901             .reg_save_area = undefined,
    902         } },
    903         .Win64 => .{ .win64 = .{} },
    904         else => undefined,
    905     };
    906 
    907     function.gen() catch |err| switch (err) {
    908         error.CodegenFail => return Result{ .fail = function.err_msg.? },
    909         error.OutOfRegisters => return Result{
    910             .fail = try ErrorMsg.create(gpa, src_loc, "CodeGen ran out of registers. This is a bug in the Zig compiler.", .{}),
    911         },
    912         else => |e| return e,
    913     };
    914 
    915     var mir = Mir{
    916         .instructions = function.mir_instructions.toOwnedSlice(),
    917         .extra = try function.mir_extra.toOwnedSlice(gpa),
    918         .frame_locs = function.frame_locs.toOwnedSlice(),
    919     };
    920     defer mir.deinit(gpa);
    921 
    922     var emit = Emit{
    923         .lower = .{
    924             .bin_file = bin_file,
    925             .allocator = gpa,
    926             .mir = mir,
    927             .cc = cc,
    928             .src_loc = src_loc,
    929             .output_mode = comp.config.output_mode,
    930             .link_mode = comp.config.link_mode,
    931             .pic = mod.pic,
    932         },
    933         .debug_output = debug_output,
    934         .code = code,
    935         .prev_di_pc = 0,
    936         .prev_di_line = func.lbrace_line,
    937         .prev_di_column = func.lbrace_column,
    938     };
    939     defer emit.deinit();
    940     emit.emitMir() catch |err| switch (err) {
    941         error.LowerFail, error.EmitFail => return Result{ .fail = emit.lower.err_msg.? },
    942         error.InvalidInstruction, error.CannotEncode => |e| {
    943             const msg = switch (e) {
    944                 error.InvalidInstruction => "CodeGen failed to find a viable instruction.",
    945                 error.CannotEncode => "CodeGen failed to encode the instruction.",
    946             };
    947             return Result{
    948                 .fail = try ErrorMsg.create(
    949                     gpa,
    950                     src_loc,
    951                     "{s} This is a bug in the Zig compiler.",
    952                     .{msg},
    953                 ),
    954             };
    955         },
    956         else => |e| return e,
    957     };
    958 
    959     if (function.err_msg) |em| {
    960         return Result{ .fail = em };
    961     } else {
    962         return Result.ok;
    963     }
    964 }
    965 
    966 pub fn generateLazy(
    967     bin_file: *link.File,
    968     src_loc: Module.SrcLoc,
    969     lazy_sym: link.File.LazySymbol,
    970     code: *std.ArrayList(u8),
    971     debug_output: DebugInfoOutput,
    972 ) CodeGenError!Result {
    973     const comp = bin_file.comp;
    974     const gpa = comp.gpa;
    975     // This function is for generating global code, so we use the root module.
    976     const mod = comp.root_mod;
    977     var function = Self{
    978         .gpa = gpa,
    979         .air = undefined,
    980         .liveness = undefined,
    981         .target = &mod.resolved_target.result,
    982         .mod = mod,
    983         .bin_file = bin_file,
    984         .debug_output = debug_output,
    985         .owner = .{ .lazy_sym = lazy_sym },
    986         .err_msg = null,
    987         .args = undefined,
    988         .va_info = undefined,
    989         .ret_mcv = undefined,
    990         .fn_type = undefined,
    991         .arg_index = undefined,
    992         .src_loc = src_loc,
    993         .end_di_line = undefined, // no debug info yet
    994         .end_di_column = undefined, // no debug info yet
    995     };
    996     defer {
    997         function.mir_instructions.deinit(gpa);
    998         function.mir_extra.deinit(gpa);
    999     }
   1000 
   1001     function.genLazy(lazy_sym) catch |err| switch (err) {
   1002         error.CodegenFail => return Result{ .fail = function.err_msg.? },
   1003         error.OutOfRegisters => return Result{
   1004             .fail = try ErrorMsg.create(gpa, src_loc, "CodeGen ran out of registers. This is a bug in the Zig compiler.", .{}),
   1005         },
   1006         else => |e| return e,
   1007     };
   1008 
   1009     var mir = Mir{
   1010         .instructions = function.mir_instructions.toOwnedSlice(),
   1011         .extra = try function.mir_extra.toOwnedSlice(gpa),
   1012         .frame_locs = function.frame_locs.toOwnedSlice(),
   1013     };
   1014     defer mir.deinit(gpa);
   1015 
   1016     var emit = Emit{
   1017         .lower = .{
   1018             .bin_file = bin_file,
   1019             .allocator = gpa,
   1020             .mir = mir,
   1021             .cc = abi.resolveCallingConvention(.Unspecified, function.target.*),
   1022             .src_loc = src_loc,
   1023             .output_mode = comp.config.output_mode,
   1024             .link_mode = comp.config.link_mode,
   1025             .pic = mod.pic,
   1026         },
   1027         .debug_output = debug_output,
   1028         .code = code,
   1029         .prev_di_pc = undefined, // no debug info yet
   1030         .prev_di_line = undefined, // no debug info yet
   1031         .prev_di_column = undefined, // no debug info yet
   1032     };
   1033     defer emit.deinit();
   1034     emit.emitMir() catch |err| switch (err) {
   1035         error.LowerFail, error.EmitFail => return Result{ .fail = emit.lower.err_msg.? },
   1036         error.InvalidInstruction, error.CannotEncode => |e| {
   1037             const msg = switch (e) {
   1038                 error.InvalidInstruction => "CodeGen failed to find a viable instruction.",
   1039                 error.CannotEncode => "CodeGen failed to encode the instruction.",
   1040             };
   1041             return Result{
   1042                 .fail = try ErrorMsg.create(
   1043                     gpa,
   1044                     src_loc,
   1045                     "{s} This is a bug in the Zig compiler.",
   1046                     .{msg},
   1047                 ),
   1048             };
   1049         },
   1050         else => |e| return e,
   1051     };
   1052 
   1053     if (function.err_msg) |em| {
   1054         return Result{ .fail = em };
   1055     } else {
   1056         return Result.ok;
   1057     }
   1058 }
   1059 
   1060 const FormatDeclData = struct {
   1061     mod: *Module,
   1062     decl_index: InternPool.DeclIndex,
   1063 };
   1064 fn formatDecl(
   1065     data: FormatDeclData,
   1066     comptime _: []const u8,
   1067     _: std.fmt.FormatOptions,
   1068     writer: anytype,
   1069 ) @TypeOf(writer).Error!void {
   1070     try data.mod.declPtr(data.decl_index).renderFullyQualifiedName(data.mod, writer);
   1071 }
   1072 fn fmtDecl(self: *Self, decl_index: InternPool.DeclIndex) std.fmt.Formatter(formatDecl) {
   1073     return .{ .data = .{
   1074         .mod = self.bin_file.comp.module.?,
   1075         .decl_index = decl_index,
   1076     } };
   1077 }
   1078 
   1079 const FormatAirData = struct {
   1080     self: *Self,
   1081     inst: Air.Inst.Index,
   1082 };
   1083 fn formatAir(
   1084     data: FormatAirData,
   1085     comptime _: []const u8,
   1086     _: std.fmt.FormatOptions,
   1087     writer: anytype,
   1088 ) @TypeOf(writer).Error!void {
   1089     @import("../../print_air.zig").dumpInst(
   1090         data.inst,
   1091         data.self.bin_file.comp.module.?,
   1092         data.self.air,
   1093         data.self.liveness,
   1094     );
   1095 }
   1096 fn fmtAir(self: *Self, inst: Air.Inst.Index) std.fmt.Formatter(formatAir) {
   1097     return .{ .data = .{ .self = self, .inst = inst } };
   1098 }
   1099 
   1100 const FormatWipMirData = struct {
   1101     self: *Self,
   1102     inst: Mir.Inst.Index,
   1103 };
   1104 fn formatWipMir(
   1105     data: FormatWipMirData,
   1106     comptime _: []const u8,
   1107     _: std.fmt.FormatOptions,
   1108     writer: anytype,
   1109 ) @TypeOf(writer).Error!void {
   1110     const comp = data.self.bin_file.comp;
   1111     const mod = comp.root_mod;
   1112     var lower = Lower{
   1113         .bin_file = data.self.bin_file,
   1114         .allocator = data.self.gpa,
   1115         .mir = .{
   1116             .instructions = data.self.mir_instructions.slice(),
   1117             .extra = data.self.mir_extra.items,
   1118             .frame_locs = (std.MultiArrayList(Mir.FrameLoc){}).slice(),
   1119         },
   1120         .cc = .Unspecified,
   1121         .src_loc = data.self.src_loc,
   1122         .output_mode = comp.config.output_mode,
   1123         .link_mode = comp.config.link_mode,
   1124         .pic = mod.pic,
   1125     };
   1126     var first = true;
   1127     for ((lower.lowerMir(data.inst) catch |err| switch (err) {
   1128         error.LowerFail => {
   1129             defer {
   1130                 lower.err_msg.?.deinit(data.self.gpa);
   1131                 lower.err_msg = null;
   1132             }
   1133             try writer.writeAll(lower.err_msg.?.msg);
   1134             return;
   1135         },
   1136         error.OutOfMemory, error.InvalidInstruction, error.CannotEncode => |e| {
   1137             try writer.writeAll(switch (e) {
   1138                 error.OutOfMemory => "Out of memory",
   1139                 error.InvalidInstruction => "CodeGen failed to find a viable instruction.",
   1140                 error.CannotEncode => "CodeGen failed to encode the instruction.",
   1141             });
   1142             return;
   1143         },
   1144         else => |e| return e,
   1145     }).insts) |lowered_inst| {
   1146         if (!first) try writer.writeAll("\ndebug(wip_mir): ");
   1147         try writer.print("  | {}", .{lowered_inst});
   1148         first = false;
   1149     }
   1150 }
   1151 fn fmtWipMir(self: *Self, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) {
   1152     return .{ .data = .{ .self = self, .inst = inst } };
   1153 }
   1154 
   1155 const FormatTrackingData = struct {
   1156     self: *Self,
   1157 };
   1158 fn formatTracking(
   1159     data: FormatTrackingData,
   1160     comptime _: []const u8,
   1161     _: std.fmt.FormatOptions,
   1162     writer: anytype,
   1163 ) @TypeOf(writer).Error!void {
   1164     var it = data.self.inst_tracking.iterator();
   1165     while (it.next()) |entry| try writer.print("\n%{d} = {}", .{ entry.key_ptr.*, entry.value_ptr.* });
   1166 }
   1167 fn fmtTracking(self: *Self) std.fmt.Formatter(formatTracking) {
   1168     return .{ .data = .{ .self = self } };
   1169 }
   1170 
   1171 fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index {
   1172     const gpa = self.gpa;
   1173     try self.mir_instructions.ensureUnusedCapacity(gpa, 1);
   1174     const result_index: Mir.Inst.Index = @intCast(self.mir_instructions.len);
   1175     self.mir_instructions.appendAssumeCapacity(inst);
   1176     if (inst.tag != .pseudo or switch (inst.ops) {
   1177         else => true,
   1178         .pseudo_dbg_prologue_end_none,
   1179         .pseudo_dbg_line_line_column,
   1180         .pseudo_dbg_epilogue_begin_none,
   1181         .pseudo_dead_none,
   1182         => false,
   1183     }) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)});
   1184     return result_index;
   1185 }
   1186 
   1187 fn addExtra(self: *Self, extra: anytype) Allocator.Error!u32 {
   1188     const fields = std.meta.fields(@TypeOf(extra));
   1189     try self.mir_extra.ensureUnusedCapacity(self.gpa, fields.len);
   1190     return self.addExtraAssumeCapacity(extra);
   1191 }
   1192 
   1193 fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 {
   1194     const fields = std.meta.fields(@TypeOf(extra));
   1195     const result: u32 = @intCast(self.mir_extra.items.len);
   1196     inline for (fields) |field| {
   1197         self.mir_extra.appendAssumeCapacity(switch (field.type) {
   1198             u32 => @field(extra, field.name),
   1199             i32, Mir.Memory.Info => @bitCast(@field(extra, field.name)),
   1200             else => @compileError("bad field type: " ++ field.name ++ ": " ++ @typeName(field.type)),
   1201         });
   1202     }
   1203     return result;
   1204 }
   1205 
   1206 /// A `cc` of `.z_and_np` clobbers `reg2`!
   1207 fn asmCmovccRegisterRegister(self: *Self, cc: Condition, reg1: Register, reg2: Register) !void {
   1208     _ = try self.addInst(.{
   1209         .tag = switch (cc) {
   1210             else => .cmov,
   1211             .z_and_np, .nz_or_p => .pseudo,
   1212         },
   1213         .ops = switch (cc) {
   1214             else => .rr,
   1215             .z_and_np => .pseudo_cmov_z_and_np_rr,
   1216             .nz_or_p => .pseudo_cmov_nz_or_p_rr,
   1217         },
   1218         .data = .{ .rr = .{
   1219             .fixes = switch (cc) {
   1220                 else => Mir.Inst.Fixes.fromCondition(cc),
   1221                 .z_and_np, .nz_or_p => ._,
   1222             },
   1223             .r1 = reg1,
   1224             .r2 = reg2,
   1225         } },
   1226     });
   1227 }
   1228 
   1229 /// A `cc` of `.z_and_np` is not supported by this encoding!
   1230 fn asmCmovccRegisterMemory(self: *Self, cc: Condition, reg: Register, m: Memory) !void {
   1231     _ = try self.addInst(.{
   1232         .tag = switch (cc) {
   1233             else => .cmov,
   1234             .z_and_np => unreachable,
   1235             .nz_or_p => .pseudo,
   1236         },
   1237         .ops = switch (cc) {
   1238             else => .rm,
   1239             .z_and_np => unreachable,
   1240             .nz_or_p => .pseudo_cmov_nz_or_p_rm,
   1241         },
   1242         .data = .{ .rx = .{
   1243             .fixes = switch (cc) {
   1244                 else => Mir.Inst.Fixes.fromCondition(cc),
   1245                 .z_and_np => unreachable,
   1246                 .nz_or_p => ._,
   1247             },
   1248             .r1 = reg,
   1249             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1250         } },
   1251     });
   1252 }
   1253 
   1254 fn asmSetccRegister(self: *Self, cc: Condition, reg: Register) !void {
   1255     _ = try self.addInst(.{
   1256         .tag = switch (cc) {
   1257             else => .set,
   1258             .z_and_np, .nz_or_p => .pseudo,
   1259         },
   1260         .ops = switch (cc) {
   1261             else => .r,
   1262             .z_and_np => .pseudo_set_z_and_np_r,
   1263             .nz_or_p => .pseudo_set_nz_or_p_r,
   1264         },
   1265         .data = switch (cc) {
   1266             else => .{ .r = .{
   1267                 .fixes = Mir.Inst.Fixes.fromCondition(cc),
   1268                 .r1 = reg,
   1269             } },
   1270             .z_and_np, .nz_or_p => .{ .rr = .{
   1271                 .r1 = reg,
   1272                 .r2 = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to8(),
   1273             } },
   1274         },
   1275     });
   1276 }
   1277 
   1278 fn asmSetccMemory(self: *Self, cc: Condition, m: Memory) !void {
   1279     const payload = try self.addExtra(Mir.Memory.encode(m));
   1280     _ = try self.addInst(.{
   1281         .tag = switch (cc) {
   1282             else => .set,
   1283             .z_and_np, .nz_or_p => .pseudo,
   1284         },
   1285         .ops = switch (cc) {
   1286             else => .m,
   1287             .z_and_np => .pseudo_set_z_and_np_m,
   1288             .nz_or_p => .pseudo_set_nz_or_p_m,
   1289         },
   1290         .data = switch (cc) {
   1291             else => .{ .x = .{
   1292                 .fixes = Mir.Inst.Fixes.fromCondition(cc),
   1293                 .payload = payload,
   1294             } },
   1295             .z_and_np, .nz_or_p => .{ .rx = .{
   1296                 .r1 = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to8(),
   1297                 .payload = payload,
   1298             } },
   1299         },
   1300     });
   1301 }
   1302 
   1303 fn asmJmpReloc(self: *Self, target: Mir.Inst.Index) !Mir.Inst.Index {
   1304     return self.addInst(.{
   1305         .tag = .jmp,
   1306         .ops = .inst,
   1307         .data = .{ .inst = .{
   1308             .inst = target,
   1309         } },
   1310     });
   1311 }
   1312 
   1313 fn asmJccReloc(self: *Self, cc: Condition, target: Mir.Inst.Index) !Mir.Inst.Index {
   1314     return self.addInst(.{
   1315         .tag = switch (cc) {
   1316             else => .j,
   1317             .z_and_np, .nz_or_p => .pseudo,
   1318         },
   1319         .ops = switch (cc) {
   1320             else => .inst,
   1321             .z_and_np => .pseudo_j_z_and_np_inst,
   1322             .nz_or_p => .pseudo_j_nz_or_p_inst,
   1323         },
   1324         .data = .{ .inst = .{
   1325             .fixes = switch (cc) {
   1326                 else => Mir.Inst.Fixes.fromCondition(cc),
   1327                 .z_and_np, .nz_or_p => ._,
   1328             },
   1329             .inst = target,
   1330         } },
   1331     });
   1332 }
   1333 
   1334 fn asmReloc(self: *Self, tag: Mir.Inst.FixedTag, target: Mir.Inst.Index) !void {
   1335     _ = try self.addInst(.{
   1336         .tag = tag[1],
   1337         .ops = .inst,
   1338         .data = .{ .inst = .{
   1339             .fixes = tag[0],
   1340             .inst = target,
   1341         } },
   1342     });
   1343 }
   1344 
   1345 fn asmPlaceholder(self: *Self) !Mir.Inst.Index {
   1346     return self.addInst(.{
   1347         .tag = .pseudo,
   1348         .ops = .pseudo_dead_none,
   1349         .data = undefined,
   1350     });
   1351 }
   1352 
   1353 fn asmOpOnly(self: *Self, tag: Mir.Inst.FixedTag) !void {
   1354     _ = try self.addInst(.{
   1355         .tag = tag[1],
   1356         .ops = .none,
   1357         .data = .{ .none = .{
   1358             .fixes = tag[0],
   1359         } },
   1360     });
   1361 }
   1362 
   1363 fn asmPseudo(self: *Self, ops: Mir.Inst.Ops) !void {
   1364     _ = try self.addInst(.{
   1365         .tag = .pseudo,
   1366         .ops = ops,
   1367         .data = undefined,
   1368     });
   1369 }
   1370 
   1371 fn asmRegister(self: *Self, tag: Mir.Inst.FixedTag, reg: Register) !void {
   1372     _ = try self.addInst(.{
   1373         .tag = tag[1],
   1374         .ops = .r,
   1375         .data = .{ .r = .{
   1376             .fixes = tag[0],
   1377             .r1 = reg,
   1378         } },
   1379     });
   1380 }
   1381 
   1382 fn asmImmediate(self: *Self, tag: Mir.Inst.FixedTag, imm: Immediate) !void {
   1383     _ = try self.addInst(.{
   1384         .tag = tag[1],
   1385         .ops = switch (imm) {
   1386             .signed => .i_s,
   1387             .unsigned => .i_u,
   1388         },
   1389         .data = .{ .i = .{
   1390             .fixes = tag[0],
   1391             .i = switch (imm) {
   1392                 .signed => |s| @bitCast(s),
   1393                 .unsigned => |u| @intCast(u),
   1394             },
   1395         } },
   1396     });
   1397 }
   1398 
   1399 fn asmRegisterRegister(self: *Self, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void {
   1400     _ = try self.addInst(.{
   1401         .tag = tag[1],
   1402         .ops = .rr,
   1403         .data = .{ .rr = .{
   1404             .fixes = tag[0],
   1405             .r1 = reg1,
   1406             .r2 = reg2,
   1407         } },
   1408     });
   1409 }
   1410 
   1411 fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void {
   1412     const ops: Mir.Inst.Ops = switch (imm) {
   1413         .signed => .ri_s,
   1414         .unsigned => |u| if (math.cast(u32, u)) |_| .ri_u else .ri64,
   1415     };
   1416     _ = try self.addInst(.{
   1417         .tag = tag[1],
   1418         .ops = ops,
   1419         .data = switch (ops) {
   1420             .ri_s, .ri_u => .{ .ri = .{
   1421                 .fixes = tag[0],
   1422                 .r1 = reg,
   1423                 .i = switch (imm) {
   1424                     .signed => |s| @bitCast(s),
   1425                     .unsigned => |u| @intCast(u),
   1426                 },
   1427             } },
   1428             .ri64 => .{ .rx = .{
   1429                 .fixes = tag[0],
   1430                 .r1 = reg,
   1431                 .payload = try self.addExtra(Mir.Imm64.encode(imm.unsigned)),
   1432             } },
   1433             else => unreachable,
   1434         },
   1435     });
   1436 }
   1437 
   1438 fn asmRegisterRegisterRegister(
   1439     self: *Self,
   1440     tag: Mir.Inst.FixedTag,
   1441     reg1: Register,
   1442     reg2: Register,
   1443     reg3: Register,
   1444 ) !void {
   1445     _ = try self.addInst(.{
   1446         .tag = tag[1],
   1447         .ops = .rrr,
   1448         .data = .{ .rrr = .{
   1449             .fixes = tag[0],
   1450             .r1 = reg1,
   1451             .r2 = reg2,
   1452             .r3 = reg3,
   1453         } },
   1454     });
   1455 }
   1456 
   1457 fn asmRegisterRegisterRegisterRegister(
   1458     self: *Self,
   1459     tag: Mir.Inst.FixedTag,
   1460     reg1: Register,
   1461     reg2: Register,
   1462     reg3: Register,
   1463     reg4: Register,
   1464 ) !void {
   1465     _ = try self.addInst(.{
   1466         .tag = tag[1],
   1467         .ops = .rrrr,
   1468         .data = .{ .rrrr = .{
   1469             .fixes = tag[0],
   1470             .r1 = reg1,
   1471             .r2 = reg2,
   1472             .r3 = reg3,
   1473             .r4 = reg4,
   1474         } },
   1475     });
   1476 }
   1477 
   1478 fn asmRegisterRegisterRegisterImmediate(
   1479     self: *Self,
   1480     tag: Mir.Inst.FixedTag,
   1481     reg1: Register,
   1482     reg2: Register,
   1483     reg3: Register,
   1484     imm: Immediate,
   1485 ) !void {
   1486     _ = try self.addInst(.{
   1487         .tag = tag[1],
   1488         .ops = .rrri,
   1489         .data = .{ .rrri = .{
   1490             .fixes = tag[0],
   1491             .r1 = reg1,
   1492             .r2 = reg2,
   1493             .r3 = reg3,
   1494             .i = switch (imm) {
   1495                 .signed => |s| @bitCast(@as(i8, @intCast(s))),
   1496                 .unsigned => |u| @intCast(u),
   1497             },
   1498         } },
   1499     });
   1500 }
   1501 
   1502 fn asmRegisterRegisterImmediate(
   1503     self: *Self,
   1504     tag: Mir.Inst.FixedTag,
   1505     reg1: Register,
   1506     reg2: Register,
   1507     imm: Immediate,
   1508 ) !void {
   1509     _ = try self.addInst(.{
   1510         .tag = tag[1],
   1511         .ops = switch (imm) {
   1512             .signed => .rri_s,
   1513             .unsigned => .rri_u,
   1514         },
   1515         .data = .{ .rri = .{
   1516             .fixes = tag[0],
   1517             .r1 = reg1,
   1518             .r2 = reg2,
   1519             .i = switch (imm) {
   1520                 .signed => |s| @bitCast(s),
   1521                 .unsigned => |u| @intCast(u),
   1522             },
   1523         } },
   1524     });
   1525 }
   1526 
   1527 fn asmRegisterRegisterMemory(
   1528     self: *Self,
   1529     tag: Mir.Inst.FixedTag,
   1530     reg1: Register,
   1531     reg2: Register,
   1532     m: Memory,
   1533 ) !void {
   1534     _ = try self.addInst(.{
   1535         .tag = tag[1],
   1536         .ops = .rrm,
   1537         .data = .{ .rrx = .{
   1538             .fixes = tag[0],
   1539             .r1 = reg1,
   1540             .r2 = reg2,
   1541             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1542         } },
   1543     });
   1544 }
   1545 
   1546 fn asmMemory(self: *Self, tag: Mir.Inst.FixedTag, m: Memory) !void {
   1547     _ = try self.addInst(.{
   1548         .tag = tag[1],
   1549         .ops = .m,
   1550         .data = .{ .x = .{
   1551             .fixes = tag[0],
   1552             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1553         } },
   1554     });
   1555 }
   1556 
   1557 fn asmRegisterMemory(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, m: Memory) !void {
   1558     _ = try self.addInst(.{
   1559         .tag = tag[1],
   1560         .ops = .rm,
   1561         .data = .{ .rx = .{
   1562             .fixes = tag[0],
   1563             .r1 = reg,
   1564             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1565         } },
   1566     });
   1567 }
   1568 
   1569 fn asmRegisterMemoryImmediate(
   1570     self: *Self,
   1571     tag: Mir.Inst.FixedTag,
   1572     reg: Register,
   1573     m: Memory,
   1574     imm: Immediate,
   1575 ) !void {
   1576     if (switch (imm) {
   1577         .signed => |s| if (math.cast(i16, s)) |x| @as(u16, @bitCast(x)) else null,
   1578         .unsigned => |u| math.cast(u16, u),
   1579     }) |small_imm| {
   1580         _ = try self.addInst(.{
   1581             .tag = tag[1],
   1582             .ops = .rmi,
   1583             .data = .{ .rix = .{
   1584                 .fixes = tag[0],
   1585                 .r1 = reg,
   1586                 .i = small_imm,
   1587                 .payload = try self.addExtra(Mir.Memory.encode(m)),
   1588             } },
   1589         });
   1590     } else {
   1591         const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) {
   1592             .signed => |s| @bitCast(s),
   1593             .unsigned => unreachable,
   1594         } });
   1595         assert(payload + 1 == try self.addExtra(Mir.Memory.encode(m)));
   1596         _ = try self.addInst(.{
   1597             .tag = tag[1],
   1598             .ops = switch (imm) {
   1599                 .signed => .rmi_s,
   1600                 .unsigned => .rmi_u,
   1601             },
   1602             .data = .{ .rx = .{
   1603                 .fixes = tag[0],
   1604                 .r1 = reg,
   1605                 .payload = payload,
   1606             } },
   1607         });
   1608     }
   1609 }
   1610 
   1611 fn asmRegisterRegisterMemoryImmediate(
   1612     self: *Self,
   1613     tag: Mir.Inst.FixedTag,
   1614     reg1: Register,
   1615     reg2: Register,
   1616     m: Memory,
   1617     imm: Immediate,
   1618 ) !void {
   1619     _ = try self.addInst(.{
   1620         .tag = tag[1],
   1621         .ops = .rrmi,
   1622         .data = .{ .rrix = .{
   1623             .fixes = tag[0],
   1624             .r1 = reg1,
   1625             .r2 = reg2,
   1626             .i = @intCast(imm.unsigned),
   1627             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1628         } },
   1629     });
   1630 }
   1631 
   1632 fn asmMemoryRegister(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, reg: Register) !void {
   1633     _ = try self.addInst(.{
   1634         .tag = tag[1],
   1635         .ops = .mr,
   1636         .data = .{ .rx = .{
   1637             .fixes = tag[0],
   1638             .r1 = reg,
   1639             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1640         } },
   1641     });
   1642 }
   1643 
   1644 fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void {
   1645     const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) {
   1646         .signed => |s| @bitCast(s),
   1647         .unsigned => |u| @intCast(u),
   1648     } });
   1649     assert(payload + 1 == try self.addExtra(Mir.Memory.encode(m)));
   1650     _ = try self.addInst(.{
   1651         .tag = tag[1],
   1652         .ops = switch (imm) {
   1653             .signed => .mi_s,
   1654             .unsigned => .mi_u,
   1655         },
   1656         .data = .{ .x = .{
   1657             .fixes = tag[0],
   1658             .payload = payload,
   1659         } },
   1660     });
   1661 }
   1662 
   1663 fn asmMemoryRegisterRegister(
   1664     self: *Self,
   1665     tag: Mir.Inst.FixedTag,
   1666     m: Memory,
   1667     reg1: Register,
   1668     reg2: Register,
   1669 ) !void {
   1670     _ = try self.addInst(.{
   1671         .tag = tag[1],
   1672         .ops = .mrr,
   1673         .data = .{ .rrx = .{
   1674             .fixes = tag[0],
   1675             .r1 = reg1,
   1676             .r2 = reg2,
   1677             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1678         } },
   1679     });
   1680 }
   1681 
   1682 fn asmMemoryRegisterImmediate(
   1683     self: *Self,
   1684     tag: Mir.Inst.FixedTag,
   1685     m: Memory,
   1686     reg: Register,
   1687     imm: Immediate,
   1688 ) !void {
   1689     _ = try self.addInst(.{
   1690         .tag = tag[1],
   1691         .ops = .mri,
   1692         .data = .{ .rix = .{
   1693             .fixes = tag[0],
   1694             .r1 = reg,
   1695             .i = @intCast(imm.unsigned),
   1696             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1697         } },
   1698     });
   1699 }
   1700 
   1701 fn gen(self: *Self) InnerError!void {
   1702     const mod = self.bin_file.comp.module.?;
   1703     const fn_info = mod.typeToFunc(self.fn_type).?;
   1704     const cc = abi.resolveCallingConvention(fn_info.cc, self.target.*);
   1705     if (cc != .Naked) {
   1706         try self.asmRegister(.{ ._, .push }, .rbp);
   1707         try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp);
   1708         const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
   1709         const backpatch_frame_align = try self.asmPlaceholder();
   1710         const backpatch_frame_align_extra = try self.asmPlaceholder();
   1711         const backpatch_stack_alloc = try self.asmPlaceholder();
   1712         const backpatch_stack_alloc_extra = try self.asmPlaceholder();
   1713 
   1714         switch (self.ret_mcv.long) {
   1715             .none, .unreach => {},
   1716             .indirect => {
   1717                 // The address where to store the return value for the caller is in a
   1718                 // register which the callee is free to clobber. Therefore, we purposely
   1719                 // spill it to stack immediately.
   1720                 const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(Type.usize, mod));
   1721                 try self.genSetMem(
   1722                     .{ .frame = frame_index },
   1723                     0,
   1724                     Type.usize,
   1725                     self.ret_mcv.long.address().offset(-self.ret_mcv.short.indirect.off),
   1726                 );
   1727                 self.ret_mcv.long = .{ .load_frame = .{ .index = frame_index } };
   1728                 tracking_log.debug("spill {} to {}", .{ self.ret_mcv.long, frame_index });
   1729             },
   1730             else => unreachable,
   1731         }
   1732 
   1733         if (fn_info.is_var_args) switch (cc) {
   1734             .SysV => {
   1735                 const info = &self.va_info.sysv;
   1736                 const reg_save_area_fi = try self.allocFrameIndex(FrameAlloc.init(.{
   1737                     .size = abi.SysV.c_abi_int_param_regs.len * 8 +
   1738                         abi.SysV.c_abi_sse_param_regs.len * 16,
   1739                     .alignment = .@"16",
   1740                 }));
   1741                 info.reg_save_area = .{ .index = reg_save_area_fi };
   1742 
   1743                 for (abi.SysV.c_abi_int_param_regs[info.gp_count..], info.gp_count..) |reg, reg_i|
   1744                     try self.genSetMem(
   1745                         .{ .frame = reg_save_area_fi },
   1746                         @intCast(reg_i * 8),
   1747                         Type.usize,
   1748                         .{ .register = reg },
   1749                     );
   1750 
   1751                 try self.asmRegisterImmediate(.{ ._, .cmp }, .al, Immediate.u(info.fp_count));
   1752                 const skip_sse_reloc = try self.asmJccReloc(.na, undefined);
   1753 
   1754                 const vec_2_f64 = try mod.vectorType(.{ .len = 2, .child = .f64_type });
   1755                 for (abi.SysV.c_abi_sse_param_regs[info.fp_count..], info.fp_count..) |reg, reg_i|
   1756                     try self.genSetMem(
   1757                         .{ .frame = reg_save_area_fi },
   1758                         @intCast(abi.SysV.c_abi_int_param_regs.len * 8 + reg_i * 16),
   1759                         vec_2_f64,
   1760                         .{ .register = reg },
   1761                     );
   1762 
   1763                 try self.performReloc(skip_sse_reloc);
   1764             },
   1765             .Win64 => return self.fail("TODO implement gen var arg function for Win64", .{}),
   1766             else => unreachable,
   1767         };
   1768 
   1769         try self.asmPseudo(.pseudo_dbg_prologue_end_none);
   1770 
   1771         try self.genBody(self.air.getMainBody());
   1772 
   1773         // TODO can single exitlude jump reloc be elided? What if it is not at the end of the code?
   1774         // Example:
   1775         // pub fn main() void {
   1776         //     maybeErr() catch return;
   1777         //     unreachable;
   1778         // }
   1779         // Eliding the reloc will cause a miscompilation in this case.
   1780         for (self.exitlude_jump_relocs.items) |jmp_reloc| {
   1781             self.mir_instructions.items(.data)[jmp_reloc].inst.inst =
   1782                 @intCast(self.mir_instructions.len);
   1783         }
   1784 
   1785         try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
   1786         const backpatch_stack_dealloc = try self.asmPlaceholder();
   1787         const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder();
   1788         try self.asmRegister(.{ ._, .pop }, .rbp);
   1789         try self.asmOpOnly(.{ ._, .ret });
   1790 
   1791         const frame_layout = try self.computeFrameLayout(cc);
   1792         const need_frame_align = frame_layout.stack_mask != math.maxInt(u32);
   1793         const need_stack_adjust = frame_layout.stack_adjust > 0;
   1794         const need_save_reg = frame_layout.save_reg_list.count() > 0;
   1795         if (need_frame_align) {
   1796             const page_align = @as(u32, math.maxInt(u32)) << 12;
   1797             self.mir_instructions.set(backpatch_frame_align, .{
   1798                 .tag = .@"and",
   1799                 .ops = .ri_s,
   1800                 .data = .{ .ri = .{
   1801                     .r1 = .rsp,
   1802                     .i = @max(frame_layout.stack_mask, page_align),
   1803                 } },
   1804             });
   1805             if (frame_layout.stack_mask < page_align) {
   1806                 self.mir_instructions.set(backpatch_frame_align_extra, .{
   1807                     .tag = .pseudo,
   1808                     .ops = .pseudo_probe_align_ri_s,
   1809                     .data = .{ .ri = .{
   1810                         .r1 = .rsp,
   1811                         .i = ~frame_layout.stack_mask & page_align,
   1812                     } },
   1813                 });
   1814             }
   1815         }
   1816         if (need_stack_adjust) {
   1817             const page_size: u32 = 1 << 12;
   1818             if (frame_layout.stack_adjust <= page_size) {
   1819                 self.mir_instructions.set(backpatch_stack_alloc, .{
   1820                     .tag = .sub,
   1821                     .ops = .ri_s,
   1822                     .data = .{ .ri = .{
   1823                         .r1 = .rsp,
   1824                         .i = frame_layout.stack_adjust,
   1825                     } },
   1826                 });
   1827             } else if (frame_layout.stack_adjust <
   1828                 page_size * Lower.pseudo_probe_adjust_unrolled_max_insts)
   1829             {
   1830                 self.mir_instructions.set(backpatch_stack_alloc, .{
   1831                     .tag = .pseudo,
   1832                     .ops = .pseudo_probe_adjust_unrolled_ri_s,
   1833                     .data = .{ .ri = .{
   1834                         .r1 = .rsp,
   1835                         .i = frame_layout.stack_adjust,
   1836                     } },
   1837                 });
   1838             } else {
   1839                 self.mir_instructions.set(backpatch_stack_alloc, .{
   1840                     .tag = .pseudo,
   1841                     .ops = .pseudo_probe_adjust_setup_rri_s,
   1842                     .data = .{ .rri = .{
   1843                         .r1 = .rsp,
   1844                         .r2 = .rax,
   1845                         .i = frame_layout.stack_adjust,
   1846                     } },
   1847                 });
   1848                 self.mir_instructions.set(backpatch_stack_alloc_extra, .{
   1849                     .tag = .pseudo,
   1850                     .ops = .pseudo_probe_adjust_loop_rr,
   1851                     .data = .{ .rr = .{
   1852                         .r1 = .rsp,
   1853                         .r2 = .rax,
   1854                     } },
   1855                 });
   1856             }
   1857         }
   1858         if (need_frame_align or need_stack_adjust) {
   1859             self.mir_instructions.set(backpatch_stack_dealloc, .{
   1860                 .tag = .lea,
   1861                 .ops = .rm,
   1862                 .data = .{ .rx = .{
   1863                     .r1 = .rsp,
   1864                     .payload = try self.addExtra(Mir.Memory.encode(.{
   1865                         .base = .{ .reg = .rbp },
   1866                         .mod = .{ .rm = .{
   1867                             .size = .qword,
   1868                             .disp = -frame_layout.save_reg_list.size(),
   1869                         } },
   1870                     })),
   1871                 } },
   1872             });
   1873         }
   1874         if (need_save_reg) {
   1875             self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{
   1876                 .tag = .pseudo,
   1877                 .ops = .pseudo_push_reg_list,
   1878                 .data = .{ .reg_list = frame_layout.save_reg_list },
   1879             });
   1880             self.mir_instructions.set(backpatch_pop_callee_preserved_regs, .{
   1881                 .tag = .pseudo,
   1882                 .ops = .pseudo_pop_reg_list,
   1883                 .data = .{ .reg_list = frame_layout.save_reg_list },
   1884             });
   1885         }
   1886     } else {
   1887         try self.asmPseudo(.pseudo_dbg_prologue_end_none);
   1888         try self.genBody(self.air.getMainBody());
   1889         try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
   1890     }
   1891 
   1892     // Drop them off at the rbrace.
   1893     _ = try self.addInst(.{
   1894         .tag = .pseudo,
   1895         .ops = .pseudo_dbg_line_line_column,
   1896         .data = .{ .line_column = .{
   1897             .line = self.end_di_line,
   1898             .column = self.end_di_column,
   1899         } },
   1900     });
   1901 }
   1902 
   1903 fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
   1904     const mod = self.bin_file.comp.module.?;
   1905     const ip = &mod.intern_pool;
   1906     const air_tags = self.air.instructions.items(.tag);
   1907 
   1908     for (body) |inst| {
   1909         if (builtin.mode == .Debug) {
   1910             const mir_inst: Mir.Inst.Index = @intCast(self.mir_instructions.len);
   1911             try self.mir_to_air_map.put(self.gpa, mir_inst, inst);
   1912         }
   1913 
   1914         if (self.liveness.isUnused(inst) and !self.air.mustLower(inst, ip)) continue;
   1915         wip_mir_log.debug("{}", .{self.fmtAir(inst)});
   1916         verbose_tracking_log.debug("{}", .{self.fmtTracking()});
   1917 
   1918         const old_air_bookkeeping = self.air_bookkeeping;
   1919         try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1);
   1920         switch (air_tags[@intFromEnum(inst)]) {
   1921             // zig fmt: off
   1922             .not,
   1923             => |tag| try self.airUnOp(inst, tag),
   1924 
   1925             .add,
   1926             .add_wrap,
   1927             .sub,
   1928             .sub_wrap,
   1929             .bool_and,
   1930             .bool_or,
   1931             .bit_and,
   1932             .bit_or,
   1933             .xor,
   1934             .min,
   1935             .max,
   1936             => |tag| try self.airBinOp(inst, tag),
   1937 
   1938             .ptr_add, .ptr_sub => |tag| try self.airPtrArithmetic(inst, tag),
   1939 
   1940             .shr, .shr_exact => try self.airShlShrBinOp(inst),
   1941             .shl, .shl_exact => try self.airShlShrBinOp(inst),
   1942 
   1943             .mul             => try self.airMulDivBinOp(inst),
   1944             .mul_wrap        => try self.airMulDivBinOp(inst),
   1945             .rem             => try self.airMulDivBinOp(inst),
   1946             .mod             => try self.airMulDivBinOp(inst),
   1947 
   1948             .add_sat         => try self.airAddSat(inst),
   1949             .sub_sat         => try self.airSubSat(inst),
   1950             .mul_sat         => try self.airMulSat(inst),
   1951             .shl_sat         => try self.airShlSat(inst),
   1952             .slice           => try self.airSlice(inst),
   1953 
   1954             .sin,
   1955             .cos,
   1956             .tan,
   1957             .exp,
   1958             .exp2,
   1959             .log,
   1960             .log2,
   1961             .log10,
   1962             .round,
   1963             => |tag| try self.airUnaryMath(inst, tag),
   1964 
   1965             .floor       => try self.airRound(inst, .{ .mode = .down, .precision = .inexact }),
   1966             .ceil        => try self.airRound(inst, .{ .mode = .up, .precision = .inexact }),
   1967             .trunc_float => try self.airRound(inst, .{ .mode = .zero, .precision = .inexact }),
   1968             .sqrt        => try self.airSqrt(inst),
   1969             .neg         => try self.airFloatSign(inst),
   1970 
   1971             .abs => try self.airAbs(inst),
   1972 
   1973             .add_with_overflow => try self.airAddSubWithOverflow(inst),
   1974             .sub_with_overflow => try self.airAddSubWithOverflow(inst),
   1975             .mul_with_overflow => try self.airMulWithOverflow(inst),
   1976             .shl_with_overflow => try self.airShlWithOverflow(inst),
   1977 
   1978             .div_float, .div_trunc, .div_floor, .div_exact => try self.airMulDivBinOp(inst),
   1979 
   1980             .cmp_lt  => try self.airCmp(inst, .lt),
   1981             .cmp_lte => try self.airCmp(inst, .lte),
   1982             .cmp_eq  => try self.airCmp(inst, .eq),
   1983             .cmp_gte => try self.airCmp(inst, .gte),
   1984             .cmp_gt  => try self.airCmp(inst, .gt),
   1985             .cmp_neq => try self.airCmp(inst, .neq),
   1986 
   1987             .cmp_vector => try self.airCmpVector(inst),
   1988             .cmp_lt_errors_len => try self.airCmpLtErrorsLen(inst),
   1989 
   1990             .alloc           => try self.airAlloc(inst),
   1991             .ret_ptr         => try self.airRetPtr(inst),
   1992             .arg             => try self.airArg(inst),
   1993             .assembly        => try self.airAsm(inst),
   1994             .bitcast         => try self.airBitCast(inst),
   1995             .block           => try self.airBlock(inst),
   1996             .br              => try self.airBr(inst),
   1997             .trap            => try self.airTrap(),
   1998             .breakpoint      => try self.airBreakpoint(),
   1999             .ret_addr        => try self.airRetAddr(inst),
   2000             .frame_addr      => try self.airFrameAddress(inst),
   2001             .fence           => try self.airFence(inst),
   2002             .cond_br         => try self.airCondBr(inst),
   2003             .dbg_stmt        => try self.airDbgStmt(inst),
   2004             .fptrunc         => try self.airFptrunc(inst),
   2005             .fpext           => try self.airFpext(inst),
   2006             .intcast         => try self.airIntCast(inst),
   2007             .trunc           => try self.airTrunc(inst),
   2008             .int_from_bool     => try self.airIntFromBool(inst),
   2009             .is_non_null     => try self.airIsNonNull(inst),
   2010             .is_non_null_ptr => try self.airIsNonNullPtr(inst),
   2011             .is_null         => try self.airIsNull(inst),
   2012             .is_null_ptr     => try self.airIsNullPtr(inst),
   2013             .is_non_err      => try self.airIsNonErr(inst),
   2014             .is_non_err_ptr  => try self.airIsNonErrPtr(inst),
   2015             .is_err          => try self.airIsErr(inst),
   2016             .is_err_ptr      => try self.airIsErrPtr(inst),
   2017             .load            => try self.airLoad(inst),
   2018             .loop            => try self.airLoop(inst),
   2019             .int_from_ptr        => try self.airIntFromPtr(inst),
   2020             .ret             => try self.airRet(inst),
   2021             .ret_load        => try self.airRetLoad(inst),
   2022             .store           => try self.airStore(inst, false),
   2023             .store_safe      => try self.airStore(inst, true),
   2024             .struct_field_ptr=> try self.airStructFieldPtr(inst),
   2025             .struct_field_val=> try self.airStructFieldVal(inst),
   2026             .array_to_slice  => try self.airArrayToSlice(inst),
   2027             .float_from_int    => try self.airFloatFromInt(inst),
   2028             .int_from_float    => try self.airIntFromFloat(inst),
   2029             .cmpxchg_strong  => try self.airCmpxchg(inst),
   2030             .cmpxchg_weak    => try self.airCmpxchg(inst),
   2031             .atomic_rmw      => try self.airAtomicRmw(inst),
   2032             .atomic_load     => try self.airAtomicLoad(inst),
   2033             .memcpy          => try self.airMemcpy(inst),
   2034             .memset          => try self.airMemset(inst, false),
   2035             .memset_safe     => try self.airMemset(inst, true),
   2036             .set_union_tag   => try self.airSetUnionTag(inst),
   2037             .get_union_tag   => try self.airGetUnionTag(inst),
   2038             .clz             => try self.airClz(inst),
   2039             .ctz             => try self.airCtz(inst),
   2040             .popcount        => try self.airPopCount(inst),
   2041             .byte_swap       => try self.airByteSwap(inst),
   2042             .bit_reverse     => try self.airBitReverse(inst),
   2043             .tag_name        => try self.airTagName(inst),
   2044             .error_name      => try self.airErrorName(inst),
   2045             .splat           => try self.airSplat(inst),
   2046             .select          => try self.airSelect(inst),
   2047             .shuffle         => try self.airShuffle(inst),
   2048             .reduce          => try self.airReduce(inst),
   2049             .aggregate_init  => try self.airAggregateInit(inst),
   2050             .union_init      => try self.airUnionInit(inst),
   2051             .prefetch        => try self.airPrefetch(inst),
   2052             .mul_add         => try self.airMulAdd(inst),
   2053             .addrspace_cast  => return self.fail("TODO implement addrspace_cast", .{}),
   2054 
   2055             .@"try"          => try self.airTry(inst),
   2056             .try_ptr         => try self.airTryPtr(inst),
   2057 
   2058             .dbg_var_ptr,
   2059             .dbg_var_val,
   2060             => try self.airDbgVar(inst),
   2061 
   2062             .dbg_inline_begin,
   2063             .dbg_inline_end,
   2064             => try self.airDbgInline(inst),
   2065 
   2066             .dbg_block_begin,
   2067             .dbg_block_end,
   2068             => try self.airDbgBlock(inst),
   2069 
   2070             .call              => try self.airCall(inst, .auto),
   2071             .call_always_tail  => try self.airCall(inst, .always_tail),
   2072             .call_never_tail   => try self.airCall(inst, .never_tail),
   2073             .call_never_inline => try self.airCall(inst, .never_inline),
   2074 
   2075             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
   2076             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
   2077             .atomic_store_release   => try self.airAtomicStore(inst, .Release),
   2078             .atomic_store_seq_cst   => try self.airAtomicStore(inst, .SeqCst),
   2079 
   2080             .struct_field_ptr_index_0 => try self.airStructFieldPtrIndex(inst, 0),
   2081             .struct_field_ptr_index_1 => try self.airStructFieldPtrIndex(inst, 1),
   2082             .struct_field_ptr_index_2 => try self.airStructFieldPtrIndex(inst, 2),
   2083             .struct_field_ptr_index_3 => try self.airStructFieldPtrIndex(inst, 3),
   2084 
   2085             .field_parent_ptr => try self.airFieldParentPtr(inst),
   2086 
   2087             .switch_br       => try self.airSwitchBr(inst),
   2088             .slice_ptr       => try self.airSlicePtr(inst),
   2089             .slice_len       => try self.airSliceLen(inst),
   2090 
   2091             .ptr_slice_len_ptr => try self.airPtrSliceLenPtr(inst),
   2092             .ptr_slice_ptr_ptr => try self.airPtrSlicePtrPtr(inst),
   2093 
   2094             .array_elem_val      => try self.airArrayElemVal(inst),
   2095             .slice_elem_val      => try self.airSliceElemVal(inst),
   2096             .slice_elem_ptr      => try self.airSliceElemPtr(inst),
   2097             .ptr_elem_val        => try self.airPtrElemVal(inst),
   2098             .ptr_elem_ptr        => try self.airPtrElemPtr(inst),
   2099 
   2100             .inferred_alloc, .inferred_alloc_comptime => unreachable,
   2101             .unreach  => self.finishAirBookkeeping(),
   2102 
   2103             .optional_payload           => try self.airOptionalPayload(inst),
   2104             .optional_payload_ptr       => try self.airOptionalPayloadPtr(inst),
   2105             .optional_payload_ptr_set   => try self.airOptionalPayloadPtrSet(inst),
   2106             .unwrap_errunion_err        => try self.airUnwrapErrUnionErr(inst),
   2107             .unwrap_errunion_payload    => try self.airUnwrapErrUnionPayload(inst),
   2108             .unwrap_errunion_err_ptr    => try self.airUnwrapErrUnionErrPtr(inst),
   2109             .unwrap_errunion_payload_ptr=> try self.airUnwrapErrUnionPayloadPtr(inst),
   2110             .errunion_payload_ptr_set   => try self.airErrUnionPayloadPtrSet(inst),
   2111             .err_return_trace           => try self.airErrReturnTrace(inst),
   2112             .set_err_return_trace       => try self.airSetErrReturnTrace(inst),
   2113             .save_err_return_trace_index=> try self.airSaveErrReturnTraceIndex(inst),
   2114 
   2115             .wrap_optional         => try self.airWrapOptional(inst),
   2116             .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
   2117             .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
   2118 
   2119             .add_optimized,
   2120             .sub_optimized,
   2121             .mul_optimized,
   2122             .div_float_optimized,
   2123             .div_trunc_optimized,
   2124             .div_floor_optimized,
   2125             .div_exact_optimized,
   2126             .rem_optimized,
   2127             .mod_optimized,
   2128             .neg_optimized,
   2129             .cmp_lt_optimized,
   2130             .cmp_lte_optimized,
   2131             .cmp_eq_optimized,
   2132             .cmp_gte_optimized,
   2133             .cmp_gt_optimized,
   2134             .cmp_neq_optimized,
   2135             .cmp_vector_optimized,
   2136             .reduce_optimized,
   2137             .int_from_float_optimized,
   2138             => return self.fail("TODO implement optimized float mode", .{}),
   2139 
   2140             .add_safe,
   2141             .sub_safe,
   2142             .mul_safe,
   2143             => return self.fail("TODO implement safety_checked_instructions", .{}),
   2144 
   2145             .is_named_enum_value => return self.fail("TODO implement is_named_enum_value", .{}),
   2146             .error_set_has_value => return self.fail("TODO implement error_set_has_value", .{}),
   2147             .vector_store_elem => return self.fail("TODO implement vector_store_elem", .{}),
   2148 
   2149             .c_va_arg => try self.airVaArg(inst),
   2150             .c_va_copy => try self.airVaCopy(inst),
   2151             .c_va_end => try self.airVaEnd(inst),
   2152             .c_va_start => try self.airVaStart(inst),
   2153 
   2154             .wasm_memory_size => unreachable,
   2155             .wasm_memory_grow => unreachable,
   2156 
   2157             .work_item_id => unreachable,
   2158             .work_group_size => unreachable,
   2159             .work_group_id => unreachable,
   2160             // zig fmt: on
   2161         }
   2162 
   2163         assert(!self.register_manager.lockedRegsExist());
   2164 
   2165         if (std.debug.runtime_safety) {
   2166             if (self.air_bookkeeping < old_air_bookkeeping + 1) {
   2167                 std.debug.panic("in codegen.zig, handling of AIR instruction %{d} ('{}') did not do proper bookkeeping. Look for a missing call to finishAir.", .{ inst, air_tags[@intFromEnum(inst)] });
   2168             }
   2169 
   2170             { // check consistency of tracked registers
   2171                 var it = self.register_manager.free_registers.iterator(.{ .kind = .unset });
   2172                 while (it.next()) |index| {
   2173                     const tracked_inst = self.register_manager.registers[index];
   2174                     const tracking = self.getResolvedInstValue(tracked_inst);
   2175                     for (tracking.getRegs()) |reg| {
   2176                         if (RegisterManager.indexOfRegIntoTracked(reg).? == index) break;
   2177                     } else unreachable; // tracked register not in use
   2178                 }
   2179             }
   2180         }
   2181     }
   2182     verbose_tracking_log.debug("{}", .{self.fmtTracking()});
   2183 }
   2184 
   2185 fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void {
   2186     const mod = self.bin_file.comp.module.?;
   2187     switch (lazy_sym.ty.zigTypeTag(mod)) {
   2188         .Enum => {
   2189             const enum_ty = lazy_sym.ty;
   2190             wip_mir_log.debug("{}.@tagName:", .{enum_ty.fmt(mod)});
   2191 
   2192             const resolved_cc = abi.resolveCallingConvention(.Unspecified, self.target.*);
   2193             const param_regs = abi.getCAbiIntParamRegs(resolved_cc);
   2194             const param_locks = self.register_manager.lockRegsAssumeUnused(2, param_regs[0..2].*);
   2195             defer for (param_locks) |lock| self.register_manager.unlockReg(lock);
   2196 
   2197             const ret_reg = param_regs[0];
   2198             const enum_mcv = MCValue{ .register = param_regs[1] };
   2199 
   2200             const exitlude_jump_relocs = try self.gpa.alloc(Mir.Inst.Index, enum_ty.enumFieldCount(mod));
   2201             defer self.gpa.free(exitlude_jump_relocs);
   2202 
   2203             const data_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   2204             const data_lock = self.register_manager.lockRegAssumeUnused(data_reg);
   2205             defer self.register_manager.unlockReg(data_lock);
   2206             try self.genLazySymbolRef(.lea, data_reg, .{ .kind = .const_data, .ty = enum_ty });
   2207 
   2208             var data_off: i32 = 0;
   2209             for (exitlude_jump_relocs, 0..) |*exitlude_jump_reloc, index_usize| {
   2210                 const index: u32 = @intCast(index_usize);
   2211                 const tag_name = mod.intern_pool.stringToSlice(enum_ty.enumFields(mod)[index_usize]);
   2212                 const tag_val = try mod.enumValueFieldIndex(enum_ty, index);
   2213                 const tag_mcv = try self.genTypedValue(.{ .ty = enum_ty, .val = tag_val });
   2214                 try self.genBinOpMir(.{ ._, .cmp }, enum_ty, enum_mcv, tag_mcv);
   2215                 const skip_reloc = try self.asmJccReloc(.ne, undefined);
   2216 
   2217                 try self.genSetMem(
   2218                     .{ .reg = ret_reg },
   2219                     0,
   2220                     Type.usize,
   2221                     .{ .register_offset = .{ .reg = data_reg, .off = data_off } },
   2222                 );
   2223                 try self.genSetMem(.{ .reg = ret_reg }, 8, Type.usize, .{ .immediate = tag_name.len });
   2224 
   2225                 exitlude_jump_reloc.* = try self.asmJmpReloc(undefined);
   2226                 try self.performReloc(skip_reloc);
   2227 
   2228                 data_off += @intCast(tag_name.len + 1);
   2229             }
   2230 
   2231             try self.airTrap();
   2232 
   2233             for (exitlude_jump_relocs) |reloc| try self.performReloc(reloc);
   2234             try self.asmOpOnly(.{ ._, .ret });
   2235         },
   2236         else => return self.fail(
   2237             "TODO implement {s} for {}",
   2238             .{ @tagName(lazy_sym.kind), lazy_sym.ty.fmt(mod) },
   2239         ),
   2240     }
   2241 }
   2242 
   2243 fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) !void {
   2244     for (value.getRegs()) |reg| try self.register_manager.getReg(reg, inst);
   2245 }
   2246 
   2247 fn getValueIfFree(self: *Self, value: MCValue, inst: ?Air.Inst.Index) void {
   2248     for (value.getRegs()) |reg| if (self.register_manager.isRegFree(reg))
   2249         self.register_manager.getRegAssumeFree(reg, inst);
   2250 }
   2251 
   2252 fn freeValue(self: *Self, value: MCValue) !void {
   2253     switch (value) {
   2254         .register => |reg| {
   2255             self.register_manager.freeReg(reg);
   2256             if (reg.class() == .x87) try self.asmRegister(.{ .f_, .free }, reg);
   2257         },
   2258         .register_pair => |regs| for (regs) |reg| self.register_manager.freeReg(reg),
   2259         .register_offset => |reg_off| self.register_manager.freeReg(reg_off.reg),
   2260         .register_overflow => |reg_ov| {
   2261             self.register_manager.freeReg(reg_ov.reg);
   2262             self.eflags_inst = null;
   2263         },
   2264         .eflags => self.eflags_inst = null,
   2265         else => {}, // TODO process stack allocation death
   2266     }
   2267 }
   2268 
   2269 fn feed(self: *Self, bt: *Liveness.BigTomb, operand: Air.Inst.Ref) !void {
   2270     if (bt.feed()) if (operand.toIndex()) |inst| try self.processDeath(inst);
   2271 }
   2272 
   2273 /// Asserts there is already capacity to insert into top branch inst_table.
   2274 fn processDeath(self: *Self, inst: Air.Inst.Index) !void {
   2275     try self.inst_tracking.getPtr(inst).?.die(self, inst);
   2276 }
   2277 
   2278 /// Called when there are no operands, and the instruction is always unreferenced.
   2279 fn finishAirBookkeeping(self: *Self) void {
   2280     if (std.debug.runtime_safety) {
   2281         self.air_bookkeeping += 1;
   2282     }
   2283 }
   2284 
   2285 fn finishAirResult(self: *Self, inst: Air.Inst.Index, result: MCValue) void {
   2286     if (self.liveness.isUnused(inst)) switch (result) {
   2287         .none, .dead, .unreach => {},
   2288         else => unreachable, // Why didn't the result die?
   2289     } else {
   2290         tracking_log.debug("%{d} => {} (birth)", .{ inst, result });
   2291         self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(result));
   2292         // In some cases, an operand may be reused as the result.
   2293         // If that operand died and was a register, it was freed by
   2294         // processDeath, so we have to "re-allocate" the register.
   2295         self.getValueIfFree(result, inst);
   2296     }
   2297     self.finishAirBookkeeping();
   2298 }
   2299 
   2300 fn finishAir(
   2301     self: *Self,
   2302     inst: Air.Inst.Index,
   2303     result: MCValue,
   2304     operands: [Liveness.bpi - 1]Air.Inst.Ref,
   2305 ) !void {
   2306     var tomb_bits = self.liveness.getTombBits(inst);
   2307     for (operands) |op| {
   2308         const dies = @as(u1, @truncate(tomb_bits)) != 0;
   2309         tomb_bits >>= 1;
   2310         if (!dies) continue;
   2311         try self.processDeath(op.toIndexAllowNone() orelse continue);
   2312     }
   2313     self.finishAirResult(inst, result);
   2314 }
   2315 
   2316 const FrameLayout = struct {
   2317     stack_mask: u32,
   2318     stack_adjust: u32,
   2319     save_reg_list: Mir.RegisterList,
   2320 };
   2321 
   2322 fn setFrameLoc(
   2323     self: *Self,
   2324     frame_index: FrameIndex,
   2325     base: Register,
   2326     offset: *i32,
   2327     comptime aligned: bool,
   2328 ) void {
   2329     const frame_i = @intFromEnum(frame_index);
   2330     if (aligned) {
   2331         const alignment = self.frame_allocs.items(.abi_align)[frame_i];
   2332         offset.* = @intCast(alignment.forward(@intCast(offset.*)));
   2333     }
   2334     self.frame_locs.set(frame_i, .{ .base = base, .disp = offset.* });
   2335     offset.* += self.frame_allocs.items(.abi_size)[frame_i];
   2336 }
   2337 
   2338 fn computeFrameLayout(self: *Self, cc: std.builtin.CallingConvention) !FrameLayout {
   2339     const frame_allocs_len = self.frame_allocs.len;
   2340     try self.frame_locs.resize(self.gpa, frame_allocs_len);
   2341     const stack_frame_order = try self.gpa.alloc(FrameIndex, frame_allocs_len - FrameIndex.named_count);
   2342     defer self.gpa.free(stack_frame_order);
   2343 
   2344     const frame_size = self.frame_allocs.items(.abi_size);
   2345     const frame_align = self.frame_allocs.items(.abi_align);
   2346     const frame_offset = self.frame_locs.items(.disp);
   2347 
   2348     for (stack_frame_order, FrameIndex.named_count..) |*frame_order, frame_index|
   2349         frame_order.* = @enumFromInt(frame_index);
   2350     {
   2351         const SortContext = struct {
   2352             frame_align: @TypeOf(frame_align),
   2353             pub fn lessThan(context: @This(), lhs: FrameIndex, rhs: FrameIndex) bool {
   2354                 return context.frame_align[@intFromEnum(lhs)].compare(.gt, context.frame_align[@intFromEnum(rhs)]);
   2355             }
   2356         };
   2357         const sort_context = SortContext{ .frame_align = frame_align };
   2358         mem.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan);
   2359     }
   2360 
   2361     const call_frame_align = frame_align[@intFromEnum(FrameIndex.call_frame)];
   2362     const stack_frame_align = frame_align[@intFromEnum(FrameIndex.stack_frame)];
   2363     const args_frame_align = frame_align[@intFromEnum(FrameIndex.args_frame)];
   2364     const needed_align = call_frame_align.max(stack_frame_align);
   2365     const need_align_stack = needed_align.compare(.gt, args_frame_align);
   2366 
   2367     // Create list of registers to save in the prologue.
   2368     // TODO handle register classes
   2369     var save_reg_list = Mir.RegisterList{};
   2370     const callee_preserved_regs =
   2371         abi.getCalleePreservedRegs(abi.resolveCallingConvention(cc, self.target.*));
   2372     for (callee_preserved_regs) |reg| {
   2373         if (self.register_manager.isRegAllocated(reg) or true) {
   2374             save_reg_list.push(callee_preserved_regs, reg);
   2375         }
   2376     }
   2377 
   2378     var rbp_offset: i32 = 0;
   2379     self.setFrameLoc(.base_ptr, .rbp, &rbp_offset, false);
   2380     self.setFrameLoc(.ret_addr, .rbp, &rbp_offset, false);
   2381     self.setFrameLoc(.args_frame, .rbp, &rbp_offset, false);
   2382     const stack_frame_align_offset = if (need_align_stack)
   2383         0
   2384     else
   2385         save_reg_list.size() + frame_offset[@intFromEnum(FrameIndex.args_frame)];
   2386 
   2387     var rsp_offset: i32 = 0;
   2388     self.setFrameLoc(.call_frame, .rsp, &rsp_offset, true);
   2389     self.setFrameLoc(.stack_frame, .rsp, &rsp_offset, true);
   2390     for (stack_frame_order) |frame_index| self.setFrameLoc(frame_index, .rsp, &rsp_offset, true);
   2391     rsp_offset += stack_frame_align_offset;
   2392     rsp_offset = @intCast(needed_align.forward(@intCast(rsp_offset)));
   2393     rsp_offset -= stack_frame_align_offset;
   2394     frame_size[@intFromEnum(FrameIndex.call_frame)] =
   2395         @intCast(rsp_offset - frame_offset[@intFromEnum(FrameIndex.stack_frame)]);
   2396 
   2397     return .{
   2398         .stack_mask = @as(u32, math.maxInt(u32)) << @intCast(if (need_align_stack) @intFromEnum(needed_align) else 0),
   2399         .stack_adjust = @intCast(rsp_offset - frame_offset[@intFromEnum(FrameIndex.call_frame)]),
   2400         .save_reg_list = save_reg_list,
   2401     };
   2402 }
   2403 
   2404 fn getFrameAddrAlignment(self: *Self, frame_addr: FrameAddr) Alignment {
   2405     const alloc_align = self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_align;
   2406     return @enumFromInt(@min(@intFromEnum(alloc_align), @ctz(frame_addr.off)));
   2407 }
   2408 
   2409 fn getFrameAddrSize(self: *Self, frame_addr: FrameAddr) u32 {
   2410     return self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_size - @as(u31, @intCast(frame_addr.off));
   2411 }
   2412 
   2413 fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex {
   2414     const frame_allocs_slice = self.frame_allocs.slice();
   2415     const frame_size = frame_allocs_slice.items(.abi_size);
   2416     const frame_align = frame_allocs_slice.items(.abi_align);
   2417 
   2418     const stack_frame_align = &frame_align[@intFromEnum(FrameIndex.stack_frame)];
   2419     stack_frame_align.* = stack_frame_align.max(alloc.abi_align);
   2420 
   2421     for (self.free_frame_indices.keys(), 0..) |frame_index, free_i| {
   2422         const abi_size = frame_size[@intFromEnum(frame_index)];
   2423         if (abi_size != alloc.abi_size) continue;
   2424         const abi_align = &frame_align[@intFromEnum(frame_index)];
   2425         abi_align.* = abi_align.max(alloc.abi_align);
   2426 
   2427         _ = self.free_frame_indices.swapRemoveAt(free_i);
   2428         return frame_index;
   2429     }
   2430     const frame_index: FrameIndex = @enumFromInt(self.frame_allocs.len);
   2431     try self.frame_allocs.append(self.gpa, alloc);
   2432     return frame_index;
   2433 }
   2434 
   2435 /// Use a pointer instruction as the basis for allocating stack memory.
   2436 fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex {
   2437     const mod = self.bin_file.comp.module.?;
   2438     const ptr_ty = self.typeOfIndex(inst);
   2439     const val_ty = ptr_ty.childType(mod);
   2440     return self.allocFrameIndex(FrameAlloc.init(.{
   2441         .size = math.cast(u32, val_ty.abiSize(mod)) orelse {
   2442             return self.fail("type '{}' too big to fit into stack frame", .{val_ty.fmt(mod)});
   2443         },
   2444         .alignment = ptr_ty.ptrAlignment(mod).max(.@"1"),
   2445     }));
   2446 }
   2447 
   2448 fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
   2449     return self.allocRegOrMemAdvanced(self.typeOfIndex(inst), inst, reg_ok);
   2450 }
   2451 
   2452 fn allocTempRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool) !MCValue {
   2453     return self.allocRegOrMemAdvanced(elem_ty, null, reg_ok);
   2454 }
   2455 
   2456 fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue {
   2457     const mod = self.bin_file.comp.module.?;
   2458     const abi_size = math.cast(u32, ty.abiSize(mod)) orelse {
   2459         return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(mod)});
   2460     };
   2461 
   2462     if (reg_ok) need_mem: {
   2463         if (abi_size <= @as(u32, switch (ty.zigTypeTag(mod)) {
   2464             .Float => switch (ty.floatBits(self.target.*)) {
   2465                 16, 32, 64, 128 => 16,
   2466                 80 => break :need_mem,
   2467                 else => unreachable,
   2468             },
   2469             .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
   2470                 .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
   2471                     16, 32, 64, 128 => if (self.hasFeature(.avx)) 32 else 16,
   2472                     80 => break :need_mem,
   2473                     else => unreachable,
   2474                 },
   2475                 else => if (self.hasFeature(.avx)) 32 else 16,
   2476             },
   2477             else => 8,
   2478         })) {
   2479             if (self.register_manager.tryAllocReg(inst, self.regClassForType(ty))) |reg| {
   2480                 return MCValue{ .register = registerAlias(reg, abi_size) };
   2481             }
   2482         }
   2483     }
   2484 
   2485     const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(ty, mod));
   2486     return .{ .load_frame = .{ .index = frame_index } };
   2487 }
   2488 
   2489 fn regClassForType(self: *Self, ty: Type) RegisterManager.RegisterBitSet {
   2490     const mod = self.bin_file.comp.module.?;
   2491     return switch (ty.zigTypeTag(mod)) {
   2492         .Float => switch (ty.floatBits(self.target.*)) {
   2493             80 => abi.RegisterClass.x87,
   2494             else => abi.RegisterClass.sse,
   2495         },
   2496         .Vector => switch (ty.childType(mod).toIntern()) {
   2497             .bool_type, .u1_type => abi.RegisterClass.gp,
   2498             else => if (ty.isAbiInt(mod) and ty.intInfo(mod).bits == 1)
   2499                 abi.RegisterClass.gp
   2500             else
   2501                 abi.RegisterClass.sse,
   2502         },
   2503         else => abi.RegisterClass.gp,
   2504     };
   2505 }
   2506 
   2507 const State = struct {
   2508     registers: RegisterManager.TrackedRegisters,
   2509     reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking,
   2510     free_registers: RegisterManager.RegisterBitSet,
   2511     inst_tracking_len: u32,
   2512     scope_generation: u32,
   2513 };
   2514 
   2515 fn initRetroactiveState(self: *Self) State {
   2516     var state: State = undefined;
   2517     state.inst_tracking_len = @intCast(self.inst_tracking.count());
   2518     state.scope_generation = self.scope_generation;
   2519     return state;
   2520 }
   2521 
   2522 fn saveRetroactiveState(self: *Self, state: *State) !void {
   2523     try self.spillEflagsIfOccupied();
   2524     const free_registers = self.register_manager.free_registers;
   2525     var it = free_registers.iterator(.{ .kind = .unset });
   2526     while (it.next()) |index| {
   2527         const tracked_inst = self.register_manager.registers[index];
   2528         state.registers[index] = tracked_inst;
   2529         state.reg_tracking[index] = self.inst_tracking.get(tracked_inst).?;
   2530     }
   2531     state.free_registers = free_registers;
   2532 }
   2533 
   2534 fn saveState(self: *Self) !State {
   2535     var state = self.initRetroactiveState();
   2536     try self.saveRetroactiveState(&state);
   2537     return state;
   2538 }
   2539 
   2540 fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, comptime opts: struct {
   2541     emit_instructions: bool,
   2542     update_tracking: bool,
   2543     resurrect: bool,
   2544     close_scope: bool,
   2545 }) !void {
   2546     if (opts.close_scope) {
   2547         for (
   2548             self.inst_tracking.keys()[state.inst_tracking_len..],
   2549             self.inst_tracking.values()[state.inst_tracking_len..],
   2550         ) |inst, *tracking| try tracking.die(self, inst);
   2551         self.inst_tracking.shrinkRetainingCapacity(state.inst_tracking_len);
   2552     }
   2553 
   2554     if (opts.resurrect) for (
   2555         self.inst_tracking.keys()[0..state.inst_tracking_len],
   2556         self.inst_tracking.values()[0..state.inst_tracking_len],
   2557     ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation);
   2558     for (deaths) |death| try self.processDeath(death);
   2559 
   2560     const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).Array.len]RegisterLock;
   2561     var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
   2562         if (opts.update_tracking) ({}) else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
   2563 
   2564     var reg_locks = if (opts.update_tracking) {} else try std.ArrayList(RegisterLock).initCapacity(
   2565         stack.get(),
   2566         @typeInfo(ExpectedContents).Array.len,
   2567     );
   2568     defer if (!opts.update_tracking) {
   2569         for (reg_locks.items) |lock| self.register_manager.unlockReg(lock);
   2570         reg_locks.deinit();
   2571     };
   2572 
   2573     for (0..state.registers.len) |index| {
   2574         const current_maybe_inst = if (self.register_manager.free_registers.isSet(index))
   2575             null
   2576         else
   2577             self.register_manager.registers[index];
   2578         const target_maybe_inst = if (state.free_registers.isSet(index))
   2579             null
   2580         else
   2581             state.registers[index];
   2582         if (std.debug.runtime_safety) if (target_maybe_inst) |target_inst|
   2583             assert(self.inst_tracking.getIndex(target_inst).? < state.inst_tracking_len);
   2584         if (opts.emit_instructions) {
   2585             if (current_maybe_inst) |current_inst| {
   2586                 try self.inst_tracking.getPtr(current_inst).?.spill(self, current_inst);
   2587             }
   2588             if (target_maybe_inst) |target_inst| {
   2589                 const target_tracking = self.inst_tracking.getPtr(target_inst).?;
   2590                 try target_tracking.materialize(self, target_inst, state.reg_tracking[index]);
   2591             }
   2592         }
   2593         if (opts.update_tracking) {
   2594             if (current_maybe_inst) |current_inst| {
   2595                 try self.inst_tracking.getPtr(current_inst).?.trackSpill(self, current_inst);
   2596             }
   2597             {
   2598                 const reg = RegisterManager.regAtTrackedIndex(@intCast(index));
   2599                 self.register_manager.freeReg(reg);
   2600                 self.register_manager.getRegAssumeFree(reg, target_maybe_inst);
   2601             }
   2602             if (target_maybe_inst) |target_inst| {
   2603                 self.inst_tracking.getPtr(target_inst).?.trackMaterialize(
   2604                     target_inst,
   2605                     state.reg_tracking[index],
   2606                 );
   2607             }
   2608         } else if (target_maybe_inst) |_|
   2609             try reg_locks.append(self.register_manager.lockRegIndexAssumeUnused(@intCast(index)));
   2610     }
   2611     if (opts.emit_instructions) if (self.eflags_inst) |inst|
   2612         try self.inst_tracking.getPtr(inst).?.spill(self, inst);
   2613     if (opts.update_tracking) if (self.eflags_inst) |inst| {
   2614         self.eflags_inst = null;
   2615         try self.inst_tracking.getPtr(inst).?.trackSpill(self, inst);
   2616     };
   2617 
   2618     if (opts.update_tracking and std.debug.runtime_safety) {
   2619         assert(self.eflags_inst == null);
   2620         assert(self.register_manager.free_registers.eql(state.free_registers));
   2621         var used_reg_it = state.free_registers.iterator(.{ .kind = .unset });
   2622         while (used_reg_it.next()) |index|
   2623             assert(self.register_manager.registers[index] == state.registers[index]);
   2624     }
   2625 }
   2626 
   2627 pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void {
   2628     const tracking = self.inst_tracking.getPtr(inst) orelse return;
   2629     for (tracking.getRegs()) |tracked_reg| {
   2630         if (tracked_reg.id() == reg.id()) break;
   2631     } else unreachable; // spilled reg not tracked with spilled instruciton
   2632     try tracking.spill(self, inst);
   2633     try tracking.trackSpill(self, inst);
   2634 }
   2635 
   2636 pub fn spillEflagsIfOccupied(self: *Self) !void {
   2637     if (self.eflags_inst) |inst| {
   2638         self.eflags_inst = null;
   2639         const tracking = self.inst_tracking.getPtr(inst).?;
   2640         assert(tracking.getCondition() != null);
   2641         try tracking.spill(self, inst);
   2642         try tracking.trackSpill(self, inst);
   2643     }
   2644 }
   2645 
   2646 pub fn spillCallerPreservedRegs(self: *Self, cc: std.builtin.CallingConvention) !void {
   2647     switch (cc) {
   2648         inline .SysV, .Win64 => |known_cc| try self.spillRegisters(
   2649             comptime abi.getCallerPreservedRegs(known_cc),
   2650         ),
   2651         else => unreachable,
   2652     }
   2653 }
   2654 
   2655 pub fn spillRegisters(self: *Self, comptime registers: []const Register) !void {
   2656     inline for (registers) |reg| try self.register_manager.getKnownReg(reg, null);
   2657 }
   2658 
   2659 /// Copies a value to a register without tracking the register. The register is not considered
   2660 /// allocated. A second call to `copyToTmpRegister` may return the same register.
   2661 /// This can have a side effect of spilling instructions to the stack to free up a register.
   2662 fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
   2663     const reg = try self.register_manager.allocReg(null, self.regClassForType(ty));
   2664     try self.genSetReg(reg, ty, mcv);
   2665     return reg;
   2666 }
   2667 
   2668 /// Allocates a new register and copies `mcv` into it.
   2669 /// `reg_owner` is the instruction that gets associated with the register in the register table.
   2670 /// This can have a side effect of spilling instructions to the stack to free up a register.
   2671 /// WARNING make sure that the allocated register matches the returned MCValue from an instruction!
   2672 fn copyToRegisterWithInstTracking(
   2673     self: *Self,
   2674     reg_owner: Air.Inst.Index,
   2675     ty: Type,
   2676     mcv: MCValue,
   2677 ) !MCValue {
   2678     const reg: Register = try self.register_manager.allocReg(reg_owner, self.regClassForType(ty));
   2679     try self.genSetReg(reg, ty, mcv);
   2680     return MCValue{ .register = reg };
   2681 }
   2682 
   2683 fn airAlloc(self: *Self, inst: Air.Inst.Index) !void {
   2684     const result = MCValue{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } };
   2685     return self.finishAir(inst, result, .{ .none, .none, .none });
   2686 }
   2687 
   2688 fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void {
   2689     const result: MCValue = switch (self.ret_mcv.long) {
   2690         else => unreachable,
   2691         .none => .{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } },
   2692         .load_frame => .{ .register_offset = .{
   2693             .reg = (try self.copyToRegisterWithInstTracking(
   2694                 inst,
   2695                 self.typeOfIndex(inst),
   2696                 self.ret_mcv.long,
   2697             )).register,
   2698             .off = self.ret_mcv.short.indirect.off,
   2699         } },
   2700     };
   2701     return self.finishAir(inst, result, .{ .none, .none, .none });
   2702 }
   2703 
   2704 fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
   2705     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   2706     const dst_ty = self.typeOfIndex(inst);
   2707     const dst_bits = dst_ty.floatBits(self.target.*);
   2708     const src_ty = self.typeOf(ty_op.operand);
   2709     const src_bits = src_ty.floatBits(self.target.*);
   2710 
   2711     const result = result: {
   2712         if (switch (dst_bits) {
   2713             16 => switch (src_bits) {
   2714                 32 => !self.hasFeature(.f16c),
   2715                 64, 80, 128 => true,
   2716                 else => unreachable,
   2717             },
   2718             32 => switch (src_bits) {
   2719                 64 => false,
   2720                 80, 128 => true,
   2721                 else => unreachable,
   2722             },
   2723             64 => switch (src_bits) {
   2724                 80, 128 => true,
   2725                 else => unreachable,
   2726             },
   2727             80 => switch (src_bits) {
   2728                 128 => true,
   2729                 else => unreachable,
   2730             },
   2731             else => unreachable,
   2732         }) {
   2733             var callee_buf: ["__trunc?f?f2".len]u8 = undefined;
   2734             break :result try self.genCall(.{ .lib = .{
   2735                 .return_type = self.floatCompilerRtAbiType(dst_ty, src_ty).toIntern(),
   2736                 .param_types = &.{self.floatCompilerRtAbiType(src_ty, dst_ty).toIntern()},
   2737                 .callee = std.fmt.bufPrint(&callee_buf, "__trunc{c}f{c}f2", .{
   2738                     floatCompilerRtAbiName(src_bits),
   2739                     floatCompilerRtAbiName(dst_bits),
   2740                 }) catch unreachable,
   2741             } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }});
   2742         }
   2743 
   2744         const src_mcv = try self.resolveInst(ty_op.operand);
   2745         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   2746             src_mcv
   2747         else
   2748             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   2749         const dst_reg = dst_mcv.getReg().?.to128();
   2750         const dst_lock = self.register_manager.lockReg(dst_reg);
   2751         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   2752 
   2753         if (dst_bits == 16) {
   2754             assert(self.hasFeature(.f16c));
   2755             switch (src_bits) {
   2756                 32 => {
   2757                     const mat_src_reg = if (src_mcv.isRegister())
   2758                         src_mcv.getReg().?
   2759                     else
   2760                         try self.copyToTmpRegister(src_ty, src_mcv);
   2761                     try self.asmRegisterRegisterImmediate(
   2762                         .{ .v_, .cvtps2ph },
   2763                         dst_reg,
   2764                         mat_src_reg.to128(),
   2765                         Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
   2766                     );
   2767                 },
   2768                 else => unreachable,
   2769             }
   2770         } else {
   2771             assert(src_bits == 64 and dst_bits == 32);
   2772             if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   2773                 .{ .v_ss, .cvtsd2 },
   2774                 dst_reg,
   2775                 dst_reg,
   2776                 try src_mcv.mem(self, .qword),
   2777             ) else try self.asmRegisterRegisterRegister(
   2778                 .{ .v_ss, .cvtsd2 },
   2779                 dst_reg,
   2780                 dst_reg,
   2781                 (if (src_mcv.isRegister())
   2782                     src_mcv.getReg().?
   2783                 else
   2784                     try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2785             ) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
   2786                 .{ ._ss, .cvtsd2 },
   2787                 dst_reg,
   2788                 try src_mcv.mem(self, .qword),
   2789             ) else try self.asmRegisterRegister(
   2790                 .{ ._ss, .cvtsd2 },
   2791                 dst_reg,
   2792                 (if (src_mcv.isRegister())
   2793                     src_mcv.getReg().?
   2794                 else
   2795                     try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2796             );
   2797         }
   2798         break :result dst_mcv;
   2799     };
   2800     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   2801 }
   2802 
   2803 fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
   2804     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   2805     const dst_ty = self.typeOfIndex(inst);
   2806     const dst_bits = dst_ty.floatBits(self.target.*);
   2807     const src_ty = self.typeOf(ty_op.operand);
   2808     const src_bits = src_ty.floatBits(self.target.*);
   2809 
   2810     const result = result: {
   2811         if (switch (src_bits) {
   2812             16 => switch (dst_bits) {
   2813                 32, 64 => !self.hasFeature(.f16c),
   2814                 80, 128 => true,
   2815                 else => unreachable,
   2816             },
   2817             32 => switch (dst_bits) {
   2818                 64 => false,
   2819                 80, 128 => true,
   2820                 else => unreachable,
   2821             },
   2822             64 => switch (dst_bits) {
   2823                 80, 128 => true,
   2824                 else => unreachable,
   2825             },
   2826             80 => switch (dst_bits) {
   2827                 128 => true,
   2828                 else => unreachable,
   2829             },
   2830             else => unreachable,
   2831         }) {
   2832             var callee_buf: ["__extend?f?f2".len]u8 = undefined;
   2833             break :result try self.genCall(.{ .lib = .{
   2834                 .return_type = self.floatCompilerRtAbiType(dst_ty, src_ty).toIntern(),
   2835                 .param_types = &.{self.floatCompilerRtAbiType(src_ty, dst_ty).toIntern()},
   2836                 .callee = std.fmt.bufPrint(&callee_buf, "__extend{c}f{c}f2", .{
   2837                     floatCompilerRtAbiName(src_bits),
   2838                     floatCompilerRtAbiName(dst_bits),
   2839                 }) catch unreachable,
   2840             } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }});
   2841         }
   2842 
   2843         const src_mcv = try self.resolveInst(ty_op.operand);
   2844         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   2845             src_mcv
   2846         else
   2847             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   2848         const dst_reg = dst_mcv.getReg().?.to128();
   2849         const dst_lock = self.register_manager.lockReg(dst_reg);
   2850         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   2851 
   2852         if (src_bits == 16) {
   2853             assert(self.hasFeature(.f16c));
   2854             const mat_src_reg = if (src_mcv.isRegister())
   2855                 src_mcv.getReg().?
   2856             else
   2857                 try self.copyToTmpRegister(src_ty, src_mcv);
   2858             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
   2859             switch (dst_bits) {
   2860                 32 => {},
   2861                 64 => try self.asmRegisterRegisterRegister(
   2862                     .{ .v_sd, .cvtss2 },
   2863                     dst_reg,
   2864                     dst_reg,
   2865                     dst_reg,
   2866                 ),
   2867                 else => unreachable,
   2868             }
   2869         } else {
   2870             assert(src_bits == 32 and dst_bits == 64);
   2871             if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   2872                 .{ .v_sd, .cvtss2 },
   2873                 dst_reg,
   2874                 dst_reg,
   2875                 try src_mcv.mem(self, .dword),
   2876             ) else try self.asmRegisterRegisterRegister(
   2877                 .{ .v_sd, .cvtss2 },
   2878                 dst_reg,
   2879                 dst_reg,
   2880                 (if (src_mcv.isRegister())
   2881                     src_mcv.getReg().?
   2882                 else
   2883                     try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2884             ) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
   2885                 .{ ._sd, .cvtss2 },
   2886                 dst_reg,
   2887                 try src_mcv.mem(self, .dword),
   2888             ) else try self.asmRegisterRegister(
   2889                 .{ ._sd, .cvtss2 },
   2890                 dst_reg,
   2891                 (if (src_mcv.isRegister())
   2892                     src_mcv.getReg().?
   2893                 else
   2894                     try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
   2895             );
   2896         }
   2897         break :result dst_mcv;
   2898     };
   2899     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   2900 }
   2901 
   2902 fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
   2903     const mod = self.bin_file.comp.module.?;
   2904     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   2905     const result: MCValue = result: {
   2906         const src_ty = self.typeOf(ty_op.operand);
   2907         const src_int_info = src_ty.intInfo(mod);
   2908 
   2909         const dst_ty = self.typeOfIndex(inst);
   2910         const dst_int_info = dst_ty.intInfo(mod);
   2911         const abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   2912 
   2913         const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty;
   2914         const extend = switch (src_int_info.signedness) {
   2915             .signed => dst_int_info,
   2916             .unsigned => src_int_info,
   2917         }.signedness;
   2918 
   2919         const src_mcv = try self.resolveInst(ty_op.operand);
   2920         const src_storage_bits: u16 = switch (src_mcv) {
   2921             .register, .register_offset => 64,
   2922             .register_pair => 128,
   2923             .load_frame => |frame_addr| @intCast(self.getFrameAddrSize(frame_addr) * 8),
   2924             else => src_int_info.bits,
   2925         };
   2926 
   2927         const dst_mcv = if (dst_int_info.bits <= src_storage_bits and
   2928             math.divCeil(u16, dst_int_info.bits, 64) catch unreachable ==
   2929             math.divCeil(u32, src_storage_bits, 64) catch unreachable and
   2930             self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
   2931             const dst_mcv = try self.allocRegOrMem(inst, true);
   2932             try self.genCopy(min_ty, dst_mcv, src_mcv);
   2933             break :dst dst_mcv;
   2934         };
   2935 
   2936         if (dst_int_info.bits <= src_int_info.bits) break :result if (dst_mcv.isRegister())
   2937             .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) }
   2938         else
   2939             dst_mcv;
   2940 
   2941         if (dst_mcv.isRegister()) {
   2942             try self.truncateRegister(src_ty, dst_mcv.getReg().?);
   2943             break :result .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) };
   2944         }
   2945 
   2946         const src_limbs_len = math.divCeil(u16, src_int_info.bits, 64) catch unreachable;
   2947         const dst_limbs_len = math.divCeil(u16, dst_int_info.bits, 64) catch unreachable;
   2948 
   2949         const high_mcv: MCValue = if (dst_mcv.isMemory())
   2950             dst_mcv.address().offset((src_limbs_len - 1) * 8).deref()
   2951         else
   2952             .{ .register = dst_mcv.register_pair[1] };
   2953         const high_reg = if (high_mcv.isRegister())
   2954             high_mcv.getReg().?
   2955         else
   2956             try self.copyToTmpRegister(switch (src_int_info.signedness) {
   2957                 .signed => Type.isize,
   2958                 .unsigned => Type.usize,
   2959             }, high_mcv);
   2960         const high_lock = self.register_manager.lockRegAssumeUnused(high_reg);
   2961         defer self.register_manager.unlockReg(high_lock);
   2962 
   2963         const high_bits = src_int_info.bits % 64;
   2964         if (high_bits > 0) {
   2965             try self.truncateRegister(src_ty, high_reg);
   2966             const high_ty = if (dst_int_info.bits >= 64) Type.usize else dst_ty;
   2967             try self.genCopy(high_ty, high_mcv, .{ .register = high_reg });
   2968         }
   2969 
   2970         if (dst_limbs_len > src_limbs_len) try self.genInlineMemset(
   2971             dst_mcv.address().offset(src_limbs_len * 8),
   2972             switch (extend) {
   2973                 .signed => extend: {
   2974                     const extend_mcv = MCValue{ .register = high_reg };
   2975                     try self.genShiftBinOpMir(
   2976                         .{ ._r, .sa },
   2977                         Type.isize,
   2978                         extend_mcv,
   2979                         .{ .immediate = 63 },
   2980                     );
   2981                     break :extend extend_mcv;
   2982                 },
   2983                 .unsigned => .{ .immediate = 0 },
   2984             },
   2985             .{ .immediate = (dst_limbs_len - src_limbs_len) * 8 },
   2986         );
   2987 
   2988         break :result dst_mcv;
   2989     };
   2990     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   2991 }
   2992 
   2993 fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
   2994     const mod = self.bin_file.comp.module.?;
   2995     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   2996 
   2997     const dst_ty = self.typeOfIndex(inst);
   2998     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   2999     const src_ty = self.typeOf(ty_op.operand);
   3000     const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
   3001 
   3002     const result = result: {
   3003         const src_mcv = try self.resolveInst(ty_op.operand);
   3004         const src_lock =
   3005             if (src_mcv.getReg()) |reg| self.register_manager.lockRegAssumeUnused(reg) else null;
   3006         defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   3007 
   3008         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   3009             src_mcv
   3010         else if (dst_abi_size <= 8)
   3011             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv)
   3012         else if (dst_abi_size <= 16) dst: {
   3013             const dst_regs =
   3014                 try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp);
   3015             const dst_mcv: MCValue = .{ .register_pair = dst_regs };
   3016             const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
   3017             defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
   3018 
   3019             try self.genCopy(dst_ty, dst_mcv, src_mcv);
   3020             break :dst dst_mcv;
   3021         } else return self.fail("TODO implement trunc from {} to {}", .{ src_ty.fmt(mod), dst_ty.fmt(mod) });
   3022 
   3023         if (dst_ty.zigTypeTag(mod) == .Vector) {
   3024             assert(src_ty.zigTypeTag(mod) == .Vector and dst_ty.vectorLen(mod) == src_ty.vectorLen(mod));
   3025             const dst_info = dst_ty.childType(mod).intInfo(mod);
   3026             const src_info = src_ty.childType(mod).intInfo(mod);
   3027             const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_info.bits) {
   3028                 8 => switch (src_info.bits) {
   3029                     16 => switch (dst_ty.vectorLen(mod)) {
   3030                         1...8 => if (self.hasFeature(.avx)) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw },
   3031                         9...16 => if (self.hasFeature(.avx2)) .{ .vp_b, .ackusw } else null,
   3032                         else => null,
   3033                     },
   3034                     else => null,
   3035                 },
   3036                 16 => switch (src_info.bits) {
   3037                     32 => switch (dst_ty.vectorLen(mod)) {
   3038                         1...4 => if (self.hasFeature(.avx))
   3039                             .{ .vp_w, .ackusd }
   3040                         else if (self.hasFeature(.sse4_1))
   3041                             .{ .p_w, .ackusd }
   3042                         else
   3043                             null,
   3044                         5...8 => if (self.hasFeature(.avx2)) .{ .vp_w, .ackusd } else null,
   3045                         else => null,
   3046                     },
   3047                     else => null,
   3048                 },
   3049                 else => null,
   3050             }) orelse return self.fail("TODO implement airTrunc for {}", .{dst_ty.fmt(mod)});
   3051 
   3052             const elem_ty = src_ty.childType(mod);
   3053             const mask_val = try mod.intValue(elem_ty, @as(u64, math.maxInt(u64)) >> @intCast(64 - dst_info.bits));
   3054 
   3055             const splat_ty = try mod.vectorType(.{
   3056                 .len = @intCast(@divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)),
   3057                 .child = elem_ty.ip_index,
   3058             });
   3059             const splat_abi_size: u32 = @intCast(splat_ty.abiSize(mod));
   3060 
   3061             const splat_val = try mod.intern(.{ .aggregate = .{
   3062                 .ty = splat_ty.ip_index,
   3063                 .storage = .{ .repeated_elem = mask_val.ip_index },
   3064             } });
   3065 
   3066             const splat_mcv = try self.genTypedValue(.{ .ty = splat_ty, .val = Value.fromInterned(splat_val) });
   3067             const splat_addr_mcv: MCValue = switch (splat_mcv) {
   3068                 .memory, .indirect, .load_frame => splat_mcv.address(),
   3069                 else => .{ .register = try self.copyToTmpRegister(Type.usize, splat_mcv.address()) },
   3070             };
   3071 
   3072             const dst_reg = registerAlias(dst_mcv.getReg().?, src_abi_size);
   3073             if (self.hasFeature(.avx)) {
   3074                 try self.asmRegisterRegisterMemory(
   3075                     .{ .vp_, .@"and" },
   3076                     dst_reg,
   3077                     dst_reg,
   3078                     try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)),
   3079                 );
   3080                 try self.asmRegisterRegisterRegister(mir_tag, dst_reg, dst_reg, dst_reg);
   3081             } else {
   3082                 try self.asmRegisterMemory(
   3083                     .{ .p_, .@"and" },
   3084                     dst_reg,
   3085                     try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)),
   3086                 );
   3087                 try self.asmRegisterRegister(mir_tag, dst_reg, dst_reg);
   3088             }
   3089             break :result dst_mcv;
   3090         }
   3091 
   3092         // when truncating a `u16` to `u5`, for example, those top 3 bits in the result
   3093         // have to be removed. this only happens if the dst if not a power-of-two size.
   3094         if (dst_abi_size <= 8) {
   3095             if (self.regExtraBits(dst_ty) > 0) try self.truncateRegister(dst_ty, dst_mcv.register.to64());
   3096         } else if (dst_abi_size <= 16) {
   3097             const dst_info = dst_ty.intInfo(mod);
   3098             const high_ty = try mod.intType(dst_info.signedness, dst_info.bits - 64);
   3099             if (self.regExtraBits(high_ty) > 0) try self.truncateRegister(high_ty, dst_mcv.register_pair[1].to64());
   3100         }
   3101 
   3102         break :result dst_mcv;
   3103     };
   3104     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   3105 }
   3106 
   3107 fn airIntFromBool(self: *Self, inst: Air.Inst.Index) !void {
   3108     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
   3109     const ty = self.typeOfIndex(inst);
   3110 
   3111     const operand = try self.resolveInst(un_op);
   3112     const dst_mcv = if (self.reuseOperand(inst, un_op, 0, operand))
   3113         operand
   3114     else
   3115         try self.copyToRegisterWithInstTracking(inst, ty, operand);
   3116 
   3117     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
   3118 }
   3119 
   3120 fn airSlice(self: *Self, inst: Air.Inst.Index) !void {
   3121     const mod = self.bin_file.comp.module.?;
   3122     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
   3123     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3124 
   3125     const slice_ty = self.typeOfIndex(inst);
   3126     const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(slice_ty, mod));
   3127 
   3128     const ptr_ty = self.typeOf(bin_op.lhs);
   3129     try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, .{ .air_ref = bin_op.lhs });
   3130 
   3131     const len_ty = self.typeOf(bin_op.rhs);
   3132     try self.genSetMem(
   3133         .{ .frame = frame_index },
   3134         @intCast(ptr_ty.abiSize(mod)),
   3135         len_ty,
   3136         .{ .air_ref = bin_op.rhs },
   3137     );
   3138 
   3139     const result = MCValue{ .load_frame = .{ .index = frame_index } };
   3140     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3141 }
   3142 
   3143 fn airUnOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
   3144     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   3145     const dst_mcv = try self.genUnOp(inst, tag, ty_op.operand);
   3146     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   3147 }
   3148 
   3149 fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
   3150     const mod = self.bin_file.comp.module.?;
   3151     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   3152     const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
   3153 
   3154     const dst_ty = self.typeOfIndex(inst);
   3155     if (dst_ty.isAbiInt(mod)) {
   3156         const abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   3157         const bit_size: u32 = @intCast(dst_ty.bitSize(mod));
   3158         if (abi_size * 8 > bit_size) {
   3159             const dst_lock = switch (dst_mcv) {
   3160                 .register => |dst_reg| self.register_manager.lockRegAssumeUnused(dst_reg),
   3161                 else => null,
   3162             };
   3163             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   3164 
   3165             if (dst_mcv.isRegister()) {
   3166                 try self.truncateRegister(dst_ty, dst_mcv.getReg().?);
   3167             } else {
   3168                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   3169                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   3170                 defer self.register_manager.unlockReg(tmp_lock);
   3171 
   3172                 const hi_ty = try mod.intType(.unsigned, @intCast((dst_ty.bitSize(mod) - 1) % 64 + 1));
   3173                 const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref();
   3174                 try self.genSetReg(tmp_reg, hi_ty, hi_mcv);
   3175                 try self.truncateRegister(dst_ty, tmp_reg);
   3176                 try self.genCopy(hi_ty, hi_mcv, .{ .register = tmp_reg });
   3177             }
   3178         }
   3179     }
   3180     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   3181 }
   3182 
   3183 fn airPtrArithmetic(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
   3184     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
   3185     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3186     const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
   3187     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   3188 }
   3189 
   3190 fn activeIntBits(self: *Self, dst_air: Air.Inst.Ref) u16 {
   3191     const mod = self.bin_file.comp.module.?;
   3192     const air_tag = self.air.instructions.items(.tag);
   3193     const air_data = self.air.instructions.items(.data);
   3194 
   3195     const dst_ty = self.typeOf(dst_air);
   3196     const dst_info = dst_ty.intInfo(mod);
   3197     if (dst_air.toIndex()) |inst| {
   3198         switch (air_tag[@intFromEnum(inst)]) {
   3199             .intcast => {
   3200                 const src_ty = self.typeOf(air_data[@intFromEnum(inst)].ty_op.operand);
   3201                 const src_info = src_ty.intInfo(mod);
   3202                 return @min(switch (src_info.signedness) {
   3203                     .signed => switch (dst_info.signedness) {
   3204                         .signed => src_info.bits,
   3205                         .unsigned => src_info.bits - 1,
   3206                     },
   3207                     .unsigned => switch (dst_info.signedness) {
   3208                         .signed => src_info.bits + 1,
   3209                         .unsigned => src_info.bits,
   3210                     },
   3211                 }, dst_info.bits);
   3212             },
   3213             else => {},
   3214         }
   3215     } else if (dst_air.toInterned()) |ip_index| {
   3216         var space: Value.BigIntSpace = undefined;
   3217         const src_int = Value.fromInterned(ip_index).toBigInt(&space, mod);
   3218         return @as(u16, @intCast(src_int.bitCountTwosComp())) +
   3219             @intFromBool(src_int.positive and dst_info.signedness == .signed);
   3220     }
   3221     return dst_info.bits;
   3222 }
   3223 
   3224 fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void {
   3225     const mod = self.bin_file.comp.module.?;
   3226     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   3227     const result = result: {
   3228         const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)];
   3229         const dst_ty = self.typeOfIndex(inst);
   3230         switch (dst_ty.zigTypeTag(mod)) {
   3231             .Float, .Vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs),
   3232             else => {},
   3233         }
   3234         const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   3235 
   3236         const dst_info = dst_ty.intInfo(mod);
   3237         const src_ty = try mod.intType(dst_info.signedness, switch (tag) {
   3238             else => unreachable,
   3239             .mul, .mul_wrap => @max(
   3240                 self.activeIntBits(bin_op.lhs),
   3241                 self.activeIntBits(bin_op.rhs),
   3242                 dst_info.bits / 2,
   3243             ),
   3244             .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_info.bits,
   3245         });
   3246         const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
   3247 
   3248         if (dst_abi_size == 16 and src_abi_size == 16) switch (tag) {
   3249             else => unreachable,
   3250             .mul, .mul_wrap => {},
   3251             .div_trunc, .div_floor, .div_exact, .rem, .mod => {
   3252                 const signed = dst_ty.isSignedInt(mod);
   3253                 var callee_buf: ["__udiv?i3".len]u8 = undefined;
   3254                 const signed_div_floor_state: struct {
   3255                     frame_index: FrameIndex,
   3256                     state: State,
   3257                     reloc: Mir.Inst.Index,
   3258                 } = if (signed and tag == .div_floor) state: {
   3259                     const frame_index = try self.allocFrameIndex(FrameAlloc.initType(Type.usize, mod));
   3260                     try self.asmMemoryImmediate(
   3261                         .{ ._, .mov },
   3262                         .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } },
   3263                         Immediate.u(0),
   3264                     );
   3265 
   3266                     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   3267                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   3268                     defer self.register_manager.unlockReg(tmp_lock);
   3269 
   3270                     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   3271                     const mat_lhs_mcv = switch (lhs_mcv) {
   3272                         .load_symbol => mat_lhs_mcv: {
   3273                             // TODO clean this up!
   3274                             const addr_reg = try self.copyToTmpRegister(Type.usize, lhs_mcv.address());
   3275                             break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
   3276                         },
   3277                         else => lhs_mcv,
   3278                     };
   3279                     const mat_lhs_lock = switch (mat_lhs_mcv) {
   3280                         .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
   3281                         else => null,
   3282                     };
   3283                     defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
   3284                     if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
   3285                         .{ ._, .mov },
   3286                         tmp_reg,
   3287                         try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword),
   3288                     ) else try self.asmRegisterRegister(
   3289                         .{ ._, .mov },
   3290                         tmp_reg,
   3291                         mat_lhs_mcv.register_pair[1],
   3292                     );
   3293 
   3294                     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   3295                     const mat_rhs_mcv = switch (rhs_mcv) {
   3296                         .load_symbol => mat_rhs_mcv: {
   3297                             // TODO clean this up!
   3298                             const addr_reg = try self.copyToTmpRegister(Type.usize, rhs_mcv.address());
   3299                             break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
   3300                         },
   3301                         else => rhs_mcv,
   3302                     };
   3303                     const mat_rhs_lock = switch (mat_rhs_mcv) {
   3304                         .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
   3305                         else => null,
   3306                     };
   3307                     defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
   3308                     if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
   3309                         .{ ._, .xor },
   3310                         tmp_reg,
   3311                         try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword),
   3312                     ) else try self.asmRegisterRegister(
   3313                         .{ ._, .xor },
   3314                         tmp_reg,
   3315                         mat_rhs_mcv.register_pair[1],
   3316                     );
   3317                     const state = try self.saveState();
   3318                     const reloc = try self.asmJccReloc(.ns, undefined);
   3319 
   3320                     break :state .{ .frame_index = frame_index, .state = state, .reloc = reloc };
   3321                 } else undefined;
   3322                 const call_mcv = try self.genCall(
   3323                     .{ .lib = .{
   3324                         .return_type = dst_ty.toIntern(),
   3325                         .param_types = &.{ src_ty.toIntern(), src_ty.toIntern() },
   3326                         .callee = std.fmt.bufPrint(&callee_buf, "__{s}{s}{c}i3", .{
   3327                             if (signed) "" else "u",
   3328                             switch (tag) {
   3329                                 .div_trunc, .div_exact => "div",
   3330                                 .div_floor => if (signed) "mod" else "div",
   3331                                 .rem, .mod => "mod",
   3332                                 else => unreachable,
   3333                             },
   3334                             intCompilerRtAbiName(@intCast(dst_ty.bitSize(mod))),
   3335                         }) catch unreachable,
   3336                     } },
   3337                     &.{ src_ty, src_ty },
   3338                     &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } },
   3339                 );
   3340                 break :result if (signed) switch (tag) {
   3341                     .div_floor => {
   3342                         try self.asmRegisterRegister(
   3343                             .{ ._, .@"or" },
   3344                             call_mcv.register_pair[0],
   3345                             call_mcv.register_pair[1],
   3346                         );
   3347                         try self.asmSetccMemory(.nz, .{
   3348                             .base = .{ .frame = signed_div_floor_state.frame_index },
   3349                             .mod = .{ .rm = .{ .size = .byte } },
   3350                         });
   3351                         try self.restoreState(signed_div_floor_state.state, &.{}, .{
   3352                             .emit_instructions = true,
   3353                             .update_tracking = true,
   3354                             .resurrect = true,
   3355                             .close_scope = true,
   3356                         });
   3357                         try self.performReloc(signed_div_floor_state.reloc);
   3358                         const dst_mcv = try self.genCall(
   3359                             .{ .lib = .{
   3360                                 .return_type = dst_ty.toIntern(),
   3361                                 .param_types = &.{ src_ty.toIntern(), src_ty.toIntern() },
   3362                                 .callee = std.fmt.bufPrint(&callee_buf, "__div{c}i3", .{
   3363                                     intCompilerRtAbiName(@intCast(dst_ty.bitSize(mod))),
   3364                                 }) catch unreachable,
   3365                             } },
   3366                             &.{ src_ty, src_ty },
   3367                             &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } },
   3368                         );
   3369                         try self.asmRegisterMemory(
   3370                             .{ ._, .sub },
   3371                             dst_mcv.register_pair[0],
   3372                             .{
   3373                                 .base = .{ .frame = signed_div_floor_state.frame_index },
   3374                                 .mod = .{ .rm = .{ .size = .qword } },
   3375                             },
   3376                         );
   3377                         try self.asmRegisterImmediate(
   3378                             .{ ._, .sbb },
   3379                             dst_mcv.register_pair[1],
   3380                             Immediate.u(0),
   3381                         );
   3382                         try self.freeValue(
   3383                             .{ .load_frame = .{ .index = signed_div_floor_state.frame_index } },
   3384                         );
   3385                         break :result dst_mcv;
   3386                     },
   3387                     .mod => {
   3388                         const dst_regs = call_mcv.register_pair;
   3389                         const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
   3390                         defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
   3391 
   3392                         const tmp_regs =
   3393                             try self.register_manager.allocRegs(2, .{null} ** 2, abi.RegisterClass.gp);
   3394                         const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs);
   3395                         defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
   3396 
   3397                         const rhs_mcv = try self.resolveInst(bin_op.rhs);
   3398                         const mat_rhs_mcv = switch (rhs_mcv) {
   3399                             .load_symbol => mat_rhs_mcv: {
   3400                                 // TODO clean this up!
   3401                                 const addr_reg = try self.copyToTmpRegister(Type.usize, rhs_mcv.address());
   3402                                 break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
   3403                             },
   3404                             else => rhs_mcv,
   3405                         };
   3406                         const mat_rhs_lock = switch (mat_rhs_mcv) {
   3407                             .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
   3408                             else => null,
   3409                         };
   3410                         defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
   3411 
   3412                         for (tmp_regs, dst_regs) |tmp_reg, dst_reg|
   3413                             try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_reg);
   3414                         if (mat_rhs_mcv.isMemory()) {
   3415                             try self.asmRegisterMemory(
   3416                                 .{ ._, .add },
   3417                                 tmp_regs[0],
   3418                                 try mat_rhs_mcv.mem(self, .qword),
   3419                             );
   3420                             try self.asmRegisterMemory(
   3421                                 .{ ._, .adc },
   3422                                 tmp_regs[1],
   3423                                 try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword),
   3424                             );
   3425                         } else for (
   3426                             [_]Mir.Inst.Tag{ .add, .adc },
   3427                             tmp_regs,
   3428                             mat_rhs_mcv.register_pair,
   3429                         ) |op, tmp_reg, rhs_reg|
   3430                             try self.asmRegisterRegister(.{ ._, op }, tmp_reg, rhs_reg);
   3431                         try self.asmRegisterRegister(.{ ._, .@"test" }, dst_regs[1], dst_regs[1]);
   3432                         for (dst_regs, tmp_regs) |dst_reg, tmp_reg|
   3433                             try self.asmCmovccRegisterRegister(.s, dst_reg, tmp_reg);
   3434                         break :result call_mcv;
   3435                     },
   3436                     else => call_mcv,
   3437                 } else call_mcv;
   3438             },
   3439         };
   3440 
   3441         try self.spillEflagsIfOccupied();
   3442         try self.spillRegisters(&.{ .rax, .rdx });
   3443         const lhs_mcv = try self.resolveInst(bin_op.lhs);
   3444         const rhs_mcv = try self.resolveInst(bin_op.rhs);
   3445         break :result try self.genMulDivBinOp(tag, inst, dst_ty, src_ty, lhs_mcv, rhs_mcv);
   3446     };
   3447     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3448 }
   3449 
   3450 fn airAddSat(self: *Self, inst: Air.Inst.Index) !void {
   3451     const mod = self.bin_file.comp.module.?;
   3452     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   3453     const ty = self.typeOf(bin_op.lhs);
   3454     if (ty.zigTypeTag(mod) == .Vector or ty.abiSize(mod) > 8) return self.fail(
   3455         "TODO implement airAddSat for {}",
   3456         .{ty.fmt(mod)},
   3457     );
   3458 
   3459     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   3460     const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
   3461         lhs_mcv
   3462     else
   3463         try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv);
   3464     const dst_reg = dst_mcv.register;
   3465     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   3466     defer self.register_manager.unlockReg(dst_lock);
   3467 
   3468     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   3469     const rhs_lock = switch (rhs_mcv) {
   3470         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3471         else => null,
   3472     };
   3473     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   3474 
   3475     const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   3476     const limit_mcv = MCValue{ .register = limit_reg };
   3477     const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
   3478     defer self.register_manager.unlockReg(limit_lock);
   3479 
   3480     const reg_bits = self.regBitSize(ty);
   3481     const reg_extra_bits = self.regExtraBits(ty);
   3482     const cc: Condition = if (ty.isSignedInt(mod)) cc: {
   3483         if (reg_extra_bits > 0) {
   3484             try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits });
   3485         }
   3486         try self.genSetReg(limit_reg, ty, dst_mcv);
   3487         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
   3488         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
   3489             .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
   3490         });
   3491         if (reg_extra_bits > 0) {
   3492             const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv);
   3493             const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg };
   3494             const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg);
   3495             defer self.register_manager.unlockReg(shifted_rhs_lock);
   3496 
   3497             try self.genShiftBinOpMir(
   3498                 .{ ._l, .sa },
   3499                 ty,
   3500                 shifted_rhs_mcv,
   3501                 .{ .immediate = reg_extra_bits },
   3502             );
   3503             try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, shifted_rhs_mcv);
   3504         } else try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv);
   3505         break :cc .o;
   3506     } else cc: {
   3507         try self.genSetReg(limit_reg, ty, .{
   3508             .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - ty.bitSize(mod)),
   3509         });
   3510 
   3511         try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv);
   3512         if (reg_extra_bits > 0) {
   3513             try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, limit_mcv);
   3514             break :cc .a;
   3515         }
   3516         break :cc .c;
   3517     };
   3518 
   3519     const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
   3520     try self.asmCmovccRegisterRegister(
   3521         cc,
   3522         registerAlias(dst_reg, cmov_abi_size),
   3523         registerAlias(limit_reg, cmov_abi_size),
   3524     );
   3525 
   3526     if (reg_extra_bits > 0 and ty.isSignedInt(mod)) {
   3527         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits });
   3528     }
   3529 
   3530     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   3531 }
   3532 
   3533 fn airSubSat(self: *Self, inst: Air.Inst.Index) !void {
   3534     const mod = self.bin_file.comp.module.?;
   3535     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   3536     const ty = self.typeOf(bin_op.lhs);
   3537     if (ty.zigTypeTag(mod) == .Vector or ty.abiSize(mod) > 8) return self.fail(
   3538         "TODO implement airSubSat for {}",
   3539         .{ty.fmt(mod)},
   3540     );
   3541 
   3542     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   3543     const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
   3544         lhs_mcv
   3545     else
   3546         try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv);
   3547     const dst_reg = dst_mcv.register;
   3548     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   3549     defer self.register_manager.unlockReg(dst_lock);
   3550 
   3551     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   3552     const rhs_lock = switch (rhs_mcv) {
   3553         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3554         else => null,
   3555     };
   3556     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   3557 
   3558     const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   3559     const limit_mcv = MCValue{ .register = limit_reg };
   3560     const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
   3561     defer self.register_manager.unlockReg(limit_lock);
   3562 
   3563     const reg_bits = self.regBitSize(ty);
   3564     const reg_extra_bits = self.regExtraBits(ty);
   3565     const cc: Condition = if (ty.isSignedInt(mod)) cc: {
   3566         if (reg_extra_bits > 0) {
   3567             try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits });
   3568         }
   3569         try self.genSetReg(limit_reg, ty, dst_mcv);
   3570         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
   3571         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
   3572             .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
   3573         });
   3574         if (reg_extra_bits > 0) {
   3575             const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv);
   3576             const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg };
   3577             const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg);
   3578             defer self.register_manager.unlockReg(shifted_rhs_lock);
   3579 
   3580             try self.genShiftBinOpMir(
   3581                 .{ ._l, .sa },
   3582                 ty,
   3583                 shifted_rhs_mcv,
   3584                 .{ .immediate = reg_extra_bits },
   3585             );
   3586             try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, shifted_rhs_mcv);
   3587         } else try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv);
   3588         break :cc .o;
   3589     } else cc: {
   3590         try self.genSetReg(limit_reg, ty, .{ .immediate = 0 });
   3591         try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv);
   3592         break :cc .c;
   3593     };
   3594 
   3595     const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
   3596     try self.asmCmovccRegisterRegister(
   3597         cc,
   3598         registerAlias(dst_reg, cmov_abi_size),
   3599         registerAlias(limit_reg, cmov_abi_size),
   3600     );
   3601 
   3602     if (reg_extra_bits > 0 and ty.isSignedInt(mod)) {
   3603         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits });
   3604     }
   3605 
   3606     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
   3607 }
   3608 
   3609 fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
   3610     const mod = self.bin_file.comp.module.?;
   3611     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   3612     const ty = self.typeOf(bin_op.lhs);
   3613 
   3614     const result = result: {
   3615         if (ty.toIntern() == .i128_type) {
   3616             const ptr_c_int = try mod.singleMutPtrType(Type.c_int);
   3617             const overflow = try self.allocTempRegOrMem(Type.c_int, false);
   3618 
   3619             const dst_mcv = try self.genCall(.{ .lib = .{
   3620                 .return_type = .i128_type,
   3621                 .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() },
   3622                 .callee = "__muloti4",
   3623             } }, &.{ Type.i128, Type.i128, ptr_c_int }, &.{
   3624                 .{ .air_ref = bin_op.lhs },
   3625                 .{ .air_ref = bin_op.rhs },
   3626                 overflow.address(),
   3627             });
   3628             const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_mcv.register_pair);
   3629             defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
   3630 
   3631             const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   3632             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   3633             defer self.register_manager.unlockReg(tmp_lock);
   3634 
   3635             const lhs_mcv = try self.resolveInst(bin_op.lhs);
   3636             const mat_lhs_mcv = switch (lhs_mcv) {
   3637                 .load_symbol => mat_lhs_mcv: {
   3638                     // TODO clean this up!
   3639                     const addr_reg = try self.copyToTmpRegister(Type.usize, lhs_mcv.address());
   3640                     break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
   3641                 },
   3642                 else => lhs_mcv,
   3643             };
   3644             const mat_lhs_lock = switch (mat_lhs_mcv) {
   3645                 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
   3646                 else => null,
   3647             };
   3648             defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
   3649             if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
   3650                 .{ ._, .mov },
   3651                 tmp_reg,
   3652                 try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword),
   3653             ) else try self.asmRegisterRegister(
   3654                 .{ ._, .mov },
   3655                 tmp_reg,
   3656                 mat_lhs_mcv.register_pair[1],
   3657             );
   3658 
   3659             const rhs_mcv = try self.resolveInst(bin_op.rhs);
   3660             const mat_rhs_mcv = switch (rhs_mcv) {
   3661                 .load_symbol => mat_rhs_mcv: {
   3662                     // TODO clean this up!
   3663                     const addr_reg = try self.copyToTmpRegister(Type.usize, rhs_mcv.address());
   3664                     break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
   3665                 },
   3666                 else => rhs_mcv,
   3667             };
   3668             const mat_rhs_lock = switch (mat_rhs_mcv) {
   3669                 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
   3670                 else => null,
   3671             };
   3672             defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
   3673             if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
   3674                 .{ ._, .xor },
   3675                 tmp_reg,
   3676                 try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword),
   3677             ) else try self.asmRegisterRegister(
   3678                 .{ ._, .xor },
   3679                 tmp_reg,
   3680                 mat_rhs_mcv.register_pair[1],
   3681             );
   3682 
   3683             try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, Immediate.u(63));
   3684             try self.asmRegister(.{ ._, .not }, tmp_reg);
   3685             try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .dword), Immediate.s(0));
   3686             try self.freeValue(overflow);
   3687             try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[0], tmp_reg);
   3688             try self.asmRegisterImmediate(.{ ._c, .bt }, tmp_reg, Immediate.u(63));
   3689             try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[1], tmp_reg);
   3690             break :result dst_mcv;
   3691         }
   3692 
   3693         if (ty.zigTypeTag(mod) == .Vector or ty.abiSize(mod) > 8) return self.fail(
   3694             "TODO implement airMulSat for {}",
   3695             .{ty.fmt(mod)},
   3696         );
   3697 
   3698         try self.spillRegisters(&.{ .rax, .rdx });
   3699         const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx });
   3700         defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
   3701 
   3702         const lhs_mcv = try self.resolveInst(bin_op.lhs);
   3703         const lhs_lock = switch (lhs_mcv) {
   3704             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3705             else => null,
   3706         };
   3707         defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   3708 
   3709         const rhs_mcv = try self.resolveInst(bin_op.rhs);
   3710         const rhs_lock = switch (rhs_mcv) {
   3711             .register => |reg| self.register_manager.lockReg(reg),
   3712             else => null,
   3713         };
   3714         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   3715 
   3716         const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   3717         const limit_mcv = MCValue{ .register = limit_reg };
   3718         const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
   3719         defer self.register_manager.unlockReg(limit_lock);
   3720 
   3721         const reg_bits = self.regBitSize(ty);
   3722         const cc: Condition = if (ty.isSignedInt(mod)) cc: {
   3723             try self.genSetReg(limit_reg, ty, lhs_mcv);
   3724             try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv);
   3725             try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
   3726             try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
   3727                 .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
   3728             });
   3729             break :cc .o;
   3730         } else cc: {
   3731             try self.genSetReg(limit_reg, ty, .{
   3732                 .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - reg_bits),
   3733             });
   3734             break :cc .c;
   3735         };
   3736 
   3737         const dst_mcv = try self.genMulDivBinOp(.mul, inst, ty, ty, lhs_mcv, rhs_mcv);
   3738         const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
   3739         try self.asmCmovccRegisterRegister(
   3740             cc,
   3741             registerAlias(dst_mcv.register, cmov_abi_size),
   3742             registerAlias(limit_reg, cmov_abi_size),
   3743         );
   3744         break :result dst_mcv;
   3745     };
   3746     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3747 }
   3748 
   3749 fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
   3750     const mod = self.bin_file.comp.module.?;
   3751     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
   3752     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3753     const result: MCValue = result: {
   3754         const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)];
   3755         const ty = self.typeOf(bin_op.lhs);
   3756         switch (ty.zigTypeTag(mod)) {
   3757             .Vector => return self.fail("TODO implement add/sub with overflow for Vector type", .{}),
   3758             .Int => {
   3759                 try self.spillEflagsIfOccupied();
   3760 
   3761                 const partial_mcv = try self.genBinOp(null, switch (tag) {
   3762                     .add_with_overflow => .add,
   3763                     .sub_with_overflow => .sub,
   3764                     else => unreachable,
   3765                 }, bin_op.lhs, bin_op.rhs);
   3766                 const int_info = ty.intInfo(mod);
   3767                 const cc: Condition = switch (int_info.signedness) {
   3768                     .unsigned => .c,
   3769                     .signed => .o,
   3770                 };
   3771 
   3772                 const tuple_ty = self.typeOfIndex(inst);
   3773                 if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) {
   3774                     switch (partial_mcv) {
   3775                         .register => |reg| {
   3776                             self.eflags_inst = inst;
   3777                             break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
   3778                         },
   3779                         else => {},
   3780                     }
   3781 
   3782                     const frame_index =
   3783                         try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
   3784                     try self.genSetMem(
   3785                         .{ .frame = frame_index },
   3786                         @intCast(tuple_ty.structFieldOffset(1, mod)),
   3787                         Type.u1,
   3788                         .{ .eflags = cc },
   3789                     );
   3790                     try self.genSetMem(
   3791                         .{ .frame = frame_index },
   3792                         @intCast(tuple_ty.structFieldOffset(0, mod)),
   3793                         ty,
   3794                         partial_mcv,
   3795                     );
   3796                     break :result .{ .load_frame = .{ .index = frame_index } };
   3797                 }
   3798 
   3799                 const frame_index =
   3800                     try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
   3801                 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
   3802                 break :result .{ .load_frame = .{ .index = frame_index } };
   3803             },
   3804             else => unreachable,
   3805         }
   3806     };
   3807     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3808 }
   3809 
   3810 fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
   3811     const mod = self.bin_file.comp.module.?;
   3812     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
   3813     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3814     const result: MCValue = result: {
   3815         const lhs_ty = self.typeOf(bin_op.lhs);
   3816         const rhs_ty = self.typeOf(bin_op.rhs);
   3817         switch (lhs_ty.zigTypeTag(mod)) {
   3818             .Vector => return self.fail("TODO implement shl with overflow for Vector type", .{}),
   3819             .Int => {
   3820                 try self.spillEflagsIfOccupied();
   3821 
   3822                 try self.register_manager.getReg(.rcx, null);
   3823                 const lhs = try self.resolveInst(bin_op.lhs);
   3824                 const rhs = try self.resolveInst(bin_op.rhs);
   3825 
   3826                 const int_info = lhs_ty.intInfo(mod);
   3827 
   3828                 const partial_mcv = try self.genShiftBinOp(.shl, null, lhs, rhs, lhs_ty, rhs_ty);
   3829                 const partial_lock = switch (partial_mcv) {
   3830                     .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3831                     else => null,
   3832                 };
   3833                 defer if (partial_lock) |lock| self.register_manager.unlockReg(lock);
   3834 
   3835                 const tmp_mcv = try self.genShiftBinOp(.shr, null, partial_mcv, rhs, lhs_ty, rhs_ty);
   3836                 const tmp_lock = switch (tmp_mcv) {
   3837                     .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   3838                     else => null,
   3839                 };
   3840                 defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
   3841 
   3842                 try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs);
   3843                 const cc = Condition.ne;
   3844 
   3845                 const tuple_ty = self.typeOfIndex(inst);
   3846                 if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) {
   3847                     switch (partial_mcv) {
   3848                         .register => |reg| {
   3849                             self.eflags_inst = inst;
   3850                             break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
   3851                         },
   3852                         else => {},
   3853                     }
   3854 
   3855                     const frame_index =
   3856                         try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
   3857                     try self.genSetMem(
   3858                         .{ .frame = frame_index },
   3859                         @intCast(tuple_ty.structFieldOffset(1, mod)),
   3860                         tuple_ty.structFieldType(1, mod),
   3861                         .{ .eflags = cc },
   3862                     );
   3863                     try self.genSetMem(
   3864                         .{ .frame = frame_index },
   3865                         @intCast(tuple_ty.structFieldOffset(0, mod)),
   3866                         tuple_ty.structFieldType(0, mod),
   3867                         partial_mcv,
   3868                     );
   3869                     break :result .{ .load_frame = .{ .index = frame_index } };
   3870                 }
   3871 
   3872                 const frame_index =
   3873                     try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
   3874                 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
   3875                 break :result .{ .load_frame = .{ .index = frame_index } };
   3876             },
   3877             else => unreachable,
   3878         }
   3879     };
   3880     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   3881 }
   3882 
   3883 fn genSetFrameTruncatedOverflowCompare(
   3884     self: *Self,
   3885     tuple_ty: Type,
   3886     frame_index: FrameIndex,
   3887     src_mcv: MCValue,
   3888     overflow_cc: ?Condition,
   3889 ) !void {
   3890     const mod = self.bin_file.comp.module.?;
   3891     const src_lock = switch (src_mcv) {
   3892         .register => |reg| self.register_manager.lockReg(reg),
   3893         else => null,
   3894     };
   3895     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   3896 
   3897     const ty = tuple_ty.structFieldType(0, mod);
   3898     const int_info = ty.intInfo(mod);
   3899 
   3900     const hi_bits = (int_info.bits - 1) % 64 + 1;
   3901     const hi_ty = try mod.intType(int_info.signedness, hi_bits);
   3902 
   3903     const limb_bits: u16 = @intCast(if (int_info.bits <= 64) self.regBitSize(ty) else 64);
   3904     const limb_ty = try mod.intType(int_info.signedness, limb_bits);
   3905 
   3906     const rest_ty = try mod.intType(.unsigned, int_info.bits - hi_bits);
   3907 
   3908     const temp_regs =
   3909         try self.register_manager.allocRegs(3, .{ null, null, null }, abi.RegisterClass.gp);
   3910     const temp_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs);
   3911     defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
   3912 
   3913     const overflow_reg = temp_regs[0];
   3914     if (overflow_cc) |cc| try self.asmSetccRegister(cc, overflow_reg.to8());
   3915 
   3916     const scratch_reg = temp_regs[1];
   3917     const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8;
   3918     const hi_limb_mcv = if (hi_limb_off > 0)
   3919         src_mcv.address().offset(int_info.bits / 64 * 8).deref()
   3920     else
   3921         src_mcv;
   3922     try self.genSetReg(scratch_reg, limb_ty, hi_limb_mcv);
   3923     try self.truncateRegister(hi_ty, scratch_reg);
   3924     try self.genBinOpMir(.{ ._, .cmp }, limb_ty, .{ .register = scratch_reg }, hi_limb_mcv);
   3925 
   3926     const eq_reg = temp_regs[2];
   3927     if (overflow_cc) |_| {
   3928         try self.asmSetccRegister(.ne, eq_reg.to8());
   3929         try self.genBinOpMir(
   3930             .{ ._, .@"or" },
   3931             Type.u8,
   3932             .{ .register = overflow_reg },
   3933             .{ .register = eq_reg },
   3934         );
   3935     }
   3936 
   3937     const payload_off: i32 = @intCast(tuple_ty.structFieldOffset(0, mod));
   3938     if (hi_limb_off > 0) try self.genSetMem(.{ .frame = frame_index }, payload_off, rest_ty, src_mcv);
   3939     try self.genSetMem(
   3940         .{ .frame = frame_index },
   3941         payload_off + hi_limb_off,
   3942         limb_ty,
   3943         .{ .register = scratch_reg },
   3944     );
   3945     try self.genSetMem(
   3946         .{ .frame = frame_index },
   3947         @intCast(tuple_ty.structFieldOffset(1, mod)),
   3948         tuple_ty.structFieldType(1, mod),
   3949         if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne },
   3950     );
   3951 }
   3952 
   3953 fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
   3954     const mod = self.bin_file.comp.module.?;
   3955     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
   3956     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
   3957     const tuple_ty = self.typeOfIndex(inst);
   3958     const dst_ty = self.typeOf(bin_op.lhs);
   3959     const result: MCValue = switch (dst_ty.zigTypeTag(mod)) {
   3960         .Vector => return self.fail("TODO implement airMulWithOverflow for {}", .{dst_ty.fmt(mod)}),
   3961         .Int => result: {
   3962             const dst_info = dst_ty.intInfo(mod);
   3963             const lhs_active_bits = self.activeIntBits(bin_op.lhs);
   3964             const rhs_active_bits = self.activeIntBits(bin_op.rhs);
   3965             const src_bits = @max(lhs_active_bits, rhs_active_bits, dst_info.bits / 2);
   3966             const src_ty = try mod.intType(dst_info.signedness, src_bits);
   3967 
   3968             if (src_bits > 64 and src_bits <= 128 and
   3969                 dst_info.bits > 64 and dst_info.bits <= 128) switch (dst_info.signedness) {
   3970                 .signed => {
   3971                     const ptr_c_int = try mod.singleMutPtrType(Type.c_int);
   3972                     const overflow = try self.allocTempRegOrMem(Type.c_int, false);
   3973                     const result = try self.genCall(.{ .lib = .{
   3974                         .return_type = .i128_type,
   3975                         .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() },
   3976                         .callee = "__muloti4",
   3977                     } }, &.{ Type.i128, Type.i128, ptr_c_int }, &.{
   3978                         .{ .air_ref = bin_op.lhs },
   3979                         .{ .air_ref = bin_op.rhs },
   3980                         overflow.address(),
   3981                     });
   3982 
   3983                     const dst_mcv = try self.allocRegOrMem(inst, false);
   3984                     try self.genSetMem(
   3985                         .{ .frame = dst_mcv.load_frame.index },
   3986                         @intCast(tuple_ty.structFieldOffset(0, mod)),
   3987                         tuple_ty.structFieldType(0, mod),
   3988                         result,
   3989                     );
   3990                     try self.asmMemoryImmediate(
   3991                         .{ ._, .cmp },
   3992                         try overflow.mem(self, self.memSize(Type.c_int)),
   3993                         Immediate.s(0),
   3994                     );
   3995                     try self.genSetMem(
   3996                         .{ .frame = dst_mcv.load_frame.index },
   3997                         @intCast(tuple_ty.structFieldOffset(1, mod)),
   3998                         tuple_ty.structFieldType(1, mod),
   3999                         .{ .eflags = .ne },
   4000                     );
   4001                     try self.freeValue(overflow);
   4002                     break :result dst_mcv;
   4003                 },
   4004                 .unsigned => {
   4005                     try self.spillEflagsIfOccupied();
   4006                     try self.spillRegisters(&.{ .rax, .rdx });
   4007                     const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rdx });
   4008                     defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
   4009 
   4010                     const tmp_regs =
   4011                         try self.register_manager.allocRegs(4, .{null} ** 4, abi.RegisterClass.gp);
   4012                     const tmp_locks = self.register_manager.lockRegsAssumeUnused(4, tmp_regs);
   4013                     defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
   4014 
   4015                     const lhs_mcv = try self.resolveInst(bin_op.lhs);
   4016                     const rhs_mcv = try self.resolveInst(bin_op.rhs);
   4017                     const mat_lhs_mcv = switch (lhs_mcv) {
   4018                         .load_symbol => mat_lhs_mcv: {
   4019                             // TODO clean this up!
   4020                             const addr_reg = try self.copyToTmpRegister(Type.usize, lhs_mcv.address());
   4021                             break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
   4022                         },
   4023                         else => lhs_mcv,
   4024                     };
   4025                     const mat_lhs_lock = switch (mat_lhs_mcv) {
   4026                         .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
   4027                         else => null,
   4028                     };
   4029                     defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
   4030                     const mat_rhs_mcv = switch (rhs_mcv) {
   4031                         .load_symbol => mat_rhs_mcv: {
   4032                             // TODO clean this up!
   4033                             const addr_reg = try self.copyToTmpRegister(Type.usize, rhs_mcv.address());
   4034                             break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
   4035                         },
   4036                         else => rhs_mcv,
   4037                     };
   4038                     const mat_rhs_lock = switch (mat_rhs_mcv) {
   4039                         .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
   4040                         else => null,
   4041                     };
   4042                     defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
   4043 
   4044                     if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
   4045                         .{ ._, .mov },
   4046                         .rax,
   4047                         try mat_lhs_mcv.mem(self, .qword),
   4048                     ) else try self.asmRegisterRegister(
   4049                         .{ ._, .mov },
   4050                         .rax,
   4051                         mat_lhs_mcv.register_pair[0],
   4052                     );
   4053                     if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
   4054                         .{ ._, .mov },
   4055                         tmp_regs[0],
   4056                         try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword),
   4057                     ) else try self.asmRegisterRegister(
   4058                         .{ ._, .mov },
   4059                         tmp_regs[0],
   4060                         mat_rhs_mcv.register_pair[1],
   4061                     );
   4062                     try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]);
   4063                     try self.asmSetccRegister(.nz, tmp_regs[1].to8());
   4064                     try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax);
   4065                     try self.asmSetccRegister(.o, tmp_regs[2].to8());
   4066                     if (mat_rhs_mcv.isMemory())
   4067                         try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .qword))
   4068                     else
   4069                         try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]);
   4070                     try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
   4071                     try self.asmSetccRegister(.c, tmp_regs[3].to8());
   4072                     try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[2].to8(), tmp_regs[3].to8());
   4073                     if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
   4074                         .{ ._, .mov },
   4075                         tmp_regs[0],
   4076                         try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword),
   4077                     ) else try self.asmRegisterRegister(
   4078                         .{ ._, .mov },
   4079                         tmp_regs[0],
   4080                         mat_lhs_mcv.register_pair[1],
   4081                     );
   4082                     try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]);
   4083                     try self.asmSetccRegister(.nz, tmp_regs[3].to8());
   4084                     try self.asmRegisterRegister(
   4085                         .{ ._, .@"and" },
   4086                         tmp_regs[1].to8(),
   4087                         tmp_regs[3].to8(),
   4088                     );
   4089                     try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
   4090                     if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
   4091                         .{ .i_, .mul },
   4092                         tmp_regs[0],
   4093                         try mat_rhs_mcv.mem(self, .qword),
   4094                     ) else try self.asmRegisterRegister(
   4095                         .{ .i_, .mul },
   4096                         tmp_regs[0],
   4097                         mat_rhs_mcv.register_pair[0],
   4098                     );
   4099                     try self.asmSetccRegister(.o, tmp_regs[2].to8());
   4100                     try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
   4101                     try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
   4102                     try self.asmSetccRegister(.c, tmp_regs[2].to8());
   4103                     try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
   4104 
   4105                     const dst_mcv = try self.allocRegOrMem(inst, false);
   4106                     try self.genSetMem(
   4107                         .{ .frame = dst_mcv.load_frame.index },
   4108                         @intCast(tuple_ty.structFieldOffset(0, mod)),
   4109                         tuple_ty.structFieldType(0, mod),
   4110                         .{ .register_pair = .{ .rax, .rdx } },
   4111                     );
   4112                     try self.genSetMem(
   4113                         .{ .frame = dst_mcv.load_frame.index },
   4114                         @intCast(tuple_ty.structFieldOffset(1, mod)),
   4115                         tuple_ty.structFieldType(1, mod),
   4116                         .{ .register = tmp_regs[1] },
   4117                     );
   4118                     break :result dst_mcv;
   4119                 },
   4120             };
   4121 
   4122             try self.spillEflagsIfOccupied();
   4123             try self.spillRegisters(&.{ .rax, .rdx });
   4124 
   4125             const cc: Condition = switch (dst_info.signedness) {
   4126                 .unsigned => .c,
   4127                 .signed => .o,
   4128             };
   4129 
   4130             const lhs = try self.resolveInst(bin_op.lhs);
   4131             const rhs = try self.resolveInst(bin_op.rhs);
   4132 
   4133             const extra_bits = if (dst_info.bits <= 64)
   4134                 self.regExtraBits(dst_ty)
   4135             else
   4136                 dst_info.bits % 64;
   4137             const partial_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs);
   4138 
   4139             switch (partial_mcv) {
   4140                 .register => |reg| if (extra_bits == 0) {
   4141                     self.eflags_inst = inst;
   4142                     break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
   4143                 } else {
   4144                     const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
   4145                     try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
   4146                     break :result .{ .load_frame = .{ .index = frame_index } };
   4147                 },
   4148                 else => {
   4149                     // For now, this is the only supported multiply that doesn't fit in a register.
   4150                     if (dst_info.bits > 128 or src_bits != 64)
   4151                         return self.fail("TODO implement airWithOverflow from {} to {}", .{
   4152                             src_ty.fmt(mod), dst_ty.fmt(mod),
   4153                         });
   4154 
   4155                     const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
   4156                     if (dst_info.bits >= lhs_active_bits + rhs_active_bits) {
   4157                         try self.genSetMem(
   4158                             .{ .frame = frame_index },
   4159                             @intCast(tuple_ty.structFieldOffset(0, mod)),
   4160                             tuple_ty.structFieldType(0, mod),
   4161                             partial_mcv,
   4162                         );
   4163                         try self.genSetMem(
   4164                             .{ .frame = frame_index },
   4165                             @intCast(tuple_ty.structFieldOffset(1, mod)),
   4166                             tuple_ty.structFieldType(1, mod),
   4167                             .{ .immediate = 0 }, // cc being set is impossible
   4168                         );
   4169                     } else try self.genSetFrameTruncatedOverflowCompare(
   4170                         tuple_ty,
   4171                         frame_index,
   4172                         partial_mcv,
   4173                         null,
   4174                     );
   4175                     break :result .{ .load_frame = .{ .index = frame_index } };
   4176                 },
   4177             }
   4178         },
   4179         else => unreachable,
   4180     };
   4181     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   4182 }
   4183 
   4184 /// Generates signed or unsigned integer multiplication/division.
   4185 /// Clobbers .rax and .rdx registers.
   4186 /// Quotient is saved in .rax and remainder in .rdx.
   4187 fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void {
   4188     const mod = self.bin_file.comp.module.?;
   4189     const abi_size: u32 = @intCast(ty.abiSize(mod));
   4190     const bit_size: u32 = @intCast(self.regBitSize(ty));
   4191     if (abi_size > 8) {
   4192         return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{});
   4193     }
   4194 
   4195     try self.genSetReg(.rax, ty, lhs);
   4196     switch (tag[1]) {
   4197         else => unreachable,
   4198         .mul => {},
   4199         .div => switch (tag[0]) {
   4200             ._ => {
   4201                 const hi_reg: Register =
   4202                     switch (bit_size) {
   4203                     8 => .ah,
   4204                     16, 32, 64 => .edx,
   4205                     else => unreachable,
   4206                 };
   4207                 try self.asmRegisterRegister(.{ ._, .xor }, hi_reg, hi_reg);
   4208             },
   4209             .i_ => try self.asmOpOnly(.{ ._, switch (bit_size) {
   4210                 8 => .cbw,
   4211                 16 => .cwd,
   4212                 32 => .cdq,
   4213                 64 => .cqo,
   4214                 else => unreachable,
   4215             } }),
   4216             else => unreachable,
   4217         },
   4218     }
   4219 
   4220     const mat_rhs: MCValue = switch (rhs) {
   4221         .register, .indirect, .load_frame => rhs,
   4222         else => .{ .register = try self.copyToTmpRegister(ty, rhs) },
   4223     };
   4224     switch (mat_rhs) {
   4225         .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)),
   4226         .memory, .indirect, .load_frame => try self.asmMemory(
   4227             tag,
   4228             try mat_rhs.mem(self, Memory.Size.fromSize(abi_size)),
   4229         ),
   4230         else => unreachable,
   4231     }
   4232     if (tag[1] == .div and bit_size == 8) try self.asmRegisterRegister(.{ ._, .mov }, .dl, .ah);
   4233 }
   4234 
   4235 /// Always returns a register.
   4236 /// Clobbers .rax and .rdx registers.
   4237 fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue {
   4238     const mod = self.bin_file.comp.module.?;
   4239     const abi_size: u32 = @intCast(ty.abiSize(mod));
   4240     const int_info = ty.intInfo(mod);
   4241     const dividend = switch (lhs) {
   4242         .register => |reg| reg,
   4243         else => try self.copyToTmpRegister(ty, lhs),
   4244     };
   4245     const dividend_lock = self.register_manager.lockReg(dividend);
   4246     defer if (dividend_lock) |lock| self.register_manager.unlockReg(lock);
   4247 
   4248     const divisor = switch (rhs) {
   4249         .register => |reg| reg,
   4250         else => try self.copyToTmpRegister(ty, rhs),
   4251     };
   4252     const divisor_lock = self.register_manager.lockReg(divisor);
   4253     defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock);
   4254 
   4255     try self.genIntMulDivOpMir(
   4256         switch (int_info.signedness) {
   4257             .signed => .{ .i_, .div },
   4258             .unsigned => .{ ._, .div },
   4259         },
   4260         ty,
   4261         .{ .register = dividend },
   4262         .{ .register = divisor },
   4263     );
   4264 
   4265     try self.asmRegisterRegister(
   4266         .{ ._, .xor },
   4267         registerAlias(divisor, abi_size),
   4268         registerAlias(dividend, abi_size),
   4269     );
   4270     try self.asmRegisterImmediate(
   4271         .{ ._r, .sa },
   4272         registerAlias(divisor, abi_size),
   4273         Immediate.u(int_info.bits - 1),
   4274     );
   4275     try self.asmRegisterRegister(
   4276         .{ ._, .@"test" },
   4277         registerAlias(.rdx, abi_size),
   4278         registerAlias(.rdx, abi_size),
   4279     );
   4280     try self.asmCmovccRegisterRegister(
   4281         .z,
   4282         registerAlias(divisor, @max(abi_size, 2)),
   4283         registerAlias(.rdx, @max(abi_size, 2)),
   4284     );
   4285     try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax });
   4286     return MCValue{ .register = divisor };
   4287 }
   4288 
   4289 fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void {
   4290     const mod = self.bin_file.comp.module.?;
   4291     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   4292 
   4293     const air_tags = self.air.instructions.items(.tag);
   4294     const tag = air_tags[@intFromEnum(inst)];
   4295     const lhs_ty = self.typeOf(bin_op.lhs);
   4296     const rhs_ty = self.typeOf(bin_op.rhs);
   4297     const result: MCValue = result: {
   4298         switch (lhs_ty.zigTypeTag(mod)) {
   4299             .Int => {
   4300                 try self.spillRegisters(&.{.rcx});
   4301                 try self.register_manager.getReg(.rcx, null);
   4302                 const lhs_mcv = try self.resolveInst(bin_op.lhs);
   4303                 const rhs_mcv = try self.resolveInst(bin_op.rhs);
   4304 
   4305                 const dst_mcv = try self.genShiftBinOp(tag, inst, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty);
   4306                 switch (tag) {
   4307                     .shr, .shr_exact, .shl_exact => {},
   4308                     .shl => switch (dst_mcv) {
   4309                         .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg),
   4310                         .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]),
   4311                         .load_frame => |frame_addr| {
   4312                             const tmp_reg =
   4313                                 try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   4314                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   4315                             defer self.register_manager.unlockReg(tmp_lock);
   4316 
   4317                             const lhs_bits: u31 = @intCast(lhs_ty.bitSize(mod));
   4318                             const tmp_ty = if (lhs_bits > 64) Type.usize else lhs_ty;
   4319                             const off = frame_addr.off + (lhs_bits - 1) / 64 * 8;
   4320                             try self.genSetReg(
   4321                                 tmp_reg,
   4322                                 tmp_ty,
   4323                                 .{ .load_frame = .{ .index = frame_addr.index, .off = off } },
   4324                             );
   4325                             try self.truncateRegister(lhs_ty, tmp_reg);
   4326                             try self.genSetMem(
   4327                                 .{ .frame = frame_addr.index },
   4328                                 off,
   4329                                 tmp_ty,
   4330                                 .{ .register = tmp_reg },
   4331                             );
   4332                         },
   4333                         else => {},
   4334                     },
   4335                     else => unreachable,
   4336                 }
   4337                 break :result dst_mcv;
   4338             },
   4339             .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   4340                 .Int => if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.childType(mod).intInfo(mod).bits) {
   4341                     else => null,
   4342                     16 => switch (lhs_ty.vectorLen(mod)) {
   4343                         else => null,
   4344                         1...8 => switch (tag) {
   4345                             else => unreachable,
   4346                             .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   4347                                 .signed => if (self.hasFeature(.avx))
   4348                                     .{ .vp_w, .sra }
   4349                                 else
   4350                                     .{ .p_w, .sra },
   4351                                 .unsigned => if (self.hasFeature(.avx))
   4352                                     .{ .vp_w, .srl }
   4353                                 else
   4354                                     .{ .p_w, .srl },
   4355                             },
   4356                             .shl, .shl_exact => if (self.hasFeature(.avx))
   4357                                 .{ .vp_w, .sll }
   4358                             else
   4359                                 .{ .p_w, .sll },
   4360                         },
   4361                         9...16 => switch (tag) {
   4362                             else => unreachable,
   4363                             .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   4364                                 .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .sra } else null,
   4365                                 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .srl } else null,
   4366                             },
   4367                             .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_w, .sll } else null,
   4368                         },
   4369                     },
   4370                     32 => switch (lhs_ty.vectorLen(mod)) {
   4371                         else => null,
   4372                         1...4 => switch (tag) {
   4373                             else => unreachable,
   4374                             .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   4375                                 .signed => if (self.hasFeature(.avx))
   4376                                     .{ .vp_d, .sra }
   4377                                 else
   4378                                     .{ .p_d, .sra },
   4379                                 .unsigned => if (self.hasFeature(.avx))
   4380                                     .{ .vp_d, .srl }
   4381                                 else
   4382                                     .{ .p_d, .srl },
   4383                             },
   4384                             .shl, .shl_exact => if (self.hasFeature(.avx))
   4385                                 .{ .vp_d, .sll }
   4386                             else
   4387                                 .{ .p_d, .sll },
   4388                         },
   4389                         5...8 => switch (tag) {
   4390                             else => unreachable,
   4391                             .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   4392                                 .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .sra } else null,
   4393                                 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .srl } else null,
   4394                             },
   4395                             .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_d, .sll } else null,
   4396                         },
   4397                     },
   4398                     64 => switch (lhs_ty.vectorLen(mod)) {
   4399                         else => null,
   4400                         1...2 => switch (tag) {
   4401                             else => unreachable,
   4402                             .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   4403                                 .signed => if (self.hasFeature(.avx))
   4404                                     .{ .vp_q, .sra }
   4405                                 else
   4406                                     .{ .p_q, .sra },
   4407                                 .unsigned => if (self.hasFeature(.avx))
   4408                                     .{ .vp_q, .srl }
   4409                                 else
   4410                                     .{ .p_q, .srl },
   4411                             },
   4412                             .shl, .shl_exact => if (self.hasFeature(.avx))
   4413                                 .{ .vp_q, .sll }
   4414                             else
   4415                                 .{ .p_q, .sll },
   4416                         },
   4417                         3...4 => switch (tag) {
   4418                             else => unreachable,
   4419                             .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   4420                                 .signed => if (self.hasFeature(.avx2)) .{ .vp_q, .sra } else null,
   4421                                 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_q, .srl } else null,
   4422                             },
   4423                             .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_q, .sll } else null,
   4424                         },
   4425                     },
   4426                 })) |mir_tag| if (try self.air.value(bin_op.rhs, mod)) |rhs_val| {
   4427                     switch (mod.intern_pool.indexToKey(rhs_val.toIntern())) {
   4428                         .aggregate => |rhs_aggregate| switch (rhs_aggregate.storage) {
   4429                             .repeated_elem => |rhs_elem| {
   4430                                 const abi_size: u32 = @intCast(lhs_ty.abiSize(mod));
   4431 
   4432                                 const lhs_mcv = try self.resolveInst(bin_op.lhs);
   4433                                 const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and
   4434                                     self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
   4435                                     .{lhs_mcv.getReg().?} ** 2
   4436                                 else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{
   4437                                     try self.register_manager.allocReg(inst, abi.RegisterClass.sse),
   4438                                     lhs_mcv.getReg().?,
   4439                                 } else .{(try self.copyToRegisterWithInstTracking(
   4440                                     inst,
   4441                                     lhs_ty,
   4442                                     lhs_mcv,
   4443                                 )).register} ** 2;
   4444                                 const reg_locks =
   4445                                     self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg });
   4446                                 defer for (reg_locks) |reg_lock| if (reg_lock) |lock|
   4447                                     self.register_manager.unlockReg(lock);
   4448 
   4449                                 const shift_imm =
   4450                                     Immediate.u(@intCast(Value.fromInterned(rhs_elem).toUnsignedInt(mod)));
   4451                                 if (self.hasFeature(.avx)) try self.asmRegisterRegisterImmediate(
   4452                                     mir_tag,
   4453                                     registerAlias(dst_reg, abi_size),
   4454                                     registerAlias(lhs_reg, abi_size),
   4455                                     shift_imm,
   4456                                 ) else {
   4457                                     assert(dst_reg.id() == lhs_reg.id());
   4458                                     try self.asmRegisterImmediate(
   4459                                         mir_tag,
   4460                                         registerAlias(dst_reg, abi_size),
   4461                                         shift_imm,
   4462                                     );
   4463                                 }
   4464                                 break :result .{ .register = dst_reg };
   4465                             },
   4466                             else => {},
   4467                         },
   4468                         else => {},
   4469                     }
   4470                 } else if (bin_op.rhs.toIndex()) |rhs_inst| switch (air_tags[@intFromEnum(rhs_inst)]) {
   4471                     .splat => {
   4472                         const abi_size: u32 = @intCast(lhs_ty.abiSize(mod));
   4473 
   4474                         const lhs_mcv = try self.resolveInst(bin_op.lhs);
   4475                         const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and
   4476                             self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
   4477                             .{lhs_mcv.getReg().?} ** 2
   4478                         else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{
   4479                             try self.register_manager.allocReg(inst, abi.RegisterClass.sse),
   4480                             lhs_mcv.getReg().?,
   4481                         } else .{(try self.copyToRegisterWithInstTracking(
   4482                             inst,
   4483                             lhs_ty,
   4484                             lhs_mcv,
   4485                         )).register} ** 2;
   4486                         const reg_locks = self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg });
   4487                         defer for (reg_locks) |reg_lock| if (reg_lock) |lock|
   4488                             self.register_manager.unlockReg(lock);
   4489 
   4490                         const shift_reg =
   4491                             try self.copyToTmpRegister(rhs_ty, .{ .air_ref = bin_op.rhs });
   4492                         const shift_lock = self.register_manager.lockRegAssumeUnused(shift_reg);
   4493                         defer self.register_manager.unlockReg(shift_lock);
   4494 
   4495                         const mask_ty = try mod.vectorType(.{ .len = 16, .child = .u8_type });
   4496                         const mask_mcv = try self.genTypedValue(.{
   4497                             .ty = mask_ty,
   4498                             .val = Value.fromInterned((try mod.intern(.{ .aggregate = .{
   4499                                 .ty = mask_ty.toIntern(),
   4500                                 .storage = .{ .elems = &([1]InternPool.Index{
   4501                                     (try rhs_ty.childType(mod).maxIntScalar(mod, Type.u8)).toIntern(),
   4502                                 } ++ [1]InternPool.Index{
   4503                                     (try mod.intValue(Type.u8, 0)).toIntern(),
   4504                                 } ** 15) },
   4505                             } }))),
   4506                         });
   4507                         const mask_addr_reg =
   4508                             try self.copyToTmpRegister(Type.usize, mask_mcv.address());
   4509                         const mask_addr_lock = self.register_manager.lockRegAssumeUnused(mask_addr_reg);
   4510                         defer self.register_manager.unlockReg(mask_addr_lock);
   4511 
   4512                         if (self.hasFeature(.avx)) {
   4513                             try self.asmRegisterRegisterMemory(
   4514                                 .{ .vp_, .@"and" },
   4515                                 shift_reg.to128(),
   4516                                 shift_reg.to128(),
   4517                                 .{
   4518                                     .base = .{ .reg = mask_addr_reg },
   4519                                     .mod = .{ .rm = .{ .size = .xword } },
   4520                                 },
   4521                             );
   4522                             try self.asmRegisterRegisterRegister(
   4523                                 mir_tag,
   4524                                 registerAlias(dst_reg, abi_size),
   4525                                 registerAlias(lhs_reg, abi_size),
   4526                                 shift_reg.to128(),
   4527                             );
   4528                         } else {
   4529                             try self.asmRegisterMemory(
   4530                                 .{ .p_, .@"and" },
   4531                                 shift_reg.to128(),
   4532                                 .{
   4533                                     .base = .{ .reg = mask_addr_reg },
   4534                                     .mod = .{ .rm = .{ .size = .xword } },
   4535                                 },
   4536                             );
   4537                             assert(dst_reg.id() == lhs_reg.id());
   4538                             try self.asmRegisterRegister(
   4539                                 mir_tag,
   4540                                 registerAlias(dst_reg, abi_size),
   4541                                 shift_reg.to128(),
   4542                             );
   4543                         }
   4544                         break :result .{ .register = dst_reg };
   4545                     },
   4546                     else => {},
   4547                 },
   4548                 else => {},
   4549             },
   4550             else => {},
   4551         }
   4552         return self.fail("TODO implement airShlShrBinOp for {}", .{lhs_ty.fmt(mod)});
   4553     };
   4554     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   4555 }
   4556 
   4557 fn airShlSat(self: *Self, inst: Air.Inst.Index) !void {
   4558     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   4559     _ = bin_op;
   4560     return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
   4561     //return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   4562 }
   4563 
   4564 fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void {
   4565     const mod = self.bin_file.comp.module.?;
   4566     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4567     const result: MCValue = result: {
   4568         const pl_ty = self.typeOfIndex(inst);
   4569         if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
   4570 
   4571         const opt_mcv = try self.resolveInst(ty_op.operand);
   4572         if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) {
   4573             const pl_mcv: MCValue = switch (opt_mcv) {
   4574                 .register_overflow => |ro| pl: {
   4575                     self.eflags_inst = null; // actually stop tracking the overflow part
   4576                     break :pl .{ .register = ro.reg };
   4577                 },
   4578                 else => opt_mcv,
   4579             };
   4580             switch (pl_mcv) {
   4581                 .register => |pl_reg| try self.truncateRegister(pl_ty, pl_reg),
   4582                 else => {},
   4583             }
   4584             break :result pl_mcv;
   4585         }
   4586 
   4587         const pl_mcv = try self.allocRegOrMem(inst, true);
   4588         try self.genCopy(pl_ty, pl_mcv, switch (opt_mcv) {
   4589             else => opt_mcv,
   4590             .register_overflow => |ro| .{ .register = ro.reg },
   4591         });
   4592         break :result pl_mcv;
   4593     };
   4594     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4595 }
   4596 
   4597 fn airOptionalPayloadPtr(self: *Self, inst: Air.Inst.Index) !void {
   4598     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4599 
   4600     const dst_ty = self.typeOfIndex(inst);
   4601     const opt_mcv = try self.resolveInst(ty_op.operand);
   4602 
   4603     const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv))
   4604         opt_mcv
   4605     else
   4606         try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv);
   4607     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   4608 }
   4609 
   4610 fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
   4611     const mod = self.bin_file.comp.module.?;
   4612     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4613     const result = result: {
   4614         const dst_ty = self.typeOfIndex(inst);
   4615         const src_ty = self.typeOf(ty_op.operand);
   4616         const opt_ty = src_ty.childType(mod);
   4617         const src_mcv = try self.resolveInst(ty_op.operand);
   4618 
   4619         if (opt_ty.optionalReprIsPayload(mod)) {
   4620             break :result if (self.liveness.isUnused(inst))
   4621                 .unreach
   4622             else if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4623                 src_mcv
   4624             else
   4625                 try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   4626         }
   4627 
   4628         const dst_mcv: MCValue = if (src_mcv.isRegister() and
   4629             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4630             src_mcv
   4631         else if (self.liveness.isUnused(inst))
   4632             .{ .register = try self.copyToTmpRegister(dst_ty, src_mcv) }
   4633         else
   4634             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
   4635 
   4636         const pl_ty = dst_ty.childType(mod);
   4637         const pl_abi_size: i32 = @intCast(pl_ty.abiSize(mod));
   4638         try self.genSetMem(.{ .reg = dst_mcv.getReg().? }, pl_abi_size, Type.bool, .{ .immediate = 1 });
   4639         break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv;
   4640     };
   4641     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4642 }
   4643 
   4644 fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
   4645     const mod = self.bin_file.comp.module.?;
   4646     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4647     const err_union_ty = self.typeOf(ty_op.operand);
   4648     const err_ty = err_union_ty.errorUnionSet(mod);
   4649     const payload_ty = err_union_ty.errorUnionPayload(mod);
   4650     const operand = try self.resolveInst(ty_op.operand);
   4651 
   4652     const result: MCValue = result: {
   4653         if (err_ty.errorSetIsEmpty(mod)) {
   4654             break :result MCValue{ .immediate = 0 };
   4655         }
   4656 
   4657         if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) {
   4658             break :result operand;
   4659         }
   4660 
   4661         const err_off = errUnionErrorOffset(payload_ty, mod);
   4662         switch (operand) {
   4663             .register => |reg| {
   4664                 // TODO reuse operand
   4665                 const eu_lock = self.register_manager.lockReg(reg);
   4666                 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
   4667 
   4668                 const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand);
   4669                 if (err_off > 0) try self.genShiftBinOpMir(
   4670                     .{ ._r, .sh },
   4671                     err_union_ty,
   4672                     result,
   4673                     .{ .immediate = @as(u6, @intCast(err_off * 8)) },
   4674                 ) else try self.truncateRegister(Type.anyerror, result.register);
   4675                 break :result result;
   4676             },
   4677             .load_frame => |frame_addr| break :result .{ .load_frame = .{
   4678                 .index = frame_addr.index,
   4679                 .off = frame_addr.off + @as(i32, @intCast(err_off)),
   4680             } },
   4681             else => return self.fail("TODO implement unwrap_err_err for {}", .{operand}),
   4682         }
   4683     };
   4684     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4685 }
   4686 
   4687 fn airUnwrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void {
   4688     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4689     const operand_ty = self.typeOf(ty_op.operand);
   4690     const operand = try self.resolveInst(ty_op.operand);
   4691     const result = try self.genUnwrapErrUnionPayloadMir(inst, operand_ty, operand);
   4692     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4693 }
   4694 
   4695 // *(E!T) -> E
   4696 fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void {
   4697     const mod = self.bin_file.comp.module.?;
   4698     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4699 
   4700     const src_ty = self.typeOf(ty_op.operand);
   4701     const src_mcv = try self.resolveInst(ty_op.operand);
   4702     const src_reg = switch (src_mcv) {
   4703         .register => |reg| reg,
   4704         else => try self.copyToTmpRegister(src_ty, src_mcv),
   4705     };
   4706     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   4707     defer self.register_manager.unlockReg(src_lock);
   4708 
   4709     const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
   4710     const dst_mcv = MCValue{ .register = dst_reg };
   4711     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   4712     defer self.register_manager.unlockReg(dst_lock);
   4713 
   4714     const eu_ty = src_ty.childType(mod);
   4715     const pl_ty = eu_ty.errorUnionPayload(mod);
   4716     const err_ty = eu_ty.errorUnionSet(mod);
   4717     const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod));
   4718     const err_abi_size: u32 = @intCast(err_ty.abiSize(mod));
   4719     try self.asmRegisterMemory(
   4720         .{ ._, .mov },
   4721         registerAlias(dst_reg, err_abi_size),
   4722         .{
   4723             .base = .{ .reg = src_reg },
   4724             .mod = .{ .rm = .{
   4725                 .size = Memory.Size.fromSize(err_abi_size),
   4726                 .disp = err_off,
   4727             } },
   4728         },
   4729     );
   4730 
   4731     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   4732 }
   4733 
   4734 // *(E!T) -> *T
   4735 fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void {
   4736     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4737     const operand_ty = self.typeOf(ty_op.operand);
   4738     const operand = try self.resolveInst(ty_op.operand);
   4739     const result = try self.genUnwrapErrUnionPayloadPtrMir(inst, operand_ty, operand);
   4740     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4741 }
   4742 
   4743 fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
   4744     const mod = self.bin_file.comp.module.?;
   4745     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4746     const result: MCValue = result: {
   4747         const src_ty = self.typeOf(ty_op.operand);
   4748         const src_mcv = try self.resolveInst(ty_op.operand);
   4749         const src_reg = switch (src_mcv) {
   4750             .register => |reg| reg,
   4751             else => try self.copyToTmpRegister(src_ty, src_mcv),
   4752         };
   4753         const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   4754         defer self.register_manager.unlockReg(src_lock);
   4755 
   4756         const eu_ty = src_ty.childType(mod);
   4757         const pl_ty = eu_ty.errorUnionPayload(mod);
   4758         const err_ty = eu_ty.errorUnionSet(mod);
   4759         const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod));
   4760         const err_abi_size: u32 = @intCast(err_ty.abiSize(mod));
   4761         try self.asmMemoryImmediate(
   4762             .{ ._, .mov },
   4763             .{
   4764                 .base = .{ .reg = src_reg },
   4765                 .mod = .{ .rm = .{
   4766                     .size = Memory.Size.fromSize(err_abi_size),
   4767                     .disp = err_off,
   4768                 } },
   4769             },
   4770             Immediate.u(0),
   4771         );
   4772 
   4773         if (self.liveness.isUnused(inst)) break :result .unreach;
   4774 
   4775         const dst_ty = self.typeOfIndex(inst);
   4776         const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   4777             src_reg
   4778         else
   4779             try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
   4780         const dst_lock = self.register_manager.lockReg(dst_reg);
   4781         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   4782 
   4783         const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod));
   4784         const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   4785         try self.asmRegisterMemory(
   4786             .{ ._, .lea },
   4787             registerAlias(dst_reg, dst_abi_size),
   4788             .{
   4789                 .base = .{ .reg = src_reg },
   4790                 .mod = .{ .rm = .{ .size = .qword, .disp = pl_off } },
   4791             },
   4792         );
   4793         break :result .{ .register = dst_reg };
   4794     };
   4795     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4796 }
   4797 
   4798 fn genUnwrapErrUnionPayloadMir(
   4799     self: *Self,
   4800     maybe_inst: ?Air.Inst.Index,
   4801     err_union_ty: Type,
   4802     err_union: MCValue,
   4803 ) !MCValue {
   4804     const mod = self.bin_file.comp.module.?;
   4805     const payload_ty = err_union_ty.errorUnionPayload(mod);
   4806 
   4807     const result: MCValue = result: {
   4808         if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
   4809 
   4810         const payload_off: u31 = @intCast(errUnionPayloadOffset(payload_ty, mod));
   4811         switch (err_union) {
   4812             .load_frame => |frame_addr| break :result .{ .load_frame = .{
   4813                 .index = frame_addr.index,
   4814                 .off = frame_addr.off + payload_off,
   4815             } },
   4816             .register => |reg| {
   4817                 // TODO reuse operand
   4818                 const eu_lock = self.register_manager.lockReg(reg);
   4819                 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
   4820 
   4821                 const payload_in_gp = self.regClassForType(payload_ty).supersetOf(abi.RegisterClass.gp);
   4822                 const result_mcv: MCValue = if (payload_in_gp and maybe_inst != null)
   4823                     try self.copyToRegisterWithInstTracking(maybe_inst.?, err_union_ty, err_union)
   4824                 else
   4825                     .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) };
   4826                 if (payload_off > 0) try self.genShiftBinOpMir(
   4827                     .{ ._r, .sh },
   4828                     err_union_ty,
   4829                     result_mcv,
   4830                     .{ .immediate = @as(u6, @intCast(payload_off * 8)) },
   4831                 ) else try self.truncateRegister(payload_ty, result_mcv.register);
   4832                 break :result if (payload_in_gp)
   4833                     result_mcv
   4834                 else if (maybe_inst) |inst|
   4835                     try self.copyToRegisterWithInstTracking(inst, payload_ty, result_mcv)
   4836                 else
   4837                     .{ .register = try self.copyToTmpRegister(payload_ty, result_mcv) };
   4838             },
   4839             else => return self.fail("TODO implement genUnwrapErrUnionPayloadMir for {}", .{err_union}),
   4840         }
   4841     };
   4842 
   4843     return result;
   4844 }
   4845 
   4846 fn genUnwrapErrUnionPayloadPtrMir(
   4847     self: *Self,
   4848     maybe_inst: ?Air.Inst.Index,
   4849     ptr_ty: Type,
   4850     ptr_mcv: MCValue,
   4851 ) !MCValue {
   4852     const mod = self.bin_file.comp.module.?;
   4853     const err_union_ty = ptr_ty.childType(mod);
   4854     const payload_ty = err_union_ty.errorUnionPayload(mod);
   4855 
   4856     const result: MCValue = result: {
   4857         const payload_off = errUnionPayloadOffset(payload_ty, mod);
   4858         const result_mcv: MCValue = if (maybe_inst) |inst|
   4859             try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr_mcv)
   4860         else
   4861             .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) };
   4862         try self.genBinOpMir(.{ ._, .add }, ptr_ty, result_mcv, .{ .immediate = payload_off });
   4863         break :result result_mcv;
   4864     };
   4865 
   4866     return result;
   4867 }
   4868 
   4869 fn airErrReturnTrace(self: *Self, inst: Air.Inst.Index) !void {
   4870     _ = inst;
   4871     return self.fail("TODO implement airErrReturnTrace for {}", .{self.target.cpu.arch});
   4872     //return self.finishAir(inst, result, .{ .none, .none, .none });
   4873 }
   4874 
   4875 fn airSetErrReturnTrace(self: *Self, inst: Air.Inst.Index) !void {
   4876     _ = inst;
   4877     return self.fail("TODO implement airSetErrReturnTrace for {}", .{self.target.cpu.arch});
   4878 }
   4879 
   4880 fn airSaveErrReturnTraceIndex(self: *Self, inst: Air.Inst.Index) !void {
   4881     _ = inst;
   4882     return self.fail("TODO implement airSaveErrReturnTraceIndex for {}", .{self.target.cpu.arch});
   4883 }
   4884 
   4885 fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void {
   4886     const mod = self.bin_file.comp.module.?;
   4887     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4888     const result: MCValue = result: {
   4889         const pl_ty = self.typeOf(ty_op.operand);
   4890         if (!pl_ty.hasRuntimeBits(mod)) break :result .{ .immediate = 1 };
   4891 
   4892         const opt_ty = self.typeOfIndex(inst);
   4893         const pl_mcv = try self.resolveInst(ty_op.operand);
   4894         const same_repr = opt_ty.optionalReprIsPayload(mod);
   4895         if (same_repr and self.reuseOperand(inst, ty_op.operand, 0, pl_mcv)) break :result pl_mcv;
   4896 
   4897         const pl_lock: ?RegisterLock = switch (pl_mcv) {
   4898             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   4899             else => null,
   4900         };
   4901         defer if (pl_lock) |lock| self.register_manager.unlockReg(lock);
   4902 
   4903         const opt_mcv = try self.allocRegOrMem(inst, true);
   4904         try self.genCopy(pl_ty, opt_mcv, pl_mcv);
   4905 
   4906         if (!same_repr) {
   4907             const pl_abi_size: i32 = @intCast(pl_ty.abiSize(mod));
   4908             switch (opt_mcv) {
   4909                 else => unreachable,
   4910 
   4911                 .register => |opt_reg| {
   4912                     try self.truncateRegister(pl_ty, opt_reg);
   4913                     try self.asmRegisterImmediate(
   4914                         .{ ._s, .bt },
   4915                         opt_reg,
   4916                         Immediate.u(@as(u6, @intCast(pl_abi_size * 8))),
   4917                     );
   4918                 },
   4919 
   4920                 .load_frame => |frame_addr| try self.asmMemoryImmediate(
   4921                     .{ ._, .mov },
   4922                     .{
   4923                         .base = .{ .frame = frame_addr.index },
   4924                         .mod = .{ .rm = .{
   4925                             .size = .byte,
   4926                             .disp = frame_addr.off + pl_abi_size,
   4927                         } },
   4928                     },
   4929                     Immediate.u(1),
   4930                 ),
   4931             }
   4932         }
   4933         break :result opt_mcv;
   4934     };
   4935     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4936 }
   4937 
   4938 /// T to E!T
   4939 fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void {
   4940     const mod = self.bin_file.comp.module.?;
   4941     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4942 
   4943     const eu_ty = ty_op.ty.toType();
   4944     const pl_ty = eu_ty.errorUnionPayload(mod);
   4945     const err_ty = eu_ty.errorUnionSet(mod);
   4946     const operand = try self.resolveInst(ty_op.operand);
   4947 
   4948     const result: MCValue = result: {
   4949         if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .{ .immediate = 0 };
   4950 
   4951         const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(eu_ty, mod));
   4952         const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod));
   4953         const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod));
   4954         try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand);
   4955         try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 });
   4956         break :result .{ .load_frame = .{ .index = frame_index } };
   4957     };
   4958     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4959 }
   4960 
   4961 /// E to E!T
   4962 fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
   4963     const mod = self.bin_file.comp.module.?;
   4964     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4965 
   4966     const eu_ty = ty_op.ty.toType();
   4967     const pl_ty = eu_ty.errorUnionPayload(mod);
   4968     const err_ty = eu_ty.errorUnionSet(mod);
   4969 
   4970     const result: MCValue = result: {
   4971         if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result try self.resolveInst(ty_op.operand);
   4972 
   4973         const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(eu_ty, mod));
   4974         const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod));
   4975         const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod));
   4976         try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef);
   4977         const operand = try self.resolveInst(ty_op.operand);
   4978         try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand);
   4979         break :result .{ .load_frame = .{ .index = frame_index } };
   4980     };
   4981     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4982 }
   4983 
   4984 fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void {
   4985     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   4986     const result = result: {
   4987         const src_mcv = try self.resolveInst(ty_op.operand);
   4988         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
   4989 
   4990         const dst_mcv = try self.allocRegOrMem(inst, true);
   4991         const dst_ty = self.typeOfIndex(inst);
   4992         try self.genCopy(dst_ty, dst_mcv, src_mcv);
   4993         break :result dst_mcv;
   4994     };
   4995     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   4996 }
   4997 
   4998 fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void {
   4999     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   5000 
   5001     const result: MCValue = result: {
   5002         const src_mcv = try self.resolveInst(ty_op.operand);
   5003         switch (src_mcv) {
   5004             .load_frame => |frame_addr| {
   5005                 const len_mcv: MCValue = .{ .load_frame = .{
   5006                     .index = frame_addr.index,
   5007                     .off = frame_addr.off + 8,
   5008                 } };
   5009                 if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result len_mcv;
   5010 
   5011                 const dst_mcv = try self.allocRegOrMem(inst, true);
   5012                 try self.genCopy(Type.usize, dst_mcv, len_mcv);
   5013                 break :result dst_mcv;
   5014             },
   5015             else => return self.fail("TODO implement slice_len for {}", .{src_mcv}),
   5016         }
   5017     };
   5018     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   5019 }
   5020 
   5021 fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void {
   5022     const mod = self.bin_file.comp.module.?;
   5023     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   5024 
   5025     const src_ty = self.typeOf(ty_op.operand);
   5026     const src_mcv = try self.resolveInst(ty_op.operand);
   5027     const src_reg = switch (src_mcv) {
   5028         .register => |reg| reg,
   5029         else => try self.copyToTmpRegister(src_ty, src_mcv),
   5030     };
   5031     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   5032     defer self.register_manager.unlockReg(src_lock);
   5033 
   5034     const dst_ty = self.typeOfIndex(inst);
   5035     const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   5036         src_reg
   5037     else
   5038         try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
   5039     const dst_mcv = MCValue{ .register = dst_reg };
   5040     const dst_lock = self.register_manager.lockReg(dst_reg);
   5041     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5042 
   5043     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   5044     try self.asmRegisterMemory(
   5045         .{ ._, .lea },
   5046         registerAlias(dst_reg, dst_abi_size),
   5047         .{
   5048             .base = .{ .reg = src_reg },
   5049             .mod = .{ .rm = .{ .size = .qword, .disp = 8 } },
   5050         },
   5051     );
   5052 
   5053     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   5054 }
   5055 
   5056 fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void {
   5057     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   5058 
   5059     const dst_ty = self.typeOfIndex(inst);
   5060     const opt_mcv = try self.resolveInst(ty_op.operand);
   5061 
   5062     const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv))
   5063         opt_mcv
   5064     else
   5065         try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv);
   5066     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   5067 }
   5068 
   5069 fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Register {
   5070     const reg: Register = blk: {
   5071         switch (index) {
   5072             .immediate => |imm| {
   5073                 // Optimisation: if index MCValue is an immediate, we can multiply in `comptime`
   5074                 // and set the register directly to the scaled offset as an immediate.
   5075                 const reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   5076                 try self.genSetReg(reg, index_ty, .{ .immediate = imm * elem_size });
   5077                 break :blk reg;
   5078             },
   5079             else => {
   5080                 const reg = try self.copyToTmpRegister(index_ty, index);
   5081                 try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size });
   5082                 break :blk reg;
   5083             },
   5084         }
   5085     };
   5086     return reg;
   5087 }
   5088 
   5089 fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue {
   5090     const mod = self.bin_file.comp.module.?;
   5091     const slice_ty = self.typeOf(lhs);
   5092     const slice_mcv = try self.resolveInst(lhs);
   5093     const slice_mcv_lock: ?RegisterLock = switch (slice_mcv) {
   5094         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5095         else => null,
   5096     };
   5097     defer if (slice_mcv_lock) |lock| self.register_manager.unlockReg(lock);
   5098 
   5099     const elem_ty = slice_ty.childType(mod);
   5100     const elem_size = elem_ty.abiSize(mod);
   5101     const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod);
   5102 
   5103     const index_ty = self.typeOf(rhs);
   5104     const index_mcv = try self.resolveInst(rhs);
   5105     const index_mcv_lock: ?RegisterLock = switch (index_mcv) {
   5106         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5107         else => null,
   5108     };
   5109     defer if (index_mcv_lock) |lock| self.register_manager.unlockReg(lock);
   5110 
   5111     const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_size);
   5112     const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   5113     defer self.register_manager.unlockReg(offset_reg_lock);
   5114 
   5115     const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   5116     try self.genSetReg(addr_reg, Type.usize, slice_mcv);
   5117     // TODO we could allocate register here, but need to expect addr register and potentially
   5118     // offset register.
   5119     try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{
   5120         .register = offset_reg,
   5121     });
   5122     return MCValue{ .register = addr_reg.to64() };
   5123 }
   5124 
   5125 fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
   5126     const mod = self.bin_file.comp.module.?;
   5127     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   5128 
   5129     const result: MCValue = result: {
   5130         const elem_ty = self.typeOfIndex(inst);
   5131         if (!elem_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
   5132 
   5133         const slice_ty = self.typeOf(bin_op.lhs);
   5134         const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod);
   5135         const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs);
   5136         const dst_mcv = try self.allocRegOrMem(inst, false);
   5137         try self.load(dst_mcv, slice_ptr_field_type, elem_ptr);
   5138         break :result dst_mcv;
   5139     };
   5140     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   5141 }
   5142 
   5143 fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void {
   5144     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
   5145     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
   5146     const dst_mcv = try self.genSliceElemPtr(extra.lhs, extra.rhs);
   5147     return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
   5148 }
   5149 
   5150 fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
   5151     const mod = self.bin_file.comp.module.?;
   5152     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   5153 
   5154     const result: MCValue = result: {
   5155         const array_ty = self.typeOf(bin_op.lhs);
   5156         const elem_ty = array_ty.childType(mod);
   5157 
   5158         const array_mcv = try self.resolveInst(bin_op.lhs);
   5159         const array_lock: ?RegisterLock = switch (array_mcv) {
   5160             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5161             else => null,
   5162         };
   5163         defer if (array_lock) |lock| self.register_manager.unlockReg(lock);
   5164 
   5165         const index_ty = self.typeOf(bin_op.rhs);
   5166         const index_mcv = try self.resolveInst(bin_op.rhs);
   5167         const index_lock: ?RegisterLock = switch (index_mcv) {
   5168             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5169             else => null,
   5170         };
   5171         defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
   5172 
   5173         try self.spillEflagsIfOccupied();
   5174         if (array_ty.isVector(mod) and elem_ty.bitSize(mod) == 1) {
   5175             const index_reg = switch (index_mcv) {
   5176                 .register => |reg| reg,
   5177                 else => try self.copyToTmpRegister(index_ty, index_mcv),
   5178             };
   5179             switch (array_mcv) {
   5180                 .register => |array_reg| try self.asmRegisterRegister(
   5181                     .{ ._, .bt },
   5182                     array_reg.to64(),
   5183                     index_reg.to64(),
   5184                 ),
   5185                 .load_frame => try self.asmMemoryRegister(
   5186                     .{ ._, .bt },
   5187                     try array_mcv.mem(self, .qword),
   5188                     index_reg.to64(),
   5189                 ),
   5190                 .memory, .load_symbol, .load_direct, .load_got, .load_tlv => try self.asmMemoryRegister(
   5191                     .{ ._, .bt },
   5192                     .{
   5193                         .base = .{
   5194                             .reg = try self.copyToTmpRegister(Type.usize, array_mcv.address()),
   5195                         },
   5196                         .mod = .{ .rm = .{ .size = .qword } },
   5197                     },
   5198                     index_reg.to64(),
   5199                 ),
   5200                 else => return self.fail("TODO airArrayElemVal for {s} of {}", .{
   5201                     @tagName(array_mcv), array_ty.fmt(mod),
   5202                 }),
   5203             }
   5204 
   5205             const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
   5206             try self.asmSetccRegister(.c, dst_reg.to8());
   5207             break :result .{ .register = dst_reg };
   5208         }
   5209 
   5210         const elem_abi_size = elem_ty.abiSize(mod);
   5211         const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   5212         const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   5213         defer self.register_manager.unlockReg(addr_lock);
   5214 
   5215         switch (array_mcv) {
   5216             .register => {
   5217                 const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, mod));
   5218                 try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mcv);
   5219                 try self.asmRegisterMemory(
   5220                     .{ ._, .lea },
   5221                     addr_reg,
   5222                     .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } },
   5223                 );
   5224             },
   5225             .load_frame => |frame_addr| try self.asmRegisterMemory(
   5226                 .{ ._, .lea },
   5227                 addr_reg,
   5228                 .{
   5229                     .base = .{ .frame = frame_addr.index },
   5230                     .mod = .{ .rm = .{ .size = .qword, .disp = frame_addr.off } },
   5231                 },
   5232             ),
   5233             .memory,
   5234             .load_symbol,
   5235             .load_direct,
   5236             .load_got,
   5237             .load_tlv,
   5238             => try self.genSetReg(addr_reg, Type.usize, array_mcv.address()),
   5239             .lea_symbol, .lea_direct, .lea_tlv => unreachable,
   5240             else => return self.fail("TODO airArrayElemVal_val for {s} of {}", .{
   5241                 @tagName(array_mcv), array_ty.fmt(mod),
   5242             }),
   5243         }
   5244 
   5245         const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
   5246         const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   5247         defer self.register_manager.unlockReg(offset_lock);
   5248 
   5249         // TODO we could allocate register here, but need to expect addr register and potentially
   5250         // offset register.
   5251         const dst_mcv = try self.allocRegOrMem(inst, false);
   5252         try self.genBinOpMir(
   5253             .{ ._, .add },
   5254             Type.usize,
   5255             .{ .register = addr_reg },
   5256             .{ .register = offset_reg },
   5257         );
   5258         try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } });
   5259         break :result dst_mcv;
   5260     };
   5261     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   5262 }
   5263 
   5264 fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void {
   5265     const mod = self.bin_file.comp.module.?;
   5266     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   5267     const ptr_ty = self.typeOf(bin_op.lhs);
   5268 
   5269     // this is identical to the `airPtrElemPtr` codegen expect here an
   5270     // additional `mov` is needed at the end to get the actual value
   5271 
   5272     const result = result: {
   5273         const elem_ty = ptr_ty.elemType2(mod);
   5274         if (!elem_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
   5275 
   5276         const elem_abi_size: u32 = @intCast(elem_ty.abiSize(mod));
   5277         const index_ty = self.typeOf(bin_op.rhs);
   5278         const index_mcv = try self.resolveInst(bin_op.rhs);
   5279         const index_lock = switch (index_mcv) {
   5280             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5281             else => null,
   5282         };
   5283         defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
   5284 
   5285         const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
   5286         const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   5287         defer self.register_manager.unlockReg(offset_lock);
   5288 
   5289         const ptr_mcv = try self.resolveInst(bin_op.lhs);
   5290         const elem_ptr_reg = if (ptr_mcv.isRegister() and self.liveness.operandDies(inst, 0))
   5291             ptr_mcv.register
   5292         else
   5293             try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   5294         const elem_ptr_lock = self.register_manager.lockRegAssumeUnused(elem_ptr_reg);
   5295         defer self.register_manager.unlockReg(elem_ptr_lock);
   5296         try self.asmRegisterRegister(
   5297             .{ ._, .add },
   5298             elem_ptr_reg,
   5299             offset_reg,
   5300         );
   5301 
   5302         const dst_mcv = try self.allocRegOrMem(inst, true);
   5303         const dst_lock = switch (dst_mcv) {
   5304             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5305             else => null,
   5306         };
   5307         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5308         try self.load(dst_mcv, ptr_ty, .{ .register = elem_ptr_reg });
   5309         break :result dst_mcv;
   5310     };
   5311     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
   5312 }
   5313 
   5314 fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void {
   5315     const mod = self.bin_file.comp.module.?;
   5316     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
   5317     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
   5318 
   5319     const result = result: {
   5320         const elem_ptr_ty = self.typeOfIndex(inst);
   5321         const base_ptr_ty = self.typeOf(extra.lhs);
   5322 
   5323         const base_ptr_mcv = try self.resolveInst(extra.lhs);
   5324         const base_ptr_lock: ?RegisterLock = switch (base_ptr_mcv) {
   5325             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5326             else => null,
   5327         };
   5328         defer if (base_ptr_lock) |lock| self.register_manager.unlockReg(lock);
   5329 
   5330         if (elem_ptr_ty.ptrInfo(mod).flags.vector_index != .none) {
   5331             break :result if (self.reuseOperand(inst, extra.lhs, 0, base_ptr_mcv))
   5332                 base_ptr_mcv
   5333             else
   5334                 try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv);
   5335         }
   5336 
   5337         const elem_ty = base_ptr_ty.elemType2(mod);
   5338         const elem_abi_size = elem_ty.abiSize(mod);
   5339         const index_ty = self.typeOf(extra.rhs);
   5340         const index_mcv = try self.resolveInst(extra.rhs);
   5341         const index_lock: ?RegisterLock = switch (index_mcv) {
   5342             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5343             else => null,
   5344         };
   5345         defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
   5346 
   5347         const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
   5348         const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
   5349         defer self.register_manager.unlockReg(offset_reg_lock);
   5350 
   5351         const dst_mcv = try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv);
   5352         try self.genBinOpMir(.{ ._, .add }, elem_ptr_ty, dst_mcv, .{ .register = offset_reg });
   5353 
   5354         break :result dst_mcv;
   5355     };
   5356     return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none });
   5357 }
   5358 
   5359 fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
   5360     const mod = self.bin_file.comp.module.?;
   5361     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   5362     const ptr_union_ty = self.typeOf(bin_op.lhs);
   5363     const union_ty = ptr_union_ty.childType(mod);
   5364     const tag_ty = self.typeOf(bin_op.rhs);
   5365     const layout = union_ty.unionGetLayout(mod);
   5366 
   5367     if (layout.tag_size == 0) {
   5368         return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
   5369     }
   5370 
   5371     const ptr = try self.resolveInst(bin_op.lhs);
   5372     const ptr_lock: ?RegisterLock = switch (ptr) {
   5373         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5374         else => null,
   5375     };
   5376     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
   5377 
   5378     const tag = try self.resolveInst(bin_op.rhs);
   5379     const tag_lock: ?RegisterLock = switch (tag) {
   5380         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5381         else => null,
   5382     };
   5383     defer if (tag_lock) |lock| self.register_manager.unlockReg(lock);
   5384 
   5385     const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align.compare(.lt, layout.payload_align)) blk: {
   5386         // TODO reusing the operand
   5387         const reg = try self.copyToTmpRegister(ptr_union_ty, ptr);
   5388         try self.genBinOpMir(
   5389             .{ ._, .add },
   5390             ptr_union_ty,
   5391             .{ .register = reg },
   5392             .{ .immediate = layout.payload_size },
   5393         );
   5394         break :blk MCValue{ .register = reg };
   5395     } else ptr;
   5396 
   5397     const ptr_tag_ty = try mod.adjustPtrTypeChild(ptr_union_ty, tag_ty);
   5398     try self.store(ptr_tag_ty, adjusted_ptr, tag);
   5399 
   5400     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
   5401 }
   5402 
   5403 fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
   5404     const mod = self.bin_file.comp.module.?;
   5405     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   5406 
   5407     const tag_ty = self.typeOfIndex(inst);
   5408     const union_ty = self.typeOf(ty_op.operand);
   5409     const layout = union_ty.unionGetLayout(mod);
   5410 
   5411     if (layout.tag_size == 0) {
   5412         return self.finishAir(inst, .none, .{ ty_op.operand, .none, .none });
   5413     }
   5414 
   5415     // TODO reusing the operand
   5416     const operand = try self.resolveInst(ty_op.operand);
   5417     const operand_lock: ?RegisterLock = switch (operand) {
   5418         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5419         else => null,
   5420     };
   5421     defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
   5422 
   5423     const tag_abi_size = tag_ty.abiSize(mod);
   5424     const dst_mcv: MCValue = blk: {
   5425         switch (operand) {
   5426             .load_frame => |frame_addr| {
   5427                 if (tag_abi_size <= 8) {
   5428                     const off: i32 = if (layout.tag_align.compare(.lt, layout.payload_align))
   5429                         @intCast(layout.payload_size)
   5430                     else
   5431                         0;
   5432                     break :blk try self.copyToRegisterWithInstTracking(inst, tag_ty, .{
   5433                         .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
   5434                     });
   5435                 }
   5436 
   5437                 return self.fail("TODO implement get_union_tag for ABI larger than 8 bytes and operand {}", .{operand});
   5438             },
   5439             .register => {
   5440                 const shift: u6 = if (layout.tag_align.compare(.lt, layout.payload_align))
   5441                     @intCast(layout.payload_size * 8)
   5442                 else
   5443                     0;
   5444                 const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand);
   5445                 try self.genShiftBinOpMir(.{ ._r, .sh }, Type.usize, result, .{ .immediate = shift });
   5446                 break :blk MCValue{
   5447                     .register = registerAlias(result.register, @intCast(layout.tag_size)),
   5448                 };
   5449             },
   5450             else => return self.fail("TODO implement get_union_tag for {}", .{operand}),
   5451         }
   5452     };
   5453 
   5454     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   5455 }
   5456 
   5457 fn airClz(self: *Self, inst: Air.Inst.Index) !void {
   5458     const mod = self.bin_file.comp.module.?;
   5459     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   5460     const result = result: {
   5461         const dst_ty = self.typeOfIndex(inst);
   5462         const src_ty = self.typeOf(ty_op.operand);
   5463         if (src_ty.zigTypeTag(mod) == .Vector) return self.fail("TODO implement airClz for {}", .{
   5464             src_ty.fmt(mod),
   5465         });
   5466         const src_bits: u32 = @intCast(src_ty.bitSize(mod));
   5467 
   5468         const has_lzcnt = self.hasFeature(.lzcnt);
   5469         if (src_bits > 64 and !has_lzcnt) {
   5470             var callee_buf: ["__clz?i2".len]u8 = undefined;
   5471             const result = try self.genCall(.{ .lib = .{
   5472                 .return_type = .i32_type,
   5473                 .param_types = &.{src_ty.toIntern()},
   5474                 .callee = std.fmt.bufPrint(&callee_buf, "__clz{c}i2", .{
   5475                     intCompilerRtAbiName(src_bits),
   5476                 }) catch unreachable,
   5477             } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }});
   5478             if (src_bits < 128) try self.asmRegisterImmediate(
   5479                 .{ ._, .sub },
   5480                 result.register,
   5481                 Immediate.u(128 - src_bits),
   5482             );
   5483             break :result result;
   5484         }
   5485 
   5486         const src_mcv = try self.resolveInst(ty_op.operand);
   5487         const mat_src_mcv = switch (src_mcv) {
   5488             .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
   5489             else => src_mcv,
   5490         };
   5491         const mat_src_lock = switch (mat_src_mcv) {
   5492             .register => |reg| self.register_manager.lockReg(reg),
   5493             else => null,
   5494         };
   5495         defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
   5496 
   5497         const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
   5498         const dst_mcv = MCValue{ .register = dst_reg };
   5499         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   5500         defer self.register_manager.unlockReg(dst_lock);
   5501 
   5502         if (has_lzcnt) {
   5503             if (src_bits <= 8) {
   5504                 const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
   5505                 try self.truncateRegister(src_ty, wide_reg);
   5506                 try self.genBinOpMir(.{ ._, .lzcnt }, Type.u32, dst_mcv, .{ .register = wide_reg });
   5507                 try self.genBinOpMir(
   5508                     .{ ._, .sub },
   5509                     dst_ty,
   5510                     dst_mcv,
   5511                     .{ .immediate = 32 - src_bits },
   5512                 );
   5513             } else if (src_bits <= 64) {
   5514                 try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv);
   5515                 const extra_bits = self.regExtraBits(src_ty);
   5516                 if (extra_bits > 0) {
   5517                     try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits });
   5518                 }
   5519             } else if (src_bits <= 128) {
   5520                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   5521                 const tmp_mcv = MCValue{ .register = tmp_reg };
   5522                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5523                 defer self.register_manager.unlockReg(tmp_lock);
   5524 
   5525                 try self.genBinOpMir(
   5526                     .{ ._, .lzcnt },
   5527                     Type.u64,
   5528                     dst_mcv,
   5529                     if (mat_src_mcv.isMemory())
   5530                         mat_src_mcv
   5531                     else
   5532                         .{ .register = mat_src_mcv.register_pair[0] },
   5533                 );
   5534                 try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 });
   5535                 try self.genBinOpMir(
   5536                     .{ ._, .lzcnt },
   5537                     Type.u64,
   5538                     tmp_mcv,
   5539                     if (mat_src_mcv.isMemory())
   5540                         mat_src_mcv.address().offset(8).deref()
   5541                     else
   5542                         .{ .register = mat_src_mcv.register_pair[1] },
   5543                 );
   5544                 try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32());
   5545 
   5546                 if (src_bits < 128) try self.genBinOpMir(
   5547                     .{ ._, .sub },
   5548                     dst_ty,
   5549                     dst_mcv,
   5550                     .{ .immediate = 128 - src_bits },
   5551                 );
   5552             } else return self.fail("TODO airClz of {}", .{src_ty.fmt(mod)});
   5553             break :result dst_mcv;
   5554         }
   5555 
   5556         if (src_bits > 64)
   5557             return self.fail("TODO airClz of {}", .{src_ty.fmt(mod)});
   5558         if (math.isPowerOfTwo(src_bits)) {
   5559             const imm_reg = try self.copyToTmpRegister(dst_ty, .{
   5560                 .immediate = src_bits ^ (src_bits - 1),
   5561             });
   5562             const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg);
   5563             defer self.register_manager.unlockReg(imm_lock);
   5564 
   5565             if (src_bits <= 8) {
   5566                 const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
   5567                 const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
   5568                 defer self.register_manager.unlockReg(wide_lock);
   5569 
   5570                 try self.truncateRegister(src_ty, wide_reg);
   5571                 try self.genBinOpMir(.{ ._, .bsr }, Type.u16, dst_mcv, .{ .register = wide_reg });
   5572             } else try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv);
   5573 
   5574             const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(mod))), 2);
   5575             try self.asmCmovccRegisterRegister(
   5576                 .z,
   5577                 registerAlias(dst_reg, cmov_abi_size),
   5578                 registerAlias(imm_reg, cmov_abi_size),
   5579             );
   5580 
   5581             try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 });
   5582         } else {
   5583             const imm_reg = try self.copyToTmpRegister(dst_ty, .{
   5584                 .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - self.regBitSize(dst_ty)),
   5585             });
   5586             const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg);
   5587             defer self.register_manager.unlockReg(imm_lock);
   5588 
   5589             const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
   5590             const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
   5591             defer self.register_manager.unlockReg(wide_lock);
   5592 
   5593             try self.truncateRegister(src_ty, wide_reg);
   5594             try self.genBinOpMir(
   5595                 .{ ._, .bsr },
   5596                 if (src_bits <= 8) Type.u16 else src_ty,
   5597                 dst_mcv,
   5598                 .{ .register = wide_reg },
   5599             );
   5600 
   5601             const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(mod))), 2);
   5602             try self.asmCmovccRegisterRegister(
   5603                 .nz,
   5604                 registerAlias(imm_reg, cmov_abi_size),
   5605                 registerAlias(dst_reg, cmov_abi_size),
   5606             );
   5607 
   5608             try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 });
   5609             try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .register = imm_reg });
   5610         }
   5611         break :result dst_mcv;
   5612     };
   5613     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   5614 }
   5615 
   5616 fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
   5617     const mod = self.bin_file.comp.module.?;
   5618     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   5619     const result = result: {
   5620         const dst_ty = self.typeOfIndex(inst);
   5621         const src_ty = self.typeOf(ty_op.operand);
   5622         if (src_ty.zigTypeTag(mod) == .Vector) return self.fail("TODO implement airClz for {}", .{
   5623             src_ty.fmt(mod),
   5624         });
   5625         const src_bits: u32 = @intCast(src_ty.bitSize(mod));
   5626 
   5627         const has_bmi = self.hasFeature(.bmi);
   5628         if (src_bits > 64 and !has_bmi) {
   5629             var callee_buf: ["__ctz?i2".len]u8 = undefined;
   5630             break :result try self.genCall(.{ .lib = .{
   5631                 .return_type = .i32_type,
   5632                 .param_types = &.{src_ty.toIntern()},
   5633                 .callee = std.fmt.bufPrint(&callee_buf, "__ctz{c}i2", .{
   5634                     intCompilerRtAbiName(src_bits),
   5635                 }) catch unreachable,
   5636             } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }});
   5637         }
   5638 
   5639         const src_mcv = try self.resolveInst(ty_op.operand);
   5640         const mat_src_mcv = switch (src_mcv) {
   5641             .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
   5642             else => src_mcv,
   5643         };
   5644         const mat_src_lock = switch (mat_src_mcv) {
   5645             .register => |reg| self.register_manager.lockReg(reg),
   5646             else => null,
   5647         };
   5648         defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
   5649 
   5650         const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
   5651         const dst_mcv = MCValue{ .register = dst_reg };
   5652         const dst_lock = self.register_manager.lockReg(dst_reg);
   5653         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5654 
   5655         if (self.hasFeature(.bmi)) {
   5656             if (src_bits <= 64) {
   5657                 const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0);
   5658                 const wide_ty = if (src_bits <= 8) Type.u16 else src_ty;
   5659                 const masked_mcv = if (extra_bits > 0) masked: {
   5660                     const tmp_mcv = tmp: {
   5661                         if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0))
   5662                             break :tmp src_mcv;
   5663                         try self.genSetReg(dst_reg, wide_ty, src_mcv);
   5664                         break :tmp dst_mcv;
   5665                     };
   5666                     try self.genBinOpMir(
   5667                         .{ ._, .@"or" },
   5668                         wide_ty,
   5669                         tmp_mcv,
   5670                         .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(64 - extra_bits)) <<
   5671                             @intCast(src_bits) },
   5672                     );
   5673                     break :masked tmp_mcv;
   5674                 } else mat_src_mcv;
   5675                 try self.genBinOpMir(.{ ._, .tzcnt }, wide_ty, dst_mcv, masked_mcv);
   5676             } else if (src_bits <= 128) {
   5677                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   5678                 const tmp_mcv = MCValue{ .register = tmp_reg };
   5679                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5680                 defer self.register_manager.unlockReg(tmp_lock);
   5681 
   5682                 const lo_mat_src_mcv: MCValue = if (mat_src_mcv.isMemory())
   5683                     mat_src_mcv
   5684                 else
   5685                     .{ .register = mat_src_mcv.register_pair[0] };
   5686                 const hi_mat_src_mcv: MCValue = if (mat_src_mcv.isMemory())
   5687                     mat_src_mcv.address().offset(8).deref()
   5688                 else
   5689                     .{ .register = mat_src_mcv.register_pair[1] };
   5690                 const masked_mcv = if (src_bits < 128) masked: {
   5691                     try self.genCopy(Type.u64, dst_mcv, hi_mat_src_mcv);
   5692                     try self.genBinOpMir(
   5693                         .{ ._, .@"or" },
   5694                         Type.u64,
   5695                         dst_mcv,
   5696                         .{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(src_bits - 64) },
   5697                     );
   5698                     break :masked dst_mcv;
   5699                 } else hi_mat_src_mcv;
   5700                 try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, dst_mcv, masked_mcv);
   5701                 try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 });
   5702                 try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, tmp_mcv, lo_mat_src_mcv);
   5703                 try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32());
   5704             } else return self.fail("TODO airCtz of {}", .{src_ty.fmt(mod)});
   5705             break :result dst_mcv;
   5706         }
   5707 
   5708         if (src_bits > 64) return self.fail("TODO airCtz of {}", .{src_ty.fmt(mod)});
   5709 
   5710         const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits });
   5711         const width_lock = self.register_manager.lockRegAssumeUnused(width_reg);
   5712         defer self.register_manager.unlockReg(width_lock);
   5713 
   5714         if (src_bits <= 8 or !math.isPowerOfTwo(src_bits)) {
   5715             const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
   5716             const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
   5717             defer self.register_manager.unlockReg(wide_lock);
   5718 
   5719             try self.truncateRegister(src_ty, wide_reg);
   5720             try self.genBinOpMir(.{ ._, .bsf }, Type.u16, dst_mcv, .{ .register = wide_reg });
   5721         } else try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv);
   5722 
   5723         const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(mod))), 2);
   5724         try self.asmCmovccRegisterRegister(
   5725             .z,
   5726             registerAlias(dst_reg, cmov_abi_size),
   5727             registerAlias(width_reg, cmov_abi_size),
   5728         );
   5729         break :result dst_mcv;
   5730     };
   5731     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   5732 }
   5733 
   5734 fn airPopCount(self: *Self, inst: Air.Inst.Index) !void {
   5735     const mod = self.bin_file.comp.module.?;
   5736     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   5737     const result: MCValue = result: {
   5738         try self.spillEflagsIfOccupied();
   5739 
   5740         const src_ty = self.typeOf(ty_op.operand);
   5741         const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
   5742         if (src_ty.zigTypeTag(mod) == .Vector or src_abi_size > 16)
   5743             return self.fail("TODO implement airPopCount for {}", .{src_ty.fmt(mod)});
   5744         const src_mcv = try self.resolveInst(ty_op.operand);
   5745 
   5746         const mat_src_mcv = switch (src_mcv) {
   5747             .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
   5748             else => src_mcv,
   5749         };
   5750         const mat_src_lock = switch (mat_src_mcv) {
   5751             .register => |reg| self.register_manager.lockReg(reg),
   5752             else => null,
   5753         };
   5754         defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
   5755 
   5756         if (src_abi_size <= 8) {
   5757             const dst_contains_src =
   5758                 src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv);
   5759             const dst_reg = if (dst_contains_src)
   5760                 src_mcv.getReg().?
   5761             else
   5762                 try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
   5763             const dst_lock = self.register_manager.lockReg(dst_reg);
   5764             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   5765 
   5766             try self.genPopCount(dst_reg, src_ty, mat_src_mcv, dst_contains_src);
   5767             break :result .{ .register = dst_reg };
   5768         }
   5769 
   5770         assert(src_abi_size > 8 and src_abi_size <= 16);
   5771         const tmp_regs = try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp);
   5772         const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs);
   5773         defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
   5774 
   5775         try self.genPopCount(tmp_regs[0], Type.usize, if (mat_src_mcv.isMemory())
   5776             mat_src_mcv
   5777         else
   5778             .{ .register = mat_src_mcv.register_pair[0] }, false);
   5779         const src_info = src_ty.intInfo(mod);
   5780         const hi_ty = try mod.intType(src_info.signedness, (src_info.bits - 1) % 64 + 1);
   5781         try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isMemory())
   5782             mat_src_mcv.address().offset(8).deref()
   5783         else
   5784             .{ .register = mat_src_mcv.register_pair[1] }, false);
   5785         try self.asmRegisterRegister(.{ ._, .add }, tmp_regs[0].to8(), tmp_regs[1].to8());
   5786         break :result .{ .register = tmp_regs[0] };
   5787     };
   5788     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   5789 }
   5790 
   5791 fn genPopCount(
   5792     self: *Self,
   5793     dst_reg: Register,
   5794     src_ty: Type,
   5795     src_mcv: MCValue,
   5796     dst_contains_src: bool,
   5797 ) !void {
   5798     const mod = self.bin_file.comp.module.?;
   5799 
   5800     const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
   5801     if (self.hasFeature(.popcnt)) return self.genBinOpMir(
   5802         .{ ._, .popcnt },
   5803         if (src_abi_size > 1) src_ty else Type.u32,
   5804         .{ .register = dst_reg },
   5805         if (src_abi_size > 1) src_mcv else src: {
   5806             if (!dst_contains_src) try self.genSetReg(dst_reg, src_ty, src_mcv);
   5807             try self.truncateRegister(try src_ty.toUnsigned(mod), dst_reg);
   5808             break :src .{ .register = dst_reg };
   5809         },
   5810     );
   5811 
   5812     const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8);
   5813     const imm_0_1 = Immediate.u(mask / 0b1_1);
   5814     const imm_00_11 = Immediate.u(mask / 0b01_01);
   5815     const imm_0000_1111 = Immediate.u(mask / 0b0001_0001);
   5816     const imm_0000_0001 = Immediate.u(mask / 0b1111_1111);
   5817 
   5818     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   5819     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   5820     defer self.register_manager.unlockReg(tmp_lock);
   5821 
   5822     const dst = registerAlias(dst_reg, src_abi_size);
   5823     const tmp = registerAlias(tmp_reg, src_abi_size);
   5824     const imm = if (src_abi_size > 4)
   5825         try self.register_manager.allocReg(null, abi.RegisterClass.gp)
   5826     else
   5827         undefined;
   5828 
   5829     if (!dst_contains_src) try self.genSetReg(dst, src_ty, src_mcv);
   5830     // dst = operand
   5831     try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   5832     // tmp = operand
   5833     try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1));
   5834     // tmp = operand >> 1
   5835     if (src_abi_size > 4) {
   5836         try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1);
   5837         try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   5838     } else try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1);
   5839     // tmp = (operand >> 1) & 0x55...55
   5840     try self.asmRegisterRegister(.{ ._, .sub }, dst, tmp);
   5841     // dst = temp1 = operand - ((operand >> 1) & 0x55...55)
   5842     try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   5843     // tmp = temp1
   5844     try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2));
   5845     // dst = temp1 >> 2
   5846     if (src_abi_size > 4) {
   5847         try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11);
   5848         try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   5849         try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   5850     } else {
   5851         try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11);
   5852         try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11);
   5853     }
   5854     // tmp = temp1 & 0x33...33
   5855     // dst = (temp1 >> 2) & 0x33...33
   5856     try self.asmRegisterRegister(.{ ._, .add }, tmp, dst);
   5857     // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33)
   5858     try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp);
   5859     // dst = temp2
   5860     try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(4));
   5861     // tmp = temp2 >> 4
   5862     try self.asmRegisterRegister(.{ ._, .add }, dst, tmp);
   5863     // dst = temp2 + (temp2 >> 4)
   5864     if (src_abi_size > 4) {
   5865         try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111);
   5866         try self.asmRegisterImmediate(.{ ._, .mov }, tmp, imm_0000_0001);
   5867         try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   5868         try self.asmRegisterRegister(.{ .i_, .mul }, dst, tmp);
   5869     } else {
   5870         try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
   5871         if (src_abi_size > 1) {
   5872             try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst, dst, imm_0000_0001);
   5873         }
   5874     }
   5875     // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f
   5876     // dst = temp3 * 0x01...01
   5877     if (src_abi_size > 1) {
   5878         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u((src_abi_size - 1) * 8));
   5879     }
   5880     // dst = (temp3 * 0x01...01) >> (bits - 8)
   5881 }
   5882 
   5883 fn genByteSwap(
   5884     self: *Self,
   5885     inst: Air.Inst.Index,
   5886     src_ty: Type,
   5887     src_mcv: MCValue,
   5888     mem_ok: bool,
   5889 ) !MCValue {
   5890     const mod = self.bin_file.comp.module.?;
   5891     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   5892 
   5893     if (src_ty.zigTypeTag(mod) == .Vector) return self.fail(
   5894         "TODO implement genByteSwap for {}",
   5895         .{src_ty.fmt(mod)},
   5896     );
   5897     const abi_size: u32 = @intCast(src_ty.abiSize(mod));
   5898     const src_lock = switch (src_mcv) {
   5899         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   5900         else => null,
   5901     };
   5902     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   5903 
   5904     switch (abi_size) {
   5905         else => return self.fail("TODO implement genByteSwap for {}", .{
   5906             src_ty.fmt(mod),
   5907         }),
   5908         1 => return if ((mem_ok or src_mcv.isRegister()) and
   5909             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   5910             src_mcv
   5911         else
   5912             try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv),
   5913         2 => if ((mem_ok or src_mcv.isRegister()) and
   5914             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   5915         {
   5916             try self.genBinOpMir(.{ ._l, .ro }, src_ty, src_mcv, .{ .immediate = 8 });
   5917             return src_mcv;
   5918         },
   5919         3...8 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
   5920             try self.genUnOpMir(.{ ._, .bswap }, src_ty, src_mcv);
   5921             return src_mcv;
   5922         },
   5923         9...16 => {
   5924             switch (src_mcv) {
   5925                 .register_pair => |src_regs| if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
   5926                     for (src_regs) |src_reg| try self.asmRegister(.{ ._, .bswap }, src_reg.to64());
   5927                     return .{ .register_pair = .{ src_regs[1], src_regs[0] } };
   5928                 },
   5929                 else => {},
   5930             }
   5931 
   5932             const dst_regs =
   5933                 try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp);
   5934             const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
   5935             defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
   5936 
   5937             if (src_mcv.isMemory()) {
   5938                 try self.asmRegisterMemory(
   5939                     .{ ._, .movbe },
   5940                     dst_regs[0],
   5941                     try src_mcv.address().offset(8).deref().mem(self, .qword),
   5942                 );
   5943                 try self.asmRegisterMemory(.{ ._, .movbe }, dst_regs[1], try src_mcv.mem(self, .qword));
   5944             } else for (dst_regs, src_mcv.register_pair) |dst_reg, src_reg| {
   5945                 try self.asmRegisterRegister(.{ ._, .mov }, dst_reg.to64(), src_reg.to64());
   5946                 try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
   5947             }
   5948             return .{ .register_pair = dst_regs };
   5949         },
   5950     }
   5951 
   5952     if (src_mcv.isRegister()) {
   5953         const dst_mcv: MCValue = if (mem_ok)
   5954             try self.allocRegOrMem(inst, true)
   5955         else
   5956             .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.gp) };
   5957         if (dst_mcv.isRegister()) {
   5958             const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register);
   5959             defer self.register_manager.unlockReg(dst_lock);
   5960 
   5961             try self.genSetReg(dst_mcv.register, src_ty, src_mcv);
   5962             switch (abi_size) {
   5963                 else => unreachable,
   5964                 2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }),
   5965                 3...8 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv),
   5966             }
   5967         } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
   5968         return dst_mcv;
   5969     }
   5970 
   5971     const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
   5972     const dst_mcv = MCValue{ .register = dst_reg };
   5973     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   5974     defer self.register_manager.unlockReg(dst_lock);
   5975 
   5976     try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
   5977     return dst_mcv;
   5978 }
   5979 
   5980 fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void {
   5981     const mod = self.bin_file.comp.module.?;
   5982     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   5983 
   5984     const src_ty = self.typeOf(ty_op.operand);
   5985     const abi_size: u32 = @intCast(src_ty.abiSize(mod));
   5986     const bit_size: u32 = @intCast(src_ty.bitSize(mod));
   5987     const src_mcv = try self.resolveInst(ty_op.operand);
   5988 
   5989     const dst_mcv = try self.genByteSwap(inst, src_ty, src_mcv, true);
   5990 
   5991     const extra_bits = abi_size * 8 - bit_size;
   5992     const signedness: std.builtin.Signedness =
   5993         if (src_ty.isAbiInt(mod)) src_ty.intInfo(mod).signedness else .unsigned;
   5994     if (extra_bits > 0) try self.genShiftBinOpMir(switch (signedness) {
   5995         .signed => .{ ._r, .sa },
   5996         .unsigned => .{ ._r, .sh },
   5997     }, src_ty, dst_mcv, .{ .immediate = extra_bits });
   5998 
   5999     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   6000 }
   6001 
   6002 fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
   6003     const mod = self.bin_file.comp.module.?;
   6004     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   6005 
   6006     const src_ty = self.typeOf(ty_op.operand);
   6007     const abi_size: u32 = @intCast(src_ty.abiSize(mod));
   6008     const bit_size: u32 = @intCast(src_ty.bitSize(mod));
   6009     const src_mcv = try self.resolveInst(ty_op.operand);
   6010 
   6011     const dst_mcv = try self.genByteSwap(inst, src_ty, src_mcv, false);
   6012     const dst_locks: [2]?RegisterLock = switch (dst_mcv) {
   6013         .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null },
   6014         .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs),
   6015         else => unreachable,
   6016     };
   6017     defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   6018 
   6019     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   6020     const tmp_lock = self.register_manager.lockReg(tmp_reg);
   6021     defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
   6022 
   6023     const limb_abi_size: u32 = @min(abi_size, 8);
   6024     const tmp = registerAlias(tmp_reg, limb_abi_size);
   6025     const imm = if (limb_abi_size > 4)
   6026         try self.register_manager.allocReg(null, abi.RegisterClass.gp)
   6027     else
   6028         undefined;
   6029 
   6030     const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - limb_abi_size * 8);
   6031     const imm_0000_1111 = Immediate.u(mask / 0b0001_0001);
   6032     const imm_00_11 = Immediate.u(mask / 0b01_01);
   6033     const imm_0_1 = Immediate.u(mask / 0b1_1);
   6034 
   6035     for (dst_mcv.getRegs()) |dst_reg| {
   6036         const dst = registerAlias(dst_reg, limb_abi_size);
   6037 
   6038         // dst = temp1 = bswap(operand)
   6039         try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   6040         // tmp = temp1
   6041         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(4));
   6042         // dst = temp1 >> 4
   6043         if (limb_abi_size > 4) {
   6044             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111);
   6045             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   6046             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   6047         } else {
   6048             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111);
   6049             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
   6050         }
   6051         // tmp = temp1 & 0x0F...0F
   6052         // dst = (temp1 >> 4) & 0x0F...0F
   6053         try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, Immediate.u(4));
   6054         // tmp = (temp1 & 0x0F...0F) << 4
   6055         try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp);
   6056         // dst = temp2 = ((temp1 >> 4) & 0x0F...0F) | ((temp1 & 0x0F...0F) << 4)
   6057         try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
   6058         // tmp = temp2
   6059         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2));
   6060         // dst = temp2 >> 2
   6061         if (limb_abi_size > 4) {
   6062             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11);
   6063             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   6064             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   6065         } else {
   6066             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11);
   6067             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11);
   6068         }
   6069         // tmp = temp2 & 0x33...33
   6070         // dst = (temp2 >> 2) & 0x33...33
   6071         try self.asmRegisterMemory(
   6072             .{ ._, .lea },
   6073             if (limb_abi_size > 4) tmp.to64() else tmp.to32(),
   6074             .{
   6075                 .base = .{ .reg = dst.to64() },
   6076                 .mod = .{ .rm = .{
   6077                     .size = .qword,
   6078                     .index = tmp.to64(),
   6079                     .scale = .@"4",
   6080                 } },
   6081             },
   6082         );
   6083         // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2)
   6084         try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp);
   6085         // dst = temp3
   6086         try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1));
   6087         // tmp = temp3 >> 1
   6088         if (limb_abi_size > 4) {
   6089             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1);
   6090             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
   6091             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
   6092         } else {
   6093             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0_1);
   6094             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1);
   6095         }
   6096         // dst = temp3 & 0x55...55
   6097         // tmp = (temp3 >> 1) & 0x55...55
   6098         try self.asmRegisterMemory(
   6099             .{ ._, .lea },
   6100             if (limb_abi_size > 4) dst.to64() else dst.to32(),
   6101             .{
   6102                 .base = .{ .reg = tmp.to64() },
   6103                 .mod = .{ .rm = .{
   6104                     .size = .qword,
   6105                     .index = dst.to64(),
   6106                     .scale = .@"2",
   6107                 } },
   6108             },
   6109         );
   6110         // dst = ((temp3 >> 1) & 0x55...55) + ((temp3 & 0x55...55) << 1)
   6111     }
   6112 
   6113     const extra_bits = abi_size * 8 - bit_size;
   6114     const signedness: std.builtin.Signedness =
   6115         if (src_ty.isAbiInt(mod)) src_ty.intInfo(mod).signedness else .unsigned;
   6116     if (extra_bits > 0) try self.genShiftBinOpMir(switch (signedness) {
   6117         .signed => .{ ._r, .sa },
   6118         .unsigned => .{ ._r, .sh },
   6119     }, src_ty, dst_mcv, .{ .immediate = extra_bits });
   6120 
   6121     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
   6122 }
   6123 
   6124 fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) !void {
   6125     const mod = self.bin_file.comp.module.?;
   6126     const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)];
   6127 
   6128     const result = result: {
   6129         const scalar_bits = ty.scalarType(mod).floatBits(self.target.*);
   6130         if (scalar_bits == 80) {
   6131             if (ty.zigTypeTag(mod) != .Float) return self.fail("TODO implement floatSign for {}", .{
   6132                 ty.fmt(mod),
   6133             });
   6134 
   6135             const src_mcv = try self.resolveInst(operand);
   6136             const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
   6137             defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   6138 
   6139             const dst_mcv: MCValue = .{ .register = .st0 };
   6140             if (!std.meta.eql(src_mcv, dst_mcv) or !self.reuseOperand(inst, operand, 0, src_mcv))
   6141                 try self.register_manager.getReg(.st0, inst);
   6142 
   6143             try self.genCopy(ty, dst_mcv, src_mcv);
   6144             switch (tag) {
   6145                 .neg => try self.asmOpOnly(.{ .f_, .chs }),
   6146                 .abs => try self.asmOpOnly(.{ .f_, .abs }),
   6147                 else => unreachable,
   6148             }
   6149             break :result dst_mcv;
   6150         }
   6151 
   6152         const abi_size: u32 = switch (ty.abiSize(mod)) {
   6153             1...16 => 16,
   6154             17...32 => 32,
   6155             else => return self.fail("TODO implement floatSign for {}", .{
   6156                 ty.fmt(mod),
   6157             }),
   6158         };
   6159 
   6160         const src_mcv = try self.resolveInst(operand);
   6161         const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
   6162         defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   6163 
   6164         const dst_mcv: MCValue = if (src_mcv.isRegister() and
   6165             self.reuseOperand(inst, operand, 0, src_mcv))
   6166             src_mcv
   6167         else if (self.hasFeature(.avx))
   6168             .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
   6169         else
   6170             try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
   6171         const dst_reg = dst_mcv.getReg().?;
   6172         const dst_lock = self.register_manager.lockReg(dst_reg);
   6173         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   6174 
   6175         const vec_ty = try mod.vectorType(.{
   6176             .len = @divExact(abi_size * 8, scalar_bits),
   6177             .child = (try mod.intType(.signed, scalar_bits)).ip_index,
   6178         });
   6179 
   6180         const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = switch (tag) {
   6181             .neg => try vec_ty.minInt(mod, vec_ty),
   6182             .abs => try vec_ty.maxInt(mod, vec_ty),
   6183             else => unreachable,
   6184         } });
   6185         const sign_mem: Memory = if (sign_mcv.isMemory())
   6186             try sign_mcv.mem(self, Memory.Size.fromSize(abi_size))
   6187         else
   6188             .{
   6189                 .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) },
   6190                 .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size) } },
   6191             };
   6192 
   6193         if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory(
   6194             switch (scalar_bits) {
   6195                 16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) {
   6196                     .neg => .{ .vp_, .xor },
   6197                     .abs => .{ .vp_, .@"and" },
   6198                     else => unreachable,
   6199                 } else switch (tag) {
   6200                     .neg => .{ .v_ps, .xor },
   6201                     .abs => .{ .v_ps, .@"and" },
   6202                     else => unreachable,
   6203                 },
   6204                 32 => switch (tag) {
   6205                     .neg => .{ .v_ps, .xor },
   6206                     .abs => .{ .v_ps, .@"and" },
   6207                     else => unreachable,
   6208                 },
   6209                 64 => switch (tag) {
   6210                     .neg => .{ .v_pd, .xor },
   6211                     .abs => .{ .v_pd, .@"and" },
   6212                     else => unreachable,
   6213                 },
   6214                 80 => return self.fail("TODO implement floatSign for {}", .{ty.fmt(mod)}),
   6215                 else => unreachable,
   6216             },
   6217             registerAlias(dst_reg, abi_size),
   6218             registerAlias(if (src_mcv.isRegister())
   6219                 src_mcv.getReg().?
   6220             else
   6221                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
   6222             sign_mem,
   6223         ) else try self.asmRegisterMemory(
   6224             switch (scalar_bits) {
   6225                 16, 128 => switch (tag) {
   6226                     .neg => .{ .p_, .xor },
   6227                     .abs => .{ .p_, .@"and" },
   6228                     else => unreachable,
   6229                 },
   6230                 32 => switch (tag) {
   6231                     .neg => .{ ._ps, .xor },
   6232                     .abs => .{ ._ps, .@"and" },
   6233                     else => unreachable,
   6234                 },
   6235                 64 => switch (tag) {
   6236                     .neg => .{ ._pd, .xor },
   6237                     .abs => .{ ._pd, .@"and" },
   6238                     else => unreachable,
   6239                 },
   6240                 80 => return self.fail("TODO implement floatSign for {}", .{ty.fmt(mod)}),
   6241                 else => unreachable,
   6242             },
   6243             registerAlias(dst_reg, abi_size),
   6244             sign_mem,
   6245         );
   6246         break :result dst_mcv;
   6247     };
   6248     return self.finishAir(inst, result, .{ operand, .none, .none });
   6249 }
   6250 
   6251 fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
   6252     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
   6253     const ty = self.typeOf(un_op);
   6254     return self.floatSign(inst, un_op, ty);
   6255 }
   6256 
   6257 const RoundMode = packed struct(u5) {
   6258     mode: enum(u4) {
   6259         /// Round to nearest (even)
   6260         nearest = 0b0_00,
   6261         /// Round down (toward -∞)
   6262         down = 0b0_01,
   6263         /// Round up (toward +∞)
   6264         up = 0b0_10,
   6265         /// Round toward zero (truncate)
   6266         zero = 0b0_11,
   6267         /// Use current rounding mode of MXCSR.RC
   6268         mxcsr = 0b1_00,
   6269     },
   6270     precision: enum(u1) {
   6271         normal = 0b0,
   6272         inexact = 0b1,
   6273     } = .normal,
   6274 };
   6275 
   6276 fn airRound(self: *Self, inst: Air.Inst.Index, mode: RoundMode) !void {
   6277     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
   6278     const ty = self.typeOf(un_op);
   6279 
   6280     const result = result: {
   6281         switch (try self.genRoundLibcall(ty, .{ .air_ref = un_op }, mode)) {
   6282             .none => {},
   6283             else => |dst_mcv| break :result dst_mcv,
   6284         }
   6285 
   6286         const src_mcv = try self.resolveInst(un_op);
   6287         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
   6288             src_mcv
   6289         else
   6290             try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
   6291         const dst_reg = dst_mcv.getReg().?;
   6292         const dst_lock = self.register_manager.lockReg(dst_reg);
   6293         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   6294         try self.genRound(ty, dst_reg, src_mcv, mode);
   6295         break :result dst_mcv;
   6296     };
   6297     return self.finishAir(inst, result, .{ un_op, .none, .none });
   6298 }
   6299 
   6300 fn getRoundTag(self: *Self, ty: Type) ?Mir.Inst.FixedTag {
   6301     const mod = self.bin_file.comp.module.?;
   6302     return if (self.hasFeature(.sse4_1)) switch (ty.zigTypeTag(mod)) {
   6303         .Float => switch (ty.floatBits(self.target.*)) {
   6304             32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
   6305             64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
   6306             16, 80, 128 => null,
   6307             else => unreachable,
   6308         },
   6309         .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
   6310             .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
   6311                 32 => switch (ty.vectorLen(mod)) {
   6312                     1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
   6313                     2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round },
   6314                     5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null,
   6315                     else => null,
   6316                 },
   6317                 64 => switch (ty.vectorLen(mod)) {
   6318                     1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
   6319                     2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round },
   6320                     3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null,
   6321                     else => null,
   6322                 },
   6323                 16, 80, 128 => null,
   6324                 else => unreachable,
   6325             },
   6326             else => null,
   6327         },
   6328         else => unreachable,
   6329     } else null;
   6330 }
   6331 
   6332 fn genRoundLibcall(self: *Self, ty: Type, src_mcv: MCValue, mode: RoundMode) !MCValue {
   6333     const mod = self.bin_file.comp.module.?;
   6334     if (self.getRoundTag(ty)) |_| return .none;
   6335 
   6336     if (ty.zigTypeTag(mod) != .Float)
   6337         return self.fail("TODO implement genRound for {}", .{ty.fmt(mod)});
   6338 
   6339     var callee_buf: ["__trunc?".len]u8 = undefined;
   6340     return try self.genCall(.{ .lib = .{
   6341         .return_type = ty.toIntern(),
   6342         .param_types = &.{ty.toIntern()},
   6343         .callee = std.fmt.bufPrint(&callee_buf, "{s}{s}{s}", .{
   6344             floatLibcAbiPrefix(ty),
   6345             switch (mode.mode) {
   6346                 .down => "floor",
   6347                 .up => "ceil",
   6348                 .zero => "trunc",
   6349                 else => unreachable,
   6350             },
   6351             floatLibcAbiSuffix(ty),
   6352         }) catch unreachable,
   6353     } }, &.{ty}, &.{src_mcv});
   6354 }
   6355 
   6356 fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: RoundMode) !void {
   6357     const mod = self.bin_file.comp.module.?;
   6358     const mir_tag = self.getRoundTag(ty) orelse {
   6359         const result = try self.genRoundLibcall(ty, src_mcv, mode);
   6360         return self.genSetReg(dst_reg, ty, result);
   6361     };
   6362     const abi_size: u32 = @intCast(ty.abiSize(mod));
   6363     const dst_alias = registerAlias(dst_reg, abi_size);
   6364     switch (mir_tag[0]) {
   6365         .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   6366             mir_tag,
   6367             dst_alias,
   6368             dst_alias,
   6369             try src_mcv.mem(self, Memory.Size.fromSize(abi_size)),
   6370             Immediate.u(@as(u5, @bitCast(mode))),
   6371         ) else try self.asmRegisterRegisterRegisterImmediate(
   6372             mir_tag,
   6373             dst_alias,
   6374             dst_alias,
   6375             registerAlias(if (src_mcv.isRegister())
   6376                 src_mcv.getReg().?
   6377             else
   6378                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
   6379             Immediate.u(@as(u5, @bitCast(mode))),
   6380         ),
   6381         else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
   6382             mir_tag,
   6383             dst_alias,
   6384             try src_mcv.mem(self, Memory.Size.fromSize(abi_size)),
   6385             Immediate.u(@as(u5, @bitCast(mode))),
   6386         ) else try self.asmRegisterRegisterImmediate(
   6387             mir_tag,
   6388             dst_alias,
   6389             registerAlias(if (src_mcv.isRegister())
   6390                 src_mcv.getReg().?
   6391             else
   6392                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
   6393             Immediate.u(@as(u5, @bitCast(mode))),
   6394         ),
   6395     }
   6396 }
   6397 
   6398 fn airAbs(self: *Self, inst: Air.Inst.Index) !void {
   6399     const mod = self.bin_file.comp.module.?;
   6400     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   6401     const ty = self.typeOf(ty_op.operand);
   6402 
   6403     const result: MCValue = result: {
   6404         const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(mod)) {
   6405             else => null,
   6406             .Int => switch (ty.abiSize(mod)) {
   6407                 1...8 => {
   6408                     try self.spillEflagsIfOccupied();
   6409                     const src_mcv = try self.resolveInst(ty_op.operand);
   6410                     const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
   6411 
   6412                     try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv);
   6413 
   6414                     const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
   6415                     switch (src_mcv) {
   6416                         .register => |val_reg| try self.asmCmovccRegisterRegister(
   6417                             .l,
   6418                             registerAlias(dst_mcv.register, cmov_abi_size),
   6419                             registerAlias(val_reg, cmov_abi_size),
   6420                         ),
   6421                         .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
   6422                             .l,
   6423                             registerAlias(dst_mcv.register, cmov_abi_size),
   6424                             try src_mcv.mem(self, Memory.Size.fromSize(cmov_abi_size)),
   6425                         ),
   6426                         else => {
   6427                             const val_reg = try self.copyToTmpRegister(ty, src_mcv);
   6428                             try self.asmCmovccRegisterRegister(
   6429                                 .l,
   6430                                 registerAlias(dst_mcv.register, cmov_abi_size),
   6431                                 registerAlias(val_reg, cmov_abi_size),
   6432                             );
   6433                         },
   6434                     }
   6435                     break :result dst_mcv;
   6436                 },
   6437                 9...16 => {
   6438                     try self.spillEflagsIfOccupied();
   6439                     const src_mcv = try self.resolveInst(ty_op.operand);
   6440                     const dst_mcv = if (src_mcv == .register_pair and
   6441                         self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
   6442                         const dst_regs = try self.register_manager.allocRegs(
   6443                             2,
   6444                             .{ inst, inst },
   6445                             abi.RegisterClass.gp,
   6446                         );
   6447                         const dst_mcv: MCValue = .{ .register_pair = dst_regs };
   6448                         const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
   6449                         defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
   6450 
   6451                         try self.genCopy(ty, dst_mcv, src_mcv);
   6452                         break :dst dst_mcv;
   6453                     };
   6454                     const dst_regs = dst_mcv.register_pair;
   6455                     const dst_locks = self.register_manager.lockRegs(2, dst_regs);
   6456                     defer for (dst_locks) |dst_lock| if (dst_lock) |lock|
   6457                         self.register_manager.unlockReg(lock);
   6458 
   6459                     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   6460                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6461                     defer self.register_manager.unlockReg(tmp_lock);
   6462 
   6463                     try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]);
   6464                     try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, Immediate.u(63));
   6465                     try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[0], tmp_reg);
   6466                     try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[1], tmp_reg);
   6467                     try self.asmRegisterRegister(.{ ._, .sub }, dst_regs[0], tmp_reg);
   6468                     try self.asmRegisterRegister(.{ ._, .sbb }, dst_regs[1], tmp_reg);
   6469 
   6470                     break :result dst_mcv;
   6471                 },
   6472                 else => return self.fail("TODO implement abs for {}", .{ty.fmt(mod)}),
   6473             },
   6474             .Float => return self.floatSign(inst, ty_op.operand, ty),
   6475             .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
   6476                 else => null,
   6477                 .Int => switch (ty.childType(mod).intInfo(mod).bits) {
   6478                     else => null,
   6479                     8 => switch (ty.vectorLen(mod)) {
   6480                         else => null,
   6481                         1...16 => if (self.hasFeature(.avx))
   6482                             .{ .vp_b, .abs }
   6483                         else if (self.hasFeature(.ssse3))
   6484                             .{ .p_b, .abs }
   6485                         else
   6486                             null,
   6487                         17...32 => if (self.hasFeature(.avx2)) .{ .vp_b, .abs } else null,
   6488                     },
   6489                     16 => switch (ty.vectorLen(mod)) {
   6490                         else => null,
   6491                         1...8 => if (self.hasFeature(.avx))
   6492                             .{ .vp_w, .abs }
   6493                         else if (self.hasFeature(.ssse3))
   6494                             .{ .p_w, .abs }
   6495                         else
   6496                             null,
   6497                         9...16 => if (self.hasFeature(.avx2)) .{ .vp_w, .abs } else null,
   6498                     },
   6499                     32 => switch (ty.vectorLen(mod)) {
   6500                         else => null,
   6501                         1...4 => if (self.hasFeature(.avx))
   6502                             .{ .vp_d, .abs }
   6503                         else if (self.hasFeature(.ssse3))
   6504                             .{ .p_d, .abs }
   6505                         else
   6506                             null,
   6507                         5...8 => if (self.hasFeature(.avx2)) .{ .vp_d, .abs } else null,
   6508                     },
   6509                 },
   6510                 .Float => return self.floatSign(inst, ty_op.operand, ty),
   6511             },
   6512         }) orelse return self.fail("TODO implement airAbs for {}", .{ty.fmt(mod)});
   6513 
   6514         const abi_size: u32 = @intCast(ty.abiSize(mod));
   6515         const src_mcv = try self.resolveInst(ty_op.operand);
   6516         const dst_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
   6517             src_mcv.getReg().?
   6518         else
   6519             try self.register_manager.allocReg(inst, self.regClassForType(ty));
   6520         const dst_alias = registerAlias(dst_reg, abi_size);
   6521         if (src_mcv.isMemory()) try self.asmRegisterMemory(
   6522             mir_tag,
   6523             dst_alias,
   6524             try src_mcv.mem(self, self.memSize(ty)),
   6525         ) else try self.asmRegisterRegister(
   6526             mir_tag,
   6527             dst_alias,
   6528             registerAlias(if (src_mcv.isRegister())
   6529                 src_mcv.getReg().?
   6530             else
   6531                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
   6532         );
   6533         break :result .{ .register = dst_reg };
   6534     };
   6535     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   6536 }
   6537 
   6538 fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
   6539     const mod = self.bin_file.comp.module.?;
   6540     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
   6541     const ty = self.typeOf(un_op);
   6542     const abi_size: u32 = @intCast(ty.abiSize(mod));
   6543 
   6544     const result: MCValue = result: {
   6545         switch (ty.zigTypeTag(mod)) {
   6546             .Float => {
   6547                 const float_bits = ty.floatBits(self.target.*);
   6548                 if (switch (float_bits) {
   6549                     16 => !self.hasFeature(.f16c),
   6550                     32, 64 => false,
   6551                     80, 128 => true,
   6552                     else => unreachable,
   6553                 }) {
   6554                     var callee_buf: ["__sqrt?".len]u8 = undefined;
   6555                     break :result try self.genCall(.{ .lib = .{
   6556                         .return_type = ty.toIntern(),
   6557                         .param_types = &.{ty.toIntern()},
   6558                         .callee = std.fmt.bufPrint(&callee_buf, "{s}sqrt{s}", .{
   6559                             floatLibcAbiPrefix(ty),
   6560                             floatLibcAbiSuffix(ty),
   6561                         }) catch unreachable,
   6562                     } }, &.{ty}, &.{.{ .air_ref = un_op }});
   6563                 }
   6564             },
   6565             else => {},
   6566         }
   6567 
   6568         const src_mcv = try self.resolveInst(un_op);
   6569         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
   6570             src_mcv
   6571         else
   6572             try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
   6573         const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
   6574         const dst_lock = self.register_manager.lockReg(dst_reg);
   6575         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   6576 
   6577         const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(mod)) {
   6578             .Float => switch (ty.floatBits(self.target.*)) {
   6579                 16 => {
   6580                     assert(self.hasFeature(.f16c));
   6581                     const mat_src_reg = if (src_mcv.isRegister())
   6582                         src_mcv.getReg().?
   6583                     else
   6584                         try self.copyToTmpRegister(ty, src_mcv);
   6585                     try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
   6586                     try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg);
   6587                     try self.asmRegisterRegisterImmediate(
   6588                         .{ .v_, .cvtps2ph },
   6589                         dst_reg,
   6590                         dst_reg,
   6591                         Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
   6592                     );
   6593                     break :result dst_mcv;
   6594                 },
   6595                 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
   6596                 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
   6597                 else => unreachable,
   6598             },
   6599             .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
   6600                 .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
   6601                     16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen(mod)) {
   6602                         1 => {
   6603                             try self.asmRegisterRegister(
   6604                                 .{ .v_ps, .cvtph2 },
   6605                                 dst_reg,
   6606                                 (if (src_mcv.isRegister())
   6607                                     src_mcv.getReg().?
   6608                                 else
   6609                                     try self.copyToTmpRegister(ty, src_mcv)).to128(),
   6610                             );
   6611                             try self.asmRegisterRegisterRegister(
   6612                                 .{ .v_ss, .sqrt },
   6613                                 dst_reg,
   6614                                 dst_reg,
   6615                                 dst_reg,
   6616                             );
   6617                             try self.asmRegisterRegisterImmediate(
   6618                                 .{ .v_, .cvtps2ph },
   6619                                 dst_reg,
   6620                                 dst_reg,
   6621                                 Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
   6622                             );
   6623                             break :result dst_mcv;
   6624                         },
   6625                         2...8 => {
   6626                             const wide_reg = registerAlias(dst_reg, abi_size * 2);
   6627                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
   6628                                 .{ .v_ps, .cvtph2 },
   6629                                 wide_reg,
   6630                                 try src_mcv.mem(self, Memory.Size.fromSize(
   6631                                     @intCast(@divExact(wide_reg.bitSize(), 16)),
   6632                                 )),
   6633                             ) else try self.asmRegisterRegister(
   6634                                 .{ .v_ps, .cvtph2 },
   6635                                 wide_reg,
   6636                                 (if (src_mcv.isRegister())
   6637                                     src_mcv.getReg().?
   6638                                 else
   6639                                     try self.copyToTmpRegister(ty, src_mcv)).to128(),
   6640                             );
   6641                             try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg);
   6642                             try self.asmRegisterRegisterImmediate(
   6643                                 .{ .v_, .cvtps2ph },
   6644                                 dst_reg,
   6645                                 wide_reg,
   6646                                 Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
   6647                             );
   6648                             break :result dst_mcv;
   6649                         },
   6650                         else => null,
   6651                     } else null,
   6652                     32 => switch (ty.vectorLen(mod)) {
   6653                         1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
   6654                         2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt },
   6655                         5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null,
   6656                         else => null,
   6657                     },
   6658                     64 => switch (ty.vectorLen(mod)) {
   6659                         1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
   6660                         2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt },
   6661                         3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null,
   6662                         else => null,
   6663                     },
   6664                     80, 128 => null,
   6665                     else => unreachable,
   6666                 },
   6667                 else => unreachable,
   6668             },
   6669             else => unreachable,
   6670         }) orelse return self.fail("TODO implement airSqrt for {}", .{
   6671             ty.fmt(mod),
   6672         });
   6673         switch (mir_tag[0]) {
   6674             .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   6675                 mir_tag,
   6676                 dst_reg,
   6677                 dst_reg,
   6678                 try src_mcv.mem(self, Memory.Size.fromSize(abi_size)),
   6679             ) else try self.asmRegisterRegisterRegister(
   6680                 mir_tag,
   6681                 dst_reg,
   6682                 dst_reg,
   6683                 registerAlias(if (src_mcv.isRegister())
   6684                     src_mcv.getReg().?
   6685                 else
   6686                     try self.copyToTmpRegister(ty, src_mcv), abi_size),
   6687             ),
   6688             else => if (src_mcv.isMemory()) try self.asmRegisterMemory(
   6689                 mir_tag,
   6690                 dst_reg,
   6691                 try src_mcv.mem(self, Memory.Size.fromSize(abi_size)),
   6692             ) else try self.asmRegisterRegister(
   6693                 mir_tag,
   6694                 dst_reg,
   6695                 registerAlias(if (src_mcv.isRegister())
   6696                     src_mcv.getReg().?
   6697                 else
   6698                     try self.copyToTmpRegister(ty, src_mcv), abi_size),
   6699             ),
   6700         }
   6701         break :result dst_mcv;
   6702     };
   6703     return self.finishAir(inst, result, .{ un_op, .none, .none });
   6704 }
   6705 
   6706 fn airUnaryMath(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
   6707     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
   6708     const ty = self.typeOf(un_op);
   6709     var callee_buf: ["__round?".len]u8 = undefined;
   6710     const result = try self.genCall(.{ .lib = .{
   6711         .return_type = ty.toIntern(),
   6712         .param_types = &.{ty.toIntern()},
   6713         .callee = std.fmt.bufPrint(&callee_buf, "{s}{s}{s}", .{
   6714             floatLibcAbiPrefix(ty),
   6715             switch (tag) {
   6716                 .sin,
   6717                 .cos,
   6718                 .tan,
   6719                 .exp,
   6720                 .exp2,
   6721                 .log,
   6722                 .log2,
   6723                 .log10,
   6724                 .round,
   6725                 => @tagName(tag),
   6726                 else => unreachable,
   6727             },
   6728             floatLibcAbiSuffix(ty),
   6729         }) catch unreachable,
   6730     } }, &.{ty}, &.{.{ .air_ref = un_op }});
   6731     return self.finishAir(inst, result, .{ un_op, .none, .none });
   6732 }
   6733 
   6734 fn reuseOperand(
   6735     self: *Self,
   6736     inst: Air.Inst.Index,
   6737     operand: Air.Inst.Ref,
   6738     op_index: Liveness.OperandInt,
   6739     mcv: MCValue,
   6740 ) bool {
   6741     return self.reuseOperandAdvanced(inst, operand, op_index, mcv, inst);
   6742 }
   6743 
   6744 fn reuseOperandAdvanced(
   6745     self: *Self,
   6746     inst: Air.Inst.Index,
   6747     operand: Air.Inst.Ref,
   6748     op_index: Liveness.OperandInt,
   6749     mcv: MCValue,
   6750     maybe_tracked_inst: ?Air.Inst.Index,
   6751 ) bool {
   6752     if (!self.liveness.operandDies(inst, op_index))
   6753         return false;
   6754 
   6755     switch (mcv) {
   6756         .register, .register_pair, .register_overflow => for (mcv.getRegs()) |reg| {
   6757             // If it's in the registers table, need to associate the register(s) with the
   6758             // new instruction.
   6759             if (maybe_tracked_inst) |tracked_inst| {
   6760                 if (!self.register_manager.isRegFree(reg)) {
   6761                     if (RegisterManager.indexOfRegIntoTracked(reg)) |index| {
   6762                         self.register_manager.registers[index] = tracked_inst;
   6763                     }
   6764                 }
   6765             } else self.register_manager.freeReg(reg);
   6766         },
   6767         .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false,
   6768         else => return false,
   6769     }
   6770     switch (mcv) {
   6771         .eflags, .register_overflow => self.eflags_inst = maybe_tracked_inst,
   6772         else => {},
   6773     }
   6774 
   6775     // Prevent the operand deaths processing code from deallocating it.
   6776     self.liveness.clearOperandDeath(inst, op_index);
   6777     const op_inst = operand.toIndex().?;
   6778     self.getResolvedInstValue(op_inst).reuse(self, maybe_tracked_inst, op_inst);
   6779 
   6780     return true;
   6781 }
   6782 
   6783 fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
   6784     const mod = self.bin_file.comp.module.?;
   6785 
   6786     const ptr_info = ptr_ty.ptrInfo(mod);
   6787     const val_ty = Type.fromInterned(ptr_info.child);
   6788     if (!val_ty.hasRuntimeBitsIgnoreComptime(mod)) return;
   6789     const val_abi_size: u32 = @intCast(val_ty.abiSize(mod));
   6790 
   6791     const val_bit_size: u32 = @intCast(val_ty.bitSize(mod));
   6792     const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) {
   6793         .none => 0,
   6794         .runtime => unreachable,
   6795         else => |vector_index| @intFromEnum(vector_index) * val_bit_size,
   6796     };
   6797     if (ptr_bit_off % 8 == 0) {
   6798         try self.load(dst_mcv, ptr_ty, ptr_mcv.offset(@intCast(@divExact(ptr_bit_off, 8))));
   6799         if (val_abi_size * 8 > val_bit_size) {
   6800             if (dst_mcv.isRegister()) {
   6801                 try self.truncateRegister(val_ty, dst_mcv.getReg().?);
   6802             } else {
   6803                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   6804                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6805                 defer self.register_manager.unlockReg(tmp_lock);
   6806 
   6807                 const hi_mcv = dst_mcv.address().offset(@intCast(val_bit_size / 64 * 8)).deref();
   6808                 try self.genSetReg(tmp_reg, Type.usize, hi_mcv);
   6809                 try self.truncateRegister(val_ty, tmp_reg);
   6810                 try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg });
   6811             }
   6812         }
   6813         return;
   6814     }
   6815 
   6816     if (val_abi_size > 8) return self.fail("TODO implement packed load of {}", .{val_ty.fmt(mod)});
   6817 
   6818     const limb_abi_size: u32 = @min(val_abi_size, 8);
   6819     const limb_abi_bits = limb_abi_size * 8;
   6820     const val_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size);
   6821     const val_bit_off = ptr_bit_off % limb_abi_bits;
   6822     const val_extra_bits = self.regExtraBits(val_ty);
   6823 
   6824     const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   6825     const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
   6826     defer self.register_manager.unlockReg(ptr_lock);
   6827 
   6828     const dst_reg = switch (dst_mcv) {
   6829         .register => |reg| reg,
   6830         else => try self.register_manager.allocReg(null, abi.RegisterClass.gp),
   6831     };
   6832     const dst_lock = self.register_manager.lockReg(dst_reg);
   6833     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   6834 
   6835     const load_abi_size =
   6836         if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2;
   6837     if (load_abi_size <= 8) {
   6838         const load_reg = registerAlias(dst_reg, load_abi_size);
   6839         try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{
   6840             .base = .{ .reg = ptr_reg },
   6841             .mod = .{ .rm = .{
   6842                 .size = Memory.Size.fromSize(load_abi_size),
   6843                 .disp = val_byte_off,
   6844             } },
   6845         });
   6846         try self.spillEflagsIfOccupied();
   6847         try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(val_bit_off));
   6848     } else {
   6849         const tmp_reg =
   6850             registerAlias(try self.register_manager.allocReg(null, abi.RegisterClass.gp), val_abi_size);
   6851         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   6852         defer self.register_manager.unlockReg(tmp_lock);
   6853 
   6854         const dst_alias = registerAlias(dst_reg, val_abi_size);
   6855         try self.asmRegisterMemory(.{ ._, .mov }, dst_alias, .{
   6856             .base = .{ .reg = ptr_reg },
   6857             .mod = .{ .rm = .{
   6858                 .size = Memory.Size.fromSize(val_abi_size),
   6859                 .disp = val_byte_off,
   6860             } },
   6861         });
   6862         try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{
   6863             .base = .{ .reg = ptr_reg },
   6864             .mod = .{ .rm = .{
   6865                 .size = Memory.Size.fromSize(val_abi_size),
   6866                 .disp = val_byte_off + 1,
   6867             } },
   6868         });
   6869         try self.spillEflagsIfOccupied();
   6870         try self.asmRegisterRegisterImmediate(
   6871             .{ ._rd, .sh },
   6872             dst_alias,
   6873             tmp_reg,
   6874             Immediate.u(val_bit_off),
   6875         );
   6876     }
   6877 
   6878     if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg);
   6879     try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg });
   6880 }
   6881 
   6882 fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
   6883     const mod = self.bin_file.comp.module.?;
   6884     const dst_ty = ptr_ty.childType(mod);
   6885     if (!dst_ty.hasRuntimeBitsIgnoreComptime(mod)) return;
   6886     switch (ptr_mcv) {
   6887         .none,
   6888         .unreach,
   6889         .dead,
   6890         .undef,
   6891         .eflags,
   6892         .register_pair,
   6893         .register_overflow,
   6894         .reserved_frame,
   6895         => unreachable, // not a valid pointer
   6896         .immediate,
   6897         .register,
   6898         .register_offset,
   6899         .lea_symbol,
   6900         .lea_direct,
   6901         .lea_got,
   6902         .lea_tlv,
   6903         .lea_frame,
   6904         => try self.genCopy(dst_ty, dst_mcv, ptr_mcv.deref()),
   6905         .memory,
   6906         .indirect,
   6907         .load_symbol,
   6908         .load_direct,
   6909         .load_got,
   6910         .load_tlv,
   6911         .load_frame,
   6912         => {
   6913             const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   6914             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   6915             defer self.register_manager.unlockReg(addr_lock);
   6916 
   6917             try self.genCopy(dst_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } });
   6918         },
   6919         .air_ref => |ptr_ref| try self.load(dst_mcv, ptr_ty, try self.resolveInst(ptr_ref)),
   6920     }
   6921 }
   6922 
   6923 fn airLoad(self: *Self, inst: Air.Inst.Index) !void {
   6924     const mod = self.bin_file.comp.module.?;
   6925     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   6926     const elem_ty = self.typeOfIndex(inst);
   6927     const result: MCValue = result: {
   6928         if (!elem_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
   6929 
   6930         try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
   6931         const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
   6932         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
   6933 
   6934         const ptr_ty = self.typeOf(ty_op.operand);
   6935         const elem_size = elem_ty.abiSize(mod);
   6936 
   6937         const elem_rc = self.regClassForType(elem_ty);
   6938         const ptr_rc = self.regClassForType(ptr_ty);
   6939 
   6940         const ptr_mcv = try self.resolveInst(ty_op.operand);
   6941         const dst_mcv = if (elem_size <= 8 and elem_rc.supersetOf(ptr_rc) and
   6942             self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv))
   6943             // The MCValue that holds the pointer can be re-used as the value.
   6944             ptr_mcv
   6945         else
   6946             try self.allocRegOrMem(inst, true);
   6947 
   6948         const ptr_info = ptr_ty.ptrInfo(mod);
   6949         if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) {
   6950             try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv);
   6951         } else {
   6952             try self.load(dst_mcv, ptr_ty, ptr_mcv);
   6953         }
   6954 
   6955         if (elem_ty.isAbiInt(mod) and elem_size * 8 > elem_ty.bitSize(mod)) {
   6956             const high_mcv: MCValue = switch (dst_mcv) {
   6957                 .register => |dst_reg| .{ .register = dst_reg },
   6958                 .register_pair => |dst_regs| .{ .register = dst_regs[1] },
   6959                 else => dst_mcv.address().offset(@intCast((elem_size - 1) / 8 * 8)).deref(),
   6960             };
   6961             const high_reg = if (high_mcv.isRegister())
   6962                 high_mcv.getReg().?
   6963             else
   6964                 try self.copyToTmpRegister(Type.usize, high_mcv);
   6965             const high_lock = self.register_manager.lockReg(high_reg);
   6966             defer if (high_lock) |lock| self.register_manager.unlockReg(lock);
   6967 
   6968             try self.truncateRegister(elem_ty, high_reg);
   6969             if (!high_mcv.isRegister()) try self.genCopy(
   6970                 if (elem_size <= 8) elem_ty else Type.usize,
   6971                 high_mcv,
   6972                 .{ .register = high_reg },
   6973             );
   6974         }
   6975         break :result dst_mcv;
   6976     };
   6977     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   6978 }
   6979 
   6980 fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
   6981     const mod = self.bin_file.comp.module.?;
   6982     const ptr_info = ptr_ty.ptrInfo(mod);
   6983     const src_ty = Type.fromInterned(ptr_info.child);
   6984     if (!src_ty.hasRuntimeBitsIgnoreComptime(mod)) return;
   6985 
   6986     const limb_abi_size: u16 = @min(ptr_info.packed_offset.host_size, 8);
   6987     const limb_abi_bits = limb_abi_size * 8;
   6988     const limb_ty = try mod.intType(.unsigned, limb_abi_bits);
   6989 
   6990     const src_bit_size = src_ty.bitSize(mod);
   6991     const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) {
   6992         .none => 0,
   6993         .runtime => unreachable,
   6994         else => |vector_index| @intFromEnum(vector_index) * src_bit_size,
   6995     };
   6996     const src_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size);
   6997     const src_bit_off = ptr_bit_off % limb_abi_bits;
   6998 
   6999     const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   7000     const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
   7001     defer self.register_manager.unlockReg(ptr_lock);
   7002 
   7003     var limb_i: u16 = 0;
   7004     while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) {
   7005         const part_bit_off = if (limb_i == 0) src_bit_off else 0;
   7006         const part_bit_size =
   7007             @min(src_bit_off + src_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off;
   7008         const limb_mem: Memory = .{
   7009             .base = .{ .reg = ptr_reg },
   7010             .mod = .{ .rm = .{
   7011                 .size = Memory.Size.fromSize(limb_abi_size),
   7012                 .disp = src_byte_off + limb_i * limb_abi_size,
   7013             } },
   7014         };
   7015 
   7016         const part_mask = (@as(u64, math.maxInt(u64)) >> @intCast(64 - part_bit_size)) <<
   7017             @intCast(part_bit_off);
   7018         const part_mask_not = part_mask ^ (@as(u64, math.maxInt(u64)) >> @intCast(64 - limb_abi_bits));
   7019         if (limb_abi_size <= 4) {
   7020             try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.u(part_mask_not));
   7021         } else if (math.cast(i32, @as(i64, @bitCast(part_mask_not)))) |small| {
   7022             try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.s(small));
   7023         } else {
   7024             const part_mask_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   7025             try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, Immediate.u(part_mask_not));
   7026             try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg);
   7027         }
   7028 
   7029         if (src_bit_size <= 64) {
   7030             const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   7031             const tmp_mcv = MCValue{ .register = tmp_reg };
   7032             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7033             defer self.register_manager.unlockReg(tmp_lock);
   7034 
   7035             try self.genSetReg(tmp_reg, limb_ty, src_mcv);
   7036             switch (limb_i) {
   7037                 0 => try self.genShiftBinOpMir(
   7038                     .{ ._l, .sh },
   7039                     limb_ty,
   7040                     tmp_mcv,
   7041                     .{ .immediate = src_bit_off },
   7042                 ),
   7043                 1 => try self.genShiftBinOpMir(
   7044                     .{ ._r, .sh },
   7045                     limb_ty,
   7046                     tmp_mcv,
   7047                     .{ .immediate = limb_abi_bits - src_bit_off },
   7048                 ),
   7049                 else => unreachable,
   7050             }
   7051             try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
   7052             try self.asmMemoryRegister(
   7053                 .{ ._, .@"or" },
   7054                 limb_mem,
   7055                 registerAlias(tmp_reg, limb_abi_size),
   7056             );
   7057         } else if (src_bit_size <= 128 and src_bit_off == 0) {
   7058             const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   7059             const tmp_mcv = MCValue{ .register = tmp_reg };
   7060             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7061             defer self.register_manager.unlockReg(tmp_lock);
   7062 
   7063             try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) {
   7064                 0 => src_mcv,
   7065                 else => src_mcv.address().offset(limb_i * limb_abi_size).deref(),
   7066             });
   7067             try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
   7068             try self.asmMemoryRegister(
   7069                 .{ ._, .@"or" },
   7070                 limb_mem,
   7071                 registerAlias(tmp_reg, limb_abi_size),
   7072             );
   7073         } else return self.fail("TODO: implement packed store of {}", .{src_ty.fmt(mod)});
   7074     }
   7075 }
   7076 
   7077 fn store(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
   7078     const mod = self.bin_file.comp.module.?;
   7079     const src_ty = ptr_ty.childType(mod);
   7080     if (!src_ty.hasRuntimeBitsIgnoreComptime(mod)) return;
   7081     switch (ptr_mcv) {
   7082         .none,
   7083         .unreach,
   7084         .dead,
   7085         .undef,
   7086         .eflags,
   7087         .register_pair,
   7088         .register_overflow,
   7089         .reserved_frame,
   7090         => unreachable, // not a valid pointer
   7091         .immediate,
   7092         .register,
   7093         .register_offset,
   7094         .lea_symbol,
   7095         .lea_direct,
   7096         .lea_got,
   7097         .lea_tlv,
   7098         .lea_frame,
   7099         => try self.genCopy(src_ty, ptr_mcv.deref(), src_mcv),
   7100         .memory,
   7101         .indirect,
   7102         .load_symbol,
   7103         .load_direct,
   7104         .load_got,
   7105         .load_tlv,
   7106         .load_frame,
   7107         => {
   7108             const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
   7109             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   7110             defer self.register_manager.unlockReg(addr_lock);
   7111 
   7112             try self.genCopy(src_ty, .{ .indirect = .{ .reg = addr_reg } }, src_mcv);
   7113         },
   7114         .air_ref => |ptr_ref| try self.store(ptr_ty, try self.resolveInst(ptr_ref), src_mcv),
   7115     }
   7116 }
   7117 
   7118 fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
   7119     const mod = self.bin_file.comp.module.?;
   7120     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   7121 
   7122     result: {
   7123         if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result;
   7124 
   7125         try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
   7126         const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
   7127         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
   7128 
   7129         const src_mcv = try self.resolveInst(bin_op.rhs);
   7130         const ptr_mcv = try self.resolveInst(bin_op.lhs);
   7131         const ptr_ty = self.typeOf(bin_op.lhs);
   7132 
   7133         const ptr_info = ptr_ty.ptrInfo(mod);
   7134         if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) {
   7135             try self.packedStore(ptr_ty, ptr_mcv, src_mcv);
   7136         } else {
   7137             try self.store(ptr_ty, ptr_mcv, src_mcv);
   7138         }
   7139     }
   7140     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
   7141 }
   7142 
   7143 fn airStructFieldPtr(self: *Self, inst: Air.Inst.Index) !void {
   7144     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
   7145     const extra = self.air.extraData(Air.StructField, ty_pl.payload).data;
   7146     const result = try self.fieldPtr(inst, extra.struct_operand, extra.field_index);
   7147     return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none });
   7148 }
   7149 
   7150 fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void {
   7151     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
   7152     const result = try self.fieldPtr(inst, ty_op.operand, index);
   7153     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
   7154 }
   7155 
   7156 fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue {
   7157     const mod = self.bin_file.comp.module.?;
   7158     const ptr_field_ty = self.typeOfIndex(inst);
   7159     const ptr_container_ty = self.typeOf(operand);
   7160     const ptr_container_ty_info = ptr_container_ty.ptrInfo(mod);
   7161     const container_ty = ptr_container_ty.childType(mod);
   7162 
   7163     const field_offset: i32 = if (mod.typeToPackedStruct(container_ty)) |struct_obj|
   7164         if (ptr_field_ty.ptrInfo(mod).packed_offset.host_size == 0)
   7165             @divExact(mod.structPackedFieldBitOffset(struct_obj, index) +
   7166                 ptr_container_ty_info.packed_offset.bit_offset, 8)
   7167         else
   7168             0
   7169     else
   7170         @intCast(container_ty.structFieldOffset(index, mod));
   7171 
   7172     const src_mcv = try self.resolveInst(operand);
   7173     const dst_mcv = if (switch (src_mcv) {
   7174         .immediate, .lea_frame => true,
   7175         .register, .register_offset => self.reuseOperand(inst, operand, 0, src_mcv),
   7176         else => false,
   7177     }) src_mcv else try self.copyToRegisterWithInstTracking(inst, ptr_field_ty, src_mcv);
   7178     return dst_mcv.offset(field_offset);
   7179 }
   7180 
   7181 fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
   7182     const mod = self.bin_file.comp.module.?;
   7183     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
   7184     const extra = self.air.extraData(Air.StructField, ty_pl.payload).data;
   7185     const result: MCValue = result: {
   7186         const operand = extra.struct_operand;
   7187         const index = extra.field_index;
   7188 
   7189         const container_ty = self.typeOf(operand);
   7190         const container_rc = self.regClassForType(container_ty);
   7191         const field_ty = container_ty.structFieldType(index, mod);
   7192         if (!field_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
   7193         const field_rc = self.regClassForType(field_ty);
   7194         const field_is_gp = field_rc.supersetOf(abi.RegisterClass.gp);
   7195 
   7196         const src_mcv = try self.resolveInst(operand);
   7197         const field_off: u32 = switch (container_ty.containerLayout(mod)) {
   7198             .Auto, .Extern => @intCast(container_ty.structFieldOffset(index, mod) * 8),
   7199             .Packed => if (mod.typeToStruct(container_ty)) |struct_type|
   7200                 mod.structPackedFieldBitOffset(struct_type, index)
   7201             else
   7202                 0,
   7203         };
   7204 
   7205         switch (src_mcv) {
   7206             .register => |src_reg| {
   7207                 const src_reg_lock = self.register_manager.lockRegAssumeUnused(src_reg);
   7208                 defer self.register_manager.unlockReg(src_reg_lock);
   7209 
   7210                 const src_in_field_rc =
   7211                     field_rc.isSet(RegisterManager.indexOfRegIntoTracked(src_reg).?);
   7212                 const dst_reg = if (src_in_field_rc and self.reuseOperand(inst, operand, 0, src_mcv))
   7213                     src_reg
   7214                 else if (field_off == 0)
   7215                     (try self.copyToRegisterWithInstTracking(inst, field_ty, src_mcv)).register
   7216                 else
   7217                     try self.copyToTmpRegister(Type.usize, .{ .register = src_reg });
   7218                 const dst_mcv: MCValue = .{ .register = dst_reg };
   7219                 const dst_lock = self.register_manager.lockReg(dst_reg);
   7220                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   7221 
   7222                 if (field_off > 0) {
   7223                     try self.spillEflagsIfOccupied();
   7224                     try self.genShiftBinOpMir(
   7225                         .{ ._r, .sh },
   7226                         Type.usize,
   7227                         dst_mcv,
   7228                         .{ .immediate = field_off },
   7229                     );
   7230                 }
   7231                 if (abi.RegisterClass.gp.isSet(RegisterManager.indexOfRegIntoTracked(dst_reg).?) and
   7232                     container_ty.abiSize(mod) * 8 > field_ty.bitSize(mod))
   7233                     try self.truncateRegister(field_ty, dst_reg);
   7234 
   7235                 break :result if (field_off == 0 or field_rc.supersetOf(abi.RegisterClass.gp))
   7236                     dst_mcv
   7237                 else
   7238                     try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
   7239             },
   7240             .register_pair => |src_regs| {
   7241                 const src_regs_lock = self.register_manager.lockRegsAssumeUnused(2, src_regs);
   7242                 defer for (src_regs_lock) |lock| self.register_manager.unlockReg(lock);
   7243 
   7244                 const field_bit_size: u32 = @intCast(field_ty.bitSize(mod));
   7245                 const src_reg = if (field_off + field_bit_size <= 64)
   7246                     src_regs[0]
   7247                 else if (field_off >= 64)
   7248                     src_regs[1]
   7249                 else {
   7250                     const dst_regs: [2]Register = if (field_rc.supersetOf(container_rc) and
   7251                         self.reuseOperand(inst, operand, 0, src_mcv)) src_regs else dst: {
   7252                         const dst_regs =
   7253                             try self.register_manager.allocRegs(2, .{ null, null }, field_rc);
   7254                         const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
   7255                         defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
   7256 
   7257                         try self.genCopy(container_ty, .{ .register_pair = dst_regs }, src_mcv);
   7258                         break :dst dst_regs;
   7259                     };
   7260                     const dst_mcv = MCValue{ .register_pair = dst_regs };
   7261                     const dst_locks = self.register_manager.lockRegs(2, dst_regs);
   7262                     defer for (dst_locks) |dst_lock| if (dst_lock) |lock|
   7263                         self.register_manager.unlockReg(lock);
   7264 
   7265                     if (field_off > 0) {
   7266                         try self.spillEflagsIfOccupied();
   7267                         try self.genShiftBinOpMir(
   7268                             .{ ._r, .sh },
   7269                             Type.u128,
   7270                             dst_mcv,
   7271                             .{ .immediate = field_off },
   7272                         );
   7273                     }
   7274 
   7275                     if (field_bit_size <= 64) {
   7276                         if (self.regExtraBits(field_ty) > 0)
   7277                             try self.truncateRegister(field_ty, dst_regs[0]);
   7278                         break :result if (field_rc.supersetOf(abi.RegisterClass.gp))
   7279                             .{ .register = dst_regs[0] }
   7280                         else
   7281                             try self.copyToRegisterWithInstTracking(inst, field_ty, .{
   7282                                 .register = dst_regs[0],
   7283                             });
   7284                     }
   7285 
   7286                     if (field_bit_size < 128) try self.truncateRegister(
   7287                         try mod.intType(.unsigned, @intCast(field_bit_size - 64)),
   7288                         dst_regs[1],
   7289                     );
   7290                     break :result if (field_rc.supersetOf(abi.RegisterClass.gp))
   7291                         dst_mcv
   7292                     else
   7293                         try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
   7294                 };
   7295 
   7296                 const dst_reg = try self.copyToTmpRegister(Type.usize, .{ .register = src_reg });
   7297                 const dst_mcv = MCValue{ .register = dst_reg };
   7298                 const dst_lock = self.register_manager.lockReg(dst_reg);
   7299                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   7300 
   7301                 if (field_off % 64 > 0) {
   7302                     try self.spillEflagsIfOccupied();
   7303                     try self.genShiftBinOpMir(
   7304                         .{ ._r, .sh },
   7305                         Type.usize,
   7306                         dst_mcv,
   7307                         .{ .immediate = field_off % 64 },
   7308                     );
   7309                 }
   7310                 if (self.regExtraBits(field_ty) > 0) try self.truncateRegister(field_ty, dst_reg);
   7311 
   7312                 break :result if (field_rc.supersetOf(abi.RegisterClass.gp))
   7313                     dst_mcv
   7314                 else
   7315                     try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
   7316             },
   7317             .register_overflow => |ro| {
   7318                 switch (index) {
   7319                     // Get wrapped value for overflow operation.
   7320                     0 => if (self.reuseOperand(inst, extra.struct_operand, 0, src_mcv)) {
   7321                         self.eflags_inst = null; // actually stop tracking the overflow part
   7322                         break :result .{ .register = ro.reg };
   7323                     } else break :result try self.copyToRegisterWithInstTracking(
   7324                         inst,
   7325                         Type.usize,
   7326                         .{ .register = ro.reg },
   7327                     ),
   7328                     // Get overflow bit.
   7329                     1 => if (self.reuseOperandAdvanced(inst, extra.struct_operand, 0, src_mcv, null)) {
   7330                         self.eflags_inst = inst; // actually keep tracking the overflow part
   7331                         break :result .{ .eflags = ro.eflags };
   7332                     } else {
   7333                         const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
   7334                         try self.asmSetccRegister(ro.eflags, dst_reg.to8());
   7335                         break :result .{ .register = dst_reg.to8() };
   7336                     },
   7337                     else => unreachable,
   7338                 }
   7339             },
   7340             .load_frame => |frame_addr| {
   7341                 const field_abi_size: u32 = @intCast(field_ty.abiSize(mod));
   7342                 if (field_off % 8 == 0) {
   7343                     const field_byte_off = @divExact(field_off, 8);
   7344                     const off_mcv = src_mcv.address().offset(@intCast(field_byte_off)).deref();
   7345                     const field_bit_size = field_ty.bitSize(mod);
   7346 
   7347                     if (field_abi_size <= 8) {
   7348                         const int_ty = try mod.intType(
   7349                             if (field_ty.isAbiInt(mod)) field_ty.intInfo(mod).signedness else .unsigned,
   7350                             @intCast(field_bit_size),
   7351                         );
   7352 
   7353                         const dst_reg = try self.register_manager.allocReg(
   7354                             if (field_is_gp) inst else null,
   7355                             abi.RegisterClass.gp,
   7356                         );
   7357                         const dst_mcv = MCValue{ .register = dst_reg };
   7358                         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
   7359                         defer self.register_manager.unlockReg(dst_lock);
   7360 
   7361                         try self.genCopy(int_ty, dst_mcv, off_mcv);
   7362                         if (self.regExtraBits(field_ty) > 0) try self.truncateRegister(int_ty, dst_reg);
   7363                         break :result if (field_is_gp)
   7364                             dst_mcv
   7365                         else
   7366                             try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
   7367                     }
   7368 
   7369                     const container_abi_size: u32 = @intCast(container_ty.abiSize(mod));
   7370                     const dst_mcv = if (field_byte_off + field_abi_size <= container_abi_size and
   7371                         self.reuseOperand(inst, operand, 0, src_mcv))
   7372                         off_mcv
   7373                     else dst: {
   7374                         const dst_mcv = try self.allocRegOrMem(inst, true);
   7375                         try self.genCopy(field_ty, dst_mcv, off_mcv);
   7376                         break :dst dst_mcv;
   7377                     };
   7378                     if (field_abi_size * 8 > field_bit_size and dst_mcv.isMemory()) {
   7379                         const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   7380                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7381                         defer self.register_manager.unlockReg(tmp_lock);
   7382 
   7383                         const hi_mcv =
   7384                             dst_mcv.address().offset(@intCast(field_bit_size / 64 * 8)).deref();
   7385                         try self.genSetReg(tmp_reg, Type.usize, hi_mcv);
   7386                         try self.truncateRegister(field_ty, tmp_reg);
   7387                         try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg });
   7388                     }
   7389                     break :result dst_mcv;
   7390                 }
   7391 
   7392                 const limb_abi_size: u31 = @min(field_abi_size, 8);
   7393                 const limb_abi_bits = limb_abi_size * 8;
   7394                 const field_byte_off: i32 = @intCast(field_off / limb_abi_bits * limb_abi_size);
   7395                 const field_bit_off = field_off % limb_abi_bits;
   7396 
   7397                 if (field_abi_size > 8) {
   7398                     return self.fail("TODO implement struct_field_val with large packed field", .{});
   7399                 }
   7400 
   7401                 const dst_reg = try self.register_manager.allocReg(
   7402                     if (field_is_gp) inst else null,
   7403                     abi.RegisterClass.gp,
   7404                 );
   7405                 const field_extra_bits = self.regExtraBits(field_ty);
   7406                 const load_abi_size =
   7407                     if (field_bit_off < field_extra_bits) field_abi_size else field_abi_size * 2;
   7408                 if (load_abi_size <= 8) {
   7409                     const load_reg = registerAlias(dst_reg, load_abi_size);
   7410                     try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{
   7411                         .base = .{ .frame = frame_addr.index },
   7412                         .mod = .{ .rm = .{
   7413                             .size = Memory.Size.fromSize(load_abi_size),
   7414                             .disp = frame_addr.off + field_byte_off,
   7415                         } },
   7416                     });
   7417                     try self.spillEflagsIfOccupied();
   7418                     try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(field_bit_off));
   7419                 } else {
   7420                     const tmp_reg = registerAlias(
   7421                         try self.register_manager.allocReg(null, abi.RegisterClass.gp),
   7422                         field_abi_size,
   7423                     );
   7424                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7425                     defer self.register_manager.unlockReg(tmp_lock);
   7426 
   7427                     const dst_alias = registerAlias(dst_reg, field_abi_size);
   7428                     try self.asmRegisterMemory(
   7429                         .{ ._, .mov },
   7430                         dst_alias,
   7431                         .{
   7432                             .base = .{ .frame = frame_addr.index },
   7433                             .mod = .{ .rm = .{
   7434                                 .size = Memory.Size.fromSize(field_abi_size),
   7435                                 .disp = frame_addr.off + field_byte_off,
   7436                             } },
   7437                         },
   7438                     );
   7439                     try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{
   7440                         .base = .{ .frame = frame_addr.index },
   7441                         .mod = .{ .rm = .{
   7442                             .size = Memory.Size.fromSize(field_abi_size),
   7443                             .disp = frame_addr.off + field_byte_off + limb_abi_size,
   7444                         } },
   7445                     });
   7446                     try self.spillEflagsIfOccupied();
   7447                     try self.asmRegisterRegisterImmediate(
   7448                         .{ ._rd, .sh },
   7449                         dst_alias,
   7450                         tmp_reg,
   7451                         Immediate.u(field_bit_off),
   7452                     );
   7453                 }
   7454 
   7455                 if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg);
   7456 
   7457                 const dst_mcv = MCValue{ .register = dst_reg };
   7458                 break :result if (field_is_gp)
   7459                     dst_mcv
   7460                 else
   7461                     try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
   7462             },
   7463             else => return self.fail("TODO implement airStructFieldVal for {}", .{src_mcv}),
   7464         }
   7465     };
   7466     return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none });
   7467 }
   7468 
   7469 fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void {
   7470     const mod = self.bin_file.comp.module.?;
   7471     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
   7472     const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data;
   7473 
   7474     const inst_ty = self.typeOfIndex(inst);
   7475     const parent_ty = inst_ty.childType(mod);
   7476     const field_offset: i32 = @intCast(parent_ty.structFieldOffset(extra.field_index, mod));
   7477 
   7478     const src_mcv = try self.resolveInst(extra.field_ptr);
   7479     const dst_mcv = if (src_mcv.isRegisterOffset() and
   7480         self.reuseOperand(inst, extra.field_ptr, 0, src_mcv))
   7481         src_mcv
   7482     else
   7483         try self.copyToRegisterWithInstTracking(inst, inst_ty, src_mcv);
   7484     const result = dst_mcv.offset(-field_offset);
   7485     return self.finishAir(inst, result, .{ extra.field_ptr, .none, .none });
   7486 }
   7487 
   7488 fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue {
   7489     const mod = self.bin_file.comp.module.?;
   7490     const src_ty = self.typeOf(src_air);
   7491     if (src_ty.zigTypeTag(mod) == .Vector)
   7492         return self.fail("TODO implement genUnOp for {}", .{src_ty.fmt(mod)});
   7493 
   7494     var src_mcv = try self.resolveInst(src_air);
   7495     switch (src_mcv) {
   7496         .eflags => |cc| switch (tag) {
   7497             .not => {
   7498                 if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv))
   7499                     return .{ .eflags = cc.negate() };
   7500                 try self.spillEflagsIfOccupied();
   7501                 src_mcv = try self.resolveInst(src_air);
   7502             },
   7503             else => {},
   7504         },
   7505         else => {},
   7506     }
   7507 
   7508     const src_lock = switch (src_mcv) {
   7509         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   7510         else => null,
   7511     };
   7512     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
   7513 
   7514     const dst_mcv: MCValue = dst: {
   7515         if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) break :dst src_mcv;
   7516 
   7517         const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, maybe_inst, true);
   7518         try self.genCopy(src_ty, dst_mcv, src_mcv);
   7519         break :dst dst_mcv;
   7520     };
   7521     const dst_lock = switch (dst_mcv) {
   7522         .register => |reg| self.register_manager.lockReg(reg),
   7523         else => null,
   7524     };
   7525     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   7526 
   7527     const abi_size: u16 = @intCast(src_ty.abiSize(mod));
   7528     switch (tag) {
   7529         .not => {
   7530             const limb_abi_size: u16 = @min(abi_size, 8);
   7531             const int_info = if (src_ty.ip_index == .bool_type)
   7532                 std.builtin.Type.Int{ .signedness = .unsigned, .bits = 1 }
   7533             else
   7534                 src_ty.intInfo(mod);
   7535             var byte_off: i32 = 0;
   7536             while (byte_off * 8 < int_info.bits) : (byte_off += limb_abi_size) {
   7537                 const limb_bits: u16 = @intCast(@min(switch (int_info.signedness) {
   7538                     .signed => abi_size * 8,
   7539                     .unsigned => int_info.bits,
   7540                 } - byte_off * 8, limb_abi_size * 8));
   7541                 const limb_ty = try mod.intType(int_info.signedness, limb_bits);
   7542                 const limb_mcv = switch (byte_off) {
   7543                     0 => dst_mcv,
   7544                     else => dst_mcv.address().offset(byte_off).deref(),
   7545                 };
   7546 
   7547                 if (int_info.signedness == .unsigned and self.regExtraBits(limb_ty) > 0) {
   7548                     const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - limb_bits);
   7549                     try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask });
   7550                 } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv);
   7551             }
   7552         },
   7553         .neg => {
   7554             try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv);
   7555             const bit_size = src_ty.intInfo(mod).bits;
   7556             if (abi_size * 8 > bit_size) {
   7557                 if (dst_mcv.isRegister()) {
   7558                     try self.truncateRegister(src_ty, dst_mcv.getReg().?);
   7559                 } else {
   7560                     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   7561                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7562                     defer self.register_manager.unlockReg(tmp_lock);
   7563 
   7564                     const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref();
   7565                     try self.genSetReg(tmp_reg, Type.usize, hi_mcv);
   7566                     try self.truncateRegister(src_ty, tmp_reg);
   7567                     try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg });
   7568                 }
   7569             }
   7570         },
   7571         else => unreachable,
   7572     }
   7573     return dst_mcv;
   7574 }
   7575 
   7576 fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void {
   7577     const mod = self.bin_file.comp.module.?;
   7578     const abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   7579     if (abi_size > 8) return self.fail("TODO implement {} for {}", .{ mir_tag, dst_ty.fmt(mod) });
   7580     switch (dst_mcv) {
   7581         .none,
   7582         .unreach,
   7583         .dead,
   7584         .undef,
   7585         .immediate,
   7586         .register_offset,
   7587         .eflags,
   7588         .register_overflow,
   7589         .lea_direct,
   7590         .lea_got,
   7591         .lea_tlv,
   7592         .lea_frame,
   7593         .reserved_frame,
   7594         .air_ref,
   7595         .lea_symbol,
   7596         => unreachable, // unmodifiable destination
   7597         .register => |dst_reg| try self.asmRegister(mir_tag, registerAlias(dst_reg, abi_size)),
   7598         .register_pair => unreachable, // unimplemented
   7599         .memory, .load_symbol, .load_got, .load_direct, .load_tlv => {
   7600             const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   7601             const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
   7602             defer self.register_manager.unlockReg(addr_reg_lock);
   7603 
   7604             try self.genSetReg(addr_reg, Type.usize, dst_mcv.address());
   7605             try self.asmMemory(mir_tag, .{ .base = .{ .reg = addr_reg }, .mod = .{ .rm = .{
   7606                 .size = Memory.Size.fromSize(abi_size),
   7607             } } });
   7608         },
   7609         .indirect, .load_frame => try self.asmMemory(
   7610             mir_tag,
   7611             try dst_mcv.mem(self, Memory.Size.fromSize(abi_size)),
   7612         ),
   7613     }
   7614 }
   7615 
   7616 /// Clobbers .rcx for non-immediate shift value.
   7617 fn genShiftBinOpMir(
   7618     self: *Self,
   7619     tag: Mir.Inst.FixedTag,
   7620     ty: Type,
   7621     lhs_mcv: MCValue,
   7622     shift_mcv: MCValue,
   7623 ) !void {
   7624     const mod = self.bin_file.comp.module.?;
   7625     const rhs_mcv: MCValue = rhs: {
   7626         switch (shift_mcv) {
   7627             .immediate => |imm| switch (imm) {
   7628                 0 => return,
   7629                 else => break :rhs shift_mcv,
   7630             },
   7631             .register => |shift_reg| if (shift_reg == .rcx) break :rhs shift_mcv,
   7632             else => {},
   7633         }
   7634         self.register_manager.getRegAssumeFree(.rcx, null);
   7635         try self.genSetReg(.cl, Type.u8, shift_mcv);
   7636         break :rhs .{ .register = .rcx };
   7637     };
   7638 
   7639     const abi_size: u32 = @intCast(ty.abiSize(mod));
   7640     if (abi_size <= 8) {
   7641         switch (lhs_mcv) {
   7642             .register => |lhs_reg| switch (rhs_mcv) {
   7643                 .immediate => |rhs_imm| try self.asmRegisterImmediate(
   7644                     tag,
   7645                     registerAlias(lhs_reg, abi_size),
   7646                     Immediate.u(rhs_imm),
   7647                 ),
   7648                 .register => |rhs_reg| try self.asmRegisterRegister(
   7649                     tag,
   7650                     registerAlias(lhs_reg, abi_size),
   7651                     registerAlias(rhs_reg, 1),
   7652                 ),
   7653                 else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   7654                     @tagName(lhs_mcv),
   7655                     @tagName(rhs_mcv),
   7656                 }),
   7657             },
   7658             .memory, .indirect, .load_frame => {
   7659                 const lhs_mem: Memory = switch (lhs_mcv) {
   7660                     .memory => |addr| .{
   7661                         .base = .{ .reg = .ds },
   7662                         .mod = .{ .rm = .{
   7663                             .size = Memory.Size.fromSize(abi_size),
   7664                             .disp = math.cast(i32, @as(i64, @bitCast(addr))) orelse
   7665                                 return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   7666                                 @tagName(lhs_mcv),
   7667                                 @tagName(rhs_mcv),
   7668                             }),
   7669                         } },
   7670                     },
   7671                     .indirect => |reg_off| .{
   7672                         .base = .{ .reg = reg_off.reg },
   7673                         .mod = .{ .rm = .{
   7674                             .size = Memory.Size.fromSize(abi_size),
   7675                             .disp = reg_off.off,
   7676                         } },
   7677                     },
   7678                     .load_frame => |frame_addr| .{
   7679                         .base = .{ .frame = frame_addr.index },
   7680                         .mod = .{ .rm = .{
   7681                             .size = Memory.Size.fromSize(abi_size),
   7682                             .disp = frame_addr.off,
   7683                         } },
   7684                     },
   7685                     else => unreachable,
   7686                 };
   7687                 switch (rhs_mcv) {
   7688                     .immediate => |rhs_imm| try self.asmMemoryImmediate(
   7689                         tag,
   7690                         lhs_mem,
   7691                         Immediate.u(rhs_imm),
   7692                     ),
   7693                     .register => |rhs_reg| try self.asmMemoryRegister(
   7694                         tag,
   7695                         lhs_mem,
   7696                         registerAlias(rhs_reg, 1),
   7697                     ),
   7698                     else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   7699                         @tagName(lhs_mcv),
   7700                         @tagName(rhs_mcv),
   7701                     }),
   7702                 }
   7703             },
   7704             else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   7705                 @tagName(lhs_mcv),
   7706                 @tagName(rhs_mcv),
   7707             }),
   7708         }
   7709     } else if (abi_size <= 16) {
   7710         const info: struct { indices: [2]u31, double_tag: Mir.Inst.FixedTag } = switch (tag[0]) {
   7711             ._l => .{ .indices = .{ 0, 1 }, .double_tag = .{ ._ld, .sh } },
   7712             ._r => .{ .indices = .{ 1, 0 }, .double_tag = .{ ._rd, .sh } },
   7713             else => unreachable,
   7714         };
   7715         switch (lhs_mcv) {
   7716             .register_pair => |lhs_regs| switch (rhs_mcv) {
   7717                 .immediate => |rhs_imm| if (rhs_imm > 0 and rhs_imm < 64) {
   7718                     try self.asmRegisterRegisterImmediate(
   7719                         info.double_tag,
   7720                         lhs_regs[info.indices[1]],
   7721                         lhs_regs[info.indices[0]],
   7722                         Immediate.u(rhs_imm),
   7723                     );
   7724                     try self.asmRegisterImmediate(
   7725                         tag,
   7726                         lhs_regs[info.indices[0]],
   7727                         Immediate.u(rhs_imm),
   7728                     );
   7729                 } else {
   7730                     assert(rhs_imm < 128);
   7731                     try self.asmRegisterRegister(
   7732                         .{ ._, .mov },
   7733                         lhs_regs[info.indices[1]],
   7734                         lhs_regs[info.indices[0]],
   7735                     );
   7736                     if (tag[0] == ._r and tag[1] == .sa) try self.asmRegisterImmediate(
   7737                         tag,
   7738                         lhs_regs[info.indices[0]],
   7739                         Immediate.u(63),
   7740                     ) else try self.asmRegisterRegister(
   7741                         .{ ._, .xor },
   7742                         lhs_regs[info.indices[0]],
   7743                         lhs_regs[info.indices[0]],
   7744                     );
   7745                     if (rhs_imm > 64) try self.asmRegisterImmediate(
   7746                         tag,
   7747                         lhs_regs[info.indices[1]],
   7748                         Immediate.u(rhs_imm - 64),
   7749                     );
   7750                 },
   7751                 .register => |rhs_reg| {
   7752                     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   7753                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7754                     defer self.register_manager.unlockReg(tmp_lock);
   7755 
   7756                     if (tag[0] == ._r and tag[1] == .sa) {
   7757                         try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, lhs_regs[info.indices[0]]);
   7758                         try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63));
   7759                     } else try self.asmRegisterRegister(
   7760                         .{ ._, .xor },
   7761                         tmp_reg.to32(),
   7762                         tmp_reg.to32(),
   7763                     );
   7764                     try self.asmRegisterRegisterRegister(
   7765                         info.double_tag,
   7766                         lhs_regs[info.indices[1]],
   7767                         lhs_regs[info.indices[0]],
   7768                         registerAlias(rhs_reg, 1),
   7769                     );
   7770                     try self.asmRegisterRegister(
   7771                         tag,
   7772                         lhs_regs[info.indices[0]],
   7773                         registerAlias(rhs_reg, 1),
   7774                     );
   7775                     try self.asmRegisterImmediate(
   7776                         .{ ._, .cmp },
   7777                         registerAlias(rhs_reg, 1),
   7778                         Immediate.u(64),
   7779                     );
   7780                     try self.asmCmovccRegisterRegister(
   7781                         .ae,
   7782                         lhs_regs[info.indices[1]],
   7783                         lhs_regs[info.indices[0]],
   7784                     );
   7785                     try self.asmCmovccRegisterRegister(.ae, lhs_regs[info.indices[0]], tmp_reg);
   7786                 },
   7787                 else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   7788                     @tagName(lhs_mcv),
   7789                     @tagName(rhs_mcv),
   7790                 }),
   7791             },
   7792             .load_frame => |dst_frame_addr| {
   7793                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   7794                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   7795                 defer self.register_manager.unlockReg(tmp_lock);
   7796 
   7797                 switch (rhs_mcv) {
   7798                     .immediate => |rhs_imm| if (rhs_imm > 0 and rhs_imm < 64) {
   7799                         try self.asmRegisterMemory(
   7800                             .{ ._, .mov },
   7801                             tmp_reg,
   7802                             .{
   7803                                 .base = .{ .frame = dst_frame_addr.index },
   7804                                 .mod = .{ .rm = .{
   7805                                     .size = .qword,
   7806                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
   7807                                 } },
   7808                             },
   7809                         );
   7810                         try self.asmMemoryRegisterImmediate(
   7811                             info.double_tag,
   7812                             .{
   7813                                 .base = .{ .frame = dst_frame_addr.index },
   7814                                 .mod = .{ .rm = .{
   7815                                     .size = .qword,
   7816                                     .disp = dst_frame_addr.off + info.indices[1] * 8,
   7817                                 } },
   7818                             },
   7819                             tmp_reg,
   7820                             Immediate.u(rhs_imm),
   7821                         );
   7822                         try self.asmMemoryImmediate(
   7823                             tag,
   7824                             .{
   7825                                 .base = .{ .frame = dst_frame_addr.index },
   7826                                 .mod = .{ .rm = .{
   7827                                     .size = .qword,
   7828                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
   7829                                 } },
   7830                             },
   7831                             Immediate.u(rhs_imm),
   7832                         );
   7833                     } else {
   7834                         assert(rhs_imm < 128);
   7835                         try self.asmRegisterMemory(
   7836                             .{ ._, .mov },
   7837                             tmp_reg,
   7838                             .{
   7839                                 .base = .{ .frame = dst_frame_addr.index },
   7840                                 .mod = .{ .rm = .{
   7841                                     .size = .qword,
   7842                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
   7843                                 } },
   7844                             },
   7845                         );
   7846                         if (rhs_imm > 64) try self.asmRegisterImmediate(
   7847                             tag,
   7848                             tmp_reg,
   7849                             Immediate.u(rhs_imm - 64),
   7850                         );
   7851                         try self.asmMemoryRegister(
   7852                             .{ ._, .mov },
   7853                             .{
   7854                                 .base = .{ .frame = dst_frame_addr.index },
   7855                                 .mod = .{ .rm = .{
   7856                                     .size = .qword,
   7857                                     .disp = dst_frame_addr.off + info.indices[1] * 8,
   7858                                 } },
   7859                             },
   7860                             tmp_reg,
   7861                         );
   7862                         if (tag[0] == ._r and tag[1] == .sa) try self.asmMemoryImmediate(
   7863                             tag,
   7864                             .{
   7865                                 .base = .{ .frame = dst_frame_addr.index },
   7866                                 .mod = .{ .rm = .{
   7867                                     .size = .qword,
   7868                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
   7869                                 } },
   7870                             },
   7871                             Immediate.u(63),
   7872                         ) else {
   7873                             try self.asmRegisterRegister(.{ ._, .xor }, tmp_reg.to32(), tmp_reg.to32());
   7874                             try self.asmMemoryRegister(
   7875                                 .{ ._, .mov },
   7876                                 .{
   7877                                     .base = .{ .frame = dst_frame_addr.index },
   7878                                     .mod = .{ .rm = .{
   7879                                         .size = .qword,
   7880                                         .disp = dst_frame_addr.off + info.indices[0] * 8,
   7881                                     } },
   7882                                 },
   7883                                 tmp_reg,
   7884                             );
   7885                         }
   7886                     },
   7887                     .register => |rhs_reg| {
   7888                         const first_reg =
   7889                             try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   7890                         const first_lock = self.register_manager.lockRegAssumeUnused(first_reg);
   7891                         defer self.register_manager.unlockReg(first_lock);
   7892 
   7893                         const second_reg =
   7894                             try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   7895                         const second_lock = self.register_manager.lockRegAssumeUnused(second_reg);
   7896                         defer self.register_manager.unlockReg(second_lock);
   7897 
   7898                         try self.asmRegisterMemory(
   7899                             .{ ._, .mov },
   7900                             first_reg,
   7901                             .{
   7902                                 .base = .{ .frame = dst_frame_addr.index },
   7903                                 .mod = .{ .rm = .{
   7904                                     .size = .qword,
   7905                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
   7906                                 } },
   7907                             },
   7908                         );
   7909                         try self.asmRegisterMemory(
   7910                             .{ ._, .mov },
   7911                             second_reg,
   7912                             .{
   7913                                 .base = .{ .frame = dst_frame_addr.index },
   7914                                 .mod = .{ .rm = .{
   7915                                     .size = .qword,
   7916                                     .disp = dst_frame_addr.off + info.indices[1] * 8,
   7917                                 } },
   7918                             },
   7919                         );
   7920                         if (tag[0] == ._r and tag[1] == .sa) {
   7921                             try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, first_reg);
   7922                             try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63));
   7923                         } else try self.asmRegisterRegister(
   7924                             .{ ._, .xor },
   7925                             tmp_reg.to32(),
   7926                             tmp_reg.to32(),
   7927                         );
   7928                         try self.asmRegisterRegisterRegister(
   7929                             info.double_tag,
   7930                             second_reg,
   7931                             first_reg,
   7932                             registerAlias(rhs_reg, 1),
   7933                         );
   7934                         try self.asmRegisterRegister(tag, first_reg, registerAlias(rhs_reg, 1));
   7935                         try self.asmRegisterImmediate(
   7936                             .{ ._, .cmp },
   7937                             registerAlias(rhs_reg, 1),
   7938                             Immediate.u(64),
   7939                         );
   7940                         try self.asmCmovccRegisterRegister(.ae, second_reg, first_reg);
   7941                         try self.asmCmovccRegisterRegister(.ae, first_reg, tmp_reg);
   7942                         try self.asmMemoryRegister(
   7943                             .{ ._, .mov },
   7944                             .{
   7945                                 .base = .{ .frame = dst_frame_addr.index },
   7946                                 .mod = .{ .rm = .{
   7947                                     .size = .qword,
   7948                                     .disp = dst_frame_addr.off + info.indices[1] * 8,
   7949                                 } },
   7950                             },
   7951                             second_reg,
   7952                         );
   7953                         try self.asmMemoryRegister(
   7954                             .{ ._, .mov },
   7955                             .{
   7956                                 .base = .{ .frame = dst_frame_addr.index },
   7957                                 .mod = .{ .rm = .{
   7958                                     .size = .qword,
   7959                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
   7960                                 } },
   7961                             },
   7962                             first_reg,
   7963                         );
   7964                     },
   7965                     else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   7966                         @tagName(lhs_mcv),
   7967                         @tagName(rhs_mcv),
   7968                     }),
   7969                 }
   7970             },
   7971             else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   7972                 @tagName(lhs_mcv),
   7973                 @tagName(rhs_mcv),
   7974             }),
   7975         }
   7976     } else return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
   7977         @tagName(lhs_mcv),
   7978         @tagName(rhs_mcv),
   7979     });
   7980 }
   7981 
   7982 /// Result is always a register.
   7983 /// Clobbers .rcx for non-immediate rhs, therefore care is needed to spill .rcx upfront.
   7984 /// Asserts .rcx is free.
   7985 fn genShiftBinOp(
   7986     self: *Self,
   7987     air_tag: Air.Inst.Tag,
   7988     maybe_inst: ?Air.Inst.Index,
   7989     lhs_mcv: MCValue,
   7990     rhs_mcv: MCValue,
   7991     lhs_ty: Type,
   7992     rhs_ty: Type,
   7993 ) !MCValue {
   7994     const mod = self.bin_file.comp.module.?;
   7995     if (lhs_ty.zigTypeTag(mod) == .Vector) return self.fail("TODO implement genShiftBinOp for {}", .{
   7996         lhs_ty.fmt(mod),
   7997     });
   7998 
   7999     assert(rhs_ty.abiSize(mod) == 1);
   8000     try self.spillEflagsIfOccupied();
   8001 
   8002     const lhs_abi_size = lhs_ty.abiSize(mod);
   8003     if (lhs_abi_size > 16) return self.fail("TODO implement genShiftBinOp for {}", .{
   8004         lhs_ty.fmt(mod),
   8005     });
   8006 
   8007     try self.register_manager.getReg(.rcx, null);
   8008     const rcx_lock = self.register_manager.lockRegAssumeUnused(.rcx);
   8009     defer self.register_manager.unlockReg(rcx_lock);
   8010 
   8011     const lhs_lock = switch (lhs_mcv) {
   8012         .register => |reg| self.register_manager.lockReg(reg),
   8013         else => null,
   8014     };
   8015     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   8016 
   8017     const rhs_lock = switch (rhs_mcv) {
   8018         .register => |reg| self.register_manager.lockReg(reg),
   8019         else => null,
   8020     };
   8021     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   8022 
   8023     const dst_mcv: MCValue = dst: {
   8024         if (maybe_inst) |inst| {
   8025             const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
   8026             if (self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) break :dst lhs_mcv;
   8027         }
   8028         const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
   8029         try self.genCopy(lhs_ty, dst_mcv, lhs_mcv);
   8030         break :dst dst_mcv;
   8031     };
   8032 
   8033     const signedness = lhs_ty.intInfo(mod).signedness;
   8034     try self.genShiftBinOpMir(switch (air_tag) {
   8035         .shl, .shl_exact => switch (signedness) {
   8036             .signed => .{ ._l, .sa },
   8037             .unsigned => .{ ._l, .sh },
   8038         },
   8039         .shr, .shr_exact => switch (signedness) {
   8040             .signed => .{ ._r, .sa },
   8041             .unsigned => .{ ._r, .sh },
   8042         },
   8043         else => unreachable,
   8044     }, lhs_ty, dst_mcv, rhs_mcv);
   8045     return dst_mcv;
   8046 }
   8047 
   8048 /// Result is always a register.
   8049 /// Clobbers .rax and .rdx therefore care is needed to spill .rax and .rdx upfront.
   8050 /// Asserts .rax and .rdx are free.
   8051 fn genMulDivBinOp(
   8052     self: *Self,
   8053     tag: Air.Inst.Tag,
   8054     maybe_inst: ?Air.Inst.Index,
   8055     dst_ty: Type,
   8056     src_ty: Type,
   8057     lhs_mcv: MCValue,
   8058     rhs_mcv: MCValue,
   8059 ) !MCValue {
   8060     const mod = self.bin_file.comp.module.?;
   8061     if (dst_ty.zigTypeTag(mod) == .Vector or dst_ty.zigTypeTag(mod) == .Float) return self.fail(
   8062         "TODO implement genMulDivBinOp for {s} from {} to {}",
   8063         .{ @tagName(tag), src_ty.fmt(mod), dst_ty.fmt(mod) },
   8064     );
   8065     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
   8066     const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
   8067 
   8068     assert(self.register_manager.isRegFree(.rax));
   8069     assert(self.register_manager.isRegFree(.rdx));
   8070     assert(self.eflags_inst == null);
   8071 
   8072     if (dst_abi_size == 16 and src_abi_size == 16) {
   8073         assert(tag == .mul or tag == .mul_wrap);
   8074         const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rdx });
   8075         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
   8076 
   8077         const mat_lhs_mcv = switch (lhs_mcv) {
   8078             .load_symbol => mat_lhs_mcv: {
   8079                 // TODO clean this up!
   8080                 const addr_reg = try self.copyToTmpRegister(Type.usize, lhs_mcv.address());
   8081                 break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
   8082             },
   8083             else => lhs_mcv,
   8084         };
   8085         const mat_lhs_lock = switch (mat_lhs_mcv) {
   8086             .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
   8087             else => null,
   8088         };
   8089         defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
   8090         const mat_rhs_mcv = switch (rhs_mcv) {
   8091             .load_symbol => mat_rhs_mcv: {
   8092                 // TODO clean this up!
   8093                 const addr_reg = try self.copyToTmpRegister(Type.usize, rhs_mcv.address());
   8094                 break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
   8095             },
   8096             else => rhs_mcv,
   8097         };
   8098         const mat_rhs_lock = switch (mat_rhs_mcv) {
   8099             .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
   8100             else => null,
   8101         };
   8102         defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
   8103 
   8104         const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   8105         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   8106         defer self.register_manager.unlockReg(tmp_lock);
   8107 
   8108         if (mat_lhs_mcv.isMemory())
   8109             try self.asmRegisterMemory(.{ ._, .mov }, .rax, try mat_lhs_mcv.mem(self, .qword))
   8110         else
   8111             try self.asmRegisterRegister(.{ ._, .mov }, .rax, mat_lhs_mcv.register_pair[0]);
   8112         if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
   8113             .{ ._, .mov },
   8114             tmp_reg,
   8115             try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword),
   8116         ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_rhs_mcv.register_pair[1]);
   8117         try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, .rax);
   8118         if (mat_rhs_mcv.isMemory())
   8119             try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .qword))
   8120         else
   8121             try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]);
   8122         try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg);
   8123         if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
   8124             .{ ._, .mov },
   8125             tmp_reg,
   8126             try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword),
   8127         ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_lhs_mcv.register_pair[1]);
   8128         if (mat_rhs_mcv.isMemory())
   8129             try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, try mat_rhs_mcv.mem(self, .qword))
   8130         else
   8131             try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, mat_rhs_mcv.register_pair[0]);
   8132         try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg);
   8133         return .{ .register_pair = .{ .rax, .rdx } };
   8134     }
   8135 
   8136     if (switch (tag) {
   8137         else => unreachable,
   8138         .mul, .mul_wrap => dst_abi_size != src_abi_size and dst_abi_size != src_abi_size * 2,
   8139         .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_abi_size != src_abi_size,
   8140     } or src_abi_size > 8) return self.fail(
   8141         "TODO implement genMulDivBinOp for {s} from {} to {}",
   8142         .{ @tagName(tag), src_ty.fmt(mod), dst_ty.fmt(mod) },
   8143     );
   8144     const ty = if (dst_abi_size <= 8) dst_ty else src_ty;
   8145     const abi_size = if (dst_abi_size <= 8) dst_abi_size else src_abi_size;
   8146 
   8147     const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx });
   8148     defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
   8149 
   8150     const signedness = ty.intInfo(mod).signedness;
   8151     switch (tag) {
   8152         .mul,
   8153         .mul_wrap,
   8154         .rem,
   8155         .div_trunc,
   8156         .div_exact,
   8157         => {
   8158             const track_inst_rax = switch (tag) {
   8159                 .mul, .mul_wrap => if (dst_abi_size <= 8) maybe_inst else null,
   8160                 .div_exact, .div_trunc => maybe_inst,
   8161                 else => null,
   8162             };
   8163             const track_inst_rdx = switch (tag) {
   8164                 .rem => maybe_inst,
   8165                 else => null,
   8166             };
   8167             try self.register_manager.getReg(.rax, track_inst_rax);
   8168             try self.register_manager.getReg(.rdx, track_inst_rdx);
   8169 
   8170             try self.genIntMulDivOpMir(switch (signedness) {
   8171                 .signed => switch (tag) {
   8172                     .mul, .mul_wrap => .{ .i_, .mul },
   8173                     .div_trunc, .div_exact, .rem => .{ .i_, .div },
   8174                     else => unreachable,
   8175                 },
   8176                 .unsigned => switch (tag) {
   8177                     .mul, .mul_wrap => .{ ._, .mul },
   8178                     .div_trunc, .div_exact, .rem => .{ ._, .div },
   8179                     else => unreachable,
   8180                 },
   8181             }, ty, lhs_mcv, rhs_mcv);
   8182 
   8183             if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) {
   8184                 .mul, .mul_wrap, .div_trunc, .div_exact => .rax,
   8185                 .rem => .rdx,
   8186                 else => unreachable,
   8187             }, dst_abi_size) };
   8188 
   8189             const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false);
   8190             try self.asmMemoryRegister(.{ ._, .mov }, .{
   8191                 .base = .{ .frame = dst_mcv.load_frame.index },
   8192                 .mod = .{ .rm = .{
   8193                     .size = .qword,
   8194                     .disp = dst_mcv.load_frame.off,
   8195                 } },
   8196             }, .rax);
   8197             try self.asmMemoryRegister(.{ ._, .mov }, .{
   8198                 .base = .{ .frame = dst_mcv.load_frame.index },
   8199                 .mod = .{ .rm = .{
   8200                     .size = .qword,
   8201                     .disp = dst_mcv.load_frame.off + 8,
   8202                 } },
   8203             }, .rdx);
   8204             return dst_mcv;
   8205         },
   8206 
   8207         .mod => {
   8208             try self.register_manager.getReg(.rax, null);
   8209             try self.register_manager.getReg(.rdx, if (signedness == .unsigned) maybe_inst else null);
   8210 
   8211             switch (signedness) {
   8212                 .signed => {
   8213                     const lhs_lock = switch (lhs_mcv) {
   8214                         .register => |reg| self.register_manager.lockReg(reg),
   8215                         else => null,
   8216                     };
   8217                     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   8218                     const rhs_lock = switch (rhs_mcv) {
   8219                         .register => |reg| self.register_manager.lockReg(reg),
   8220                         else => null,
   8221                     };
   8222                     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   8223 
   8224                     // hack around hazard between rhs and div_floor by copying rhs to another register
   8225                     const rhs_copy = try self.copyToTmpRegister(ty, rhs_mcv);
   8226                     const rhs_copy_lock = self.register_manager.lockRegAssumeUnused(rhs_copy);
   8227                     defer self.register_manager.unlockReg(rhs_copy_lock);
   8228 
   8229                     const div_floor = try self.genInlineIntDivFloor(ty, lhs_mcv, rhs_mcv);
   8230                     try self.genIntMulComplexOpMir(ty, div_floor, .{ .register = rhs_copy });
   8231                     const div_floor_lock = self.register_manager.lockReg(div_floor.register);
   8232                     defer if (div_floor_lock) |lock| self.register_manager.unlockReg(lock);
   8233 
   8234                     const result: MCValue = if (maybe_inst) |inst|
   8235                         try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv)
   8236                     else
   8237                         .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) };
   8238                     try self.genBinOpMir(.{ ._, .sub }, ty, result, div_floor);
   8239 
   8240                     return result;
   8241                 },
   8242                 .unsigned => {
   8243                     try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs_mcv, rhs_mcv);
   8244                     return .{ .register = registerAlias(.rdx, abi_size) };
   8245                 },
   8246             }
   8247         },
   8248 
   8249         .div_floor => {
   8250             try self.register_manager.getReg(.rax, if (signedness == .unsigned) maybe_inst else null);
   8251             try self.register_manager.getReg(.rdx, null);
   8252 
   8253             const lhs_lock: ?RegisterLock = switch (lhs_mcv) {
   8254                 .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   8255                 else => null,
   8256             };
   8257             defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   8258 
   8259             const actual_rhs_mcv: MCValue = blk: {
   8260                 switch (signedness) {
   8261                     .signed => {
   8262                         const rhs_lock: ?RegisterLock = switch (rhs_mcv) {
   8263                             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
   8264                             else => null,
   8265                         };
   8266                         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   8267 
   8268                         if (maybe_inst) |inst| {
   8269                             break :blk try self.copyToRegisterWithInstTracking(inst, ty, rhs_mcv);
   8270                         }
   8271                         break :blk MCValue{ .register = try self.copyToTmpRegister(ty, rhs_mcv) };
   8272                     },
   8273                     .unsigned => break :blk rhs_mcv,
   8274                 }
   8275             };
   8276             const rhs_lock: ?RegisterLock = switch (actual_rhs_mcv) {
   8277                 .register => |reg| self.register_manager.lockReg(reg),
   8278                 else => null,
   8279             };
   8280             defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   8281 
   8282             switch (signedness) {
   8283                 .signed => return try self.genInlineIntDivFloor(ty, lhs_mcv, actual_rhs_mcv),
   8284                 .unsigned => {
   8285                     try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs_mcv, actual_rhs_mcv);
   8286                     return .{ .register = registerAlias(.rax, abi_size) };
   8287                 },
   8288             }
   8289         },
   8290 
   8291         else => unreachable,
   8292     }
   8293 }
   8294 
   8295 fn genBinOp(
   8296     self: *Self,
   8297     maybe_inst: ?Air.Inst.Index,
   8298     air_tag: Air.Inst.Tag,
   8299     lhs_air: Air.Inst.Ref,
   8300     rhs_air: Air.Inst.Ref,
   8301 ) !MCValue {
   8302     const mod = self.bin_file.comp.module.?;
   8303     const lhs_ty = self.typeOf(lhs_air);
   8304     const rhs_ty = self.typeOf(rhs_air);
   8305     const abi_size: u32 = @intCast(lhs_ty.abiSize(mod));
   8306 
   8307     if (lhs_ty.isRuntimeFloat()) libcall: {
   8308         const float_bits = lhs_ty.floatBits(self.target.*);
   8309         const type_needs_libcall = switch (float_bits) {
   8310             16 => !self.hasFeature(.f16c),
   8311             32, 64 => false,
   8312             80, 128 => true,
   8313             else => unreachable,
   8314         };
   8315         switch (air_tag) {
   8316             .rem, .mod => {},
   8317             else => if (!type_needs_libcall) break :libcall,
   8318         }
   8319         var callee_buf: ["__mod?f3".len]u8 = undefined;
   8320         const callee = switch (air_tag) {
   8321             .add,
   8322             .sub,
   8323             .mul,
   8324             .div_float,
   8325             .div_trunc,
   8326             .div_floor,
   8327             => std.fmt.bufPrint(&callee_buf, "__{s}{c}f3", .{
   8328                 @tagName(air_tag)[0..3],
   8329                 floatCompilerRtAbiName(float_bits),
   8330             }),
   8331             .rem, .mod, .min, .max => std.fmt.bufPrint(&callee_buf, "{s}f{s}{s}", .{
   8332                 floatLibcAbiPrefix(lhs_ty),
   8333                 switch (air_tag) {
   8334                     .rem, .mod => "mod",
   8335                     .min => "min",
   8336                     .max => "max",
   8337                     else => unreachable,
   8338                 },
   8339                 floatLibcAbiSuffix(lhs_ty),
   8340             }),
   8341             else => return self.fail("TODO implement genBinOp for {s} {}", .{
   8342                 @tagName(air_tag), lhs_ty.fmt(mod),
   8343             }),
   8344         } catch unreachable;
   8345         const result = try self.genCall(.{ .lib = .{
   8346             .return_type = lhs_ty.toIntern(),
   8347             .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() },
   8348             .callee = callee,
   8349         } }, &.{ lhs_ty, rhs_ty }, &.{ .{ .air_ref = lhs_air }, .{ .air_ref = rhs_air } });
   8350         return switch (air_tag) {
   8351             .mod => result: {
   8352                 const adjusted: MCValue = if (type_needs_libcall) adjusted: {
   8353                     var add_callee_buf: ["__add?f3".len]u8 = undefined;
   8354                     break :adjusted try self.genCall(.{ .lib = .{
   8355                         .return_type = lhs_ty.toIntern(),
   8356                         .param_types = &.{
   8357                             lhs_ty.toIntern(),
   8358                             rhs_ty.toIntern(),
   8359                         },
   8360                         .callee = std.fmt.bufPrint(&add_callee_buf, "__add{c}f3", .{
   8361                             floatCompilerRtAbiName(float_bits),
   8362                         }) catch unreachable,
   8363                     } }, &.{ lhs_ty, rhs_ty }, &.{ result, .{ .air_ref = rhs_air } });
   8364                 } else switch (float_bits) {
   8365                     16, 32, 64 => adjusted: {
   8366                         const dst_reg = switch (result) {
   8367                             .register => |reg| reg,
   8368                             else => if (maybe_inst) |inst|
   8369                                 (try self.copyToRegisterWithInstTracking(inst, lhs_ty, result)).register
   8370                             else
   8371                                 try self.copyToTmpRegister(lhs_ty, result),
   8372                         };
   8373                         const dst_lock = self.register_manager.lockReg(dst_reg);
   8374                         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   8375 
   8376                         const rhs_mcv = try self.resolveInst(rhs_air);
   8377                         const src_mcv: MCValue = if (float_bits == 16) src: {
   8378                             assert(self.hasFeature(.f16c));
   8379                             const tmp_reg = (try self.register_manager.allocReg(
   8380                                 null,
   8381                                 abi.RegisterClass.sse,
   8382                             )).to128();
   8383                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   8384                             defer self.register_manager.unlockReg(tmp_lock);
   8385 
   8386                             if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   8387                                 .{ .vp_w, .insr },
   8388                                 dst_reg,
   8389                                 dst_reg,
   8390                                 try rhs_mcv.mem(self, .word),
   8391                                 Immediate.u(1),
   8392                             ) else try self.asmRegisterRegisterRegister(
   8393                                 .{ .vp_, .unpcklwd },
   8394                                 dst_reg,
   8395                                 dst_reg,
   8396                                 (if (rhs_mcv.isRegister())
   8397                                     rhs_mcv.getReg().?
   8398                                 else
   8399                                     try self.copyToTmpRegister(rhs_ty, rhs_mcv)).to128(),
   8400                             );
   8401                             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
   8402                             break :src .{ .register = tmp_reg };
   8403                         } else rhs_mcv;
   8404 
   8405                         if (self.hasFeature(.avx)) {
   8406                             const mir_tag: Mir.Inst.FixedTag = switch (float_bits) {
   8407                                 16, 32 => .{ .v_ss, .add },
   8408                                 64 => .{ .v_sd, .add },
   8409                                 else => unreachable,
   8410                             };
   8411                             if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   8412                                 mir_tag,
   8413                                 dst_reg,
   8414                                 dst_reg,
   8415                                 try src_mcv.mem(self, Memory.Size.fromBitSize(float_bits)),
   8416                             ) else try self.asmRegisterRegisterRegister(
   8417                                 mir_tag,
   8418                                 dst_reg,
   8419                                 dst_reg,
   8420                                 (if (src_mcv.isRegister())
   8421                                     src_mcv.getReg().?
   8422                                 else
   8423                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   8424                             );
   8425                         } else {
   8426                             const mir_tag: Mir.Inst.FixedTag = switch (float_bits) {
   8427                                 32 => .{ ._ss, .add },
   8428                                 64 => .{ ._sd, .add },
   8429                                 else => unreachable,
   8430                             };
   8431                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
   8432                                 mir_tag,
   8433                                 dst_reg,
   8434                                 try src_mcv.mem(self, Memory.Size.fromBitSize(float_bits)),
   8435                             ) else try self.asmRegisterRegister(
   8436                                 mir_tag,
   8437                                 dst_reg,
   8438                                 (if (src_mcv.isRegister())
   8439                                     src_mcv.getReg().?
   8440                                 else
   8441                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   8442                             );
   8443                         }
   8444 
   8445                         if (float_bits == 16) try self.asmRegisterRegisterImmediate(
   8446                             .{ .v_, .cvtps2ph },
   8447                             dst_reg,
   8448                             dst_reg,
   8449                             Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
   8450                         );
   8451                         break :adjusted .{ .register = dst_reg };
   8452                     },
   8453                     80, 128 => return self.fail("TODO implement genBinOp for {s} of {}", .{
   8454                         @tagName(air_tag), lhs_ty.fmt(mod),
   8455                     }),
   8456                     else => unreachable,
   8457                 };
   8458                 break :result try self.genCall(.{ .lib = .{
   8459                     .return_type = lhs_ty.toIntern(),
   8460                     .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() },
   8461                     .callee = callee,
   8462                 } }, &.{ lhs_ty, rhs_ty }, &.{ adjusted, .{ .air_ref = rhs_air } });
   8463             },
   8464             .div_trunc, .div_floor => try self.genRoundLibcall(lhs_ty, result, .{
   8465                 .mode = switch (air_tag) {
   8466                     .div_trunc => .zero,
   8467                     .div_floor => .down,
   8468                     else => unreachable,
   8469                 },
   8470                 .precision = .inexact,
   8471             }),
   8472             else => result,
   8473         };
   8474     }
   8475 
   8476     const sse_op = switch (lhs_ty.zigTypeTag(mod)) {
   8477         else => false,
   8478         .Float => true,
   8479         .Vector => switch (lhs_ty.childType(mod).toIntern()) {
   8480             .bool_type => false,
   8481             else => true,
   8482         },
   8483     };
   8484     if (sse_op and ((lhs_ty.scalarType(mod).isRuntimeFloat() and
   8485         lhs_ty.scalarType(mod).floatBits(self.target.*) == 80) or
   8486         lhs_ty.abiSize(mod) > @as(u6, if (self.hasFeature(.avx)) 32 else 16)))
   8487         return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(air_tag), lhs_ty.fmt(mod) });
   8488 
   8489     const maybe_mask_reg = switch (air_tag) {
   8490         else => null,
   8491         .rem, .mod => unreachable,
   8492         .max, .min => if (lhs_ty.scalarType(mod).isRuntimeFloat()) registerAlias(
   8493             if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: {
   8494                 try self.register_manager.getReg(.xmm0, null);
   8495                 break :mask .xmm0;
   8496             } else try self.register_manager.allocReg(null, abi.RegisterClass.sse),
   8497             abi_size,
   8498         ) else null,
   8499     };
   8500     const mask_lock =
   8501         if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null;
   8502     defer if (mask_lock) |lock| self.register_manager.unlockReg(lock);
   8503 
   8504     const ordered_air: [2]Air.Inst.Ref = if (lhs_ty.isVector(mod) and
   8505         switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   8506         .Int => switch (air_tag) {
   8507             .cmp_lt, .cmp_gte => true,
   8508             else => false,
   8509         },
   8510         .Float => switch (air_tag) {
   8511             .cmp_gte, .cmp_gt => true,
   8512             else => false,
   8513         },
   8514         else => unreachable,
   8515     }) .{ rhs_air, lhs_air } else .{ lhs_air, rhs_air };
   8516 
   8517     if (lhs_ty.isAbiInt(mod)) for (ordered_air) |op_air| {
   8518         switch (try self.resolveInst(op_air)) {
   8519             .register => |op_reg| switch (op_reg.class()) {
   8520                 .sse => try self.register_manager.getReg(op_reg, null),
   8521                 else => {},
   8522             },
   8523             else => {},
   8524         }
   8525     };
   8526 
   8527     const lhs_mcv = try self.resolveInst(ordered_air[0]);
   8528     var rhs_mcv = try self.resolveInst(ordered_air[1]);
   8529     switch (lhs_mcv) {
   8530         .immediate => |imm| switch (imm) {
   8531             0 => switch (air_tag) {
   8532                 .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, ordered_air[1]),
   8533                 else => {},
   8534             },
   8535             else => {},
   8536         },
   8537         else => {},
   8538     }
   8539 
   8540     const is_commutative = switch (air_tag) {
   8541         .add,
   8542         .add_wrap,
   8543         .mul,
   8544         .bool_or,
   8545         .bit_or,
   8546         .bool_and,
   8547         .bit_and,
   8548         .xor,
   8549         .min,
   8550         .max,
   8551         .cmp_eq,
   8552         .cmp_neq,
   8553         => true,
   8554 
   8555         else => false,
   8556     };
   8557 
   8558     const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) {
   8559         .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null },
   8560         .register_pair => |lhs_regs| locks: {
   8561             const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs);
   8562             break :locks .{ locks[0], locks[1] };
   8563         },
   8564         else => .{ null, null },
   8565     };
   8566     defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
   8567 
   8568     const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) {
   8569         .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null },
   8570         .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs),
   8571         else => .{ null, null },
   8572     };
   8573     defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
   8574 
   8575     var flipped = false;
   8576     var copied_to_dst = true;
   8577     const dst_mcv: MCValue = dst: {
   8578         const tracked_inst = switch (air_tag) {
   8579             else => maybe_inst,
   8580             .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null,
   8581         };
   8582         if (maybe_inst) |inst| {
   8583             if ((!sse_op or lhs_mcv.isRegister()) and
   8584                 self.reuseOperandAdvanced(inst, ordered_air[0], 0, lhs_mcv, tracked_inst))
   8585                 break :dst lhs_mcv;
   8586             if (is_commutative and (!sse_op or rhs_mcv.isRegister()) and
   8587                 self.reuseOperandAdvanced(inst, ordered_air[1], 1, rhs_mcv, tracked_inst))
   8588             {
   8589                 flipped = true;
   8590                 break :dst rhs_mcv;
   8591             }
   8592         }
   8593         const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, tracked_inst, true);
   8594         if (sse_op and lhs_mcv.isRegister() and self.hasFeature(.avx))
   8595             copied_to_dst = false
   8596         else
   8597             try self.genCopy(lhs_ty, dst_mcv, lhs_mcv);
   8598         rhs_mcv = try self.resolveInst(ordered_air[1]);
   8599         break :dst dst_mcv;
   8600     };
   8601     const dst_locks: [2]?RegisterLock = switch (dst_mcv) {
   8602         .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null },
   8603         .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs),
   8604         else => .{ null, null },
   8605     };
   8606     defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
   8607 
   8608     const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv;
   8609     const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg|
   8610         if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and
   8611             self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: {
   8612             try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv);
   8613             break :src .{ .register = mask_reg };
   8614         }
   8615     else
   8616         unmat_src_mcv;
   8617     const src_locks: [2]?RegisterLock = switch (src_mcv) {
   8618         .register => |src_reg| .{ self.register_manager.lockReg(src_reg), null },
   8619         .register_pair => |src_regs| self.register_manager.lockRegs(2, src_regs),
   8620         else => .{ null, null },
   8621     };
   8622     defer for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock);
   8623 
   8624     if (!sse_op) {
   8625         switch (air_tag) {
   8626             .add,
   8627             .add_wrap,
   8628             => try self.genBinOpMir(.{ ._, .add }, lhs_ty, dst_mcv, src_mcv),
   8629 
   8630             .sub,
   8631             .sub_wrap,
   8632             => try self.genBinOpMir(.{ ._, .sub }, lhs_ty, dst_mcv, src_mcv),
   8633 
   8634             .ptr_add,
   8635             .ptr_sub,
   8636             => {
   8637                 const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv);
   8638                 const tmp_mcv = MCValue{ .register = tmp_reg };
   8639                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   8640                 defer self.register_manager.unlockReg(tmp_lock);
   8641 
   8642                 const elem_size = lhs_ty.elemType2(mod).abiSize(mod);
   8643                 try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size });
   8644                 try self.genBinOpMir(
   8645                     switch (air_tag) {
   8646                         .ptr_add => .{ ._, .add },
   8647                         .ptr_sub => .{ ._, .sub },
   8648                         else => unreachable,
   8649                     },
   8650                     lhs_ty,
   8651                     dst_mcv,
   8652                     tmp_mcv,
   8653                 );
   8654             },
   8655 
   8656             .bool_or,
   8657             .bit_or,
   8658             => try self.genBinOpMir(.{ ._, .@"or" }, lhs_ty, dst_mcv, src_mcv),
   8659 
   8660             .bool_and,
   8661             .bit_and,
   8662             => try self.genBinOpMir(.{ ._, .@"and" }, lhs_ty, dst_mcv, src_mcv),
   8663 
   8664             .xor => try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv),
   8665 
   8666             .min,
   8667             .max,
   8668             => {
   8669                 const resolved_src_mcv = switch (src_mcv) {
   8670                     else => src_mcv,
   8671                     .air_ref => |src_ref| try self.resolveInst(src_ref),
   8672                 };
   8673 
   8674                 if (abi_size > 8) {
   8675                     const dst_regs = switch (dst_mcv) {
   8676                         .register_pair => |dst_regs| dst_regs,
   8677                         else => dst: {
   8678                             const dst_regs = try self.register_manager.allocRegs(
   8679                                 2,
   8680                                 .{ null, null },
   8681                                 abi.RegisterClass.gp,
   8682                             );
   8683                             const dst_regs_locks = self.register_manager.lockRegs(2, dst_regs);
   8684                             defer for (dst_regs_locks) |dst_lock| if (dst_lock) |lock|
   8685                                 self.register_manager.unlockReg(lock);
   8686 
   8687                             try self.genCopy(lhs_ty, .{ .register_pair = dst_regs }, dst_mcv);
   8688                             break :dst dst_regs;
   8689                         },
   8690                     };
   8691                     const dst_regs_locks = self.register_manager.lockRegs(2, dst_regs);
   8692                     defer for (dst_regs_locks) |dst_lock| if (dst_lock) |lock|
   8693                         self.register_manager.unlockReg(lock);
   8694 
   8695                     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
   8696                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   8697                     defer self.register_manager.unlockReg(tmp_lock);
   8698 
   8699                     const signed = lhs_ty.isSignedInt(mod);
   8700                     const cc: Condition = switch (air_tag) {
   8701                         .min => if (signed) .nl else .nb,
   8702                         .max => if (signed) .nge else .nae,
   8703                         else => unreachable,
   8704                     };
   8705 
   8706                     try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]);
   8707                     if (src_mcv.isMemory()) {
   8708                         try self.asmRegisterMemory(
   8709                             .{ ._, .cmp },
   8710                             dst_regs[0],
   8711                             try src_mcv.mem(self, .qword),
   8712                         );
   8713                         try self.asmRegisterMemory(
   8714                             .{ ._, .sbb },
   8715                             tmp_reg,
   8716                             try src_mcv.address().offset(8).deref().mem(self, .qword),
   8717                         );
   8718                         try self.asmCmovccRegisterMemory(
   8719                             cc,
   8720                             dst_regs[0],
   8721                             try src_mcv.mem(self, .qword),
   8722                         );
   8723                         try self.asmCmovccRegisterMemory(
   8724                             cc,
   8725                             dst_regs[1],
   8726                             try src_mcv.address().offset(8).deref().mem(self, .qword),
   8727                         );
   8728                     } else {
   8729                         try self.asmRegisterRegister(
   8730                             .{ ._, .cmp },
   8731                             dst_regs[0],
   8732                             src_mcv.register_pair[0],
   8733                         );
   8734                         try self.asmRegisterRegister(
   8735                             .{ ._, .sbb },
   8736                             tmp_reg,
   8737                             src_mcv.register_pair[1],
   8738                         );
   8739                         try self.asmCmovccRegisterRegister(cc, dst_regs[0], src_mcv.register_pair[0]);
   8740                         try self.asmCmovccRegisterRegister(cc, dst_regs[1], src_mcv.register_pair[1]);
   8741                     }
   8742                     try self.genCopy(lhs_ty, dst_mcv, .{ .register_pair = dst_regs });
   8743                 } else {
   8744                     const mat_src_mcv: MCValue = if (switch (resolved_src_mcv) {
   8745                         .immediate,
   8746                         .eflags,
   8747                         .register_offset,
   8748                         .load_symbol,
   8749                         .lea_symbol,
   8750                         .load_direct,
   8751                         .lea_direct,
   8752                         .load_got,
   8753                         .lea_got,
   8754                         .load_tlv,
   8755                         .lea_tlv,
   8756                         .lea_frame,
   8757                         => true,
   8758                         .memory => |addr| math.cast(i32, @as(i64, @bitCast(addr))) == null,
   8759                         else => false,
   8760                         .register_pair,
   8761                         .register_overflow,
   8762                         => unreachable,
   8763                     })
   8764                         .{ .register = try self.copyToTmpRegister(rhs_ty, resolved_src_mcv) }
   8765                     else
   8766                         resolved_src_mcv;
   8767                     const mat_mcv_lock = switch (mat_src_mcv) {
   8768                         .register => |reg| self.register_manager.lockReg(reg),
   8769                         else => null,
   8770                     };
   8771                     defer if (mat_mcv_lock) |lock| self.register_manager.unlockReg(lock);
   8772 
   8773                     try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, dst_mcv, mat_src_mcv);
   8774 
   8775                     const int_info = lhs_ty.intInfo(mod);
   8776                     const cc: Condition = switch (int_info.signedness) {
   8777                         .unsigned => switch (air_tag) {
   8778                             .min => .a,
   8779                             .max => .b,
   8780                             else => unreachable,
   8781                         },
   8782                         .signed => switch (air_tag) {
   8783                             .min => .g,
   8784                             .max => .l,
   8785                             else => unreachable,
   8786                         },
   8787                     };
   8788 
   8789                     const cmov_abi_size = @max(@as(u32, @intCast(lhs_ty.abiSize(mod))), 2);
   8790                     const tmp_reg = switch (dst_mcv) {
   8791                         .register => |reg| reg,
   8792                         else => try self.copyToTmpRegister(lhs_ty, dst_mcv),
   8793                     };
   8794                     const tmp_lock = self.register_manager.lockReg(tmp_reg);
   8795                     defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
   8796                     switch (mat_src_mcv) {
   8797                         .none,
   8798                         .unreach,
   8799                         .dead,
   8800                         .undef,
   8801                         .immediate,
   8802                         .eflags,
   8803                         .register_pair,
   8804                         .register_offset,
   8805                         .register_overflow,
   8806                         .load_symbol,
   8807                         .lea_symbol,
   8808                         .load_direct,
   8809                         .lea_direct,
   8810                         .load_got,
   8811                         .lea_got,
   8812                         .load_tlv,
   8813                         .lea_tlv,
   8814                         .lea_frame,
   8815                         .reserved_frame,
   8816                         .air_ref,
   8817                         => unreachable,
   8818                         .register => |src_reg| try self.asmCmovccRegisterRegister(
   8819                             cc,
   8820                             registerAlias(tmp_reg, cmov_abi_size),
   8821                             registerAlias(src_reg, cmov_abi_size),
   8822                         ),
   8823                         .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
   8824                             cc,
   8825                             registerAlias(tmp_reg, cmov_abi_size),
   8826                             switch (mat_src_mcv) {
   8827                                 .memory => |addr| .{
   8828                                     .base = .{ .reg = .ds },
   8829                                     .mod = .{ .rm = .{
   8830                                         .size = Memory.Size.fromSize(cmov_abi_size),
   8831                                         .disp = @intCast(@as(i64, @bitCast(addr))),
   8832                                     } },
   8833                                 },
   8834                                 .indirect => |reg_off| .{
   8835                                     .base = .{ .reg = reg_off.reg },
   8836                                     .mod = .{ .rm = .{
   8837                                         .size = Memory.Size.fromSize(cmov_abi_size),
   8838                                         .disp = reg_off.off,
   8839                                     } },
   8840                                 },
   8841                                 .load_frame => |frame_addr| .{
   8842                                     .base = .{ .frame = frame_addr.index },
   8843                                     .mod = .{ .rm = .{
   8844                                         .size = Memory.Size.fromSize(cmov_abi_size),
   8845                                         .disp = frame_addr.off,
   8846                                     } },
   8847                                 },
   8848                                 else => unreachable,
   8849                             },
   8850                         ),
   8851                     }
   8852                     try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg });
   8853                 }
   8854             },
   8855 
   8856             else => return self.fail("TODO implement genBinOp for {s} {}", .{
   8857                 @tagName(air_tag), lhs_ty.fmt(mod),
   8858             }),
   8859         }
   8860         return dst_mcv;
   8861     }
   8862 
   8863     const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
   8864     const mir_tag = @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   8865         else => unreachable,
   8866         .Float => switch (lhs_ty.floatBits(self.target.*)) {
   8867             16 => {
   8868                 assert(self.hasFeature(.f16c));
   8869                 const tmp_reg =
   8870                     (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
   8871                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   8872                 defer self.register_manager.unlockReg(tmp_lock);
   8873 
   8874                 if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   8875                     .{ .vp_w, .insr },
   8876                     dst_reg,
   8877                     dst_reg,
   8878                     try src_mcv.mem(self, .word),
   8879                     Immediate.u(1),
   8880                 ) else try self.asmRegisterRegisterRegister(
   8881                     .{ .vp_, .unpcklwd },
   8882                     dst_reg,
   8883                     dst_reg,
   8884                     (if (src_mcv.isRegister())
   8885                         src_mcv.getReg().?
   8886                     else
   8887                         try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   8888                 );
   8889                 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
   8890                 try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
   8891                 try self.asmRegisterRegisterRegister(
   8892                     switch (air_tag) {
   8893                         .add => .{ .v_ss, .add },
   8894                         .sub => .{ .v_ss, .sub },
   8895                         .mul => .{ .v_ss, .mul },
   8896                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
   8897                         .max => .{ .v_ss, .max },
   8898                         .min => .{ .v_ss, .max },
   8899                         else => unreachable,
   8900                     },
   8901                     dst_reg,
   8902                     dst_reg,
   8903                     tmp_reg,
   8904                 );
   8905                 switch (air_tag) {
   8906                     .div_trunc, .div_floor => try self.asmRegisterRegisterRegisterImmediate(
   8907                         .{ .v_ss, .round },
   8908                         dst_reg,
   8909                         dst_reg,
   8910                         dst_reg,
   8911                         Immediate.u(@as(u5, @bitCast(RoundMode{
   8912                             .mode = switch (air_tag) {
   8913                                 .div_trunc => .zero,
   8914                                 .div_floor => .down,
   8915                                 else => unreachable,
   8916                             },
   8917                             .precision = .inexact,
   8918                         }))),
   8919                     ),
   8920                     else => {},
   8921                 }
   8922                 try self.asmRegisterRegisterImmediate(
   8923                     .{ .v_, .cvtps2ph },
   8924                     dst_reg,
   8925                     dst_reg,
   8926                     Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
   8927                 );
   8928                 return dst_mcv;
   8929             },
   8930             32 => switch (air_tag) {
   8931                 .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
   8932                 .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
   8933                 .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
   8934                 .div_float,
   8935                 .div_trunc,
   8936                 .div_floor,
   8937                 .div_exact,
   8938                 => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
   8939                 .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
   8940                 .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
   8941                 else => unreachable,
   8942             },
   8943             64 => switch (air_tag) {
   8944                 .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
   8945                 .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
   8946                 .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
   8947                 .div_float,
   8948                 .div_trunc,
   8949                 .div_floor,
   8950                 .div_exact,
   8951                 => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
   8952                 .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
   8953                 .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
   8954                 else => unreachable,
   8955             },
   8956             80, 128 => null,
   8957             else => unreachable,
   8958         },
   8959         .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   8960             else => null,
   8961             .Int => switch (lhs_ty.childType(mod).intInfo(mod).bits) {
   8962                 8 => switch (lhs_ty.vectorLen(mod)) {
   8963                     1...16 => switch (air_tag) {
   8964                         .add,
   8965                         .add_wrap,
   8966                         => if (self.hasFeature(.avx)) .{ .vp_b, .add } else .{ .p_b, .add },
   8967                         .sub,
   8968                         .sub_wrap,
   8969                         => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub },
   8970                         .bit_and => if (self.hasFeature(.avx))
   8971                             .{ .vp_, .@"and" }
   8972                         else
   8973                             .{ .p_, .@"and" },
   8974                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
   8975                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
   8976                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   8977                             .signed => if (self.hasFeature(.avx))
   8978                                 .{ .vp_b, .mins }
   8979                             else if (self.hasFeature(.sse4_1))
   8980                                 .{ .p_b, .mins }
   8981                             else
   8982                                 null,
   8983                             .unsigned => if (self.hasFeature(.avx))
   8984                                 .{ .vp_b, .minu }
   8985                             else if (self.hasFeature(.sse4_1))
   8986                                 .{ .p_b, .minu }
   8987                             else
   8988                                 null,
   8989                         },
   8990                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   8991                             .signed => if (self.hasFeature(.avx))
   8992                                 .{ .vp_b, .maxs }
   8993                             else if (self.hasFeature(.sse4_1))
   8994                                 .{ .p_b, .maxs }
   8995                             else
   8996                                 null,
   8997                             .unsigned => if (self.hasFeature(.avx))
   8998                                 .{ .vp_b, .maxu }
   8999                             else if (self.hasFeature(.sse4_1))
   9000                                 .{ .p_b, .maxu }
   9001                             else
   9002                                 null,
   9003                         },
   9004                         .cmp_lt,
   9005                         .cmp_lte,
   9006                         .cmp_gte,
   9007                         .cmp_gt,
   9008                         => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9009                             .signed => if (self.hasFeature(.avx))
   9010                                 .{ .vp_b, .cmpgt }
   9011                             else
   9012                                 .{ .p_b, .cmpgt },
   9013                             .unsigned => null,
   9014                         },
   9015                         .cmp_eq,
   9016                         .cmp_neq,
   9017                         => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else .{ .p_b, .cmpeq },
   9018                         else => null,
   9019                     },
   9020                     17...32 => switch (air_tag) {
   9021                         .add,
   9022                         .add_wrap,
   9023                         => if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null,
   9024                         .sub,
   9025                         .sub_wrap,
   9026                         => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null,
   9027                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
   9028                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
   9029                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
   9030                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9031                             .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null,
   9032                             .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null,
   9033                         },
   9034                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9035                             .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null,
   9036                             .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null,
   9037                         },
   9038                         .cmp_lt,
   9039                         .cmp_lte,
   9040                         .cmp_gte,
   9041                         .cmp_gt,
   9042                         => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9043                             .signed => if (self.hasFeature(.avx)) .{ .vp_b, .cmpgt } else null,
   9044                             .unsigned => null,
   9045                         },
   9046                         .cmp_eq,
   9047                         .cmp_neq,
   9048                         => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else null,
   9049                         else => null,
   9050                     },
   9051                     else => null,
   9052                 },
   9053                 16 => switch (lhs_ty.vectorLen(mod)) {
   9054                     1...8 => switch (air_tag) {
   9055                         .add,
   9056                         .add_wrap,
   9057                         => if (self.hasFeature(.avx)) .{ .vp_w, .add } else .{ .p_w, .add },
   9058                         .sub,
   9059                         .sub_wrap,
   9060                         => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub },
   9061                         .mul,
   9062                         .mul_wrap,
   9063                         => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull },
   9064                         .bit_and => if (self.hasFeature(.avx))
   9065                             .{ .vp_, .@"and" }
   9066                         else
   9067                             .{ .p_, .@"and" },
   9068                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
   9069                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
   9070                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9071                             .signed => if (self.hasFeature(.avx))
   9072                                 .{ .vp_w, .mins }
   9073                             else
   9074                                 .{ .p_w, .mins },
   9075                             .unsigned => if (self.hasFeature(.avx))
   9076                                 .{ .vp_w, .minu }
   9077                             else
   9078                                 .{ .p_w, .minu },
   9079                         },
   9080                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9081                             .signed => if (self.hasFeature(.avx))
   9082                                 .{ .vp_w, .maxs }
   9083                             else
   9084                                 .{ .p_w, .maxs },
   9085                             .unsigned => if (self.hasFeature(.avx))
   9086                                 .{ .vp_w, .maxu }
   9087                             else
   9088                                 .{ .p_w, .maxu },
   9089                         },
   9090                         .cmp_lt,
   9091                         .cmp_lte,
   9092                         .cmp_gte,
   9093                         .cmp_gt,
   9094                         => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9095                             .signed => if (self.hasFeature(.avx))
   9096                                 .{ .vp_w, .cmpgt }
   9097                             else
   9098                                 .{ .p_w, .cmpgt },
   9099                             .unsigned => null,
   9100                         },
   9101                         .cmp_eq,
   9102                         .cmp_neq,
   9103                         => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else .{ .p_w, .cmpeq },
   9104                         else => null,
   9105                     },
   9106                     9...16 => switch (air_tag) {
   9107                         .add,
   9108                         .add_wrap,
   9109                         => if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null,
   9110                         .sub,
   9111                         .sub_wrap,
   9112                         => if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null,
   9113                         .mul,
   9114                         .mul_wrap,
   9115                         => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null,
   9116                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
   9117                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
   9118                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
   9119                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9120                             .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null,
   9121                             .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null,
   9122                         },
   9123                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9124                             .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null,
   9125                             .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null,
   9126                         },
   9127                         .cmp_lt,
   9128                         .cmp_lte,
   9129                         .cmp_gte,
   9130                         .cmp_gt,
   9131                         => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9132                             .signed => if (self.hasFeature(.avx)) .{ .vp_w, .cmpgt } else null,
   9133                             .unsigned => null,
   9134                         },
   9135                         .cmp_eq,
   9136                         .cmp_neq,
   9137                         => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else null,
   9138                         else => null,
   9139                     },
   9140                     else => null,
   9141                 },
   9142                 32 => switch (lhs_ty.vectorLen(mod)) {
   9143                     1...4 => switch (air_tag) {
   9144                         .add,
   9145                         .add_wrap,
   9146                         => if (self.hasFeature(.avx)) .{ .vp_d, .add } else .{ .p_d, .add },
   9147                         .sub,
   9148                         .sub_wrap,
   9149                         => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub },
   9150                         .mul,
   9151                         .mul_wrap,
   9152                         => if (self.hasFeature(.avx))
   9153                             .{ .vp_d, .mull }
   9154                         else if (self.hasFeature(.sse4_1))
   9155                             .{ .p_d, .mull }
   9156                         else
   9157                             null,
   9158                         .bit_and => if (self.hasFeature(.avx))
   9159                             .{ .vp_, .@"and" }
   9160                         else
   9161                             .{ .p_, .@"and" },
   9162                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
   9163                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
   9164                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9165                             .signed => if (self.hasFeature(.avx))
   9166                                 .{ .vp_d, .mins }
   9167                             else if (self.hasFeature(.sse4_1))
   9168                                 .{ .p_d, .mins }
   9169                             else
   9170                                 null,
   9171                             .unsigned => if (self.hasFeature(.avx))
   9172                                 .{ .vp_d, .minu }
   9173                             else if (self.hasFeature(.sse4_1))
   9174                                 .{ .p_d, .minu }
   9175                             else
   9176                                 null,
   9177                         },
   9178                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9179                             .signed => if (self.hasFeature(.avx))
   9180                                 .{ .vp_d, .maxs }
   9181                             else if (self.hasFeature(.sse4_1))
   9182                                 .{ .p_d, .maxs }
   9183                             else
   9184                                 null,
   9185                             .unsigned => if (self.hasFeature(.avx))
   9186                                 .{ .vp_d, .maxu }
   9187                             else if (self.hasFeature(.sse4_1))
   9188                                 .{ .p_d, .maxu }
   9189                             else
   9190                                 null,
   9191                         },
   9192                         .cmp_lt,
   9193                         .cmp_lte,
   9194                         .cmp_gte,
   9195                         .cmp_gt,
   9196                         => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9197                             .signed => if (self.hasFeature(.avx))
   9198                                 .{ .vp_d, .cmpgt }
   9199                             else
   9200                                 .{ .p_d, .cmpgt },
   9201                             .unsigned => null,
   9202                         },
   9203                         .cmp_eq,
   9204                         .cmp_neq,
   9205                         => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else .{ .p_d, .cmpeq },
   9206                         else => null,
   9207                     },
   9208                     5...8 => switch (air_tag) {
   9209                         .add,
   9210                         .add_wrap,
   9211                         => if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null,
   9212                         .sub,
   9213                         .sub_wrap,
   9214                         => if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null,
   9215                         .mul,
   9216                         .mul_wrap,
   9217                         => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null,
   9218                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
   9219                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
   9220                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
   9221                         .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9222                             .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null,
   9223                             .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null,
   9224                         },
   9225                         .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9226                             .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null,
   9227                             .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null,
   9228                         },
   9229                         .cmp_lt,
   9230                         .cmp_lte,
   9231                         .cmp_gte,
   9232                         .cmp_gt,
   9233                         => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9234                             .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null,
   9235                             .unsigned => null,
   9236                         },
   9237                         .cmp_eq,
   9238                         .cmp_neq,
   9239                         => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null,
   9240                         else => null,
   9241                     },
   9242                     else => null,
   9243                 },
   9244                 64 => switch (lhs_ty.vectorLen(mod)) {
   9245                     1...2 => switch (air_tag) {
   9246                         .add,
   9247                         .add_wrap,
   9248                         => if (self.hasFeature(.avx)) .{ .vp_q, .add } else .{ .p_q, .add },
   9249                         .sub,
   9250                         .sub_wrap,
   9251                         => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub },
   9252                         .bit_and => if (self.hasFeature(.avx))
   9253                             .{ .vp_, .@"and" }
   9254                         else
   9255                             .{ .p_, .@"and" },
   9256                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
   9257                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
   9258                         .cmp_lt,
   9259                         .cmp_lte,
   9260                         .cmp_gte,
   9261                         .cmp_gt,
   9262                         => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9263                             .signed => if (self.hasFeature(.avx))
   9264                                 .{ .vp_q, .cmpgt }
   9265                             else if (self.hasFeature(.sse4_2))
   9266                                 .{ .p_q, .cmpgt }
   9267                             else
   9268                                 null,
   9269                             .unsigned => null,
   9270                         },
   9271                         .cmp_eq,
   9272                         .cmp_neq,
   9273                         => if (self.hasFeature(.avx))
   9274                             .{ .vp_q, .cmpeq }
   9275                         else if (self.hasFeature(.sse4_1))
   9276                             .{ .p_q, .cmpeq }
   9277                         else
   9278                             null,
   9279                         else => null,
   9280                     },
   9281                     3...4 => switch (air_tag) {
   9282                         .add,
   9283                         .add_wrap,
   9284                         => if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null,
   9285                         .sub,
   9286                         .sub_wrap,
   9287                         => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null,
   9288                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
   9289                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
   9290                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
   9291                         .cmp_eq,
   9292                         .cmp_neq,
   9293                         => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null,
   9294                         .cmp_lt,
   9295                         .cmp_lte,
   9296                         .cmp_gt,
   9297                         .cmp_gte,
   9298                         => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
   9299                             .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null,
   9300                             .unsigned => null,
   9301                         },
   9302                         else => null,
   9303                     },
   9304                     else => null,
   9305                 },
   9306                 else => null,
   9307             },
   9308             .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   9309                 16 => tag: {
   9310                     assert(self.hasFeature(.f16c));
   9311                     switch (lhs_ty.vectorLen(mod)) {
   9312                         1 => {
   9313                             const tmp_reg = (try self.register_manager.allocReg(
   9314                                 null,
   9315                                 abi.RegisterClass.sse,
   9316                             )).to128();
   9317                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   9318                             defer self.register_manager.unlockReg(tmp_lock);
   9319 
   9320                             if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   9321                                 .{ .vp_w, .insr },
   9322                                 dst_reg,
   9323                                 dst_reg,
   9324                                 try src_mcv.mem(self, .word),
   9325                                 Immediate.u(1),
   9326                             ) else try self.asmRegisterRegisterRegister(
   9327                                 .{ .vp_, .unpcklwd },
   9328                                 dst_reg,
   9329                                 dst_reg,
   9330                                 (if (src_mcv.isRegister())
   9331                                     src_mcv.getReg().?
   9332                                 else
   9333                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   9334                             );
   9335                             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
   9336                             try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
   9337                             try self.asmRegisterRegisterRegister(
   9338                                 switch (air_tag) {
   9339                                     .add => .{ .v_ss, .add },
   9340                                     .sub => .{ .v_ss, .sub },
   9341                                     .mul => .{ .v_ss, .mul },
   9342                                     .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
   9343                                     .max => .{ .v_ss, .max },
   9344                                     .min => .{ .v_ss, .max },
   9345                                     else => unreachable,
   9346                                 },
   9347                                 dst_reg,
   9348                                 dst_reg,
   9349                                 tmp_reg,
   9350                             );
   9351                             try self.asmRegisterRegisterImmediate(
   9352                                 .{ .v_, .cvtps2ph },
   9353                                 dst_reg,
   9354                                 dst_reg,
   9355                                 Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
   9356                             );
   9357                             return dst_mcv;
   9358                         },
   9359                         2 => {
   9360                             const tmp_reg = (try self.register_manager.allocReg(
   9361                                 null,
   9362                                 abi.RegisterClass.sse,
   9363                             )).to128();
   9364                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   9365                             defer self.register_manager.unlockReg(tmp_lock);
   9366 
   9367                             if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
   9368                                 .{ .vp_d, .insr },
   9369                                 dst_reg,
   9370                                 try src_mcv.mem(self, .dword),
   9371                                 Immediate.u(1),
   9372                             ) else try self.asmRegisterRegisterRegister(
   9373                                 .{ .v_ps, .unpckl },
   9374                                 dst_reg,
   9375                                 dst_reg,
   9376                                 (if (src_mcv.isRegister())
   9377                                     src_mcv.getReg().?
   9378                                 else
   9379                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   9380                             );
   9381                             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
   9382                             try self.asmRegisterRegisterRegister(
   9383                                 .{ .v_ps, .movhl },
   9384                                 tmp_reg,
   9385                                 dst_reg,
   9386                                 dst_reg,
   9387                             );
   9388                             try self.asmRegisterRegisterRegister(
   9389                                 switch (air_tag) {
   9390                                     .add => .{ .v_ps, .add },
   9391                                     .sub => .{ .v_ps, .sub },
   9392                                     .mul => .{ .v_ps, .mul },
   9393                                     .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   9394                                     .max => .{ .v_ps, .max },
   9395                                     .min => .{ .v_ps, .max },
   9396                                     else => unreachable,
   9397                                 },
   9398                                 dst_reg,
   9399                                 dst_reg,
   9400                                 tmp_reg,
   9401                             );
   9402                             try self.asmRegisterRegisterImmediate(
   9403                                 .{ .v_, .cvtps2ph },
   9404                                 dst_reg,
   9405                                 dst_reg,
   9406                                 Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
   9407                             );
   9408                             return dst_mcv;
   9409                         },
   9410                         3...4 => {
   9411                             const tmp_reg = (try self.register_manager.allocReg(
   9412                                 null,
   9413                                 abi.RegisterClass.sse,
   9414                             )).to128();
   9415                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   9416                             defer self.register_manager.unlockReg(tmp_lock);
   9417 
   9418                             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
   9419                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
   9420                                 .{ .v_ps, .cvtph2 },
   9421                                 tmp_reg,
   9422                                 try src_mcv.mem(self, .qword),
   9423                             ) else try self.asmRegisterRegister(
   9424                                 .{ .v_ps, .cvtph2 },
   9425                                 tmp_reg,
   9426                                 (if (src_mcv.isRegister())
   9427                                     src_mcv.getReg().?
   9428                                 else
   9429                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   9430                             );
   9431                             try self.asmRegisterRegisterRegister(
   9432                                 switch (air_tag) {
   9433                                     .add => .{ .v_ps, .add },
   9434                                     .sub => .{ .v_ps, .sub },
   9435                                     .mul => .{ .v_ps, .mul },
   9436                                     .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   9437                                     .max => .{ .v_ps, .max },
   9438                                     .min => .{ .v_ps, .max },
   9439                                     else => unreachable,
   9440                                 },
   9441                                 dst_reg,
   9442                                 dst_reg,
   9443                                 tmp_reg,
   9444                             );
   9445                             try self.asmRegisterRegisterImmediate(
   9446                                 .{ .v_, .cvtps2ph },
   9447                                 dst_reg,
   9448                                 dst_reg,
   9449                                 Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
   9450                             );
   9451                             return dst_mcv;
   9452                         },
   9453                         5...8 => {
   9454                             const tmp_reg = (try self.register_manager.allocReg(
   9455                                 null,
   9456                                 abi.RegisterClass.sse,
   9457                             )).to256();
   9458                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
   9459                             defer self.register_manager.unlockReg(tmp_lock);
   9460 
   9461                             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg);
   9462                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
   9463                                 .{ .v_ps, .cvtph2 },
   9464                                 tmp_reg,
   9465                                 try src_mcv.mem(self, .xword),
   9466                             ) else try self.asmRegisterRegister(
   9467                                 .{ .v_ps, .cvtph2 },
   9468                                 tmp_reg,
   9469                                 (if (src_mcv.isRegister())
   9470                                     src_mcv.getReg().?
   9471                                 else
   9472                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
   9473                             );
   9474                             try self.asmRegisterRegisterRegister(
   9475                                 switch (air_tag) {
   9476                                     .add => .{ .v_ps, .add },
   9477                                     .sub => .{ .v_ps, .sub },
   9478                                     .mul => .{ .v_ps, .mul },
   9479                                     .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   9480                                     .max => .{ .v_ps, .max },
   9481                                     .min => .{ .v_ps, .max },
   9482                                     else => unreachable,
   9483                                 },
   9484                                 dst_reg.to256(),
   9485                                 dst_reg.to256(),
   9486                                 tmp_reg,
   9487                             );
   9488                             try self.asmRegisterRegisterImmediate(
   9489                                 .{ .v_, .cvtps2ph },
   9490                                 dst_reg,
   9491                                 dst_reg.to256(),
   9492                                 Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
   9493                             );
   9494                             return dst_mcv;
   9495                         },
   9496                         else => break :tag null,
   9497                     }
   9498                 },
   9499                 32 => switch (lhs_ty.vectorLen(mod)) {
   9500                     1 => switch (air_tag) {
   9501                         .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
   9502                         .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
   9503                         .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
   9504                         .div_float,
   9505                         .div_trunc,
   9506                         .div_floor,
   9507                         .div_exact,
   9508                         => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
   9509                         .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
   9510                         .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
   9511                         .cmp_lt,
   9512                         .cmp_lte,
   9513                         .cmp_eq,
   9514                         .cmp_gte,
   9515                         .cmp_gt,
   9516                         .cmp_neq,
   9517                         => if (self.hasFeature(.avx)) .{ .v_ss, .cmp } else .{ ._ss, .cmp },
   9518                         else => unreachable,
   9519                     },
   9520                     2...4 => switch (air_tag) {
   9521                         .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add },
   9522                         .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub },
   9523                         .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul },
   9524                         .div_float,
   9525                         .div_trunc,
   9526                         .div_floor,
   9527                         .div_exact,
   9528                         => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div },
   9529                         .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max },
   9530                         .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min },
   9531                         .cmp_lt,
   9532                         .cmp_lte,
   9533                         .cmp_eq,
   9534                         .cmp_gte,
   9535                         .cmp_gt,
   9536                         .cmp_neq,
   9537                         => if (self.hasFeature(.avx)) .{ .v_ps, .cmp } else .{ ._ps, .cmp },
   9538                         else => unreachable,
   9539                     },
   9540                     5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
   9541                         .add => .{ .v_ps, .add },
   9542                         .sub => .{ .v_ps, .sub },
   9543                         .mul => .{ .v_ps, .mul },
   9544                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
   9545                         .max => .{ .v_ps, .max },
   9546                         .min => .{ .v_ps, .min },
   9547                         .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_ps, .cmp },
   9548                         else => unreachable,
   9549                     } else null,
   9550                     else => null,
   9551                 },
   9552                 64 => switch (lhs_ty.vectorLen(mod)) {
   9553                     1 => switch (air_tag) {
   9554                         .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
   9555                         .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
   9556                         .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
   9557                         .div_float,
   9558                         .div_trunc,
   9559                         .div_floor,
   9560                         .div_exact,
   9561                         => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
   9562                         .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
   9563                         .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
   9564                         .cmp_lt,
   9565                         .cmp_lte,
   9566                         .cmp_eq,
   9567                         .cmp_gte,
   9568                         .cmp_gt,
   9569                         .cmp_neq,
   9570                         => if (self.hasFeature(.avx)) .{ .v_sd, .cmp } else .{ ._sd, .cmp },
   9571                         else => unreachable,
   9572                     },
   9573                     2 => switch (air_tag) {
   9574                         .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add },
   9575                         .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub },
   9576                         .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul },
   9577                         .div_float,
   9578                         .div_trunc,
   9579                         .div_floor,
   9580                         .div_exact,
   9581                         => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div },
   9582                         .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max },
   9583                         .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min },
   9584                         .cmp_lt,
   9585                         .cmp_lte,
   9586                         .cmp_eq,
   9587                         .cmp_gte,
   9588                         .cmp_gt,
   9589                         .cmp_neq,
   9590                         => if (self.hasFeature(.avx)) .{ .v_pd, .cmp } else .{ ._pd, .cmp },
   9591                         else => unreachable,
   9592                     },
   9593                     3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
   9594                         .add => .{ .v_pd, .add },
   9595                         .sub => .{ .v_pd, .sub },
   9596                         .mul => .{ .v_pd, .mul },
   9597                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div },
   9598                         .max => .{ .v_pd, .max },
   9599                         .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_pd, .cmp },
   9600                         .min => .{ .v_pd, .min },
   9601                         else => unreachable,
   9602                     } else null,
   9603                     else => null,
   9604                 },
   9605                 80, 128 => null,
   9606                 else => unreachable,
   9607             },
   9608         },
   9609     }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   9610         @tagName(air_tag), lhs_ty.fmt(mod),
   9611     });
   9612 
   9613     const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias(
   9614         if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?,
   9615         abi_size,
   9616     ) else null;
   9617     const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null;
   9618     defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock);
   9619 
   9620     switch (mir_tag[1]) {
   9621         else => if (self.hasFeature(.avx)) {
   9622             const lhs_reg =
   9623                 if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
   9624             if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
   9625                 mir_tag,
   9626                 dst_reg,
   9627                 lhs_reg,
   9628                 try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) {
   9629                     else => Memory.Size.fromSize(abi_size),
   9630                     .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()),
   9631                 }),
   9632             ) else try self.asmRegisterRegisterRegister(
   9633                 mir_tag,
   9634                 dst_reg,
   9635                 lhs_reg,
   9636                 registerAlias(if (src_mcv.isRegister())
   9637                     src_mcv.getReg().?
   9638                 else
   9639                     try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
   9640             );
   9641         } else {
   9642             assert(copied_to_dst);
   9643             if (src_mcv.isMemory()) try self.asmRegisterMemory(
   9644                 mir_tag,
   9645                 dst_reg,
   9646                 try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) {
   9647                     else => Memory.Size.fromSize(abi_size),
   9648                     .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()),
   9649                 }),
   9650             ) else try self.asmRegisterRegister(
   9651                 mir_tag,
   9652                 dst_reg,
   9653                 registerAlias(if (src_mcv.isRegister())
   9654                     src_mcv.getReg().?
   9655                 else
   9656                     try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
   9657             );
   9658         },
   9659         .cmp => {
   9660             const imm = Immediate.u(switch (air_tag) {
   9661                 .cmp_eq => 0,
   9662                 .cmp_lt, .cmp_gt => 1,
   9663                 .cmp_lte, .cmp_gte => 2,
   9664                 .cmp_neq => 4,
   9665                 else => unreachable,
   9666             });
   9667             if (self.hasFeature(.avx)) {
   9668                 const lhs_reg =
   9669                     if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
   9670                 if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
   9671                     mir_tag,
   9672                     dst_reg,
   9673                     lhs_reg,
   9674                     try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) {
   9675                         else => Memory.Size.fromSize(abi_size),
   9676                         .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()),
   9677                     }),
   9678                     imm,
   9679                 ) else try self.asmRegisterRegisterRegisterImmediate(
   9680                     mir_tag,
   9681                     dst_reg,
   9682                     lhs_reg,
   9683                     registerAlias(if (src_mcv.isRegister())
   9684                         src_mcv.getReg().?
   9685                     else
   9686                         try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
   9687                     imm,
   9688                 );
   9689             } else {
   9690                 assert(copied_to_dst);
   9691                 if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
   9692                     mir_tag,
   9693                     dst_reg,
   9694                     try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) {
   9695                         else => Memory.Size.fromSize(abi_size),
   9696                         .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()),
   9697                     }),
   9698                     imm,
   9699                 ) else try self.asmRegisterRegisterImmediate(
   9700                     mir_tag,
   9701                     dst_reg,
   9702                     registerAlias(if (src_mcv.isRegister())
   9703                         src_mcv.getReg().?
   9704                     else
   9705                         try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
   9706                     imm,
   9707                 );
   9708             }
   9709         },
   9710     }
   9711 
   9712     switch (air_tag) {
   9713         .add, .add_wrap, .sub, .sub_wrap, .mul, .mul_wrap, .div_float, .div_exact => {},
   9714         .div_trunc, .div_floor => try self.genRound(lhs_ty, dst_reg, .{ .register = dst_reg }, .{
   9715             .mode = switch (air_tag) {
   9716                 .div_trunc => .zero,
   9717                 .div_floor => .down,
   9718                 else => unreachable,
   9719             },
   9720             .precision = .inexact,
   9721         }),
   9722         .bit_and, .bit_or, .xor => {},
   9723         .max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) {
   9724             const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size);
   9725 
   9726             try self.asmRegisterRegisterRegisterImmediate(
   9727                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   9728                     .Float => switch (lhs_ty.floatBits(self.target.*)) {
   9729                         32 => .{ .v_ss, .cmp },
   9730                         64 => .{ .v_sd, .cmp },
   9731                         16, 80, 128 => null,
   9732                         else => unreachable,
   9733                     },
   9734                     .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   9735                         .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   9736                             32 => switch (lhs_ty.vectorLen(mod)) {
   9737                                 1 => .{ .v_ss, .cmp },
   9738                                 2...8 => .{ .v_ps, .cmp },
   9739                                 else => null,
   9740                             },
   9741                             64 => switch (lhs_ty.vectorLen(mod)) {
   9742                                 1 => .{ .v_sd, .cmp },
   9743                                 2...4 => .{ .v_pd, .cmp },
   9744                                 else => null,
   9745                             },
   9746                             16, 80, 128 => null,
   9747                             else => unreachable,
   9748                         },
   9749                         else => unreachable,
   9750                     },
   9751                     else => unreachable,
   9752                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   9753                     @tagName(air_tag), lhs_ty.fmt(mod),
   9754                 }),
   9755                 mask_reg,
   9756                 rhs_copy_reg,
   9757                 rhs_copy_reg,
   9758                 Immediate.u(3), // unord
   9759             );
   9760             try self.asmRegisterRegisterRegisterRegister(
   9761                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   9762                     .Float => switch (lhs_ty.floatBits(self.target.*)) {
   9763                         32 => .{ .v_ps, .blendv },
   9764                         64 => .{ .v_pd, .blendv },
   9765                         16, 80, 128 => null,
   9766                         else => unreachable,
   9767                     },
   9768                     .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   9769                         .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   9770                             32 => switch (lhs_ty.vectorLen(mod)) {
   9771                                 1...8 => .{ .v_ps, .blendv },
   9772                                 else => null,
   9773                             },
   9774                             64 => switch (lhs_ty.vectorLen(mod)) {
   9775                                 1...4 => .{ .v_pd, .blendv },
   9776                                 else => null,
   9777                             },
   9778                             16, 80, 128 => null,
   9779                             else => unreachable,
   9780                         },
   9781                         else => unreachable,
   9782                     },
   9783                     else => unreachable,
   9784                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   9785                     @tagName(air_tag), lhs_ty.fmt(mod),
   9786                 }),
   9787                 dst_reg,
   9788                 dst_reg,
   9789                 lhs_copy_reg.?,
   9790                 mask_reg,
   9791             );
   9792         } else {
   9793             const has_blend = self.hasFeature(.sse4_1);
   9794             try self.asmRegisterRegisterImmediate(
   9795                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   9796                     .Float => switch (lhs_ty.floatBits(self.target.*)) {
   9797                         32 => .{ ._ss, .cmp },
   9798                         64 => .{ ._sd, .cmp },
   9799                         16, 80, 128 => null,
   9800                         else => unreachable,
   9801                     },
   9802                     .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   9803                         .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   9804                             32 => switch (lhs_ty.vectorLen(mod)) {
   9805                                 1 => .{ ._ss, .cmp },
   9806                                 2...4 => .{ ._ps, .cmp },
   9807                                 else => null,
   9808                             },
   9809                             64 => switch (lhs_ty.vectorLen(mod)) {
   9810                                 1 => .{ ._sd, .cmp },
   9811                                 2 => .{ ._pd, .cmp },
   9812                                 else => null,
   9813                             },
   9814                             16, 80, 128 => null,
   9815                             else => unreachable,
   9816                         },
   9817                         else => unreachable,
   9818                     },
   9819                     else => unreachable,
   9820                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   9821                     @tagName(air_tag), lhs_ty.fmt(mod),
   9822                 }),
   9823                 mask_reg,
   9824                 mask_reg,
   9825                 Immediate.u(if (has_blend) 3 else 7), // unord, ord
   9826             );
   9827             if (has_blend) try self.asmRegisterRegisterRegister(
   9828                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   9829                     .Float => switch (lhs_ty.floatBits(self.target.*)) {
   9830                         32 => .{ ._ps, .blendv },
   9831                         64 => .{ ._pd, .blendv },
   9832                         16, 80, 128 => null,
   9833                         else => unreachable,
   9834                     },
   9835                     .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   9836                         .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   9837                             32 => switch (lhs_ty.vectorLen(mod)) {
   9838                                 1...4 => .{ ._ps, .blendv },
   9839                                 else => null,
   9840                             },
   9841                             64 => switch (lhs_ty.vectorLen(mod)) {
   9842                                 1...2 => .{ ._pd, .blendv },
   9843                                 else => null,
   9844                             },
   9845                             16, 80, 128 => null,
   9846                             else => unreachable,
   9847                         },
   9848                         else => unreachable,
   9849                     },
   9850                     else => unreachable,
   9851                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   9852                     @tagName(air_tag), lhs_ty.fmt(mod),
   9853                 }),
   9854                 dst_reg,
   9855                 lhs_copy_reg.?,
   9856                 mask_reg,
   9857             ) else {
   9858                 try self.asmRegisterRegister(
   9859                     @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   9860                         .Float => switch (lhs_ty.floatBits(self.target.*)) {
   9861                             32 => .{ ._ps, .@"and" },
   9862                             64 => .{ ._pd, .@"and" },
   9863                             16, 80, 128 => null,
   9864                             else => unreachable,
   9865                         },
   9866                         .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   9867                             .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   9868                                 32 => switch (lhs_ty.vectorLen(mod)) {
   9869                                     1...4 => .{ ._ps, .@"and" },
   9870                                     else => null,
   9871                                 },
   9872                                 64 => switch (lhs_ty.vectorLen(mod)) {
   9873                                     1...2 => .{ ._pd, .@"and" },
   9874                                     else => null,
   9875                                 },
   9876                                 16, 80, 128 => null,
   9877                                 else => unreachable,
   9878                             },
   9879                             else => unreachable,
   9880                         },
   9881                         else => unreachable,
   9882                     }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   9883                         @tagName(air_tag), lhs_ty.fmt(mod),
   9884                     }),
   9885                     dst_reg,
   9886                     mask_reg,
   9887                 );
   9888                 try self.asmRegisterRegister(
   9889                     @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   9890                         .Float => switch (lhs_ty.floatBits(self.target.*)) {
   9891                             32 => .{ ._ps, .andn },
   9892                             64 => .{ ._pd, .andn },
   9893                             16, 80, 128 => null,
   9894                             else => unreachable,
   9895                         },
   9896                         .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   9897                             .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   9898                                 32 => switch (lhs_ty.vectorLen(mod)) {
   9899                                     1...4 => .{ ._ps, .andn },
   9900                                     else => null,
   9901                                 },
   9902                                 64 => switch (lhs_ty.vectorLen(mod)) {
   9903                                     1...2 => .{ ._pd, .andn },
   9904                                     else => null,
   9905                                 },
   9906                                 16, 80, 128 => null,
   9907                                 else => unreachable,
   9908                             },
   9909                             else => unreachable,
   9910                         },
   9911                         else => unreachable,
   9912                     }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   9913                         @tagName(air_tag), lhs_ty.fmt(mod),
   9914                     }),
   9915                     mask_reg,
   9916                     lhs_copy_reg.?,
   9917                 );
   9918                 try self.asmRegisterRegister(
   9919                     @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
   9920                         .Float => switch (lhs_ty.floatBits(self.target.*)) {
   9921                             32 => .{ ._ps, .@"or" },
   9922                             64 => .{ ._pd, .@"or" },
   9923                             16, 80, 128 => null,
   9924                             else => unreachable,
   9925                         },
   9926                         .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   9927                             .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
   9928                                 32 => switch (lhs_ty.vectorLen(mod)) {
   9929                                     1...4 => .{ ._ps, .@"or" },
   9930                                     else => null,
   9931                                 },
   9932                                 64 => switch (lhs_ty.vectorLen(mod)) {
   9933                                     1...2 => .{ ._pd, .@"or" },
   9934                                     else => null,
   9935                                 },
   9936                                 16, 80, 128 => null,
   9937                                 else => unreachable,
   9938                             },
   9939                             else => unreachable,
   9940                         },
   9941                         else => unreachable,
   9942                     }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
   9943                         @tagName(air_tag), lhs_ty.fmt(mod),
   9944                     }),
   9945                     dst_reg,
   9946                     mask_reg,
   9947                 );
   9948             }
   9949         },
   9950         .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => {
   9951             switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
   9952                 .Int => switch (air_tag) {
   9953                     .cmp_lt,
   9954                     .cmp_eq,
   9955                     .cmp_gt,
   9956                     => {},
   9957                     .cmp_lte,
   9958                     .cmp_gte,
   9959                     .cmp_neq,
   9960                     => {
   9961                         const unsigned_ty = try lhs_ty.toUnsigned(mod);
   9962                         const not_mcv = try self.genTypedValue(.{
   9963                             .ty = lhs_ty,
   9964                             .val = try unsigned_ty.maxInt(mod, unsigned_ty),
   9965                         });
   9966                         const not_mem: Memory = if (not_mcv.isMemory())
   9967                             try not_mcv.mem(self, Memory.Size.fromSize(abi_size))
   9968                         else
   9969                             .{ .base = .{
   9970                                 .reg = try self.copyToTmpRegister(Type.usize, not_mcv.address()),
   9971                             }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size) } } };
   9972                         switch (mir_tag[0]) {
   9973                             .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory(
   9974                                 .{ .vp_, .xor },
   9975                                 dst_reg,
   9976                                 dst_reg,
   9977                                 not_mem,
   9978                             ),
   9979                             .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory(
   9980                                 .{ .p_, .xor },
   9981                                 dst_reg,
   9982                                 not_mem,
   9983                             ),
   9984                             else => unreachable,
   9985                         }
   9986                     },
   9987                     else => unreachable,
   9988                 },
   9989                 .Float => {},
   9990                 else => unreachable,
   9991             }
   9992 
   9993             const gp_reg = try self.register_manager.allocReg(maybe_inst, abi.RegisterClass.gp);
   9994             const gp_lock = self.register_manager.lockRegAssumeUnused(gp_reg);
   9995             defer self.register_manager.unlockReg(gp_lock);
   9996 
   9997             try self.asmRegisterRegister(switch (mir_tag[0]) {
   9998                 ._pd, ._sd, .p_q => .{ ._pd, .movmsk },
   9999                 ._ps, ._ss, .p_d => .{ ._ps, .movmsk },
  10000                 .p_b => .{ .p_b, .movmsk },
  10001                 .p_w => movmsk: {
  10002                     try self.asmRegisterRegister(.{ .p_b, .ackssw }, dst_reg, dst_reg);
  10003                     break :movmsk .{ .p_b, .movmsk };
  10004                 },
  10005                 .v_pd, .v_sd, .vp_q => .{ .v_pd, .movmsk },
  10006                 .v_ps, .v_ss, .vp_d => .{ .v_ps, .movmsk },
  10007                 .vp_b => .{ .vp_b, .movmsk },
  10008                 .vp_w => movmsk: {
  10009                     try self.asmRegisterRegisterRegister(
  10010                         .{ .vp_b, .ackssw },
  10011                         dst_reg,
  10012                         dst_reg,
  10013                         dst_reg,
  10014                     );
  10015                     break :movmsk .{ .vp_b, .movmsk };
  10016                 },
  10017                 else => unreachable,
  10018             }, gp_reg.to32(), dst_reg);
  10019             return .{ .register = gp_reg };
  10020         },
  10021         else => unreachable,
  10022     }
  10023 
  10024     return dst_mcv;
  10025 }
  10026 
  10027 fn genBinOpMir(
  10028     self: *Self,
  10029     mir_tag: Mir.Inst.FixedTag,
  10030     ty: Type,
  10031     dst_mcv: MCValue,
  10032     src_mcv: MCValue,
  10033 ) !void {
  10034     const mod = self.bin_file.comp.module.?;
  10035     const abi_size: u32 = @intCast(ty.abiSize(mod));
  10036     try self.spillEflagsIfOccupied();
  10037     switch (dst_mcv) {
  10038         .none,
  10039         .unreach,
  10040         .dead,
  10041         .undef,
  10042         .immediate,
  10043         .eflags,
  10044         .register_overflow,
  10045         .lea_direct,
  10046         .lea_got,
  10047         .lea_tlv,
  10048         .lea_frame,
  10049         .lea_symbol,
  10050         .reserved_frame,
  10051         .air_ref,
  10052         => unreachable, // unmodifiable destination
  10053         .register, .register_pair, .register_offset => {
  10054             switch (dst_mcv) {
  10055                 .register, .register_pair => {},
  10056                 .register_offset => |ro| assert(ro.off == 0),
  10057                 else => unreachable,
  10058             }
  10059             for (dst_mcv.getRegs(), 0..) |dst_reg, dst_reg_i| {
  10060                 const dst_reg_lock = self.register_manager.lockReg(dst_reg);
  10061                 defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock);
  10062 
  10063                 const mir_limb_tag: Mir.Inst.FixedTag = switch (dst_reg_i) {
  10064                     0 => mir_tag,
  10065                     1 => switch (mir_tag[1]) {
  10066                         .add => .{ ._, .adc },
  10067                         .sub, .cmp => .{ ._, .sbb },
  10068                         .@"or", .@"and", .xor => mir_tag,
  10069                         else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{
  10070                             @tagName(mir_tag[1]),
  10071                         }),
  10072                     },
  10073                     else => unreachable,
  10074                 };
  10075                 const off: u4 = @intCast(dst_reg_i * 8);
  10076                 const limb_abi_size = @min(abi_size - off, 8);
  10077                 const dst_alias = registerAlias(dst_reg, limb_abi_size);
  10078                 switch (src_mcv) {
  10079                     .none,
  10080                     .unreach,
  10081                     .dead,
  10082                     .undef,
  10083                     .register_overflow,
  10084                     .reserved_frame,
  10085                     => unreachable,
  10086                     .register, .register_pair => try self.asmRegisterRegister(
  10087                         mir_limb_tag,
  10088                         dst_alias,
  10089                         registerAlias(src_mcv.getRegs()[dst_reg_i], limb_abi_size),
  10090                     ),
  10091                     .immediate => |imm| {
  10092                         assert(off == 0);
  10093                         switch (self.regBitSize(ty)) {
  10094                             8 => try self.asmRegisterImmediate(
  10095                                 mir_limb_tag,
  10096                                 dst_alias,
  10097                                 if (math.cast(i8, @as(i64, @bitCast(imm)))) |small|
  10098                                     Immediate.s(small)
  10099                                 else
  10100                                     Immediate.u(@as(u8, @intCast(imm))),
  10101                             ),
  10102                             16 => try self.asmRegisterImmediate(
  10103                                 mir_limb_tag,
  10104                                 dst_alias,
  10105                                 if (math.cast(i16, @as(i64, @bitCast(imm)))) |small|
  10106                                     Immediate.s(small)
  10107                                 else
  10108                                     Immediate.u(@as(u16, @intCast(imm))),
  10109                             ),
  10110                             32 => try self.asmRegisterImmediate(
  10111                                 mir_limb_tag,
  10112                                 dst_alias,
  10113                                 if (math.cast(i32, @as(i64, @bitCast(imm)))) |small|
  10114                                     Immediate.s(small)
  10115                                 else
  10116                                     Immediate.u(@as(u32, @intCast(imm))),
  10117                             ),
  10118                             64 => if (math.cast(i32, @as(i64, @bitCast(imm)))) |small|
  10119                                 try self.asmRegisterImmediate(mir_limb_tag, dst_alias, Immediate.s(small))
  10120                             else
  10121                                 try self.asmRegisterRegister(mir_limb_tag, dst_alias, registerAlias(
  10122                                     try self.copyToTmpRegister(ty, src_mcv),
  10123                                     limb_abi_size,
  10124                                 )),
  10125                             else => unreachable,
  10126                         }
  10127                     },
  10128                     .eflags,
  10129                     .register_offset,
  10130                     .memory,
  10131                     .indirect,
  10132                     .load_symbol,
  10133                     .lea_symbol,
  10134                     .load_direct,
  10135                     .lea_direct,
  10136                     .load_got,
  10137                     .lea_got,
  10138                     .load_tlv,
  10139                     .lea_tlv,
  10140                     .load_frame,
  10141                     .lea_frame,
  10142                     => {
  10143                         direct: {
  10144                             try self.asmRegisterMemory(mir_limb_tag, dst_alias, switch (src_mcv) {
  10145                                 .memory => |addr| .{
  10146                                     .base = .{ .reg = .ds },
  10147                                     .mod = .{ .rm = .{
  10148                                         .size = Memory.Size.fromSize(limb_abi_size),
  10149                                         .disp = math.cast(i32, addr + off) orelse break :direct,
  10150                                     } },
  10151                                 },
  10152                                 .indirect => |reg_off| .{
  10153                                     .base = .{ .reg = reg_off.reg },
  10154                                     .mod = .{ .rm = .{
  10155                                         .size = Memory.Size.fromSize(limb_abi_size),
  10156                                         .disp = reg_off.off + off,
  10157                                     } },
  10158                                 },
  10159                                 .load_frame => |frame_addr| .{
  10160                                     .base = .{ .frame = frame_addr.index },
  10161                                     .mod = .{ .rm = .{
  10162                                         .size = Memory.Size.fromSize(limb_abi_size),
  10163                                         .disp = frame_addr.off + off,
  10164                                     } },
  10165                                 },
  10166                                 else => break :direct,
  10167                             });
  10168                             continue;
  10169                         }
  10170 
  10171                         switch (src_mcv) {
  10172                             .eflags,
  10173                             .register_offset,
  10174                             .lea_symbol,
  10175                             .lea_direct,
  10176                             .lea_got,
  10177                             .lea_tlv,
  10178                             .lea_frame,
  10179                             => {
  10180                                 assert(off == 0);
  10181                                 const reg = try self.copyToTmpRegister(ty, src_mcv);
  10182                                 return self.genBinOpMir(
  10183                                     mir_limb_tag,
  10184                                     ty,
  10185                                     dst_mcv,
  10186                                     .{ .register = reg },
  10187                                 );
  10188                             },
  10189                             .memory,
  10190                             .load_symbol,
  10191                             .load_direct,
  10192                             .load_got,
  10193                             .load_tlv,
  10194                             => {
  10195                                 const ptr_ty = try mod.singleConstPtrType(ty);
  10196                                 const addr_reg = try self.copyToTmpRegister(ptr_ty, src_mcv.address());
  10197                                 return self.genBinOpMir(mir_limb_tag, ty, dst_mcv, .{
  10198                                     .indirect = .{ .reg = addr_reg, .off = off },
  10199                                 });
  10200                             },
  10201                             else => unreachable,
  10202                         }
  10203                     },
  10204                     .air_ref => |src_ref| return self.genBinOpMir(
  10205                         mir_tag,
  10206                         ty,
  10207                         dst_mcv,
  10208                         try self.resolveInst(src_ref),
  10209                     ),
  10210                 }
  10211             }
  10212         },
  10213         .memory, .indirect, .load_symbol, .load_got, .load_direct, .load_tlv, .load_frame => {
  10214             const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock };
  10215             const limb_abi_size: u32 = @min(abi_size, 8);
  10216 
  10217             const dst_info: OpInfo = switch (dst_mcv) {
  10218                 else => unreachable,
  10219                 .memory, .load_symbol, .load_got, .load_direct, .load_tlv => dst: {
  10220                     const dst_addr_reg =
  10221                         (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64();
  10222                     const dst_addr_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg);
  10223                     errdefer self.register_manager.unlockReg(dst_addr_lock);
  10224 
  10225                     try self.genSetReg(dst_addr_reg, Type.usize, dst_mcv.address());
  10226                     break :dst .{ .addr_reg = dst_addr_reg, .addr_lock = dst_addr_lock };
  10227                 },
  10228                 .load_frame => null,
  10229             };
  10230             defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock);
  10231 
  10232             const resolved_src_mcv = switch (src_mcv) {
  10233                 else => src_mcv,
  10234                 .air_ref => |src_ref| try self.resolveInst(src_ref),
  10235             };
  10236             const src_info: OpInfo = switch (resolved_src_mcv) {
  10237                 .none,
  10238                 .unreach,
  10239                 .dead,
  10240                 .undef,
  10241                 .register_overflow,
  10242                 .reserved_frame,
  10243                 .air_ref,
  10244                 => unreachable,
  10245                 .immediate,
  10246                 .eflags,
  10247                 .register,
  10248                 .register_pair,
  10249                 .register_offset,
  10250                 .indirect,
  10251                 .lea_direct,
  10252                 .lea_got,
  10253                 .lea_tlv,
  10254                 .load_frame,
  10255                 .lea_frame,
  10256                 .lea_symbol,
  10257                 => null,
  10258                 .memory, .load_symbol, .load_got, .load_direct, .load_tlv => src: {
  10259                     switch (resolved_src_mcv) {
  10260                         .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr))) != null and
  10261                             math.cast(i32, @as(i64, @bitCast(addr)) + abi_size - limb_abi_size) != null)
  10262                             break :src null,
  10263                         .load_symbol, .load_got, .load_direct, .load_tlv => {},
  10264                         else => unreachable,
  10265                     }
  10266 
  10267                     const src_addr_reg =
  10268                         (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64();
  10269                     const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
  10270                     errdefer self.register_manager.unlockReg(src_addr_lock);
  10271 
  10272                     try self.genSetReg(src_addr_reg, Type.usize, resolved_src_mcv.address());
  10273                     break :src .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock };
  10274                 },
  10275             };
  10276             defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock);
  10277 
  10278             const ty_signedness =
  10279                 if (ty.isAbiInt(mod)) ty.intInfo(mod).signedness else .unsigned;
  10280             const limb_ty = if (abi_size <= 8) ty else switch (ty_signedness) {
  10281                 .signed => Type.usize,
  10282                 .unsigned => Type.isize,
  10283             };
  10284             var limb_i: usize = 0;
  10285             var off: i32 = 0;
  10286             while (off < abi_size) : ({
  10287                 limb_i += 1;
  10288                 off += 8;
  10289             }) {
  10290                 const mir_limb_tag: Mir.Inst.FixedTag = switch (limb_i) {
  10291                     0 => mir_tag,
  10292                     else => switch (mir_tag[1]) {
  10293                         .add => .{ ._, .adc },
  10294                         .sub, .cmp => .{ ._, .sbb },
  10295                         .@"or", .@"and", .xor => mir_tag,
  10296                         else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{
  10297                             @tagName(mir_tag[1]),
  10298                         }),
  10299                     },
  10300                 };
  10301                 const dst_limb_mem: Memory = switch (dst_mcv) {
  10302                     .memory,
  10303                     .load_symbol,
  10304                     .load_got,
  10305                     .load_direct,
  10306                     .load_tlv,
  10307                     => .{
  10308                         .base = .{ .reg = dst_info.?.addr_reg },
  10309                         .mod = .{ .rm = .{
  10310                             .size = Memory.Size.fromSize(limb_abi_size),
  10311                             .disp = off,
  10312                         } },
  10313                     },
  10314                     .indirect => |reg_off| .{
  10315                         .base = .{ .reg = reg_off.reg },
  10316                         .mod = .{ .rm = .{
  10317                             .size = Memory.Size.fromSize(limb_abi_size),
  10318                             .disp = reg_off.off + off,
  10319                         } },
  10320                     },
  10321                     .load_frame => |frame_addr| .{
  10322                         .base = .{ .frame = frame_addr.index },
  10323                         .mod = .{ .rm = .{
  10324                             .size = Memory.Size.fromSize(limb_abi_size),
  10325                             .disp = frame_addr.off + off,
  10326                         } },
  10327                     },
  10328                     else => unreachable,
  10329                 };
  10330                 switch (resolved_src_mcv) {
  10331                     .none,
  10332                     .unreach,
  10333                     .dead,
  10334                     .undef,
  10335                     .register_overflow,
  10336                     .reserved_frame,
  10337                     .air_ref,
  10338                     => unreachable,
  10339                     .immediate => |src_imm| {
  10340                         const imm: u64 = switch (limb_i) {
  10341                             0 => src_imm,
  10342                             else => switch (ty_signedness) {
  10343                                 .signed => @bitCast(@as(i64, @bitCast(src_imm)) >> 63),
  10344                                 .unsigned => 0,
  10345                             },
  10346                         };
  10347                         switch (self.regBitSize(limb_ty)) {
  10348                             8 => try self.asmMemoryImmediate(
  10349                                 mir_limb_tag,
  10350                                 dst_limb_mem,
  10351                                 if (math.cast(i8, @as(i64, @bitCast(imm)))) |small|
  10352                                     Immediate.s(small)
  10353                                 else
  10354                                     Immediate.u(@as(u8, @intCast(imm))),
  10355                             ),
  10356                             16 => try self.asmMemoryImmediate(
  10357                                 mir_limb_tag,
  10358                                 dst_limb_mem,
  10359                                 if (math.cast(i16, @as(i64, @bitCast(imm)))) |small|
  10360                                     Immediate.s(small)
  10361                                 else
  10362                                     Immediate.u(@as(u16, @intCast(imm))),
  10363                             ),
  10364                             32 => try self.asmMemoryImmediate(
  10365                                 mir_limb_tag,
  10366                                 dst_limb_mem,
  10367                                 if (math.cast(i32, @as(i64, @bitCast(imm)))) |small|
  10368                                     Immediate.s(small)
  10369                                 else
  10370                                     Immediate.u(@as(u32, @intCast(imm))),
  10371                             ),
  10372                             64 => if (math.cast(i32, @as(i64, @bitCast(imm)))) |small|
  10373                                 try self.asmMemoryImmediate(
  10374                                     mir_limb_tag,
  10375                                     dst_limb_mem,
  10376                                     Immediate.s(small),
  10377                                 )
  10378                             else
  10379                                 try self.asmMemoryRegister(
  10380                                     mir_limb_tag,
  10381                                     dst_limb_mem,
  10382                                     registerAlias(
  10383                                         try self.copyToTmpRegister(limb_ty, .{ .immediate = imm }),
  10384                                         limb_abi_size,
  10385                                     ),
  10386                                 ),
  10387                             else => unreachable,
  10388                         }
  10389                     },
  10390                     .register,
  10391                     .register_pair,
  10392                     .register_offset,
  10393                     .eflags,
  10394                     .memory,
  10395                     .indirect,
  10396                     .load_symbol,
  10397                     .lea_symbol,
  10398                     .load_direct,
  10399                     .lea_direct,
  10400                     .load_got,
  10401                     .lea_got,
  10402                     .load_tlv,
  10403                     .lea_tlv,
  10404                     .load_frame,
  10405                     .lea_frame,
  10406                     => {
  10407                         const src_limb_mcv: MCValue = if (src_info) |info| .{
  10408                             .indirect = .{ .reg = info.addr_reg, .off = off },
  10409                         } else switch (resolved_src_mcv) {
  10410                             .register, .register_pair => .{
  10411                                 .register = resolved_src_mcv.getRegs()[limb_i],
  10412                             },
  10413                             .eflags,
  10414                             .register_offset,
  10415                             .lea_symbol,
  10416                             .lea_direct,
  10417                             .lea_got,
  10418                             .lea_tlv,
  10419                             .lea_frame,
  10420                             => switch (limb_i) {
  10421                                 0 => resolved_src_mcv,
  10422                                 else => .{ .immediate = 0 },
  10423                             },
  10424                             .memory => |addr| .{ .memory = @bitCast(@as(i64, @bitCast(addr)) + off) },
  10425                             .indirect => |reg_off| .{ .indirect = .{
  10426                                 .reg = reg_off.reg,
  10427                                 .off = reg_off.off + off,
  10428                             } },
  10429                             .load_frame => |frame_addr| .{ .load_frame = .{
  10430                                 .index = frame_addr.index,
  10431                                 .off = frame_addr.off + off,
  10432                             } },
  10433                             else => unreachable,
  10434                         };
  10435                         const src_limb_reg = if (src_limb_mcv.isRegister())
  10436                             src_limb_mcv.getReg().?
  10437                         else
  10438                             try self.copyToTmpRegister(limb_ty, src_limb_mcv);
  10439                         try self.asmMemoryRegister(
  10440                             mir_limb_tag,
  10441                             dst_limb_mem,
  10442                             registerAlias(src_limb_reg, limb_abi_size),
  10443                         );
  10444                     },
  10445                 }
  10446             }
  10447         },
  10448     }
  10449 }
  10450 
  10451 /// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
  10452 /// Does not support byte-size operands.
  10453 fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
  10454     const mod = self.bin_file.comp.module.?;
  10455     const abi_size: u32 = @intCast(dst_ty.abiSize(mod));
  10456     try self.spillEflagsIfOccupied();
  10457     switch (dst_mcv) {
  10458         .none,
  10459         .unreach,
  10460         .dead,
  10461         .undef,
  10462         .immediate,
  10463         .eflags,
  10464         .register_offset,
  10465         .register_overflow,
  10466         .lea_symbol,
  10467         .lea_direct,
  10468         .lea_got,
  10469         .lea_tlv,
  10470         .lea_frame,
  10471         .reserved_frame,
  10472         .air_ref,
  10473         => unreachable, // unmodifiable destination
  10474         .register => |dst_reg| {
  10475             const dst_alias = registerAlias(dst_reg, abi_size);
  10476             const dst_lock = self.register_manager.lockReg(dst_reg);
  10477             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  10478 
  10479             const resolved_src_mcv = switch (src_mcv) {
  10480                 else => src_mcv,
  10481                 .air_ref => |src_ref| try self.resolveInst(src_ref),
  10482             };
  10483             switch (resolved_src_mcv) {
  10484                 .none,
  10485                 .unreach,
  10486                 .dead,
  10487                 .undef,
  10488                 .register_pair,
  10489                 .register_overflow,
  10490                 .reserved_frame,
  10491                 .air_ref,
  10492                 => unreachable,
  10493                 .register => |src_reg| try self.asmRegisterRegister(
  10494                     .{ .i_, .mul },
  10495                     dst_alias,
  10496                     registerAlias(src_reg, abi_size),
  10497                 ),
  10498                 .immediate => |imm| {
  10499                     if (math.cast(i32, imm)) |small| {
  10500                         try self.asmRegisterRegisterImmediate(
  10501                             .{ .i_, .mul },
  10502                             dst_alias,
  10503                             dst_alias,
  10504                             Immediate.s(small),
  10505                         );
  10506                     } else {
  10507                         const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv);
  10508                         return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg });
  10509                     }
  10510                 },
  10511                 .register_offset,
  10512                 .eflags,
  10513                 .load_symbol,
  10514                 .lea_symbol,
  10515                 .load_direct,
  10516                 .lea_direct,
  10517                 .load_got,
  10518                 .lea_got,
  10519                 .load_tlv,
  10520                 .lea_tlv,
  10521                 .lea_frame,
  10522                 => try self.asmRegisterRegister(
  10523                     .{ .i_, .mul },
  10524                     dst_alias,
  10525                     registerAlias(try self.copyToTmpRegister(dst_ty, resolved_src_mcv), abi_size),
  10526                 ),
  10527                 .memory, .indirect, .load_frame => try self.asmRegisterMemory(
  10528                     .{ .i_, .mul },
  10529                     dst_alias,
  10530                     switch (resolved_src_mcv) {
  10531                         .memory => |addr| .{
  10532                             .base = .{ .reg = .ds },
  10533                             .mod = .{ .rm = .{
  10534                                 .size = Memory.Size.fromSize(abi_size),
  10535                                 .disp = math.cast(i32, @as(i64, @bitCast(addr))) orelse
  10536                                     return self.asmRegisterRegister(
  10537                                     .{ .i_, .mul },
  10538                                     dst_alias,
  10539                                     registerAlias(
  10540                                         try self.copyToTmpRegister(dst_ty, resolved_src_mcv),
  10541                                         abi_size,
  10542                                     ),
  10543                                 ),
  10544                             } },
  10545                         },
  10546                         .indirect => |reg_off| .{
  10547                             .base = .{ .reg = reg_off.reg },
  10548                             .mod = .{ .rm = .{
  10549                                 .size = Memory.Size.fromSize(abi_size),
  10550                                 .disp = reg_off.off,
  10551                             } },
  10552                         },
  10553                         .load_frame => |frame_addr| .{
  10554                             .base = .{ .frame = frame_addr.index },
  10555                             .mod = .{ .rm = .{
  10556                                 .size = Memory.Size.fromSize(abi_size),
  10557                                 .disp = frame_addr.off,
  10558                             } },
  10559                         },
  10560                         else => unreachable,
  10561                     },
  10562                 ),
  10563             }
  10564         },
  10565         .register_pair => unreachable, // unimplemented
  10566         .memory, .indirect, .load_symbol, .load_direct, .load_got, .load_tlv, .load_frame => {
  10567             const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv);
  10568             const tmp_mcv = MCValue{ .register = tmp_reg };
  10569             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  10570             defer self.register_manager.unlockReg(tmp_lock);
  10571 
  10572             try self.genIntMulComplexOpMir(dst_ty, tmp_mcv, src_mcv);
  10573             try self.genCopy(dst_ty, dst_mcv, tmp_mcv);
  10574         },
  10575     }
  10576 }
  10577 
  10578 fn airArg(self: *Self, inst: Air.Inst.Index) !void {
  10579     const mod = self.bin_file.comp.module.?;
  10580     // skip zero-bit arguments as they don't have a corresponding arg instruction
  10581     var arg_index = self.arg_index;
  10582     while (self.args[arg_index] == .none) arg_index += 1;
  10583     self.arg_index = arg_index + 1;
  10584 
  10585     const result: MCValue = if (self.liveness.isUnused(inst)) .unreach else result: {
  10586         const arg_ty = self.typeOfIndex(inst);
  10587         const src_mcv = self.args[arg_index];
  10588         const dst_mcv = switch (src_mcv) {
  10589             .register, .register_pair, .load_frame => dst: {
  10590                 for (src_mcv.getRegs()) |reg| self.register_manager.getRegAssumeFree(reg, inst);
  10591                 break :dst src_mcv;
  10592             },
  10593             .indirect => |reg_off| dst: {
  10594                 self.register_manager.getRegAssumeFree(reg_off.reg, inst);
  10595                 const dst_mcv = try self.allocRegOrMem(inst, false);
  10596                 try self.genCopy(arg_ty, dst_mcv, src_mcv);
  10597                 break :dst dst_mcv;
  10598             },
  10599             else => return self.fail("TODO implement arg for {}", .{src_mcv}),
  10600         };
  10601 
  10602         const src_index = self.air.instructions.items(.data)[@intFromEnum(inst)].arg.src_index;
  10603         const name = mod.getParamName(self.owner.func_index, src_index);
  10604         try self.genArgDbgInfo(arg_ty, name, src_mcv);
  10605 
  10606         break :result dst_mcv;
  10607     };
  10608     return self.finishAir(inst, result, .{ .none, .none, .none });
  10609 }
  10610 
  10611 fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void {
  10612     const mod = self.bin_file.comp.module.?;
  10613     switch (self.debug_output) {
  10614         .dwarf => |dw| {
  10615             const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
  10616                 .register => |reg| .{ .register = reg.dwarfNum() },
  10617                 .register_pair => |regs| .{ .register_pair = .{
  10618                     regs[0].dwarfNum(), regs[1].dwarfNum(),
  10619                 } },
  10620                 // TODO use a frame index
  10621                 .load_frame => return,
  10622                 //.stack_offset => |off| .{
  10623                 //    .stack = .{
  10624                 //        // TODO handle -fomit-frame-pointer
  10625                 //        .fp_register = Register.rbp.dwarfNum(),
  10626                 //        .offset = -off,
  10627                 //    },
  10628                 //},
  10629                 else => unreachable, // not a valid function parameter
  10630             };
  10631             // TODO: this might need adjusting like the linkers do.
  10632             // Instead of flattening the owner and passing Decl.Index here we may
  10633             // want to special case LazySymbol in DWARF linker too.
  10634             try dw.genArgDbgInfo(name, ty, self.owner.getDecl(mod), loc);
  10635         },
  10636         .plan9 => {},
  10637         .none => {},
  10638     }
  10639 }
  10640 
  10641 fn genVarDbgInfo(
  10642     self: Self,
  10643     tag: Air.Inst.Tag,
  10644     ty: Type,
  10645     mcv: MCValue,
  10646     name: [:0]const u8,
  10647 ) !void {
  10648     const mod = self.bin_file.comp.module.?;
  10649     const is_ptr = switch (tag) {
  10650         .dbg_var_ptr => true,
  10651         .dbg_var_val => false,
  10652         else => unreachable,
  10653     };
  10654 
  10655     switch (self.debug_output) {
  10656         .dwarf => |dw| {
  10657             const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
  10658                 .register => |reg| .{ .register = reg.dwarfNum() },
  10659                 // TODO use a frame index
  10660                 .load_frame, .lea_frame => return,
  10661                 //=> |off| .{ .stack = .{
  10662                 //    .fp_register = Register.rbp.dwarfNum(),
  10663                 //    .offset = -off,
  10664                 //} },
  10665                 .memory => |address| .{ .memory = address },
  10666                 .load_symbol => |sym_off| loc: {
  10667                     assert(sym_off.off == 0);
  10668                     break :loc .{ .linker_load = .{ .type = .direct, .sym_index = sym_off.sym } };
  10669                 }, // TODO
  10670                 .load_got => |sym_index| .{ .linker_load = .{ .type = .got, .sym_index = sym_index } },
  10671                 .load_direct => |sym_index| .{
  10672                     .linker_load = .{ .type = .direct, .sym_index = sym_index },
  10673                 },
  10674                 .immediate => |x| .{ .immediate = x },
  10675                 .undef => .undef,
  10676                 .none => .none,
  10677                 else => blk: {
  10678                     log.debug("TODO generate debug info for {}", .{mcv});
  10679                     break :blk .nop;
  10680                 },
  10681             };
  10682             // TODO: this might need adjusting like the linkers do.
  10683             // Instead of flattening the owner and passing Decl.Index here we may
  10684             // want to special case LazySymbol in DWARF linker too.
  10685             try dw.genVarDbgInfo(name, ty, self.owner.getDecl(mod), is_ptr, loc);
  10686         },
  10687         .plan9 => {},
  10688         .none => {},
  10689     }
  10690 }
  10691 
  10692 fn airTrap(self: *Self) !void {
  10693     try self.asmOpOnly(.{ ._, .ud2 });
  10694     self.finishAirBookkeeping();
  10695 }
  10696 
  10697 fn airBreakpoint(self: *Self) !void {
  10698     try self.asmOpOnly(.{ ._, .int3 });
  10699     self.finishAirBookkeeping();
  10700 }
  10701 
  10702 fn airRetAddr(self: *Self, inst: Air.Inst.Index) !void {
  10703     const dst_mcv = try self.allocRegOrMem(inst, true);
  10704     try self.genCopy(Type.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } });
  10705     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
  10706 }
  10707 
  10708 fn airFrameAddress(self: *Self, inst: Air.Inst.Index) !void {
  10709     const dst_mcv = try self.allocRegOrMem(inst, true);
  10710     try self.genCopy(Type.usize, dst_mcv, .{ .lea_frame = .{ .index = .base_ptr } });
  10711     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
  10712 }
  10713 
  10714 fn airFence(self: *Self, inst: Air.Inst.Index) !void {
  10715     const order = self.air.instructions.items(.data)[@intFromEnum(inst)].fence;
  10716     switch (order) {
  10717         .Unordered, .Monotonic => unreachable,
  10718         .Acquire, .Release, .AcqRel => {},
  10719         .SeqCst => try self.asmOpOnly(.{ ._, .mfence }),
  10720     }
  10721     self.finishAirBookkeeping();
  10722 }
  10723 
  10724 fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void {
  10725     if (modifier == .always_tail) return self.fail("TODO implement tail calls for x86_64", .{});
  10726 
  10727     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  10728     const extra = self.air.extraData(Air.Call, pl_op.payload);
  10729     const arg_refs: []const Air.Inst.Ref =
  10730         @ptrCast(self.air.extra[extra.end..][0..extra.data.args_len]);
  10731 
  10732     const ExpectedContents = extern struct {
  10733         tys: [16][@sizeOf(Type)]u8 align(@alignOf(Type)),
  10734         vals: [16][@sizeOf(MCValue)]u8 align(@alignOf(MCValue)),
  10735     };
  10736     var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
  10737         std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
  10738     const allocator = stack.get();
  10739 
  10740     const arg_tys = try allocator.alloc(Type, arg_refs.len);
  10741     defer allocator.free(arg_tys);
  10742     for (arg_tys, arg_refs) |*arg_ty, arg_ref| arg_ty.* = self.typeOf(arg_ref);
  10743 
  10744     const arg_vals = try allocator.alloc(MCValue, arg_refs.len);
  10745     defer allocator.free(arg_vals);
  10746     for (arg_vals, arg_refs) |*arg_val, arg_ref| arg_val.* = .{ .air_ref = arg_ref };
  10747 
  10748     const ret = try self.genCall(.{ .air = pl_op.operand }, arg_tys, arg_vals);
  10749 
  10750     var bt = self.liveness.iterateBigTomb(inst);
  10751     try self.feed(&bt, pl_op.operand);
  10752     for (arg_refs) |arg_ref| try self.feed(&bt, arg_ref);
  10753 
  10754     const result = if (self.liveness.isUnused(inst)) .unreach else ret;
  10755     return self.finishAirResult(inst, result);
  10756 }
  10757 
  10758 fn genCall(self: *Self, info: union(enum) {
  10759     air: Air.Inst.Ref,
  10760     lib: struct {
  10761         return_type: InternPool.Index,
  10762         param_types: []const InternPool.Index,
  10763         lib: ?[]const u8 = null,
  10764         callee: []const u8,
  10765     },
  10766 }, arg_types: []const Type, args: []const MCValue) !MCValue {
  10767     const mod = self.bin_file.comp.module.?;
  10768 
  10769     const fn_ty = switch (info) {
  10770         .air => |callee| fn_info: {
  10771             const callee_ty = self.typeOf(callee);
  10772             break :fn_info switch (callee_ty.zigTypeTag(mod)) {
  10773                 .Fn => callee_ty,
  10774                 .Pointer => callee_ty.childType(mod),
  10775                 else => unreachable,
  10776             };
  10777         },
  10778         .lib => |lib| try mod.funcType(.{
  10779             .param_types = lib.param_types,
  10780             .return_type = lib.return_type,
  10781             .cc = .C,
  10782         }),
  10783     };
  10784     const fn_info = mod.typeToFunc(fn_ty).?;
  10785     const resolved_cc = abi.resolveCallingConvention(fn_info.cc, self.target.*);
  10786 
  10787     const ExpectedContents = extern struct {
  10788         var_args: [16][@sizeOf(Type)]u8 align(@alignOf(Type)),
  10789         frame_indices: [16]FrameIndex,
  10790         reg_locks: [16][@sizeOf(?RegisterLock)]u8 align(@alignOf(?RegisterLock)),
  10791     };
  10792     var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
  10793         std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
  10794     const allocator = stack.get();
  10795 
  10796     const var_args = try allocator.alloc(Type, args.len - fn_info.param_types.len);
  10797     defer allocator.free(var_args);
  10798     for (var_args, arg_types[fn_info.param_types.len..]) |*var_arg, arg_ty| var_arg.* = arg_ty;
  10799 
  10800     const frame_indices = try allocator.alloc(FrameIndex, args.len);
  10801     defer allocator.free(frame_indices);
  10802 
  10803     var reg_locks = std.ArrayList(?RegisterLock).init(allocator);
  10804     defer reg_locks.deinit();
  10805     try reg_locks.ensureTotalCapacity(16);
  10806     defer for (reg_locks.items) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
  10807 
  10808     var call_info = try self.resolveCallingConventionValues(fn_info, var_args, .call_frame);
  10809     defer call_info.deinit(self);
  10810 
  10811     // We need a properly aligned and sized call frame to be able to call this function.
  10812     {
  10813         const needed_call_frame = FrameAlloc.init(.{
  10814             .size = call_info.stack_byte_count,
  10815             .alignment = call_info.stack_align,
  10816         });
  10817         const frame_allocs_slice = self.frame_allocs.slice();
  10818         const stack_frame_size =
  10819             &frame_allocs_slice.items(.abi_size)[@intFromEnum(FrameIndex.call_frame)];
  10820         stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size);
  10821         const stack_frame_align =
  10822             &frame_allocs_slice.items(.abi_align)[@intFromEnum(FrameIndex.call_frame)];
  10823         stack_frame_align.* = stack_frame_align.max(needed_call_frame.abi_align);
  10824     }
  10825 
  10826     try self.spillEflagsIfOccupied();
  10827     try self.spillCallerPreservedRegs(resolved_cc);
  10828 
  10829     // set stack arguments first because this can clobber registers
  10830     // also clobber spill arguments as we go
  10831     switch (call_info.return_value.long) {
  10832         .none, .unreach => {},
  10833         .indirect => |reg_off| try self.register_manager.getReg(reg_off.reg, null),
  10834         else => unreachable,
  10835     }
  10836     for (call_info.args, arg_types, args, frame_indices) |dst_arg, arg_ty, src_arg, *frame_index|
  10837         switch (dst_arg) {
  10838             .none => {},
  10839             .register => |reg| {
  10840                 try self.register_manager.getReg(reg, null);
  10841                 try reg_locks.append(self.register_manager.lockReg(reg));
  10842             },
  10843             .register_pair => |regs| {
  10844                 for (regs) |reg| try self.register_manager.getReg(reg, null);
  10845                 try reg_locks.appendSlice(&self.register_manager.lockRegs(2, regs));
  10846             },
  10847             .indirect => |reg_off| {
  10848                 frame_index.* = try self.allocFrameIndex(FrameAlloc.initType(arg_ty, mod));
  10849                 try self.genSetMem(.{ .frame = frame_index.* }, 0, arg_ty, src_arg);
  10850                 try self.register_manager.getReg(reg_off.reg, null);
  10851                 try reg_locks.append(self.register_manager.lockReg(reg_off.reg));
  10852             },
  10853             .load_frame => {
  10854                 try self.genCopy(arg_ty, dst_arg, src_arg);
  10855                 try self.freeValue(src_arg);
  10856             },
  10857             else => unreachable,
  10858         };
  10859 
  10860     // now we are free to set register arguments
  10861     switch (call_info.return_value.long) {
  10862         .none, .unreach => {},
  10863         .indirect => |reg_off| {
  10864             const ret_ty = Type.fromInterned(fn_info.return_type);
  10865             const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(ret_ty, mod));
  10866             try self.genSetReg(reg_off.reg, Type.usize, .{
  10867                 .lea_frame = .{ .index = frame_index, .off = -reg_off.off },
  10868             });
  10869             call_info.return_value.short = .{ .load_frame = .{ .index = frame_index } };
  10870             try reg_locks.append(self.register_manager.lockReg(reg_off.reg));
  10871         },
  10872         else => unreachable,
  10873     }
  10874 
  10875     for (call_info.args, arg_types, args, frame_indices) |dst_arg, arg_ty, src_arg, frame_index|
  10876         switch (dst_arg) {
  10877             .none, .load_frame => {},
  10878             .register => |dst_reg| switch (fn_info.cc) {
  10879                 else => try self.genSetReg(
  10880                     registerAlias(dst_reg, @intCast(arg_ty.abiSize(mod))),
  10881                     arg_ty,
  10882                     src_arg,
  10883                 ),
  10884                 .C, .SysV, .Win64 => {
  10885                     const promoted_ty = self.promoteInt(arg_ty);
  10886                     const promoted_abi_size: u32 = @intCast(promoted_ty.abiSize(mod));
  10887                     const dst_alias = registerAlias(dst_reg, promoted_abi_size);
  10888                     try self.genSetReg(dst_alias, promoted_ty, src_arg);
  10889                     if (promoted_ty.toIntern() != arg_ty.toIntern())
  10890                         try self.truncateRegister(arg_ty, dst_alias);
  10891                 },
  10892             },
  10893             .register_pair => try self.genCopy(arg_ty, dst_arg, src_arg),
  10894             .indirect => |reg_off| try self.genSetReg(reg_off.reg, Type.usize, .{
  10895                 .lea_frame = .{ .index = frame_index, .off = -reg_off.off },
  10896             }),
  10897             else => unreachable,
  10898         };
  10899 
  10900     if (fn_info.is_var_args)
  10901         try self.asmRegisterImmediate(.{ ._, .mov }, .al, Immediate.u(call_info.fp_count));
  10902 
  10903     // Due to incremental compilation, how function calls are generated depends
  10904     // on linking.
  10905     switch (info) {
  10906         .air => |callee| if (try self.air.value(callee, mod)) |func_value| {
  10907             const func_key = mod.intern_pool.indexToKey(func_value.ip_index);
  10908             switch (switch (func_key) {
  10909                 else => func_key,
  10910                 .ptr => |ptr| switch (ptr.addr) {
  10911                     .decl => |decl| mod.intern_pool.indexToKey(try mod.declPtr(decl).internValue(mod)),
  10912                     else => func_key,
  10913                 },
  10914             }) {
  10915                 .func => |func| {
  10916                     try mod.markDeclAlive(mod.declPtr(func.owner_decl));
  10917                     if (self.bin_file.cast(link.File.Elf)) |elf_file| {
  10918                         const sym_index = try elf_file.zigObjectPtr().?.getOrCreateMetadataForDecl(elf_file, func.owner_decl);
  10919                         const sym = elf_file.symbol(sym_index);
  10920                         if (self.mod.pic) {
  10921                             const callee_reg: Register = switch (resolved_cc) {
  10922                                 .SysV => callee: {
  10923                                     if (!fn_info.is_var_args) break :callee .rax;
  10924                                     const param_regs = abi.getCAbiIntParamRegs(resolved_cc);
  10925                                     break :callee if (call_info.gp_count < param_regs.len)
  10926                                         param_regs[call_info.gp_count]
  10927                                     else
  10928                                         .r10;
  10929                                 },
  10930                                 .Win64 => .rax,
  10931                                 else => unreachable,
  10932                             };
  10933                             try self.genSetReg(
  10934                                 callee_reg,
  10935                                 Type.usize,
  10936                                 .{ .load_symbol = .{ .sym = sym.esym_index } },
  10937                             );
  10938                             try self.asmRegister(.{ ._, .call }, callee_reg);
  10939                         } else try self.asmMemory(.{ ._, .call }, .{
  10940                             .base = .{ .reloc = .{
  10941                                 .atom_index = try self.owner.getSymbolIndex(self),
  10942                                 .sym_index = sym.esym_index,
  10943                             } },
  10944                             .mod = .{ .rm = .{ .size = .qword } },
  10945                         });
  10946                     } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
  10947                         const atom = try coff_file.getOrCreateAtomForDecl(func.owner_decl);
  10948                         const sym_index = coff_file.getAtom(atom).getSymbolIndex().?;
  10949                         try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
  10950                         try self.asmRegister(.{ ._, .call }, .rax);
  10951                     } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
  10952                         const sym_index = try macho_file.getZigObject().?.getOrCreateMetadataForDecl(macho_file, func.owner_decl);
  10953                         const sym = macho_file.getSymbol(sym_index);
  10954                         try self.genSetReg(.rax, Type.usize, .{ .load_symbol = .{ .sym = sym.nlist_idx } });
  10955                         try self.asmRegister(.{ ._, .call }, .rax);
  10956                     } else if (self.bin_file.cast(link.File.Plan9)) |p9| {
  10957                         const atom_index = try p9.seeDecl(func.owner_decl);
  10958                         const atom = p9.getAtom(atom_index);
  10959                         try self.asmMemory(.{ ._, .call }, .{
  10960                             .base = .{ .reg = .ds },
  10961                             .mod = .{ .rm = .{
  10962                                 .size = .qword,
  10963                                 .disp = @intCast(atom.getOffsetTableAddress(p9)),
  10964                             } },
  10965                         });
  10966                     } else unreachable;
  10967                 },
  10968                 .extern_func => |extern_func| {
  10969                     const owner_decl = mod.declPtr(extern_func.decl);
  10970                     try mod.markDeclAlive(owner_decl);
  10971                     const lib_name = mod.intern_pool.stringToSliceUnwrap(extern_func.lib_name);
  10972                     const decl_name = mod.intern_pool.stringToSlice(owner_decl.name);
  10973                     try self.genExternSymbolRef(.call, lib_name, decl_name);
  10974                 },
  10975                 else => return self.fail("TODO implement calling bitcasted functions", .{}),
  10976             }
  10977         } else {
  10978             assert(self.typeOf(callee).zigTypeTag(mod) == .Pointer);
  10979             try self.genSetReg(.rax, Type.usize, .{ .air_ref = callee });
  10980             try self.asmRegister(.{ ._, .call }, .rax);
  10981         },
  10982         .lib => |lib| try self.genExternSymbolRef(.call, lib.lib, lib.callee),
  10983     }
  10984     return call_info.return_value.short;
  10985 }
  10986 
  10987 fn airRet(self: *Self, inst: Air.Inst.Index) !void {
  10988     const mod = self.bin_file.comp.module.?;
  10989     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  10990 
  10991     const ret_ty = self.fn_type.fnReturnType(mod);
  10992     switch (self.ret_mcv.short) {
  10993         .none => {},
  10994         .register,
  10995         .register_pair,
  10996         => try self.genCopy(ret_ty, self.ret_mcv.short, .{ .air_ref = un_op }),
  10997         .indirect => |reg_off| {
  10998             try self.register_manager.getReg(reg_off.reg, null);
  10999             const lock = self.register_manager.lockRegAssumeUnused(reg_off.reg);
  11000             defer self.register_manager.unlockReg(lock);
  11001 
  11002             try self.genSetReg(reg_off.reg, Type.usize, self.ret_mcv.long);
  11003             try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ret_ty, .{ .air_ref = un_op });
  11004         },
  11005         else => unreachable,
  11006     }
  11007     self.ret_mcv.liveOut(self, inst);
  11008     try self.finishAir(inst, .unreach, .{ un_op, .none, .none });
  11009 
  11010     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
  11011     // which is available if the jump is 127 bytes or less forward.
  11012     const jmp_reloc = try self.asmJmpReloc(undefined);
  11013     try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc);
  11014 }
  11015 
  11016 fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void {
  11017     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11018     const ptr = try self.resolveInst(un_op);
  11019 
  11020     const ptr_ty = self.typeOf(un_op);
  11021     switch (self.ret_mcv.short) {
  11022         .none => {},
  11023         .register, .register_pair => try self.load(self.ret_mcv.short, ptr_ty, ptr),
  11024         .indirect => |reg_off| try self.genSetReg(reg_off.reg, ptr_ty, ptr),
  11025         else => unreachable,
  11026     }
  11027     self.ret_mcv.liveOut(self, inst);
  11028     try self.finishAir(inst, .unreach, .{ un_op, .none, .none });
  11029 
  11030     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
  11031     // which is available if the jump is 127 bytes or less forward.
  11032     const jmp_reloc = try self.asmJmpReloc(undefined);
  11033     try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc);
  11034 }
  11035 
  11036 fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
  11037     const mod = self.bin_file.comp.module.?;
  11038     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  11039     const ty = self.typeOf(bin_op.lhs);
  11040 
  11041     const result: Condition = result: {
  11042         switch (ty.zigTypeTag(mod)) {
  11043             .Float => {
  11044                 const float_bits = ty.floatBits(self.target.*);
  11045                 if (switch (float_bits) {
  11046                     16 => !self.hasFeature(.f16c),
  11047                     32, 64 => false,
  11048                     80, 128 => true,
  11049                     else => unreachable,
  11050                 }) {
  11051                     var callee_buf: ["__???f2".len]u8 = undefined;
  11052                     const ret = try self.genCall(.{ .lib = .{
  11053                         .return_type = .i32_type,
  11054                         .param_types = &.{ ty.toIntern(), ty.toIntern() },
  11055                         .callee = std.fmt.bufPrint(&callee_buf, "__{s}{c}f2", .{
  11056                             switch (op) {
  11057                                 .eq => "eq",
  11058                                 .neq => "ne",
  11059                                 .lt => "lt",
  11060                                 .lte => "le",
  11061                                 .gt => "gt",
  11062                                 .gte => "ge",
  11063                             },
  11064                             floatCompilerRtAbiName(float_bits),
  11065                         }) catch unreachable,
  11066                     } }, &.{ ty, ty }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } });
  11067                     try self.genBinOpMir(.{ ._, .@"test" }, Type.i32, ret, ret);
  11068                     break :result switch (op) {
  11069                         .eq => .e,
  11070                         .neq => .ne,
  11071                         .lt => .l,
  11072                         .lte => .le,
  11073                         .gt => .g,
  11074                         .gte => .ge,
  11075                     };
  11076                 }
  11077             },
  11078             else => {},
  11079         }
  11080 
  11081         try self.spillEflagsIfOccupied();
  11082 
  11083         const lhs_mcv = try self.resolveInst(bin_op.lhs);
  11084         const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) {
  11085             .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null },
  11086             .register_pair => |lhs_regs| locks: {
  11087                 const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs);
  11088                 break :locks .{ locks[0], locks[1] };
  11089             },
  11090             .register_offset => |lhs_ro| .{
  11091                 self.register_manager.lockRegAssumeUnused(lhs_ro.reg),
  11092                 null,
  11093             },
  11094             else => .{null} ** 2,
  11095         };
  11096         defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
  11097 
  11098         const rhs_mcv = try self.resolveInst(bin_op.rhs);
  11099         const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) {
  11100             .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null },
  11101             .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs),
  11102             .register_offset => |rhs_ro| .{ self.register_manager.lockReg(rhs_ro.reg), null },
  11103             else => .{null} ** 2,
  11104         };
  11105         defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  11106 
  11107         switch (ty.zigTypeTag(mod)) {
  11108             else => {
  11109                 const abi_size: u16 = @intCast(ty.abiSize(mod));
  11110                 const may_flip: enum {
  11111                     may_flip,
  11112                     must_flip,
  11113                     must_not_flip,
  11114                 } = if (abi_size > 8) switch (op) {
  11115                     .lt, .gte => .must_not_flip,
  11116                     .lte, .gt => .must_flip,
  11117                     .eq, .neq => .may_flip,
  11118                 } else .may_flip;
  11119 
  11120                 const flipped = switch (may_flip) {
  11121                     .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isMemory(),
  11122                     .must_flip => true,
  11123                     .must_not_flip => false,
  11124                 };
  11125                 const unmat_dst_mcv = if (flipped) rhs_mcv else lhs_mcv;
  11126                 const dst_mcv = if (unmat_dst_mcv.isRegister() or
  11127                     (abi_size <= 8 and unmat_dst_mcv.isMemory())) unmat_dst_mcv else dst: {
  11128                     const dst_mcv = try self.allocTempRegOrMem(ty, true);
  11129                     try self.genCopy(ty, dst_mcv, unmat_dst_mcv);
  11130                     break :dst dst_mcv;
  11131                 };
  11132                 const dst_lock =
  11133                     if (dst_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  11134                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  11135 
  11136                 const src_mcv = try self.resolveInst(if (flipped) bin_op.lhs else bin_op.rhs);
  11137                 const src_lock =
  11138                     if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  11139                 defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  11140 
  11141                 break :result Condition.fromCompareOperator(
  11142                     if (ty.isAbiInt(mod)) ty.intInfo(mod).signedness else .unsigned,
  11143                     result_op: {
  11144                         const flipped_op = if (flipped) op.reverse() else op;
  11145                         if (abi_size > 8) switch (flipped_op) {
  11146                             .lt, .gte => {},
  11147                             .lte, .gt => unreachable,
  11148                             .eq, .neq => {
  11149                                 const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock };
  11150 
  11151                                 const resolved_dst_mcv = switch (dst_mcv) {
  11152                                     else => dst_mcv,
  11153                                     .air_ref => |dst_ref| try self.resolveInst(dst_ref),
  11154                                 };
  11155                                 const dst_info: OpInfo = switch (resolved_dst_mcv) {
  11156                                     .none,
  11157                                     .unreach,
  11158                                     .dead,
  11159                                     .undef,
  11160                                     .immediate,
  11161                                     .eflags,
  11162                                     .register,
  11163                                     .register_offset,
  11164                                     .register_overflow,
  11165                                     .indirect,
  11166                                     .lea_direct,
  11167                                     .lea_got,
  11168                                     .lea_tlv,
  11169                                     .lea_frame,
  11170                                     .lea_symbol,
  11171                                     .reserved_frame,
  11172                                     .air_ref,
  11173                                     => unreachable,
  11174                                     .register_pair, .load_frame => null,
  11175                                     .memory, .load_symbol, .load_got, .load_direct, .load_tlv => dst: {
  11176                                         switch (resolved_dst_mcv) {
  11177                                             .memory => |addr| if (math.cast(
  11178                                                 i32,
  11179                                                 @as(i64, @bitCast(addr)),
  11180                                             ) != null and math.cast(
  11181                                                 i32,
  11182                                                 @as(i64, @bitCast(addr)) + abi_size - 8,
  11183                                             ) != null) break :dst null,
  11184                                             .load_symbol, .load_got, .load_direct, .load_tlv => {},
  11185                                             else => unreachable,
  11186                                         }
  11187 
  11188                                         const dst_addr_reg = (try self.register_manager.allocReg(
  11189                                             null,
  11190                                             abi.RegisterClass.gp,
  11191                                         )).to64();
  11192                                         const dst_addr_lock =
  11193                                             self.register_manager.lockRegAssumeUnused(dst_addr_reg);
  11194                                         errdefer self.register_manager.unlockReg(dst_addr_lock);
  11195 
  11196                                         try self.genSetReg(
  11197                                             dst_addr_reg,
  11198                                             Type.usize,
  11199                                             resolved_dst_mcv.address(),
  11200                                         );
  11201                                         break :dst .{
  11202                                             .addr_reg = dst_addr_reg,
  11203                                             .addr_lock = dst_addr_lock,
  11204                                         };
  11205                                     },
  11206                                 };
  11207                                 defer if (dst_info) |info|
  11208                                     self.register_manager.unlockReg(info.addr_lock);
  11209 
  11210                                 const resolved_src_mcv = switch (src_mcv) {
  11211                                     else => src_mcv,
  11212                                     .air_ref => |src_ref| try self.resolveInst(src_ref),
  11213                                 };
  11214                                 const src_info: OpInfo = switch (resolved_src_mcv) {
  11215                                     .none,
  11216                                     .unreach,
  11217                                     .dead,
  11218                                     .undef,
  11219                                     .immediate,
  11220                                     .eflags,
  11221                                     .register,
  11222                                     .register_offset,
  11223                                     .register_overflow,
  11224                                     .indirect,
  11225                                     .lea_symbol,
  11226                                     .lea_direct,
  11227                                     .lea_got,
  11228                                     .lea_tlv,
  11229                                     .lea_frame,
  11230                                     .reserved_frame,
  11231                                     .air_ref,
  11232                                     => unreachable,
  11233                                     .register_pair, .load_frame => null,
  11234                                     .memory, .load_symbol, .load_got, .load_direct, .load_tlv => src: {
  11235                                         switch (resolved_src_mcv) {
  11236                                             .memory => |addr| if (math.cast(
  11237                                                 i32,
  11238                                                 @as(i64, @bitCast(addr)),
  11239                                             ) != null and math.cast(
  11240                                                 i32,
  11241                                                 @as(i64, @bitCast(addr)) + abi_size - 8,
  11242                                             ) != null) break :src null,
  11243                                             .load_symbol, .load_got, .load_direct, .load_tlv => {},
  11244                                             else => unreachable,
  11245                                         }
  11246 
  11247                                         const src_addr_reg = (try self.register_manager.allocReg(
  11248                                             null,
  11249                                             abi.RegisterClass.gp,
  11250                                         )).to64();
  11251                                         const src_addr_lock =
  11252                                             self.register_manager.lockRegAssumeUnused(src_addr_reg);
  11253                                         errdefer self.register_manager.unlockReg(src_addr_lock);
  11254 
  11255                                         try self.genSetReg(
  11256                                             src_addr_reg,
  11257                                             Type.usize,
  11258                                             resolved_src_mcv.address(),
  11259                                         );
  11260                                         break :src .{
  11261                                             .addr_reg = src_addr_reg,
  11262                                             .addr_lock = src_addr_lock,
  11263                                         };
  11264                                     },
  11265                                 };
  11266                                 defer if (src_info) |info|
  11267                                     self.register_manager.unlockReg(info.addr_lock);
  11268 
  11269                                 const regs = try self.register_manager.allocRegs(
  11270                                     2,
  11271                                     .{ null, null },
  11272                                     abi.RegisterClass.gp,
  11273                                 );
  11274                                 const acc_reg = regs[0].to64();
  11275                                 const locks = self.register_manager.lockRegsAssumeUnused(2, regs);
  11276                                 defer for (locks) |lock| self.register_manager.unlockReg(lock);
  11277 
  11278                                 const limbs_len = math.divCeil(u16, abi_size, 8) catch unreachable;
  11279                                 var limb_i: u16 = 0;
  11280                                 while (limb_i < limbs_len) : (limb_i += 1) {
  11281                                     const off = limb_i * 8;
  11282                                     const tmp_reg = regs[@min(limb_i, 1)].to64();
  11283 
  11284                                     try self.genSetReg(tmp_reg, Type.usize, if (dst_info) |info| .{
  11285                                         .indirect = .{ .reg = info.addr_reg, .off = off },
  11286                                     } else switch (resolved_dst_mcv) {
  11287                                         .register_pair => |dst_regs| .{ .register = dst_regs[limb_i] },
  11288                                         .memory => |dst_addr| .{
  11289                                             .memory = @bitCast(@as(i64, @bitCast(dst_addr)) + off),
  11290                                         },
  11291                                         .indirect => |reg_off| .{ .indirect = .{
  11292                                             .reg = reg_off.reg,
  11293                                             .off = reg_off.off + off,
  11294                                         } },
  11295                                         .load_frame => |frame_addr| .{ .load_frame = .{
  11296                                             .index = frame_addr.index,
  11297                                             .off = frame_addr.off + off,
  11298                                         } },
  11299                                         else => unreachable,
  11300                                     });
  11301 
  11302                                     try self.genBinOpMir(
  11303                                         .{ ._, .xor },
  11304                                         Type.usize,
  11305                                         .{ .register = tmp_reg },
  11306                                         if (src_info) |info| .{
  11307                                             .indirect = .{ .reg = info.addr_reg, .off = off },
  11308                                         } else switch (resolved_src_mcv) {
  11309                                             .register_pair => |src_regs| .{
  11310                                                 .register = src_regs[limb_i],
  11311                                             },
  11312                                             .memory => |src_addr| .{
  11313                                                 .memory = @bitCast(@as(i64, @bitCast(src_addr)) + off),
  11314                                             },
  11315                                             .indirect => |reg_off| .{ .indirect = .{
  11316                                                 .reg = reg_off.reg,
  11317                                                 .off = reg_off.off + off,
  11318                                             } },
  11319                                             .load_frame => |frame_addr| .{ .load_frame = .{
  11320                                                 .index = frame_addr.index,
  11321                                                 .off = frame_addr.off + off,
  11322                                             } },
  11323                                             else => unreachable,
  11324                                         },
  11325                                     );
  11326 
  11327                                     if (limb_i > 0)
  11328                                         try self.asmRegisterRegister(.{ ._, .@"or" }, acc_reg, tmp_reg);
  11329                                 }
  11330                                 assert(limbs_len >= 2); // use flags from or
  11331                                 break :result_op flipped_op;
  11332                             },
  11333                         };
  11334                         try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, src_mcv);
  11335                         break :result_op flipped_op;
  11336                     },
  11337                 );
  11338             },
  11339             .Float => {
  11340                 const flipped = switch (op) {
  11341                     .lt, .lte => true,
  11342                     .eq, .gte, .gt, .neq => false,
  11343                 };
  11344 
  11345                 const dst_mcv = if (flipped) rhs_mcv else lhs_mcv;
  11346                 const dst_reg = if (dst_mcv.isRegister())
  11347                     dst_mcv.getReg().?
  11348                 else
  11349                     try self.copyToTmpRegister(ty, dst_mcv);
  11350                 const dst_lock = self.register_manager.lockReg(dst_reg);
  11351                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  11352                 const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
  11353 
  11354                 switch (ty.floatBits(self.target.*)) {
  11355                     16 => {
  11356                         assert(self.hasFeature(.f16c));
  11357                         const tmp1_reg =
  11358                             (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
  11359                         const tmp1_mcv = MCValue{ .register = tmp1_reg };
  11360                         const tmp1_lock = self.register_manager.lockRegAssumeUnused(tmp1_reg);
  11361                         defer self.register_manager.unlockReg(tmp1_lock);
  11362 
  11363                         const tmp2_reg =
  11364                             (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
  11365                         const tmp2_mcv = MCValue{ .register = tmp2_reg };
  11366                         const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg);
  11367                         defer self.register_manager.unlockReg(tmp2_lock);
  11368 
  11369                         if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
  11370                             .{ .vp_w, .insr },
  11371                             tmp1_reg,
  11372                             dst_reg.to128(),
  11373                             try src_mcv.mem(self, .word),
  11374                             Immediate.u(1),
  11375                         ) else try self.asmRegisterRegisterRegister(
  11376                             .{ .vp_, .unpcklwd },
  11377                             tmp1_reg,
  11378                             dst_reg.to128(),
  11379                             (if (src_mcv.isRegister())
  11380                                 src_mcv.getReg().?
  11381                             else
  11382                                 try self.copyToTmpRegister(ty, src_mcv)).to128(),
  11383                         );
  11384                         try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg);
  11385                         try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg);
  11386                         try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv);
  11387                     },
  11388                     32 => try self.genBinOpMir(
  11389                         .{ ._ss, .ucomi },
  11390                         ty,
  11391                         .{ .register = dst_reg },
  11392                         src_mcv,
  11393                     ),
  11394                     64 => try self.genBinOpMir(
  11395                         .{ ._sd, .ucomi },
  11396                         ty,
  11397                         .{ .register = dst_reg },
  11398                         src_mcv,
  11399                     ),
  11400                     else => unreachable,
  11401                 }
  11402 
  11403                 break :result switch (if (flipped) op.reverse() else op) {
  11404                     .lt, .lte => unreachable, // required to have been canonicalized to gt(e)
  11405                     .gt => .a,
  11406                     .gte => .ae,
  11407                     .eq => .z_and_np,
  11408                     .neq => .nz_or_p,
  11409                 };
  11410             },
  11411         }
  11412     };
  11413 
  11414     self.eflags_inst = inst;
  11415     return self.finishAir(inst, .{ .eflags = result }, .{ bin_op.lhs, bin_op.rhs, .none });
  11416 }
  11417 
  11418 fn airCmpVector(self: *Self, inst: Air.Inst.Index) !void {
  11419     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  11420     const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data;
  11421     const dst_mcv = try self.genBinOp(
  11422         inst,
  11423         Air.Inst.Tag.fromCmpOp(extra.compareOperator(), false),
  11424         extra.lhs,
  11425         extra.rhs,
  11426     );
  11427     return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
  11428 }
  11429 
  11430 fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void {
  11431     const mod = self.bin_file.comp.module.?;
  11432     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11433 
  11434     const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  11435     const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  11436     defer self.register_manager.unlockReg(addr_lock);
  11437     try self.genLazySymbolRef(.lea, addr_reg, link.File.LazySymbol.initDecl(.const_data, null, mod));
  11438 
  11439     try self.spillEflagsIfOccupied();
  11440 
  11441     const op_ty = self.typeOf(un_op);
  11442     const op_abi_size: u32 = @intCast(op_ty.abiSize(mod));
  11443     const op_mcv = try self.resolveInst(un_op);
  11444     const dst_reg = switch (op_mcv) {
  11445         .register => |reg| reg,
  11446         else => try self.copyToTmpRegister(op_ty, op_mcv),
  11447     };
  11448     try self.asmRegisterMemory(
  11449         .{ ._, .cmp },
  11450         registerAlias(dst_reg, op_abi_size),
  11451         .{
  11452             .base = .{ .reg = addr_reg },
  11453             .mod = .{ .rm = .{ .size = Memory.Size.fromSize(op_abi_size) } },
  11454         },
  11455     );
  11456 
  11457     self.eflags_inst = inst;
  11458     return self.finishAir(inst, .{ .eflags = .b }, .{ un_op, .none, .none });
  11459 }
  11460 
  11461 fn airTry(self: *Self, inst: Air.Inst.Index) !void {
  11462     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  11463     const extra = self.air.extraData(Air.Try, pl_op.payload);
  11464     const body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]);
  11465     const operand_ty = self.typeOf(pl_op.operand);
  11466     const result = try self.genTry(inst, pl_op.operand, body, operand_ty, false);
  11467     return self.finishAir(inst, result, .{ .none, .none, .none });
  11468 }
  11469 
  11470 fn airTryPtr(self: *Self, inst: Air.Inst.Index) !void {
  11471     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  11472     const extra = self.air.extraData(Air.TryPtr, ty_pl.payload);
  11473     const body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]);
  11474     const operand_ty = self.typeOf(extra.data.ptr);
  11475     const result = try self.genTry(inst, extra.data.ptr, body, operand_ty, true);
  11476     return self.finishAir(inst, result, .{ .none, .none, .none });
  11477 }
  11478 
  11479 fn genTry(
  11480     self: *Self,
  11481     inst: Air.Inst.Index,
  11482     operand: Air.Inst.Ref,
  11483     body: []const Air.Inst.Index,
  11484     operand_ty: Type,
  11485     operand_is_ptr: bool,
  11486 ) !MCValue {
  11487     const liveness_cond_br = self.liveness.getCondBr(inst);
  11488 
  11489     const operand_mcv = try self.resolveInst(operand);
  11490     const is_err_mcv = if (operand_is_ptr)
  11491         try self.isErrPtr(null, operand_ty, operand_mcv)
  11492     else
  11493         try self.isErr(null, operand_ty, operand_mcv);
  11494 
  11495     const reloc = try self.genCondBrMir(Type.anyerror, is_err_mcv);
  11496 
  11497     if (self.liveness.operandDies(inst, 0)) {
  11498         if (operand.toIndex()) |operand_inst| try self.processDeath(operand_inst);
  11499     }
  11500 
  11501     self.scope_generation += 1;
  11502     const state = try self.saveState();
  11503 
  11504     for (liveness_cond_br.else_deaths) |death| try self.processDeath(death);
  11505     try self.genBody(body);
  11506     try self.restoreState(state, &.{}, .{
  11507         .emit_instructions = false,
  11508         .update_tracking = true,
  11509         .resurrect = true,
  11510         .close_scope = true,
  11511     });
  11512 
  11513     try self.performReloc(reloc);
  11514 
  11515     for (liveness_cond_br.then_deaths) |death| try self.processDeath(death);
  11516 
  11517     const result = if (self.liveness.isUnused(inst))
  11518         .unreach
  11519     else if (operand_is_ptr)
  11520         try self.genUnwrapErrUnionPayloadPtrMir(inst, operand_ty, operand_mcv)
  11521     else
  11522         try self.genUnwrapErrUnionPayloadMir(inst, operand_ty, operand_mcv);
  11523     return result;
  11524 }
  11525 
  11526 fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void {
  11527     const dbg_stmt = self.air.instructions.items(.data)[@intFromEnum(inst)].dbg_stmt;
  11528     _ = try self.addInst(.{
  11529         .tag = .pseudo,
  11530         .ops = .pseudo_dbg_line_line_column,
  11531         .data = .{ .line_column = .{
  11532             .line = dbg_stmt.line,
  11533             .column = dbg_stmt.column,
  11534         } },
  11535     });
  11536     self.finishAirBookkeeping();
  11537 }
  11538 
  11539 fn airDbgInline(self: *Self, inst: Air.Inst.Index) !void {
  11540     const ty_fn = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_fn;
  11541     _ = try self.addInst(.{
  11542         .tag = .pseudo,
  11543         .ops = .pseudo_dbg_inline_func,
  11544         .data = .{ .func = ty_fn.func },
  11545     });
  11546     self.finishAirBookkeeping();
  11547 }
  11548 
  11549 fn airDbgBlock(self: *Self, inst: Air.Inst.Index) !void {
  11550     _ = inst;
  11551     // TODO emit debug info lexical block
  11552     self.finishAirBookkeeping();
  11553 }
  11554 
  11555 fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void {
  11556     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  11557     const operand = pl_op.operand;
  11558     const ty = self.typeOf(operand);
  11559     const mcv = try self.resolveInst(operand);
  11560 
  11561     const name = self.air.nullTerminatedString(pl_op.payload);
  11562 
  11563     const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)];
  11564     try self.genVarDbgInfo(tag, ty, mcv, name);
  11565 
  11566     return self.finishAir(inst, .unreach, .{ operand, .none, .none });
  11567 }
  11568 
  11569 fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !Mir.Inst.Index {
  11570     const mod = self.bin_file.comp.module.?;
  11571     const abi_size = ty.abiSize(mod);
  11572     switch (mcv) {
  11573         .eflags => |cc| {
  11574             // Here we map the opposites since the jump is to the false branch.
  11575             return self.asmJccReloc(cc.negate(), undefined);
  11576         },
  11577         .register => |reg| {
  11578             try self.spillEflagsIfOccupied();
  11579             try self.asmRegisterImmediate(.{ ._, .@"test" }, reg.to8(), Immediate.u(1));
  11580             return self.asmJccReloc(.z, undefined);
  11581         },
  11582         .immediate,
  11583         .load_frame,
  11584         => {
  11585             try self.spillEflagsIfOccupied();
  11586             if (abi_size <= 8) {
  11587                 const reg = try self.copyToTmpRegister(ty, mcv);
  11588                 return self.genCondBrMir(ty, .{ .register = reg });
  11589             }
  11590             return self.fail("TODO implement condbr when condition is {} with abi larger than 8 bytes", .{mcv});
  11591         },
  11592         else => return self.fail("TODO implement condbr when condition is {s}", .{@tagName(mcv)}),
  11593     }
  11594 }
  11595 
  11596 fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
  11597     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  11598     const cond = try self.resolveInst(pl_op.operand);
  11599     const cond_ty = self.typeOf(pl_op.operand);
  11600     const extra = self.air.extraData(Air.CondBr, pl_op.payload);
  11601     const then_body: []const Air.Inst.Index =
  11602         @ptrCast(self.air.extra[extra.end..][0..extra.data.then_body_len]);
  11603     const else_body: []const Air.Inst.Index =
  11604         @ptrCast(self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len]);
  11605     const liveness_cond_br = self.liveness.getCondBr(inst);
  11606 
  11607     // If the condition dies here in this condbr instruction, process
  11608     // that death now instead of later as this has an effect on
  11609     // whether it needs to be spilled in the branches
  11610     if (self.liveness.operandDies(inst, 0)) {
  11611         if (pl_op.operand.toIndex()) |op_inst| try self.processDeath(op_inst);
  11612     }
  11613 
  11614     self.scope_generation += 1;
  11615     const state = try self.saveState();
  11616     const reloc = try self.genCondBrMir(cond_ty, cond);
  11617 
  11618     for (liveness_cond_br.then_deaths) |death| try self.processDeath(death);
  11619     try self.genBody(then_body);
  11620     try self.restoreState(state, &.{}, .{
  11621         .emit_instructions = false,
  11622         .update_tracking = true,
  11623         .resurrect = true,
  11624         .close_scope = true,
  11625     });
  11626 
  11627     try self.performReloc(reloc);
  11628 
  11629     for (liveness_cond_br.else_deaths) |death| try self.processDeath(death);
  11630     try self.genBody(else_body);
  11631     try self.restoreState(state, &.{}, .{
  11632         .emit_instructions = false,
  11633         .update_tracking = true,
  11634         .resurrect = true,
  11635         .close_scope = true,
  11636     });
  11637 
  11638     // We already took care of pl_op.operand earlier, so there's nothing left to do.
  11639     self.finishAirBookkeeping();
  11640 }
  11641 
  11642 fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MCValue {
  11643     const mod = self.bin_file.comp.module.?;
  11644     switch (opt_mcv) {
  11645         .register_overflow => |ro| return .{ .eflags = ro.eflags.negate() },
  11646         else => {},
  11647     }
  11648 
  11649     try self.spillEflagsIfOccupied();
  11650 
  11651     const pl_ty = opt_ty.optionalChild(mod);
  11652 
  11653     const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(mod))
  11654         .{ .off = 0, .ty = if (pl_ty.isSlice(mod)) pl_ty.slicePtrFieldType(mod) else pl_ty }
  11655     else
  11656         .{ .off = @intCast(pl_ty.abiSize(mod)), .ty = Type.bool };
  11657 
  11658     self.eflags_inst = inst;
  11659     switch (opt_mcv) {
  11660         .none,
  11661         .unreach,
  11662         .dead,
  11663         .undef,
  11664         .immediate,
  11665         .eflags,
  11666         .register_pair,
  11667         .register_offset,
  11668         .register_overflow,
  11669         .lea_direct,
  11670         .lea_got,
  11671         .lea_tlv,
  11672         .lea_frame,
  11673         .lea_symbol,
  11674         .reserved_frame,
  11675         .air_ref,
  11676         => unreachable,
  11677 
  11678         .register => |opt_reg| {
  11679             if (some_info.off == 0) {
  11680                 const some_abi_size: u32 = @intCast(some_info.ty.abiSize(mod));
  11681                 const alias_reg = registerAlias(opt_reg, some_abi_size);
  11682                 assert(some_abi_size * 8 == alias_reg.bitSize());
  11683                 try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg);
  11684                 return .{ .eflags = .z };
  11685             }
  11686             assert(some_info.ty.ip_index == .bool_type);
  11687             const opt_abi_size: u32 = @intCast(opt_ty.abiSize(mod));
  11688             try self.asmRegisterImmediate(
  11689                 .{ ._, .bt },
  11690                 registerAlias(opt_reg, opt_abi_size),
  11691                 Immediate.u(@as(u6, @intCast(some_info.off * 8))),
  11692             );
  11693             return .{ .eflags = .nc };
  11694         },
  11695 
  11696         .memory,
  11697         .load_symbol,
  11698         .load_got,
  11699         .load_direct,
  11700         .load_tlv,
  11701         => {
  11702             const addr_reg = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64();
  11703             const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  11704             defer self.register_manager.unlockReg(addr_reg_lock);
  11705 
  11706             try self.genSetReg(addr_reg, Type.usize, opt_mcv.address());
  11707             const some_abi_size: u32 = @intCast(some_info.ty.abiSize(mod));
  11708             try self.asmMemoryImmediate(
  11709                 .{ ._, .cmp },
  11710                 .{
  11711                     .base = .{ .reg = addr_reg },
  11712                     .mod = .{ .rm = .{
  11713                         .size = Memory.Size.fromSize(some_abi_size),
  11714                         .disp = some_info.off,
  11715                     } },
  11716                 },
  11717                 Immediate.u(0),
  11718             );
  11719             return .{ .eflags = .e };
  11720         },
  11721 
  11722         .indirect, .load_frame => {
  11723             const some_abi_size: u32 = @intCast(some_info.ty.abiSize(mod));
  11724             try self.asmMemoryImmediate(
  11725                 .{ ._, .cmp },
  11726                 switch (opt_mcv) {
  11727                     .indirect => |reg_off| .{
  11728                         .base = .{ .reg = reg_off.reg },
  11729                         .mod = .{ .rm = .{
  11730                             .size = Memory.Size.fromSize(some_abi_size),
  11731                             .disp = reg_off.off + some_info.off,
  11732                         } },
  11733                     },
  11734                     .load_frame => |frame_addr| .{
  11735                         .base = .{ .frame = frame_addr.index },
  11736                         .mod = .{ .rm = .{
  11737                             .size = Memory.Size.fromSize(some_abi_size),
  11738                             .disp = frame_addr.off + some_info.off,
  11739                         } },
  11740                     },
  11741                     else => unreachable,
  11742                 },
  11743                 Immediate.u(0),
  11744             );
  11745             return .{ .eflags = .e };
  11746         },
  11747     }
  11748 }
  11749 
  11750 fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
  11751     const mod = self.bin_file.comp.module.?;
  11752     const opt_ty = ptr_ty.childType(mod);
  11753     const pl_ty = opt_ty.optionalChild(mod);
  11754 
  11755     try self.spillEflagsIfOccupied();
  11756 
  11757     const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(mod))
  11758         .{ .off = 0, .ty = if (pl_ty.isSlice(mod)) pl_ty.slicePtrFieldType(mod) else pl_ty }
  11759     else
  11760         .{ .off = @intCast(pl_ty.abiSize(mod)), .ty = Type.bool };
  11761 
  11762     const ptr_reg = switch (ptr_mcv) {
  11763         .register => |reg| reg,
  11764         else => try self.copyToTmpRegister(ptr_ty, ptr_mcv),
  11765     };
  11766     const ptr_lock = self.register_manager.lockReg(ptr_reg);
  11767     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  11768 
  11769     const some_abi_size: u32 = @intCast(some_info.ty.abiSize(mod));
  11770     try self.asmMemoryImmediate(
  11771         .{ ._, .cmp },
  11772         .{
  11773             .base = .{ .reg = ptr_reg },
  11774             .mod = .{ .rm = .{
  11775                 .size = Memory.Size.fromSize(some_abi_size),
  11776                 .disp = some_info.off,
  11777             } },
  11778         },
  11779         Immediate.u(0),
  11780     );
  11781 
  11782     self.eflags_inst = inst;
  11783     return .{ .eflags = .e };
  11784 }
  11785 
  11786 fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue {
  11787     const mod = self.bin_file.comp.module.?;
  11788     const err_ty = eu_ty.errorUnionSet(mod);
  11789     if (err_ty.errorSetIsEmpty(mod)) return MCValue{ .immediate = 0 }; // always false
  11790 
  11791     try self.spillEflagsIfOccupied();
  11792 
  11793     const err_off: u31 = @intCast(errUnionErrorOffset(eu_ty.errorUnionPayload(mod), mod));
  11794     switch (eu_mcv) {
  11795         .register => |reg| {
  11796             const eu_lock = self.register_manager.lockReg(reg);
  11797             defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
  11798 
  11799             const tmp_reg = try self.copyToTmpRegister(eu_ty, eu_mcv);
  11800             if (err_off > 0) {
  11801                 try self.genShiftBinOpMir(
  11802                     .{ ._r, .sh },
  11803                     eu_ty,
  11804                     .{ .register = tmp_reg },
  11805                     .{ .immediate = @as(u6, @intCast(err_off * 8)) },
  11806                 );
  11807             } else {
  11808                 try self.truncateRegister(Type.anyerror, tmp_reg);
  11809             }
  11810             try self.genBinOpMir(
  11811                 .{ ._, .cmp },
  11812                 Type.anyerror,
  11813                 .{ .register = tmp_reg },
  11814                 .{ .immediate = 0 },
  11815             );
  11816         },
  11817         .load_frame => |frame_addr| try self.genBinOpMir(
  11818             .{ ._, .cmp },
  11819             Type.anyerror,
  11820             .{ .load_frame = .{
  11821                 .index = frame_addr.index,
  11822                 .off = frame_addr.off + err_off,
  11823             } },
  11824             .{ .immediate = 0 },
  11825         ),
  11826         else => return self.fail("TODO implement isErr for {}", .{eu_mcv}),
  11827     }
  11828 
  11829     if (maybe_inst) |inst| self.eflags_inst = inst;
  11830     return MCValue{ .eflags = .a };
  11831 }
  11832 
  11833 fn isErrPtr(self: *Self, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
  11834     const mod = self.bin_file.comp.module.?;
  11835     const eu_ty = ptr_ty.childType(mod);
  11836     const err_ty = eu_ty.errorUnionSet(mod);
  11837     if (err_ty.errorSetIsEmpty(mod)) return MCValue{ .immediate = 0 }; // always false
  11838 
  11839     try self.spillEflagsIfOccupied();
  11840 
  11841     const ptr_reg = switch (ptr_mcv) {
  11842         .register => |reg| reg,
  11843         else => try self.copyToTmpRegister(ptr_ty, ptr_mcv),
  11844     };
  11845     const ptr_lock = self.register_manager.lockReg(ptr_reg);
  11846     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  11847 
  11848     const err_off: u31 = @intCast(errUnionErrorOffset(eu_ty.errorUnionPayload(mod), mod));
  11849     try self.asmMemoryImmediate(
  11850         .{ ._, .cmp },
  11851         .{
  11852             .base = .{ .reg = ptr_reg },
  11853             .mod = .{ .rm = .{
  11854                 .size = self.memSize(Type.anyerror),
  11855                 .disp = err_off,
  11856             } },
  11857         },
  11858         Immediate.u(0),
  11859     );
  11860 
  11861     if (maybe_inst) |inst| self.eflags_inst = inst;
  11862     return MCValue{ .eflags = .a };
  11863 }
  11864 
  11865 fn isNonErr(self: *Self, inst: Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue {
  11866     const is_err_res = try self.isErr(inst, eu_ty, eu_mcv);
  11867     switch (is_err_res) {
  11868         .eflags => |cc| {
  11869             assert(cc == .a);
  11870             return MCValue{ .eflags = cc.negate() };
  11871         },
  11872         .immediate => |imm| {
  11873             assert(imm == 0);
  11874             return MCValue{ .immediate = @intFromBool(imm == 0) };
  11875         },
  11876         else => unreachable,
  11877     }
  11878 }
  11879 
  11880 fn isNonErrPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
  11881     const is_err_res = try self.isErrPtr(inst, ptr_ty, ptr_mcv);
  11882     switch (is_err_res) {
  11883         .eflags => |cc| {
  11884             assert(cc == .a);
  11885             return MCValue{ .eflags = cc.negate() };
  11886         },
  11887         .immediate => |imm| {
  11888             assert(imm == 0);
  11889             return MCValue{ .immediate = @intFromBool(imm == 0) };
  11890         },
  11891         else => unreachable,
  11892     }
  11893 }
  11894 
  11895 fn airIsNull(self: *Self, inst: Air.Inst.Index) !void {
  11896     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11897     const operand = try self.resolveInst(un_op);
  11898     const ty = self.typeOf(un_op);
  11899     const result = try self.isNull(inst, ty, operand);
  11900     return self.finishAir(inst, result, .{ un_op, .none, .none });
  11901 }
  11902 
  11903 fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void {
  11904     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11905     const operand = try self.resolveInst(un_op);
  11906     const ty = self.typeOf(un_op);
  11907     const result = try self.isNullPtr(inst, ty, operand);
  11908     return self.finishAir(inst, result, .{ un_op, .none, .none });
  11909 }
  11910 
  11911 fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void {
  11912     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11913     const operand = try self.resolveInst(un_op);
  11914     const ty = self.typeOf(un_op);
  11915     const result = switch (try self.isNull(inst, ty, operand)) {
  11916         .eflags => |cc| .{ .eflags = cc.negate() },
  11917         else => unreachable,
  11918     };
  11919     return self.finishAir(inst, result, .{ un_op, .none, .none });
  11920 }
  11921 
  11922 fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void {
  11923     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11924     const operand = try self.resolveInst(un_op);
  11925     const ty = self.typeOf(un_op);
  11926     const result = switch (try self.isNullPtr(inst, ty, operand)) {
  11927         .eflags => |cc| .{ .eflags = cc.negate() },
  11928         else => unreachable,
  11929     };
  11930     return self.finishAir(inst, result, .{ un_op, .none, .none });
  11931 }
  11932 
  11933 fn airIsErr(self: *Self, inst: Air.Inst.Index) !void {
  11934     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11935     const operand = try self.resolveInst(un_op);
  11936     const ty = self.typeOf(un_op);
  11937     const result = try self.isErr(inst, ty, operand);
  11938     return self.finishAir(inst, result, .{ un_op, .none, .none });
  11939 }
  11940 
  11941 fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void {
  11942     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11943     const operand = try self.resolveInst(un_op);
  11944     const ty = self.typeOf(un_op);
  11945     const result = try self.isErrPtr(inst, ty, operand);
  11946     return self.finishAir(inst, result, .{ un_op, .none, .none });
  11947 }
  11948 
  11949 fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void {
  11950     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11951     const operand = try self.resolveInst(un_op);
  11952     const ty = self.typeOf(un_op);
  11953     const result = try self.isNonErr(inst, ty, operand);
  11954     return self.finishAir(inst, result, .{ un_op, .none, .none });
  11955 }
  11956 
  11957 fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void {
  11958     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11959     const operand = try self.resolveInst(un_op);
  11960     const ty = self.typeOf(un_op);
  11961     const result = try self.isNonErrPtr(inst, ty, operand);
  11962     return self.finishAir(inst, result, .{ un_op, .none, .none });
  11963 }
  11964 
  11965 fn airLoop(self: *Self, inst: Air.Inst.Index) !void {
  11966     // A loop is a setup to be able to jump back to the beginning.
  11967     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  11968     const loop = self.air.extraData(Air.Block, ty_pl.payload);
  11969     const body: []const Air.Inst.Index = @ptrCast(self.air.extra[loop.end..][0..loop.data.body_len]);
  11970 
  11971     self.scope_generation += 1;
  11972     const state = try self.saveState();
  11973 
  11974     const jmp_target: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  11975     try self.genBody(body);
  11976     try self.restoreState(state, &.{}, .{
  11977         .emit_instructions = true,
  11978         .update_tracking = false,
  11979         .resurrect = false,
  11980         .close_scope = true,
  11981     });
  11982     _ = try self.asmJmpReloc(jmp_target);
  11983 
  11984     self.finishAirBookkeeping();
  11985 }
  11986 
  11987 fn airBlock(self: *Self, inst: Air.Inst.Index) !void {
  11988     // A block is a setup to be able to jump to the end.
  11989     const inst_tracking_i = self.inst_tracking.count();
  11990     self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(.unreach));
  11991 
  11992     self.scope_generation += 1;
  11993     try self.blocks.putNoClobber(self.gpa, inst, .{ .state = self.initRetroactiveState() });
  11994     const liveness = self.liveness.getBlock(inst);
  11995 
  11996     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  11997     const extra = self.air.extraData(Air.Block, ty_pl.payload);
  11998     const body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]);
  11999     try self.genBody(body);
  12000 
  12001     var block_data = self.blocks.fetchRemove(inst).?;
  12002     defer block_data.value.deinit(self.gpa);
  12003     if (block_data.value.relocs.items.len > 0) {
  12004         try self.restoreState(block_data.value.state, liveness.deaths, .{
  12005             .emit_instructions = false,
  12006             .update_tracking = true,
  12007             .resurrect = true,
  12008             .close_scope = true,
  12009         });
  12010         for (block_data.value.relocs.items) |reloc| try self.performReloc(reloc);
  12011     }
  12012 
  12013     if (std.debug.runtime_safety) assert(self.inst_tracking.getIndex(inst).? == inst_tracking_i);
  12014     const tracking = &self.inst_tracking.values()[inst_tracking_i];
  12015     if (self.liveness.isUnused(inst)) try tracking.die(self, inst);
  12016     self.getValueIfFree(tracking.short, inst);
  12017     self.finishAirBookkeeping();
  12018 }
  12019 
  12020 fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void {
  12021     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  12022     const condition = try self.resolveInst(pl_op.operand);
  12023     const condition_ty = self.typeOf(pl_op.operand);
  12024     const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload);
  12025     var extra_index: usize = switch_br.end;
  12026     var case_i: u32 = 0;
  12027     const liveness = try self.liveness.getSwitchBr(self.gpa, inst, switch_br.data.cases_len + 1);
  12028     defer self.gpa.free(liveness.deaths);
  12029 
  12030     // If the condition dies here in this switch instruction, process
  12031     // that death now instead of later as this has an effect on
  12032     // whether it needs to be spilled in the branches
  12033     if (self.liveness.operandDies(inst, 0)) {
  12034         if (pl_op.operand.toIndex()) |op_inst| try self.processDeath(op_inst);
  12035     }
  12036 
  12037     self.scope_generation += 1;
  12038     const state = try self.saveState();
  12039 
  12040     while (case_i < switch_br.data.cases_len) : (case_i += 1) {
  12041         const case = self.air.extraData(Air.SwitchBr.Case, extra_index);
  12042         const items: []const Air.Inst.Ref =
  12043             @ptrCast(self.air.extra[case.end..][0..case.data.items_len]);
  12044         const case_body: []const Air.Inst.Index =
  12045             @ptrCast(self.air.extra[case.end + items.len ..][0..case.data.body_len]);
  12046         extra_index = case.end + items.len + case_body.len;
  12047 
  12048         var relocs = try self.gpa.alloc(Mir.Inst.Index, items.len);
  12049         defer self.gpa.free(relocs);
  12050 
  12051         try self.spillEflagsIfOccupied();
  12052         for (items, relocs, 0..) |item, *reloc, i| {
  12053             const item_mcv = try self.resolveInst(item);
  12054             const cc: Condition = switch (condition) {
  12055                 .eflags => |cc| switch (item_mcv.immediate) {
  12056                     0 => cc.negate(),
  12057                     1 => cc,
  12058                     else => unreachable,
  12059                 },
  12060                 else => cc: {
  12061                     try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, item_mcv);
  12062                     break :cc .e;
  12063                 },
  12064             };
  12065             reloc.* = try self.asmJccReloc(if (i < relocs.len - 1) cc else cc.negate(), undefined);
  12066         }
  12067 
  12068         for (liveness.deaths[case_i]) |operand| try self.processDeath(operand);
  12069 
  12070         for (relocs[0 .. relocs.len - 1]) |reloc| try self.performReloc(reloc);
  12071         try self.genBody(case_body);
  12072         try self.restoreState(state, &.{}, .{
  12073             .emit_instructions = false,
  12074             .update_tracking = true,
  12075             .resurrect = true,
  12076             .close_scope = true,
  12077         });
  12078 
  12079         try self.performReloc(relocs[relocs.len - 1]);
  12080     }
  12081 
  12082     if (switch_br.data.else_body_len > 0) {
  12083         const else_body: []const Air.Inst.Index =
  12084             @ptrCast(self.air.extra[extra_index..][0..switch_br.data.else_body_len]);
  12085 
  12086         const else_deaths = liveness.deaths.len - 1;
  12087         for (liveness.deaths[else_deaths]) |operand| try self.processDeath(operand);
  12088 
  12089         try self.genBody(else_body);
  12090         try self.restoreState(state, &.{}, .{
  12091             .emit_instructions = false,
  12092             .update_tracking = true,
  12093             .resurrect = true,
  12094             .close_scope = true,
  12095         });
  12096     }
  12097 
  12098     // We already took care of pl_op.operand earlier, so there's nothing left to do
  12099     self.finishAirBookkeeping();
  12100 }
  12101 
  12102 fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void {
  12103     const next_inst: u32 = @intCast(self.mir_instructions.len);
  12104     switch (self.mir_instructions.items(.tag)[reloc]) {
  12105         .j, .jmp => {},
  12106         .pseudo => switch (self.mir_instructions.items(.ops)[reloc]) {
  12107             .pseudo_j_z_and_np_inst, .pseudo_j_nz_or_p_inst => {},
  12108             else => unreachable,
  12109         },
  12110         else => unreachable,
  12111     }
  12112     self.mir_instructions.items(.data)[reloc].inst.inst = next_inst;
  12113 }
  12114 
  12115 fn airBr(self: *Self, inst: Air.Inst.Index) !void {
  12116     const mod = self.bin_file.comp.module.?;
  12117     const br = self.air.instructions.items(.data)[@intFromEnum(inst)].br;
  12118 
  12119     const block_ty = self.typeOfIndex(br.block_inst);
  12120     const block_unused =
  12121         !block_ty.hasRuntimeBitsIgnoreComptime(mod) or self.liveness.isUnused(br.block_inst);
  12122     const block_tracking = self.inst_tracking.getPtr(br.block_inst).?;
  12123     const block_data = self.blocks.getPtr(br.block_inst).?;
  12124     const first_br = block_data.relocs.items.len == 0;
  12125     const block_result = result: {
  12126         if (block_unused) break :result .none;
  12127 
  12128         if (!first_br) try self.getValue(block_tracking.short, null);
  12129         const src_mcv = try self.resolveInst(br.operand);
  12130 
  12131         if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) {
  12132             if (first_br) break :result src_mcv;
  12133 
  12134             try self.getValue(block_tracking.short, br.block_inst);
  12135             // .long = .none to avoid merging operand and block result stack frames.
  12136             const current_tracking: InstTracking = .{ .long = .none, .short = src_mcv };
  12137             try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*);
  12138             for (current_tracking.getRegs()) |src_reg| self.register_manager.freeReg(src_reg);
  12139             break :result block_tracking.short;
  12140         }
  12141 
  12142         const dst_mcv = if (first_br) try self.allocRegOrMem(br.block_inst, true) else dst: {
  12143             try self.getValue(block_tracking.short, br.block_inst);
  12144             break :dst block_tracking.short;
  12145         };
  12146         try self.genCopy(block_ty, dst_mcv, try self.resolveInst(br.operand));
  12147         break :result dst_mcv;
  12148     };
  12149 
  12150     // Process operand death so that it is properly accounted for in the State below.
  12151     if (self.liveness.operandDies(inst, 0)) {
  12152         if (br.operand.toIndex()) |op_inst| try self.processDeath(op_inst);
  12153     }
  12154 
  12155     if (first_br) {
  12156         block_tracking.* = InstTracking.init(block_result);
  12157         try self.saveRetroactiveState(&block_data.state);
  12158     } else try self.restoreState(block_data.state, &.{}, .{
  12159         .emit_instructions = true,
  12160         .update_tracking = false,
  12161         .resurrect = false,
  12162         .close_scope = false,
  12163     });
  12164 
  12165     // Emit a jump with a relocation. It will be patched up after the block ends.
  12166     // Leave the jump offset undefined
  12167     const jmp_reloc = try self.asmJmpReloc(undefined);
  12168     try block_data.relocs.append(self.gpa, jmp_reloc);
  12169 
  12170     // Stop tracking block result without forgetting tracking info
  12171     try self.freeValue(block_tracking.short);
  12172 
  12173     self.finishAirBookkeeping();
  12174 }
  12175 
  12176 fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
  12177     const mod = self.bin_file.comp.module.?;
  12178     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  12179     const extra = self.air.extraData(Air.Asm, ty_pl.payload);
  12180     const clobbers_len: u31 = @truncate(extra.data.flags);
  12181     var extra_i: usize = extra.end;
  12182     const outputs: []const Air.Inst.Ref =
  12183         @ptrCast(self.air.extra[extra_i..][0..extra.data.outputs_len]);
  12184     extra_i += outputs.len;
  12185     const inputs: []const Air.Inst.Ref = @ptrCast(self.air.extra[extra_i..][0..extra.data.inputs_len]);
  12186     extra_i += inputs.len;
  12187 
  12188     var result: MCValue = .none;
  12189     var args = std.ArrayList(MCValue).init(self.gpa);
  12190     try args.ensureTotalCapacity(outputs.len + inputs.len);
  12191     defer {
  12192         for (args.items) |arg| if (arg.getReg()) |reg| self.register_manager.unlockReg(.{
  12193             .tracked_index = RegisterManager.indexOfRegIntoTracked(reg) orelse continue,
  12194         });
  12195         args.deinit();
  12196     }
  12197     var arg_map = std.StringHashMap(u8).init(self.gpa);
  12198     try arg_map.ensureTotalCapacity(@intCast(outputs.len + inputs.len));
  12199     defer arg_map.deinit();
  12200 
  12201     var outputs_extra_i = extra_i;
  12202     for (outputs) |output| {
  12203         const extra_bytes = mem.sliceAsBytes(self.air.extra[extra_i..]);
  12204         const constraint = mem.sliceTo(mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
  12205         const name = mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
  12206         // This equation accounts for the fact that even if we have exactly 4 bytes
  12207         // for the string, we still use the next u32 for the null terminator.
  12208         extra_i += (constraint.len + name.len + (2 + 3)) / 4;
  12209 
  12210         const maybe_inst = switch (output) {
  12211             .none => inst,
  12212             else => null,
  12213         };
  12214         const ty = switch (output) {
  12215             .none => self.typeOfIndex(inst),
  12216             else => self.typeOf(output).childType(mod),
  12217         };
  12218         const is_read = switch (constraint[0]) {
  12219             '=' => false,
  12220             '+' => read: {
  12221                 if (output == .none) return self.fail(
  12222                     "read-write constraint unsupported for asm result: '{s}'",
  12223                     .{constraint},
  12224                 );
  12225                 break :read true;
  12226             },
  12227             else => return self.fail("invalid constraint: '{s}'", .{constraint}),
  12228         };
  12229         const is_early_clobber = constraint[1] == '&';
  12230         const rest = constraint[@as(usize, 1) + @intFromBool(is_early_clobber) ..];
  12231         const arg_mcv: MCValue = arg_mcv: {
  12232             const arg_maybe_reg: ?Register = if (mem.eql(u8, rest, "r") or
  12233                 mem.eql(u8, rest, "f") or mem.eql(u8, rest, "x"))
  12234                 registerAlias(
  12235                     self.register_manager.tryAllocReg(maybe_inst, switch (rest[0]) {
  12236                         'r' => abi.RegisterClass.gp,
  12237                         'f' => abi.RegisterClass.x87,
  12238                         'x' => abi.RegisterClass.sse,
  12239                         else => unreachable,
  12240                     }) orelse return self.fail("ran out of registers lowering inline asm", .{}),
  12241                     @intCast(ty.abiSize(mod)),
  12242                 )
  12243             else if (mem.eql(u8, rest, "m"))
  12244                 if (output != .none) null else return self.fail(
  12245                     "memory constraint unsupported for asm result: '{s}'",
  12246                     .{constraint},
  12247                 )
  12248             else if (mem.eql(u8, rest, "g") or
  12249                 mem.eql(u8, rest, "rm") or mem.eql(u8, rest, "mr") or
  12250                 mem.eql(u8, rest, "r,m") or mem.eql(u8, rest, "m,r"))
  12251                 self.register_manager.tryAllocReg(maybe_inst, abi.RegisterClass.gp) orelse
  12252                     if (output != .none)
  12253                     null
  12254                 else
  12255                     return self.fail("ran out of registers lowering inline asm", .{})
  12256             else if (mem.startsWith(u8, rest, "{") and mem.endsWith(u8, rest, "}"))
  12257                 parseRegName(rest["{".len .. rest.len - "}".len]) orelse
  12258                     return self.fail("invalid register constraint: '{s}'", .{constraint})
  12259             else if (rest.len == 1 and std.ascii.isDigit(rest[0])) {
  12260                 const index = std.fmt.charToDigit(rest[0], 10) catch unreachable;
  12261                 if (index >= args.items.len) return self.fail("constraint out of bounds: '{s}'", .{
  12262                     constraint,
  12263                 });
  12264                 break :arg_mcv args.items[index];
  12265             } else return self.fail("invalid constraint: '{s}'", .{constraint});
  12266             break :arg_mcv if (arg_maybe_reg) |reg| .{ .register = reg } else arg: {
  12267                 const ptr_mcv = try self.resolveInst(output);
  12268                 switch (ptr_mcv) {
  12269                     .immediate => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |_|
  12270                         break :arg ptr_mcv.deref(),
  12271                     .register, .register_offset, .lea_frame => break :arg ptr_mcv.deref(),
  12272                     else => {},
  12273                 }
  12274                 break :arg .{ .indirect = .{ .reg = try self.copyToTmpRegister(Type.usize, ptr_mcv) } };
  12275             };
  12276         };
  12277         if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| {
  12278             _ = self.register_manager.lockReg(reg);
  12279         };
  12280         if (!mem.eql(u8, name, "_"))
  12281             arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len));
  12282         args.appendAssumeCapacity(arg_mcv);
  12283         if (output == .none) result = arg_mcv;
  12284         if (is_read) try self.load(arg_mcv, self.typeOf(output), .{ .air_ref = output });
  12285     }
  12286 
  12287     for (inputs) |input| {
  12288         const input_bytes = mem.sliceAsBytes(self.air.extra[extra_i..]);
  12289         const constraint = mem.sliceTo(input_bytes, 0);
  12290         const name = mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
  12291         // This equation accounts for the fact that even if we have exactly 4 bytes
  12292         // for the string, we still use the next u32 for the null terminator.
  12293         extra_i += (constraint.len + name.len + (2 + 3)) / 4;
  12294 
  12295         const ty = self.typeOf(input);
  12296         const input_mcv = try self.resolveInst(input);
  12297         const arg_mcv: MCValue = if (mem.eql(u8, constraint, "r") or
  12298             mem.eql(u8, constraint, "f") or mem.eql(u8, constraint, "x"))
  12299         arg: {
  12300             const rc = switch (constraint[0]) {
  12301                 'r' => abi.RegisterClass.gp,
  12302                 'f' => abi.RegisterClass.x87,
  12303                 'x' => abi.RegisterClass.sse,
  12304                 else => unreachable,
  12305             };
  12306             if (input_mcv.isRegister() and
  12307                 rc.isSet(RegisterManager.indexOfRegIntoTracked(input_mcv.getReg().?).?))
  12308                 break :arg input_mcv;
  12309             const reg = try self.register_manager.allocReg(null, rc);
  12310             try self.genSetReg(reg, ty, input_mcv);
  12311             break :arg .{ .register = registerAlias(reg, @intCast(ty.abiSize(mod))) };
  12312         } else if (mem.eql(u8, constraint, "i") or mem.eql(u8, constraint, "n"))
  12313             switch (input_mcv) {
  12314                 .immediate => |imm| .{ .immediate = imm },
  12315                 else => return self.fail("immediate operand requires comptime value: '{s}'", .{
  12316                     constraint,
  12317                 }),
  12318             }
  12319         else if (mem.eql(u8, constraint, "m")) arg: {
  12320             switch (input_mcv) {
  12321                 .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |_|
  12322                     break :arg input_mcv,
  12323                 .indirect, .load_frame => break :arg input_mcv,
  12324                 .load_symbol, .load_direct, .load_got, .load_tlv => {},
  12325                 else => {
  12326                     const temp_mcv = try self.allocTempRegOrMem(ty, false);
  12327                     try self.genCopy(ty, temp_mcv, input_mcv);
  12328                     break :arg temp_mcv;
  12329                 },
  12330             }
  12331             const addr_reg = self.register_manager.tryAllocReg(null, abi.RegisterClass.gp) orelse {
  12332                 const temp_mcv = try self.allocTempRegOrMem(ty, false);
  12333                 try self.genCopy(ty, temp_mcv, input_mcv);
  12334                 break :arg temp_mcv;
  12335             };
  12336             try self.genSetReg(addr_reg, Type.usize, input_mcv.address());
  12337             break :arg .{ .indirect = .{ .reg = addr_reg } };
  12338         } else if (mem.eql(u8, constraint, "g") or
  12339             mem.eql(u8, constraint, "rm") or mem.eql(u8, constraint, "mr") or
  12340             mem.eql(u8, constraint, "r,m") or mem.eql(u8, constraint, "m,r"))
  12341         arg: {
  12342             switch (input_mcv) {
  12343                 .register, .indirect, .load_frame => break :arg input_mcv,
  12344                 .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |_|
  12345                     break :arg input_mcv,
  12346                 else => {},
  12347             }
  12348             const temp_mcv = try self.allocTempRegOrMem(ty, true);
  12349             try self.genCopy(ty, temp_mcv, input_mcv);
  12350             break :arg temp_mcv;
  12351         } else if (mem.eql(u8, constraint, "X"))
  12352             input_mcv
  12353         else if (mem.startsWith(u8, constraint, "{") and mem.endsWith(u8, constraint, "}")) arg: {
  12354             const reg = parseRegName(constraint["{".len .. constraint.len - "}".len]) orelse
  12355                 return self.fail("invalid register constraint: '{s}'", .{constraint});
  12356             try self.register_manager.getReg(reg, null);
  12357             try self.genSetReg(reg, ty, input_mcv);
  12358             break :arg .{ .register = reg };
  12359         } else if (constraint.len == 1 and std.ascii.isDigit(constraint[0])) arg: {
  12360             const index = std.fmt.charToDigit(constraint[0], 10) catch unreachable;
  12361             if (index >= args.items.len) return self.fail("constraint out of bounds: '{s}'", .{constraint});
  12362             break :arg args.items[index];
  12363         } else return self.fail("invalid constraint: '{s}'", .{constraint});
  12364         if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| {
  12365             _ = self.register_manager.lockReg(reg);
  12366         };
  12367         if (!mem.eql(u8, name, "_"))
  12368             arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len));
  12369         args.appendAssumeCapacity(arg_mcv);
  12370     }
  12371 
  12372     {
  12373         var clobber_i: u32 = 0;
  12374         while (clobber_i < clobbers_len) : (clobber_i += 1) {
  12375             const clobber = mem.sliceTo(mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
  12376             // This equation accounts for the fact that even if we have exactly 4 bytes
  12377             // for the string, we still use the next u32 for the null terminator.
  12378             extra_i += clobber.len / 4 + 1;
  12379 
  12380             if (std.mem.eql(u8, clobber, "") or std.mem.eql(u8, clobber, "memory")) {
  12381                 // ok, sure
  12382             } else if (std.mem.eql(u8, clobber, "cc") or
  12383                 std.mem.eql(u8, clobber, "flags") or
  12384                 std.mem.eql(u8, clobber, "eflags") or
  12385                 std.mem.eql(u8, clobber, "rflags"))
  12386             {
  12387                 try self.spillEflagsIfOccupied();
  12388             } else {
  12389                 try self.register_manager.getReg(parseRegName(clobber) orelse
  12390                     return self.fail("invalid clobber: '{s}'", .{clobber}), null);
  12391             }
  12392         }
  12393     }
  12394 
  12395     const Label = struct {
  12396         target: Mir.Inst.Index = undefined,
  12397         pending_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
  12398 
  12399         const Kind = enum { definition, reference };
  12400 
  12401         fn isValid(kind: Kind, name: []const u8) bool {
  12402             for (name, 0..) |c, i| switch (c) {
  12403                 else => return false,
  12404                 '$' => if (i == 0) return false,
  12405                 '.' => {},
  12406                 '0'...'9' => if (i == 0) switch (kind) {
  12407                     .definition => if (name.len != 1) return false,
  12408                     .reference => {
  12409                         if (name.len != 2) return false;
  12410                         switch (name[1]) {
  12411                             else => return false,
  12412                             'B', 'F', 'b', 'f' => {},
  12413                         }
  12414                     },
  12415                 },
  12416                 '@', 'A'...'Z', '_', 'a'...'z' => {},
  12417             };
  12418             return name.len > 0;
  12419         }
  12420     };
  12421     var labels: std.StringHashMapUnmanaged(Label) = .{};
  12422     defer {
  12423         var label_it = labels.valueIterator();
  12424         while (label_it.next()) |label| label.pending_relocs.deinit(self.gpa);
  12425         labels.deinit(self.gpa);
  12426     }
  12427 
  12428     const asm_source = mem.sliceAsBytes(self.air.extra[extra_i..])[0..extra.data.source_len];
  12429     var line_it = mem.tokenizeAny(u8, asm_source, "\n\r;");
  12430     next_line: while (line_it.next()) |line| {
  12431         var mnem_it = mem.tokenizeAny(u8, line, " \t");
  12432         var prefix: Instruction.Prefix = .none;
  12433         const mnem_str = while (mnem_it.next()) |mnem_str| {
  12434             if (mem.startsWith(u8, mnem_str, "#")) continue :next_line;
  12435             if (mem.startsWith(u8, mnem_str, "//")) continue :next_line;
  12436             if (std.meta.stringToEnum(Instruction.Prefix, mnem_str)) |pre| {
  12437                 if (prefix != .none) return self.fail("extra prefix: '{s}'", .{mnem_str});
  12438                 prefix = pre;
  12439                 continue;
  12440             }
  12441             if (!mem.endsWith(u8, mnem_str, ":")) break mnem_str;
  12442             const label_name = mnem_str[0 .. mnem_str.len - ":".len];
  12443             if (!Label.isValid(.definition, label_name))
  12444                 return self.fail("invalid label: '{s}'", .{label_name});
  12445             const label_gop = try labels.getOrPut(self.gpa, label_name);
  12446             if (!label_gop.found_existing) label_gop.value_ptr.* = .{} else {
  12447                 const anon = std.ascii.isDigit(label_name[0]);
  12448                 if (!anon and label_gop.value_ptr.pending_relocs.items.len == 0)
  12449                     return self.fail("redefined label: '{s}'", .{label_name});
  12450                 for (label_gop.value_ptr.pending_relocs.items) |pending_reloc|
  12451                     try self.performReloc(pending_reloc);
  12452                 if (anon)
  12453                     label_gop.value_ptr.pending_relocs.clearRetainingCapacity()
  12454                 else
  12455                     label_gop.value_ptr.pending_relocs.clearAndFree(self.gpa);
  12456             }
  12457             label_gop.value_ptr.target = @intCast(self.mir_instructions.len);
  12458         } else continue;
  12459 
  12460         var mnem_size: ?Memory.Size = null;
  12461         const mnem_tag = mnem: {
  12462             mnem_size = if (mem.endsWith(u8, mnem_str, "b"))
  12463                 .byte
  12464             else if (mem.endsWith(u8, mnem_str, "w"))
  12465                 .word
  12466             else if (mem.endsWith(u8, mnem_str, "l"))
  12467                 .dword
  12468             else if (mem.endsWith(u8, mnem_str, "q"))
  12469                 .qword
  12470             else if (mem.endsWith(u8, mnem_str, "t"))
  12471                 .tbyte
  12472             else
  12473                 break :mnem null;
  12474             break :mnem std.meta.stringToEnum(Instruction.Mnemonic, mnem_str[0 .. mnem_str.len - 1]);
  12475         } orelse mnem: {
  12476             mnem_size = null;
  12477             break :mnem std.meta.stringToEnum(Instruction.Mnemonic, mnem_str);
  12478         } orelse return self.fail("invalid mnemonic: '{s}'", .{mnem_str});
  12479         if (@as(?Memory.Size, switch (mnem_tag) {
  12480             .fldenv, .fnstenv, .fstenv => .none,
  12481             .ldmxcsr, .stmxcsr, .vldmxcsr, .vstmxcsr => .dword,
  12482             else => null,
  12483         })) |fixed_mnem_size| {
  12484             if (mnem_size) |size| if (size != fixed_mnem_size)
  12485                 return self.fail("invalid size: '{s}'", .{mnem_str});
  12486             mnem_size = fixed_mnem_size;
  12487         }
  12488         const mnem_name = @tagName(mnem_tag);
  12489         const mnem_fixed_tag: Mir.Inst.FixedTag = for (std.enums.values(Mir.Inst.Fixes)) |fixes| {
  12490             const fixes_name = @tagName(fixes);
  12491             const space_i = mem.indexOfScalar(u8, fixes_name, ' ');
  12492             const fixes_prefix = if (space_i) |i|
  12493                 std.meta.stringToEnum(Instruction.Prefix, fixes_name[0..i]).?
  12494             else
  12495                 .none;
  12496             if (fixes_prefix != prefix) continue;
  12497             const pattern = fixes_name[if (space_i) |i| i + " ".len else 0..];
  12498             const wildcard_i = mem.indexOfScalar(u8, pattern, '_').?;
  12499             const mnem_prefix = pattern[0..wildcard_i];
  12500             const mnem_suffix = pattern[wildcard_i + "_".len ..];
  12501             if (!mem.startsWith(u8, mnem_name, mnem_prefix)) continue;
  12502             if (!mem.endsWith(u8, mnem_name, mnem_suffix)) continue;
  12503             break .{ fixes, std.meta.stringToEnum(
  12504                 Mir.Inst.Tag,
  12505                 mnem_name[mnem_prefix.len .. mnem_name.len - mnem_suffix.len],
  12506             ) orelse continue };
  12507         } else {
  12508             assert(prefix != .none); // no combination of fixes produced a known mnemonic
  12509             return self.fail("invalid prefix for mnemonic: '{s} {s}'", .{
  12510                 @tagName(prefix), mnem_str,
  12511             });
  12512         };
  12513 
  12514         const Operand = union(enum) {
  12515             none,
  12516             reg: Register,
  12517             mem: Memory,
  12518             imm: Immediate,
  12519             inst: Mir.Inst.Index,
  12520         };
  12521         var ops: [4]Operand = .{.none} ** 4;
  12522 
  12523         var last_op = false;
  12524         var op_it = mem.splitScalar(u8, mnem_it.rest(), ',');
  12525         next_op: for (&ops) |*op| {
  12526             const op_str = while (!last_op) {
  12527                 const full_str = op_it.next() orelse break :next_op;
  12528                 const code_str = if (mem.indexOfScalar(u8, full_str, '#') orelse
  12529                     mem.indexOf(u8, full_str, "//")) |comment|
  12530                 code: {
  12531                     last_op = true;
  12532                     break :code full_str[0..comment];
  12533                 } else full_str;
  12534                 const trim_str = mem.trim(u8, code_str, " \t*");
  12535                 if (trim_str.len > 0) break trim_str;
  12536             } else break;
  12537             if (mem.startsWith(u8, op_str, "%%")) {
  12538                 const colon = mem.indexOfScalarPos(u8, op_str, "%%".len + 2, ':');
  12539                 const reg = parseRegName(op_str["%%".len .. colon orelse op_str.len]) orelse
  12540                     return self.fail("invalid register: '{s}'", .{op_str});
  12541                 if (colon) |colon_pos| {
  12542                     const disp = std.fmt.parseInt(i32, op_str[colon_pos + ":".len ..], 0) catch
  12543                         return self.fail("invalid displacement: '{s}'", .{op_str});
  12544                     op.* = .{ .mem = .{
  12545                         .base = .{ .reg = reg },
  12546                         .mod = .{ .rm = .{
  12547                             .size = mnem_size orelse return self.fail("unknown size: '{s}'", .{op_str}),
  12548                             .disp = disp,
  12549                         } },
  12550                     } };
  12551                 } else {
  12552                     if (mnem_size) |size| if (reg.bitSize() != size.bitSize())
  12553                         return self.fail("invalid register size: '{s}'", .{op_str});
  12554                     op.* = .{ .reg = reg };
  12555                 }
  12556             } else if (mem.startsWith(u8, op_str, "%[") and mem.endsWith(u8, op_str, "]")) {
  12557                 const colon = mem.indexOfScalarPos(u8, op_str, "%[".len, ':');
  12558                 const modifier = if (colon) |colon_pos|
  12559                     op_str[colon_pos + ":".len .. op_str.len - "]".len]
  12560                 else
  12561                     "";
  12562                 op.* = switch (args.items[
  12563                     arg_map.get(op_str["%[".len .. colon orelse op_str.len - "]".len]) orelse
  12564                         return self.fail("no matching constraint: '{s}'", .{op_str})
  12565                 ]) {
  12566                     .immediate => |imm| if (mem.eql(u8, modifier, "") or mem.eql(u8, modifier, "c"))
  12567                         .{ .imm = Immediate.u(imm) }
  12568                     else
  12569                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  12570                     .register => |reg| if (mem.eql(u8, modifier, ""))
  12571                         .{ .reg = reg }
  12572                     else
  12573                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  12574                     .memory => |addr| if (mem.eql(u8, modifier, "") or mem.eql(u8, modifier, "P"))
  12575                         .{ .mem = .{
  12576                             .base = .{ .reg = .ds },
  12577                             .mod = .{ .rm = .{
  12578                                 .size = mnem_size orelse
  12579                                     return self.fail("unknown size: '{s}'", .{op_str}),
  12580                                 .disp = @intCast(@as(i64, @bitCast(addr))),
  12581                             } },
  12582                         } }
  12583                     else
  12584                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  12585                     .indirect => |reg_off| if (mem.eql(u8, modifier, ""))
  12586                         .{ .mem = .{
  12587                             .base = .{ .reg = reg_off.reg },
  12588                             .mod = .{ .rm = .{
  12589                                 .size = mnem_size orelse
  12590                                     return self.fail("unknown size: '{s}'", .{op_str}),
  12591                                 .disp = reg_off.off,
  12592                             } },
  12593                         } }
  12594                     else
  12595                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  12596                     .load_frame => |frame_addr| if (mem.eql(u8, modifier, ""))
  12597                         .{ .mem = .{
  12598                             .base = .{ .frame = frame_addr.index },
  12599                             .mod = .{ .rm = .{
  12600                                 .size = mnem_size orelse
  12601                                     return self.fail("unknown size: '{s}'", .{op_str}),
  12602                                 .disp = frame_addr.off,
  12603                             } },
  12604                         } }
  12605                     else
  12606                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  12607                     .lea_got => |sym_index| if (mem.eql(u8, modifier, "P"))
  12608                         .{ .reg = try self.copyToTmpRegister(Type.usize, .{ .lea_got = sym_index }) }
  12609                     else
  12610                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  12611                     .load_symbol => |sym_off| if (mem.eql(u8, modifier, "P"))
  12612                         .{ .reg = try self.copyToTmpRegister(Type.usize, .{ .load_symbol = sym_off }) }
  12613                     else
  12614                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  12615                     else => return self.fail("invalid constraint: '{s}'", .{op_str}),
  12616                 };
  12617             } else if (mem.startsWith(u8, op_str, "$")) {
  12618                 if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| {
  12619                     if (mnem_size) |size| {
  12620                         const max = @as(u64, math.maxInt(u64)) >> @intCast(64 - (size.bitSize() - 1));
  12621                         if ((if (s < 0) ~s else s) > max)
  12622                             return self.fail("invalid immediate size: '{s}'", .{op_str});
  12623                     }
  12624                     op.* = .{ .imm = Immediate.s(s) };
  12625                 } else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| {
  12626                     if (mnem_size) |size| {
  12627                         const max = @as(u64, math.maxInt(u64)) >> @intCast(64 - size.bitSize());
  12628                         if (u > max)
  12629                             return self.fail("invalid immediate size: '{s}'", .{op_str});
  12630                     }
  12631                     op.* = .{ .imm = Immediate.u(u) };
  12632                 } else |_| return self.fail("invalid immediate: '{s}'", .{op_str});
  12633             } else if (mem.endsWith(u8, op_str, ")")) {
  12634                 const open = mem.indexOfScalar(u8, op_str, '(') orelse
  12635                     return self.fail("invalid operand: '{s}'", .{op_str});
  12636                 var sib_it = mem.splitScalar(u8, op_str[open + "(".len .. op_str.len - ")".len], ',');
  12637                 const base_str = sib_it.next() orelse
  12638                     return self.fail("invalid memory operand: '{s}'", .{op_str});
  12639                 if (base_str.len > 0 and !mem.startsWith(u8, base_str, "%%"))
  12640                     return self.fail("invalid memory operand: '{s}'", .{op_str});
  12641                 const index_str = sib_it.next() orelse "";
  12642                 if (index_str.len > 0 and !mem.startsWith(u8, base_str, "%%"))
  12643                     return self.fail("invalid memory operand: '{s}'", .{op_str});
  12644                 const scale_str = sib_it.next() orelse "";
  12645                 if (index_str.len == 0 and scale_str.len > 0)
  12646                     return self.fail("invalid memory operand: '{s}'", .{op_str});
  12647                 const scale: Memory.Scale = if (scale_str.len > 0)
  12648                     switch (std.fmt.parseInt(u4, scale_str, 10) catch
  12649                         return self.fail("invalid scale: '{s}'", .{op_str})) {
  12650                         1 => .@"1",
  12651                         2 => .@"2",
  12652                         4 => .@"4",
  12653                         8 => .@"8",
  12654                         else => return self.fail("invalid scale: '{s}'", .{op_str}),
  12655                     }
  12656                 else
  12657                     .@"1";
  12658                 if (sib_it.next()) |_| return self.fail("invalid memory operand: '{s}'", .{op_str});
  12659                 op.* = .{
  12660                     .mem = .{
  12661                         .base = if (base_str.len > 0)
  12662                             .{ .reg = parseRegName(base_str["%%".len..]) orelse
  12663                                 return self.fail("invalid base register: '{s}'", .{base_str}) }
  12664                         else
  12665                             .none,
  12666                         .mod = .{ .rm = .{
  12667                             .size = mnem_size orelse return self.fail("unknown size: '{s}'", .{op_str}),
  12668                             .index = if (index_str.len > 0)
  12669                                 parseRegName(index_str["%%".len..]) orelse
  12670                                     return self.fail("invalid index register: '{s}'", .{op_str})
  12671                             else
  12672                                 .none,
  12673                             .scale = scale,
  12674                             .disp = if (mem.startsWith(u8, op_str[0..open], "%[") and
  12675                                 mem.endsWith(u8, op_str[0..open], "]"))
  12676                             disp: {
  12677                                 const colon = mem.indexOfScalarPos(u8, op_str[0..open], "%[".len, ':');
  12678                                 const modifier = if (colon) |colon_pos|
  12679                                     op_str[colon_pos + ":".len .. open - "]".len]
  12680                                 else
  12681                                     "";
  12682                                 break :disp switch (args.items[
  12683                                     arg_map.get(op_str["%[".len .. colon orelse open - "]".len]) orelse
  12684                                         return self.fail("no matching constraint: '{s}'", .{op_str})
  12685                                 ]) {
  12686                                     .immediate => |imm| if (mem.eql(u8, modifier, "") or
  12687                                         mem.eql(u8, modifier, "c"))
  12688                                         math.cast(i32, @as(i64, @bitCast(imm))) orelse
  12689                                             return self.fail("invalid displacement: '{s}'", .{op_str})
  12690                                     else
  12691                                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  12692                                     else => return self.fail("invalid constraint: '{s}'", .{op_str}),
  12693                                 };
  12694                             } else if (open > 0)
  12695                                 std.fmt.parseInt(i32, op_str[0..open], 0) catch
  12696                                     return self.fail("invalid displacement: '{s}'", .{op_str})
  12697                             else
  12698                                 0,
  12699                         } },
  12700                     },
  12701                 };
  12702             } else if (Label.isValid(.reference, op_str)) {
  12703                 const anon = std.ascii.isDigit(op_str[0]);
  12704                 const label_gop = try labels.getOrPut(self.gpa, op_str[0..if (anon) 1 else op_str.len]);
  12705                 if (!label_gop.found_existing) label_gop.value_ptr.* = .{};
  12706                 if (anon and (op_str[1] == 'b' or op_str[1] == 'B') and !label_gop.found_existing)
  12707                     return self.fail("undefined label: '{s}'", .{op_str});
  12708                 const pending_relocs = &label_gop.value_ptr.pending_relocs;
  12709                 if (if (anon)
  12710                     op_str[1] == 'f' or op_str[1] == 'F'
  12711                 else
  12712                     !label_gop.found_existing or pending_relocs.items.len > 0)
  12713                     try pending_relocs.append(self.gpa, @intCast(self.mir_instructions.len));
  12714                 op.* = .{ .inst = label_gop.value_ptr.target };
  12715             } else return self.fail("invalid operand: '{s}'", .{op_str});
  12716         } else if (op_it.next()) |op_str| return self.fail("extra operand: '{s}'", .{op_str});
  12717 
  12718         (switch (ops[0]) {
  12719             .none => self.asmOpOnly(mnem_fixed_tag),
  12720             .reg => |reg0| switch (ops[1]) {
  12721                 .none => self.asmRegister(mnem_fixed_tag, reg0),
  12722                 .reg => |reg1| switch (ops[2]) {
  12723                     .none => self.asmRegisterRegister(mnem_fixed_tag, reg1, reg0),
  12724                     .reg => |reg2| switch (ops[3]) {
  12725                         .none => self.asmRegisterRegisterRegister(mnem_fixed_tag, reg2, reg1, reg0),
  12726                         else => error.InvalidInstruction,
  12727                     },
  12728                     .mem => |mem2| switch (ops[3]) {
  12729                         .none => self.asmMemoryRegisterRegister(mnem_fixed_tag, mem2, reg1, reg0),
  12730                         else => error.InvalidInstruction,
  12731                     },
  12732                     else => error.InvalidInstruction,
  12733                 },
  12734                 .mem => |mem1| switch (ops[2]) {
  12735                     .none => self.asmMemoryRegister(mnem_fixed_tag, mem1, reg0),
  12736                     else => error.InvalidInstruction,
  12737                 },
  12738                 else => error.InvalidInstruction,
  12739             },
  12740             .mem => |mem0| switch (ops[1]) {
  12741                 .none => self.asmMemory(mnem_fixed_tag, mem0),
  12742                 .reg => |reg1| switch (ops[2]) {
  12743                     .none => self.asmRegisterMemory(mnem_fixed_tag, reg1, mem0),
  12744                     else => error.InvalidInstruction,
  12745                 },
  12746                 else => error.InvalidInstruction,
  12747             },
  12748             .imm => |imm0| switch (ops[1]) {
  12749                 .none => self.asmImmediate(mnem_fixed_tag, imm0),
  12750                 .reg => |reg1| switch (ops[2]) {
  12751                     .none => self.asmRegisterImmediate(mnem_fixed_tag, reg1, imm0),
  12752                     .reg => |reg2| switch (ops[3]) {
  12753                         .none => self.asmRegisterRegisterImmediate(mnem_fixed_tag, reg2, reg1, imm0),
  12754                         .reg => |reg3| self.asmRegisterRegisterRegisterImmediate(
  12755                             mnem_fixed_tag,
  12756                             reg3,
  12757                             reg2,
  12758                             reg1,
  12759                             imm0,
  12760                         ),
  12761                         else => error.InvalidInstruction,
  12762                     },
  12763                     .mem => |mem2| switch (ops[3]) {
  12764                         .none => self.asmMemoryRegisterImmediate(mnem_fixed_tag, mem2, reg1, imm0),
  12765                         else => error.InvalidInstruction,
  12766                     },
  12767                     else => error.InvalidInstruction,
  12768                 },
  12769                 .mem => |mem1| switch (ops[2]) {
  12770                     .none => self.asmMemoryImmediate(mnem_fixed_tag, mem1, imm0),
  12771                     else => error.InvalidInstruction,
  12772                 },
  12773                 else => error.InvalidInstruction,
  12774             },
  12775             .inst => |inst0| switch (ops[1]) {
  12776                 .none => self.asmReloc(mnem_fixed_tag, inst0),
  12777                 else => error.InvalidInstruction,
  12778             },
  12779         }) catch |err| switch (err) {
  12780             error.InvalidInstruction => return self.fail(
  12781                 "invalid instruction: '{s} {s} {s} {s} {s}'",
  12782                 .{
  12783                     mnem_str,
  12784                     @tagName(ops[0]),
  12785                     @tagName(ops[1]),
  12786                     @tagName(ops[2]),
  12787                     @tagName(ops[3]),
  12788                 },
  12789             ),
  12790             else => |e| return e,
  12791         };
  12792     }
  12793 
  12794     var label_it = labels.iterator();
  12795     while (label_it.next()) |label| if (label.value_ptr.pending_relocs.items.len > 0)
  12796         return self.fail("undefined label: '{s}'", .{label.key_ptr.*});
  12797 
  12798     for (outputs, args.items[0..outputs.len]) |output, arg_mcv| {
  12799         const extra_bytes = mem.sliceAsBytes(self.air.extra[outputs_extra_i..]);
  12800         const constraint =
  12801             mem.sliceTo(mem.sliceAsBytes(self.air.extra[outputs_extra_i..]), 0);
  12802         const name = mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
  12803         // This equation accounts for the fact that even if we have exactly 4 bytes
  12804         // for the string, we still use the next u32 for the null terminator.
  12805         outputs_extra_i += (constraint.len + name.len + (2 + 3)) / 4;
  12806 
  12807         if (output == .none) continue;
  12808         if (arg_mcv != .register) continue;
  12809         if (constraint.len == 2 and std.ascii.isDigit(constraint[1])) continue;
  12810         try self.store(self.typeOf(output), .{ .air_ref = output }, arg_mcv);
  12811     }
  12812 
  12813     simple: {
  12814         var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1);
  12815         var buf_index: usize = 0;
  12816         for (outputs) |output| {
  12817             if (output == .none) continue;
  12818 
  12819             if (buf_index >= buf.len) break :simple;
  12820             buf[buf_index] = output;
  12821             buf_index += 1;
  12822         }
  12823         if (buf_index + inputs.len > buf.len) break :simple;
  12824         @memcpy(buf[buf_index..][0..inputs.len], inputs);
  12825         return self.finishAir(inst, result, buf);
  12826     }
  12827     var bt = self.liveness.iterateBigTomb(inst);
  12828     for (outputs) |output| if (output != .none) try self.feed(&bt, output);
  12829     for (inputs) |input| try self.feed(&bt, input);
  12830     return self.finishAirResult(inst, result);
  12831 }
  12832 
  12833 const MoveStrategy = union(enum) {
  12834     move: Mir.Inst.FixedTag,
  12835     x87_load_store,
  12836     insert_extract: InsertExtract,
  12837     vex_insert_extract: InsertExtract,
  12838 
  12839     const InsertExtract = struct {
  12840         insert: Mir.Inst.FixedTag,
  12841         extract: Mir.Inst.FixedTag,
  12842     };
  12843 
  12844     pub fn read(strat: MoveStrategy, self: *Self, dst_reg: Register, src_mem: Memory) !void {
  12845         switch (strat) {
  12846             .move => |tag| try self.asmRegisterMemory(tag, dst_reg, src_mem),
  12847             .x87_load_store => {
  12848                 try self.asmMemory(.{ .f_, .ld }, src_mem);
  12849                 assert(dst_reg != .st7);
  12850                 try self.asmRegister(.{ .f_p, .st }, @enumFromInt(@intFromEnum(dst_reg) + 1));
  12851             },
  12852             .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
  12853                 ie.insert,
  12854                 dst_reg,
  12855                 src_mem,
  12856                 Immediate.u(0),
  12857             ),
  12858             .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
  12859                 ie.insert,
  12860                 dst_reg,
  12861                 dst_reg,
  12862                 src_mem,
  12863                 Immediate.u(0),
  12864             ),
  12865         }
  12866     }
  12867     pub fn write(strat: MoveStrategy, self: *Self, dst_mem: Memory, src_reg: Register) !void {
  12868         switch (strat) {
  12869             .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_reg),
  12870             .x87_load_store => {
  12871                 try self.asmRegister(.{ .f_, .ld }, src_reg);
  12872                 try self.asmMemory(.{ .f_p, .st }, dst_mem);
  12873             },
  12874             .insert_extract, .vex_insert_extract => |ie| try self.asmMemoryRegisterImmediate(
  12875                 ie.extract,
  12876                 dst_mem,
  12877                 src_reg,
  12878                 Immediate.u(0),
  12879             ),
  12880         }
  12881     }
  12882 };
  12883 fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !MoveStrategy {
  12884     const mod = self.bin_file.comp.module.?;
  12885     switch (class) {
  12886         .general_purpose, .segment => return .{ .move = .{ ._, .mov } },
  12887         .x87 => return .x87_load_store,
  12888         .mmx => {},
  12889         .sse => switch (ty.zigTypeTag(mod)) {
  12890             else => {
  12891                 const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none);
  12892                 assert(std.mem.indexOfNone(abi.Class, classes, &.{
  12893                     .integer, .sse, .memory, .float, .float_combine,
  12894                 }) == null);
  12895                 const abi_size = ty.abiSize(mod);
  12896                 if (abi_size < 4 or
  12897                     std.mem.indexOfScalar(abi.Class, classes, .integer) != null) switch (abi_size) {
  12898                     1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
  12899                         .insert = .{ .vp_b, .insr },
  12900                         .extract = .{ .vp_b, .extr },
  12901                     } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
  12902                         .insert = .{ .p_b, .insr },
  12903                         .extract = .{ .p_b, .extr },
  12904                     } },
  12905                     2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
  12906                         .insert = .{ .vp_w, .insr },
  12907                         .extract = .{ .vp_w, .extr },
  12908                     } } else .{ .insert_extract = .{
  12909                         .insert = .{ .p_w, .insr },
  12910                         .extract = .{ .p_w, .extr },
  12911                     } },
  12912                     3...4 => return .{ .move = if (self.hasFeature(.avx))
  12913                         .{ .v_d, .mov }
  12914                     else
  12915                         .{ ._d, .mov } },
  12916                     5...8 => return .{ .move = if (self.hasFeature(.avx))
  12917                         .{ .v_q, .mov }
  12918                     else
  12919                         .{ ._q, .mov } },
  12920                     9...16 => return .{ .move = if (self.hasFeature(.avx))
  12921                         if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
  12922                     else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
  12923                     17...32 => if (self.hasFeature(.avx))
  12924                         return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
  12925                     else => {},
  12926                 } else switch (abi_size) {
  12927                     4 => return .{ .move = if (self.hasFeature(.avx))
  12928                         .{ .v_ss, .mov }
  12929                     else
  12930                         .{ ._ss, .mov } },
  12931                     5...8 => return .{ .move = if (self.hasFeature(.avx))
  12932                         .{ .v_sd, .mov }
  12933                     else
  12934                         .{ ._sd, .mov } },
  12935                     9...16 => return .{ .move = if (self.hasFeature(.avx))
  12936                         if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
  12937                     else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
  12938                     17...32 => if (self.hasFeature(.avx)) return .{ .move = if (aligned)
  12939                         .{ .v_pd, .mova }
  12940                     else
  12941                         .{ .v_pd, .movu } },
  12942                     else => {},
  12943                 }
  12944             },
  12945             .Float => switch (ty.floatBits(self.target.*)) {
  12946                 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
  12947                     .insert = .{ .vp_w, .insr },
  12948                     .extract = .{ .vp_w, .extr },
  12949                 } } else .{ .insert_extract = .{
  12950                     .insert = .{ .p_w, .insr },
  12951                     .extract = .{ .p_w, .extr },
  12952                 } },
  12953                 32 => return .{ .move = if (self.hasFeature(.avx))
  12954                     .{ .v_ss, .mov }
  12955                 else
  12956                     .{ ._ss, .mov } },
  12957                 64 => return .{ .move = if (self.hasFeature(.avx))
  12958                     .{ .v_sd, .mov }
  12959                 else
  12960                     .{ ._sd, .mov } },
  12961                 128 => return .{ .move = if (self.hasFeature(.avx))
  12962                     if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
  12963                 else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
  12964                 else => {},
  12965             },
  12966             .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
  12967                 .Bool => {},
  12968                 .Int => switch (ty.childType(mod).intInfo(mod).bits) {
  12969                     8 => switch (ty.vectorLen(mod)) {
  12970                         1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
  12971                             .insert = .{ .vp_b, .insr },
  12972                             .extract = .{ .vp_b, .extr },
  12973                         } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
  12974                             .insert = .{ .p_b, .insr },
  12975                             .extract = .{ .p_b, .extr },
  12976                         } },
  12977                         2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
  12978                             .insert = .{ .vp_w, .insr },
  12979                             .extract = .{ .vp_w, .extr },
  12980                         } } else .{ .insert_extract = .{
  12981                             .insert = .{ .p_w, .insr },
  12982                             .extract = .{ .p_w, .extr },
  12983                         } },
  12984                         3...4 => return .{ .move = if (self.hasFeature(.avx))
  12985                             .{ .v_d, .mov }
  12986                         else
  12987                             .{ ._d, .mov } },
  12988                         5...8 => return .{ .move = if (self.hasFeature(.avx))
  12989                             .{ .v_q, .mov }
  12990                         else
  12991                             .{ ._q, .mov } },
  12992                         9...16 => return .{ .move = if (self.hasFeature(.avx))
  12993                             if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
  12994                         else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
  12995                         17...32 => if (self.hasFeature(.avx))
  12996                             return .{ .move = if (aligned)
  12997                                 .{ .v_, .movdqa }
  12998                             else
  12999                                 .{ .v_, .movdqu } },
  13000                         else => {},
  13001                     },
  13002                     16 => switch (ty.vectorLen(mod)) {
  13003                         1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
  13004                             .insert = .{ .vp_w, .insr },
  13005                             .extract = .{ .vp_w, .extr },
  13006                         } } else .{ .insert_extract = .{
  13007                             .insert = .{ .p_w, .insr },
  13008                             .extract = .{ .p_w, .extr },
  13009                         } },
  13010                         2 => return .{ .move = if (self.hasFeature(.avx))
  13011                             .{ .v_d, .mov }
  13012                         else
  13013                             .{ ._d, .mov } },
  13014                         3...4 => return .{ .move = if (self.hasFeature(.avx))
  13015                             .{ .v_q, .mov }
  13016                         else
  13017                             .{ ._q, .mov } },
  13018                         5...8 => return .{ .move = if (self.hasFeature(.avx))
  13019                             if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
  13020                         else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
  13021                         9...16 => if (self.hasFeature(.avx))
  13022                             return .{ .move = if (aligned)
  13023                                 .{ .v_, .movdqa }
  13024                             else
  13025                                 .{ .v_, .movdqu } },
  13026                         else => {},
  13027                     },
  13028                     32 => switch (ty.vectorLen(mod)) {
  13029                         1 => return .{ .move = if (self.hasFeature(.avx))
  13030                             .{ .v_d, .mov }
  13031                         else
  13032                             .{ ._d, .mov } },
  13033                         2 => return .{ .move = if (self.hasFeature(.avx))
  13034                             .{ .v_q, .mov }
  13035                         else
  13036                             .{ ._q, .mov } },
  13037                         3...4 => return .{ .move = if (self.hasFeature(.avx))
  13038                             if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
  13039                         else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
  13040                         5...8 => if (self.hasFeature(.avx))
  13041                             return .{ .move = if (aligned)
  13042                                 .{ .v_, .movdqa }
  13043                             else
  13044                                 .{ .v_, .movdqu } },
  13045                         else => {},
  13046                     },
  13047                     64 => switch (ty.vectorLen(mod)) {
  13048                         1 => return .{ .move = if (self.hasFeature(.avx))
  13049                             .{ .v_q, .mov }
  13050                         else
  13051                             .{ ._q, .mov } },
  13052                         2 => return .{ .move = if (self.hasFeature(.avx))
  13053                             if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
  13054                         else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
  13055                         3...4 => if (self.hasFeature(.avx))
  13056                             return .{ .move = if (aligned)
  13057                                 .{ .v_, .movdqa }
  13058                             else
  13059                                 .{ .v_, .movdqu } },
  13060                         else => {},
  13061                     },
  13062                     128 => switch (ty.vectorLen(mod)) {
  13063                         1 => return .{ .move = if (self.hasFeature(.avx))
  13064                             if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
  13065                         else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
  13066                         2 => if (self.hasFeature(.avx))
  13067                             return .{ .move = if (aligned)
  13068                                 .{ .v_, .movdqa }
  13069                             else
  13070                                 .{ .v_, .movdqu } },
  13071                         else => {},
  13072                     },
  13073                     256 => switch (ty.vectorLen(mod)) {
  13074                         1 => if (self.hasFeature(.avx))
  13075                             return .{ .move = if (aligned)
  13076                                 .{ .v_, .movdqa }
  13077                             else
  13078                                 .{ .v_, .movdqu } },
  13079                         else => {},
  13080                     },
  13081                     else => {},
  13082                 },
  13083                 .Pointer, .Optional => if (ty.childType(mod).isPtrAtRuntime(mod))
  13084                     switch (ty.vectorLen(mod)) {
  13085                         1 => return .{ .move = if (self.hasFeature(.avx))
  13086                             .{ .v_q, .mov }
  13087                         else
  13088                             .{ ._q, .mov } },
  13089                         2 => return .{ .move = if (self.hasFeature(.avx))
  13090                             if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
  13091                         else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
  13092                         3...4 => if (self.hasFeature(.avx))
  13093                             return .{ .move = if (aligned)
  13094                                 .{ .v_, .movdqa }
  13095                             else
  13096                                 .{ .v_, .movdqu } },
  13097                         else => {},
  13098                     }
  13099                 else
  13100                     unreachable,
  13101                 .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
  13102                     16 => switch (ty.vectorLen(mod)) {
  13103                         1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
  13104                             .insert = .{ .vp_w, .insr },
  13105                             .extract = .{ .vp_w, .extr },
  13106                         } } else .{ .insert_extract = .{
  13107                             .insert = .{ .p_w, .insr },
  13108                             .extract = .{ .p_w, .extr },
  13109                         } },
  13110                         2 => return .{ .move = if (self.hasFeature(.avx))
  13111                             .{ .v_d, .mov }
  13112                         else
  13113                             .{ ._d, .mov } },
  13114                         3...4 => return .{ .move = if (self.hasFeature(.avx))
  13115                             .{ .v_q, .mov }
  13116                         else
  13117                             .{ ._q, .mov } },
  13118                         5...8 => return .{ .move = if (self.hasFeature(.avx))
  13119                             if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
  13120                         else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
  13121                         9...16 => if (self.hasFeature(.avx))
  13122                             return .{ .move = if (aligned)
  13123                                 .{ .v_, .movdqa }
  13124                             else
  13125                                 .{ .v_, .movdqu } },
  13126                         else => {},
  13127                     },
  13128                     32 => switch (ty.vectorLen(mod)) {
  13129                         1 => return .{ .move = if (self.hasFeature(.avx))
  13130                             .{ .v_ss, .mov }
  13131                         else
  13132                             .{ ._ss, .mov } },
  13133                         2 => return .{ .move = if (self.hasFeature(.avx))
  13134                             .{ .v_sd, .mov }
  13135                         else
  13136                             .{ ._sd, .mov } },
  13137                         3...4 => return .{ .move = if (self.hasFeature(.avx))
  13138                             if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
  13139                         else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
  13140                         5...8 => if (self.hasFeature(.avx))
  13141                             return .{ .move = if (aligned)
  13142                                 .{ .v_ps, .mova }
  13143                             else
  13144                                 .{ .v_ps, .movu } },
  13145                         else => {},
  13146                     },
  13147                     64 => switch (ty.vectorLen(mod)) {
  13148                         1 => return .{ .move = if (self.hasFeature(.avx))
  13149                             .{ .v_sd, .mov }
  13150                         else
  13151                             .{ ._sd, .mov } },
  13152                         2 => return .{ .move = if (self.hasFeature(.avx))
  13153                             if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
  13154                         else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
  13155                         3...4 => if (self.hasFeature(.avx))
  13156                             return .{ .move = if (aligned)
  13157                                 .{ .v_pd, .mova }
  13158                             else
  13159                                 .{ .v_pd, .movu } },
  13160                         else => {},
  13161                     },
  13162                     128 => switch (ty.vectorLen(mod)) {
  13163                         1 => return .{ .move = if (self.hasFeature(.avx))
  13164                             if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
  13165                         else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
  13166                         2 => if (self.hasFeature(.avx))
  13167                             return .{ .move = if (aligned)
  13168                                 .{ .v_, .movdqa }
  13169                             else
  13170                                 .{ .v_, .movdqu } },
  13171                         else => {},
  13172                     },
  13173                     else => {},
  13174                 },
  13175                 else => {},
  13176             },
  13177         },
  13178     }
  13179     return self.fail("TODO moveStrategy for {}", .{ty.fmt(mod)});
  13180 }
  13181 
  13182 fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
  13183     const mod = self.bin_file.comp.module.?;
  13184 
  13185     const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  13186     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  13187 
  13188     switch (dst_mcv) {
  13189         .none,
  13190         .unreach,
  13191         .dead,
  13192         .undef,
  13193         .immediate,
  13194         .eflags,
  13195         .register_overflow,
  13196         .lea_direct,
  13197         .lea_got,
  13198         .lea_tlv,
  13199         .lea_frame,
  13200         .lea_symbol,
  13201         .reserved_frame,
  13202         .air_ref,
  13203         => unreachable, // unmodifiable destination
  13204         .register => |reg| try self.genSetReg(reg, ty, src_mcv),
  13205         .register_offset => |dst_reg_off| try self.genSetReg(dst_reg_off.reg, ty, switch (src_mcv) {
  13206             .none,
  13207             .unreach,
  13208             .dead,
  13209             .undef,
  13210             .register_overflow,
  13211             .reserved_frame,
  13212             => unreachable,
  13213             .immediate,
  13214             .register,
  13215             .register_offset,
  13216             .lea_frame,
  13217             => src_mcv.offset(-dst_reg_off.off),
  13218             else => .{ .register_offset = .{
  13219                 .reg = try self.copyToTmpRegister(ty, src_mcv),
  13220                 .off = -dst_reg_off.off,
  13221             } },
  13222         }),
  13223         .register_pair => |dst_regs| {
  13224             const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = switch (src_mcv) {
  13225                 .register_pair, .memory, .indirect, .load_frame => null,
  13226                 .load_symbol, .load_direct, .load_got, .load_tlv => src: {
  13227                     const src_addr_reg =
  13228                         (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64();
  13229                     const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
  13230                     errdefer self.register_manager.unlockReg(src_addr_lock);
  13231 
  13232                     try self.genSetReg(src_addr_reg, Type.usize, src_mcv.address());
  13233                     break :src .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock };
  13234                 },
  13235                 .air_ref => |src_ref| return self.genCopy(ty, dst_mcv, try self.resolveInst(src_ref)),
  13236                 else => return self.fail("TODO implement genCopy for {s} of {}", .{
  13237                     @tagName(src_mcv), ty.fmt(mod),
  13238                 }),
  13239             };
  13240             defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock);
  13241 
  13242             var part_disp: i32 = 0;
  13243             for (dst_regs, try self.splitType(ty), 0..) |dst_reg, dst_ty, part_i| {
  13244                 try self.genSetReg(dst_reg, dst_ty, switch (src_mcv) {
  13245                     .register_pair => |src_regs| .{ .register = src_regs[part_i] },
  13246                     .memory, .indirect, .load_frame => src_mcv.address().offset(part_disp).deref(),
  13247                     .load_symbol, .load_direct, .load_got, .load_tlv => .{ .indirect = .{
  13248                         .reg = src_info.?.addr_reg,
  13249                         .off = part_disp,
  13250                     } },
  13251                     else => unreachable,
  13252                 });
  13253                 part_disp += @intCast(dst_ty.abiSize(mod));
  13254             }
  13255         },
  13256         .indirect => |reg_off| try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ty, src_mcv),
  13257         .memory, .load_symbol, .load_direct, .load_got, .load_tlv => {
  13258             switch (dst_mcv) {
  13259                 .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr|
  13260                     return self.genSetMem(.{ .reg = .ds }, small_addr, ty, src_mcv),
  13261                 .load_symbol, .load_direct, .load_got, .load_tlv => {},
  13262                 else => unreachable,
  13263             }
  13264 
  13265             const addr_reg = try self.copyToTmpRegister(Type.usize, dst_mcv.address());
  13266             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  13267             defer self.register_manager.unlockReg(addr_lock);
  13268 
  13269             try self.genSetMem(.{ .reg = addr_reg }, 0, ty, src_mcv);
  13270         },
  13271         .load_frame => |frame_addr| try self.genSetMem(
  13272             .{ .frame = frame_addr.index },
  13273             frame_addr.off,
  13274             ty,
  13275             src_mcv,
  13276         ),
  13277     }
  13278 }
  13279 
  13280 fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerError!void {
  13281     const mod = self.bin_file.comp.module.?;
  13282     const abi_size: u32 = @intCast(ty.abiSize(mod));
  13283     if (ty.bitSize(mod) > dst_reg.bitSize())
  13284         return self.fail("genSetReg called with a value larger than dst_reg", .{});
  13285     switch (src_mcv) {
  13286         .none,
  13287         .unreach,
  13288         .dead,
  13289         .register_overflow,
  13290         .reserved_frame,
  13291         => unreachable,
  13292         .undef => {},
  13293         .eflags => |cc| try self.asmSetccRegister(cc, dst_reg.to8()),
  13294         .immediate => |imm| {
  13295             if (imm == 0) {
  13296                 // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
  13297                 // register is the fastest way to zero a register.
  13298                 try self.spillEflagsIfOccupied();
  13299                 try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32());
  13300             } else if (abi_size > 4 and math.cast(u32, imm) != null) {
  13301                 // 32-bit moves zero-extend to 64-bit.
  13302                 try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), Immediate.u(imm));
  13303             } else if (abi_size <= 4 and @as(i64, @bitCast(imm)) < 0) {
  13304                 try self.asmRegisterImmediate(
  13305                     .{ ._, .mov },
  13306                     registerAlias(dst_reg, abi_size),
  13307                     Immediate.s(@intCast(@as(i64, @bitCast(imm)))),
  13308                 );
  13309             } else {
  13310                 try self.asmRegisterImmediate(
  13311                     .{ ._, .mov },
  13312                     registerAlias(dst_reg, abi_size),
  13313                     Immediate.u(imm),
  13314                 );
  13315             }
  13316         },
  13317         .register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) {
  13318             .general_purpose => switch (src_reg.class()) {
  13319                 .general_purpose => try self.asmRegisterRegister(
  13320                     .{ ._, .mov },
  13321                     registerAlias(dst_reg, abi_size),
  13322                     registerAlias(src_reg, abi_size),
  13323                 ),
  13324                 .segment => try self.asmRegisterRegister(
  13325                     .{ ._, .mov },
  13326                     registerAlias(dst_reg, abi_size),
  13327                     src_reg,
  13328                 ),
  13329                 .x87, .mmx => unreachable,
  13330                 .sse => try self.asmRegisterRegister(
  13331                     switch (abi_size) {
  13332                         1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
  13333                         5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
  13334                         else => unreachable,
  13335                     },
  13336                     registerAlias(dst_reg, @max(abi_size, 4)),
  13337                     src_reg.to128(),
  13338                 ),
  13339             },
  13340             .segment => try self.asmRegisterRegister(
  13341                 .{ ._, .mov },
  13342                 dst_reg,
  13343                 switch (src_reg.class()) {
  13344                     .general_purpose, .segment => registerAlias(src_reg, abi_size),
  13345                     .x87, .mmx => unreachable,
  13346                     .sse => try self.copyToTmpRegister(ty, src_mcv),
  13347                 },
  13348             ),
  13349             .x87 => switch (src_reg.class()) {
  13350                 .general_purpose, .segment => unreachable,
  13351                 .x87 => switch (src_reg) {
  13352                     .st0 => try self.asmRegister(.{ .f_, .st }, dst_reg),
  13353                     .st1, .st2, .st3, .st4, .st5, .st6 => {
  13354                         try self.asmRegister(.{ .f_, .ld }, src_reg);
  13355                         assert(dst_reg != .st7);
  13356                         try self.asmRegister(.{ .f_p, .st }, @enumFromInt(@intFromEnum(dst_reg) + 1));
  13357                     },
  13358                     else => unreachable,
  13359                 },
  13360                 .mmx, .sse => unreachable,
  13361             },
  13362             .mmx => unreachable,
  13363             .sse => switch (src_reg.class()) {
  13364                 .general_purpose => try self.asmRegisterRegister(
  13365                     switch (abi_size) {
  13366                         1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
  13367                         5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
  13368                         else => unreachable,
  13369                     },
  13370                     dst_reg.to128(),
  13371                     registerAlias(src_reg, @max(abi_size, 4)),
  13372                 ),
  13373                 .segment => try self.genSetReg(
  13374                     dst_reg,
  13375                     ty,
  13376                     .{ .register = try self.copyToTmpRegister(ty, src_mcv) },
  13377                 ),
  13378                 .x87, .mmx => unreachable,
  13379                 .sse => try self.asmRegisterRegister(
  13380                     @as(?Mir.Inst.FixedTag, switch (ty.scalarType(mod).zigTypeTag(mod)) {
  13381                         else => switch (abi_size) {
  13382                             1...16 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
  13383                             17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
  13384                             else => null,
  13385                         },
  13386                         .Float => switch (ty.scalarType(mod).floatBits(self.target.*)) {
  13387                             16, 128 => switch (abi_size) {
  13388                                 2...16 => if (self.hasFeature(.avx))
  13389                                     .{ .v_, .movdqa }
  13390                                 else
  13391                                     .{ ._, .movdqa },
  13392                                 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
  13393                                 else => null,
  13394                             },
  13395                             32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova },
  13396                             64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova },
  13397                             80 => null,
  13398                             else => unreachable,
  13399                         },
  13400                     }) orelse return self.fail("TODO implement genSetReg for {}", .{ty.fmt(mod)}),
  13401                     registerAlias(dst_reg, abi_size),
  13402                     registerAlias(src_reg, abi_size),
  13403                 ),
  13404             },
  13405         },
  13406         .register_pair => |src_regs| try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }),
  13407         .register_offset,
  13408         .indirect,
  13409         .load_frame,
  13410         .lea_frame,
  13411         => try @as(MoveStrategy, switch (src_mcv) {
  13412             .register_offset => |reg_off| switch (reg_off.off) {
  13413                 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }),
  13414                 else => .{ .move = .{ ._, .lea } },
  13415             },
  13416             .indirect => try self.moveStrategy(ty, dst_reg.class(), false),
  13417             .load_frame => |frame_addr| try self.moveStrategy(
  13418                 ty,
  13419                 dst_reg.class(),
  13420                 self.getFrameAddrAlignment(frame_addr).compare(.gte, ty.abiAlignment(mod)),
  13421             ),
  13422             .lea_frame => .{ .move = .{ ._, .lea } },
  13423             else => unreachable,
  13424         }).read(self, registerAlias(dst_reg, abi_size), switch (src_mcv) {
  13425             .register_offset, .indirect => |reg_off| .{
  13426                 .base = .{ .reg = reg_off.reg },
  13427                 .mod = .{ .rm = .{
  13428                     .size = self.memSize(ty),
  13429                     .disp = reg_off.off,
  13430                 } },
  13431             },
  13432             .load_frame, .lea_frame => |frame_addr| .{
  13433                 .base = .{ .frame = frame_addr.index },
  13434                 .mod = .{ .rm = .{
  13435                     .size = self.memSize(ty),
  13436                     .disp = frame_addr.off,
  13437                 } },
  13438             },
  13439             else => unreachable,
  13440         }),
  13441         .memory, .load_symbol, .load_direct, .load_got, .load_tlv => {
  13442             switch (src_mcv) {
  13443                 .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr|
  13444                     return (try self.moveStrategy(
  13445                         ty,
  13446                         dst_reg.class(),
  13447                         ty.abiAlignment(mod).check(@as(u32, @bitCast(small_addr))),
  13448                     )).read(self, registerAlias(dst_reg, abi_size), .{
  13449                         .base = .{ .reg = .ds },
  13450                         .mod = .{ .rm = .{
  13451                             .size = self.memSize(ty),
  13452                             .disp = small_addr,
  13453                         } },
  13454                     }),
  13455                 .load_symbol => |sym_off| switch (dst_reg.class()) {
  13456                     .general_purpose => {
  13457                         assert(sym_off.off == 0);
  13458                         try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(dst_reg, abi_size), .{
  13459                             .base = .{ .reloc = .{
  13460                                 .atom_index = try self.owner.getSymbolIndex(self),
  13461                                 .sym_index = sym_off.sym,
  13462                             } },
  13463                             .mod = .{ .rm = .{
  13464                                 .size = self.memSize(ty),
  13465                                 .disp = sym_off.off,
  13466                             } },
  13467                         });
  13468                         return;
  13469                     },
  13470                     .segment, .mmx => unreachable,
  13471                     .x87, .sse => {},
  13472                 },
  13473                 .load_direct => |sym_index| switch (dst_reg.class()) {
  13474                     .general_purpose => {
  13475                         _ = try self.addInst(.{
  13476                             .tag = .mov,
  13477                             .ops = .direct_reloc,
  13478                             .data = .{ .rx = .{
  13479                                 .r1 = registerAlias(dst_reg, abi_size),
  13480                                 .payload = try self.addExtra(bits.Symbol{
  13481                                     .atom_index = try self.owner.getSymbolIndex(self),
  13482                                     .sym_index = sym_index,
  13483                                 }),
  13484                             } },
  13485                         });
  13486                         return;
  13487                     },
  13488                     .segment, .mmx => unreachable,
  13489                     .x87, .sse => {},
  13490                 },
  13491                 .load_got, .load_tlv => {},
  13492                 else => unreachable,
  13493             }
  13494 
  13495             const addr_reg = try self.copyToTmpRegister(Type.usize, src_mcv.address());
  13496             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  13497             defer self.register_manager.unlockReg(addr_lock);
  13498 
  13499             try (try self.moveStrategy(ty, dst_reg.class(), false)).read(
  13500                 self,
  13501                 registerAlias(dst_reg, abi_size),
  13502                 .{
  13503                     .base = .{ .reg = addr_reg },
  13504                     .mod = .{ .rm = .{ .size = self.memSize(ty) } },
  13505                 },
  13506             );
  13507         },
  13508         .lea_symbol => |sym_index| {
  13509             const atom_index = try self.owner.getSymbolIndex(self);
  13510             if (self.bin_file.cast(link.File.Elf)) |_| {
  13511                 try self.asmRegisterMemory(
  13512                     .{ ._, .lea },
  13513                     dst_reg.to64(),
  13514                     .{
  13515                         .base = .{ .reloc = .{
  13516                             .atom_index = atom_index,
  13517                             .sym_index = sym_index.sym,
  13518                         } },
  13519                         .mod = .{ .rm = .{
  13520                             .size = .qword,
  13521                             .disp = sym_index.off,
  13522                         } },
  13523                     },
  13524                 );
  13525             } else return self.fail("TODO emit symbol sequence on {s}", .{
  13526                 @tagName(self.bin_file.tag),
  13527             });
  13528         },
  13529         .lea_direct, .lea_got => |sym_index| {
  13530             const atom_index = try self.owner.getSymbolIndex(self);
  13531             _ = try self.addInst(.{
  13532                 .tag = switch (src_mcv) {
  13533                     .lea_direct => .lea,
  13534                     .lea_got => .mov,
  13535                     else => unreachable,
  13536                 },
  13537                 .ops = switch (src_mcv) {
  13538                     .lea_direct => .direct_reloc,
  13539                     .lea_got => .got_reloc,
  13540                     else => unreachable,
  13541                 },
  13542                 .data = .{ .rx = .{
  13543                     .r1 = dst_reg.to64(),
  13544                     .payload = try self.addExtra(bits.Symbol{
  13545                         .atom_index = atom_index,
  13546                         .sym_index = sym_index,
  13547                     }),
  13548                 } },
  13549             });
  13550         },
  13551         .lea_tlv => unreachable, // TODO: remove this
  13552         .air_ref => |src_ref| try self.genSetReg(dst_reg, ty, try self.resolveInst(src_ref)),
  13553     }
  13554 }
  13555 
  13556 fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCValue) InnerError!void {
  13557     const mod = self.bin_file.comp.module.?;
  13558     const abi_size: u32 = @intCast(ty.abiSize(mod));
  13559     const dst_ptr_mcv: MCValue = switch (base) {
  13560         .none => .{ .immediate = @bitCast(@as(i64, disp)) },
  13561         .reg => |base_reg| .{ .register_offset = .{ .reg = base_reg, .off = disp } },
  13562         .frame => |base_frame_index| .{ .lea_frame = .{ .index = base_frame_index, .off = disp } },
  13563         .reloc => |base_symbol| .{ .lea_symbol = .{ .sym = base_symbol.sym_index, .off = disp } },
  13564     };
  13565     switch (src_mcv) {
  13566         .none, .unreach, .dead, .reserved_frame => unreachable,
  13567         .undef => try self.genInlineMemset(
  13568             dst_ptr_mcv,
  13569             .{ .immediate = 0xaa },
  13570             .{ .immediate = abi_size },
  13571         ),
  13572         .immediate => |imm| switch (abi_size) {
  13573             1, 2, 4 => {
  13574                 const immediate = switch (if (ty.isAbiInt(mod))
  13575                     ty.intInfo(mod).signedness
  13576                 else
  13577                     .unsigned) {
  13578                     .signed => Immediate.s(@truncate(@as(i64, @bitCast(imm)))),
  13579                     .unsigned => Immediate.u(@as(u32, @intCast(imm))),
  13580                 };
  13581                 try self.asmMemoryImmediate(
  13582                     .{ ._, .mov },
  13583                     .{ .base = base, .mod = .{ .rm = .{
  13584                         .size = Memory.Size.fromSize(abi_size),
  13585                         .disp = disp,
  13586                     } } },
  13587                     immediate,
  13588                 );
  13589             },
  13590             3, 5...7 => unreachable,
  13591             else => if (math.cast(i32, @as(i64, @bitCast(imm)))) |small| {
  13592                 try self.asmMemoryImmediate(
  13593                     .{ ._, .mov },
  13594                     .{ .base = base, .mod = .{ .rm = .{
  13595                         .size = Memory.Size.fromSize(abi_size),
  13596                         .disp = disp,
  13597                     } } },
  13598                     Immediate.s(small),
  13599                 );
  13600             } else {
  13601                 var offset: i32 = 0;
  13602                 while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate(
  13603                     .{ ._, .mov },
  13604                     .{ .base = base, .mod = .{ .rm = .{
  13605                         .size = .dword,
  13606                         .disp = disp + offset,
  13607                     } } },
  13608                     if (ty.isSignedInt(mod)) Immediate.s(
  13609                         @truncate(@as(i64, @bitCast(imm)) >> (math.cast(u6, offset * 8) orelse 63)),
  13610                     ) else Immediate.u(
  13611                         @as(u32, @truncate(if (math.cast(u6, offset * 8)) |shift| imm >> shift else 0)),
  13612                     ),
  13613                 );
  13614             },
  13615         },
  13616         .eflags => |cc| try self.asmSetccMemory(cc, .{ .base = base, .mod = .{
  13617             .rm = .{ .size = .byte, .disp = disp },
  13618         } }),
  13619         .register => |src_reg| {
  13620             const mem_size = switch (base) {
  13621                 .frame => |base_fi| mem_size: {
  13622                     assert(disp >= 0);
  13623                     const frame_abi_size = self.frame_allocs.items(.abi_size)[@intFromEnum(base_fi)];
  13624                     const frame_spill_pad = self.frame_allocs.items(.spill_pad)[@intFromEnum(base_fi)];
  13625                     assert(frame_abi_size - frame_spill_pad - disp >= abi_size);
  13626                     break :mem_size if (frame_abi_size - frame_spill_pad - disp == abi_size)
  13627                         frame_abi_size
  13628                     else
  13629                         abi_size;
  13630                 },
  13631                 else => abi_size,
  13632             };
  13633             const src_alias = registerAlias(src_reg, abi_size);
  13634             const src_size: u32 = @intCast(switch (src_alias.class()) {
  13635                 .general_purpose, .segment, .x87 => @divExact(src_alias.bitSize(), 8),
  13636                 .mmx, .sse => abi_size,
  13637             });
  13638             if (src_size > mem_size) {
  13639                 const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{
  13640                     .size = src_size,
  13641                     .alignment = Alignment.fromNonzeroByteUnits(src_size),
  13642                 }));
  13643                 const frame_mcv: MCValue = .{ .load_frame = .{ .index = frame_index } };
  13644                 try (try self.moveStrategy(ty, src_alias.class(), true)).write(
  13645                     self,
  13646                     .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{
  13647                         .size = Memory.Size.fromSize(src_size),
  13648                     } } },
  13649                     src_alias,
  13650                 );
  13651                 try self.genSetMem(base, disp, ty, frame_mcv);
  13652                 try self.freeValue(frame_mcv);
  13653             } else try (try self.moveStrategy(ty, src_alias.class(), switch (base) {
  13654                 .none => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))),
  13655                 .reg => |reg| switch (reg) {
  13656                     .es, .cs, .ss, .ds => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))),
  13657                     else => false,
  13658                 },
  13659                 .frame => |frame_index| self.getFrameAddrAlignment(
  13660                     .{ .index = frame_index, .off = disp },
  13661                 ).compare(.gte, ty.abiAlignment(mod)),
  13662                 .reloc => false,
  13663             })).write(
  13664                 self,
  13665                 .{ .base = base, .mod = .{ .rm = .{
  13666                     .size = self.memSize(ty),
  13667                     .disp = disp,
  13668                 } } },
  13669                 src_alias,
  13670             );
  13671         },
  13672         .register_pair => |src_regs| {
  13673             var part_disp: i32 = disp;
  13674             for (try self.splitType(ty), src_regs) |src_ty, src_reg| {
  13675                 try self.genSetMem(base, part_disp, src_ty, .{ .register = src_reg });
  13676                 part_disp += @intCast(src_ty.abiSize(mod));
  13677             }
  13678         },
  13679         .register_overflow => |ro| switch (ty.zigTypeTag(mod)) {
  13680             .Struct => {
  13681                 try self.genSetMem(
  13682                     base,
  13683                     disp + @as(i32, @intCast(ty.structFieldOffset(0, mod))),
  13684                     ty.structFieldType(0, mod),
  13685                     .{ .register = ro.reg },
  13686                 );
  13687                 try self.genSetMem(
  13688                     base,
  13689                     disp + @as(i32, @intCast(ty.structFieldOffset(1, mod))),
  13690                     ty.structFieldType(1, mod),
  13691                     .{ .eflags = ro.eflags },
  13692                 );
  13693             },
  13694             .Optional => {
  13695                 assert(!ty.optionalReprIsPayload(mod));
  13696                 const child_ty = ty.optionalChild(mod);
  13697                 try self.genSetMem(base, disp, child_ty, .{ .register = ro.reg });
  13698                 try self.genSetMem(
  13699                     base,
  13700                     disp + @as(i32, @intCast(child_ty.abiSize(mod))),
  13701                     Type.bool,
  13702                     .{ .eflags = ro.eflags },
  13703                 );
  13704             },
  13705             else => return self.fail("TODO implement genSetMem for {s} of {}", .{
  13706                 @tagName(src_mcv), ty.fmt(mod),
  13707             }),
  13708         },
  13709         .register_offset,
  13710         .memory,
  13711         .indirect,
  13712         .load_direct,
  13713         .lea_direct,
  13714         .load_got,
  13715         .lea_got,
  13716         .load_tlv,
  13717         .lea_tlv,
  13718         .load_frame,
  13719         .lea_frame,
  13720         .load_symbol,
  13721         .lea_symbol,
  13722         => switch (abi_size) {
  13723             0 => {},
  13724             1, 2, 4, 8 => {
  13725                 const src_reg = try self.copyToTmpRegister(ty, src_mcv);
  13726                 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  13727                 defer self.register_manager.unlockReg(src_lock);
  13728 
  13729                 try self.genSetMem(base, disp, ty, .{ .register = src_reg });
  13730             },
  13731             else => try self.genInlineMemcpy(dst_ptr_mcv, src_mcv.address(), .{ .immediate = abi_size }),
  13732         },
  13733         .air_ref => |src_ref| try self.genSetMem(base, disp, ty, try self.resolveInst(src_ref)),
  13734     }
  13735 }
  13736 
  13737 fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue) InnerError!void {
  13738     try self.spillRegisters(&.{ .rsi, .rdi, .rcx });
  13739     try self.genSetReg(.rsi, Type.usize, src_ptr);
  13740     try self.genSetReg(.rdi, Type.usize, dst_ptr);
  13741     try self.genSetReg(.rcx, Type.usize, len);
  13742     try self.asmOpOnly(.{ .@"rep _sb", .mov });
  13743 }
  13744 
  13745 fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) InnerError!void {
  13746     try self.spillRegisters(&.{ .rdi, .al, .rcx });
  13747     try self.genSetReg(.rdi, Type.usize, dst_ptr);
  13748     try self.genSetReg(.al, Type.u8, value);
  13749     try self.genSetReg(.rcx, Type.usize, len);
  13750     try self.asmOpOnly(.{ .@"rep _sb", .sto });
  13751 }
  13752 
  13753 fn genExternSymbolRef(
  13754     self: *Self,
  13755     comptime tag: Mir.Inst.Tag,
  13756     lib: ?[]const u8,
  13757     callee: []const u8,
  13758 ) InnerError!void {
  13759     const atom_index = try self.owner.getSymbolIndex(self);
  13760     if (self.bin_file.cast(link.File.Elf)) |elf_file| {
  13761         _ = try self.addInst(.{
  13762             .tag = tag,
  13763             .ops = .extern_fn_reloc,
  13764             .data = .{ .reloc = .{
  13765                 .atom_index = atom_index,
  13766                 .sym_index = try elf_file.getGlobalSymbol(callee, lib),
  13767             } },
  13768         });
  13769     } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
  13770         const global_index = try coff_file.getGlobalSymbol(callee, lib);
  13771         _ = try self.addInst(.{
  13772             .tag = .mov,
  13773             .ops = .import_reloc,
  13774             .data = .{ .rx = .{
  13775                 .r1 = .rax,
  13776                 .payload = try self.addExtra(bits.Symbol{
  13777                     .atom_index = atom_index,
  13778                     .sym_index = link.File.Coff.global_symbol_bit | global_index,
  13779                 }),
  13780             } },
  13781         });
  13782         switch (tag) {
  13783             .mov => {},
  13784             .call => try self.asmRegister(.{ ._, .call }, .rax),
  13785             else => unreachable,
  13786         }
  13787     } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
  13788         _ = try self.addInst(.{
  13789             .tag = .call,
  13790             .ops = .extern_fn_reloc,
  13791             .data = .{ .reloc = .{
  13792                 .atom_index = atom_index,
  13793                 .sym_index = try macho_file.getGlobalSymbol(callee, lib),
  13794             } },
  13795         });
  13796     } else return self.fail("TODO implement calling extern functions", .{});
  13797 }
  13798 
  13799 fn genLazySymbolRef(
  13800     self: *Self,
  13801     comptime tag: Mir.Inst.Tag,
  13802     reg: Register,
  13803     lazy_sym: link.File.LazySymbol,
  13804 ) InnerError!void {
  13805     if (self.bin_file.cast(link.File.Elf)) |elf_file| {
  13806         const sym_index = elf_file.zigObjectPtr().?.getOrCreateMetadataForLazySymbol(elf_file, lazy_sym) catch |err|
  13807             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  13808         const sym = elf_file.symbol(sym_index);
  13809         if (self.mod.pic) {
  13810             switch (tag) {
  13811                 .lea, .call => try self.genSetReg(reg, Type.usize, .{
  13812                     .load_symbol = .{ .sym = sym.esym_index },
  13813                 }),
  13814                 .mov => try self.genSetReg(reg, Type.usize, .{
  13815                     .load_symbol = .{ .sym = sym.esym_index },
  13816                 }),
  13817                 else => unreachable,
  13818             }
  13819             switch (tag) {
  13820                 .lea, .mov => {},
  13821                 .call => try self.asmRegister(.{ ._, .call }, reg),
  13822                 else => unreachable,
  13823             }
  13824         } else {
  13825             const reloc = bits.Symbol{
  13826                 .atom_index = try self.owner.getSymbolIndex(self),
  13827                 .sym_index = sym.esym_index,
  13828             };
  13829             switch (tag) {
  13830                 .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), .{
  13831                     .base = .{ .reloc = reloc },
  13832                     .mod = .{ .rm = .{ .size = .qword } },
  13833                 }),
  13834                 .call => try self.asmMemory(.{ ._, .call }, .{
  13835                     .base = .{ .reloc = reloc },
  13836                     .mod = .{ .rm = .{ .size = .qword } },
  13837                 }),
  13838                 else => unreachable,
  13839             }
  13840         }
  13841     } else if (self.bin_file.cast(link.File.Plan9)) |p9_file| {
  13842         const atom_index = p9_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
  13843             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  13844         var atom = p9_file.getAtom(atom_index);
  13845         _ = atom.getOrCreateOffsetTableEntry(p9_file);
  13846         const got_addr = atom.getOffsetTableAddress(p9_file);
  13847         const got_mem: Memory = .{
  13848             .base = .{ .reg = .ds },
  13849             .mod = .{ .rm = .{
  13850                 .size = .qword,
  13851                 .disp = @intCast(got_addr),
  13852             } },
  13853         };
  13854         switch (tag) {
  13855             .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), got_mem),
  13856             .call => try self.asmMemory(.{ ._, .call }, got_mem),
  13857             else => unreachable,
  13858         }
  13859         switch (tag) {
  13860             .lea, .call => {},
  13861             .mov => try self.asmRegisterMemory(
  13862                 .{ ._, tag },
  13863                 reg.to64(),
  13864                 Memory.sib(.qword, .{ .base = .{ .reg = reg.to64() } }),
  13865             ),
  13866             else => unreachable,
  13867         }
  13868     } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
  13869         const atom_index = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
  13870             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  13871         const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?;
  13872         switch (tag) {
  13873             .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }),
  13874             .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }),
  13875             else => unreachable,
  13876         }
  13877         switch (tag) {
  13878             .lea, .mov => {},
  13879             .call => try self.asmRegister(.{ ._, .call }, reg),
  13880             else => unreachable,
  13881         }
  13882     } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
  13883         const sym_index = macho_file.getZigObject().?.getOrCreateMetadataForLazySymbol(macho_file, lazy_sym) catch |err|
  13884             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  13885         const sym = macho_file.getSymbol(sym_index);
  13886         switch (tag) {
  13887             .lea, .call => try self.genSetReg(reg, Type.usize, .{ .load_symbol = .{ .sym = sym.nlist_idx } }),
  13888             .mov => try self.genSetReg(reg, Type.usize, .{ .load_symbol = .{ .sym = sym.nlist_idx } }),
  13889             else => unreachable,
  13890         }
  13891         switch (tag) {
  13892             .lea, .mov => {},
  13893             .call => try self.asmRegister(.{ ._, .call }, reg),
  13894             else => unreachable,
  13895         }
  13896     } else {
  13897         return self.fail("TODO implement genLazySymbol for x86_64 {s}", .{@tagName(self.bin_file.tag)});
  13898     }
  13899 }
  13900 
  13901 fn airIntFromPtr(self: *Self, inst: Air.Inst.Index) !void {
  13902     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  13903     const result = result: {
  13904         // TODO: handle case where the operand is a slice not a raw pointer
  13905         const src_mcv = try self.resolveInst(un_op);
  13906         if (self.reuseOperand(inst, un_op, 0, src_mcv)) break :result src_mcv;
  13907 
  13908         const dst_mcv = try self.allocRegOrMem(inst, true);
  13909         const dst_ty = self.typeOfIndex(inst);
  13910         try self.genCopy(dst_ty, dst_mcv, src_mcv);
  13911         break :result dst_mcv;
  13912     };
  13913     return self.finishAir(inst, result, .{ un_op, .none, .none });
  13914 }
  13915 
  13916 fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
  13917     const mod = self.bin_file.comp.module.?;
  13918     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13919     const dst_ty = self.typeOfIndex(inst);
  13920     const src_ty = self.typeOf(ty_op.operand);
  13921 
  13922     const result = result: {
  13923         const dst_rc = self.regClassForType(dst_ty);
  13924         const src_rc = self.regClassForType(src_ty);
  13925         const src_mcv = try self.resolveInst(ty_op.operand);
  13926 
  13927         const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  13928         defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  13929 
  13930         const dst_mcv = if (dst_rc.supersetOf(src_rc) and dst_ty.abiSize(mod) <= src_ty.abiSize(mod) and
  13931             self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
  13932             const dst_mcv = try self.allocRegOrMem(inst, true);
  13933             try self.genCopy(switch (math.order(dst_ty.abiSize(mod), src_ty.abiSize(mod))) {
  13934                 .lt => dst_ty,
  13935                 .eq => if (!dst_mcv.isMemory() or src_mcv.isMemory()) dst_ty else src_ty,
  13936                 .gt => src_ty,
  13937             }, dst_mcv, src_mcv);
  13938             break :dst dst_mcv;
  13939         };
  13940 
  13941         if (dst_ty.isRuntimeFloat()) break :result dst_mcv;
  13942 
  13943         if (dst_ty.isAbiInt(mod) and src_ty.isAbiInt(mod) and
  13944             dst_ty.intInfo(mod).signedness == src_ty.intInfo(mod).signedness) break :result dst_mcv;
  13945 
  13946         const abi_size = dst_ty.abiSize(mod);
  13947         const bit_size = dst_ty.bitSize(mod);
  13948         if (abi_size * 8 <= bit_size or dst_ty.isVector(mod)) break :result dst_mcv;
  13949 
  13950         const dst_limbs_len = math.divCeil(i32, @intCast(bit_size), 64) catch unreachable;
  13951         const high_mcv: MCValue = switch (dst_mcv) {
  13952             .register => |dst_reg| .{ .register = dst_reg },
  13953             .register_pair => |dst_regs| .{ .register = dst_regs[1] },
  13954             else => dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(),
  13955         };
  13956         const high_reg = if (high_mcv.isRegister())
  13957             high_mcv.getReg().?
  13958         else
  13959             try self.copyToTmpRegister(Type.usize, high_mcv);
  13960         const high_lock = self.register_manager.lockReg(high_reg);
  13961         defer if (high_lock) |lock| self.register_manager.unlockReg(lock);
  13962 
  13963         try self.truncateRegister(dst_ty, high_reg);
  13964         if (!high_mcv.isRegister()) try self.genCopy(
  13965             if (abi_size <= 8) dst_ty else Type.usize,
  13966             high_mcv,
  13967             .{ .register = high_reg },
  13968         );
  13969         break :result dst_mcv;
  13970     };
  13971     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13972 }
  13973 
  13974 fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void {
  13975     const mod = self.bin_file.comp.module.?;
  13976     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13977 
  13978     const slice_ty = self.typeOfIndex(inst);
  13979     const ptr_ty = self.typeOf(ty_op.operand);
  13980     const ptr = try self.resolveInst(ty_op.operand);
  13981     const array_ty = ptr_ty.childType(mod);
  13982     const array_len = array_ty.arrayLen(mod);
  13983 
  13984     const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(slice_ty, mod));
  13985     try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr);
  13986     try self.genSetMem(
  13987         .{ .frame = frame_index },
  13988         @intCast(ptr_ty.abiSize(mod)),
  13989         Type.usize,
  13990         .{ .immediate = array_len },
  13991     );
  13992 
  13993     const result = MCValue{ .load_frame = .{ .index = frame_index } };
  13994     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13995 }
  13996 
  13997 fn airFloatFromInt(self: *Self, inst: Air.Inst.Index) !void {
  13998     const mod = self.bin_file.comp.module.?;
  13999     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  14000 
  14001     const dst_ty = self.typeOfIndex(inst);
  14002     const dst_bits = dst_ty.floatBits(self.target.*);
  14003 
  14004     const src_ty = self.typeOf(ty_op.operand);
  14005     const src_bits: u32 = @intCast(src_ty.bitSize(mod));
  14006     const src_signedness =
  14007         if (src_ty.isAbiInt(mod)) src_ty.intInfo(mod).signedness else .unsigned;
  14008     const src_size = math.divCeil(u32, @max(switch (src_signedness) {
  14009         .signed => src_bits,
  14010         .unsigned => src_bits + 1,
  14011     }, 32), 8) catch unreachable;
  14012 
  14013     const result = result: {
  14014         if (switch (dst_bits) {
  14015             16, 80, 128 => true,
  14016             32, 64 => src_size > 8,
  14017             else => unreachable,
  14018         }) {
  14019             if (src_bits > 128) return self.fail("TODO implement airFloatFromInt from {} to {}", .{
  14020                 src_ty.fmt(mod), dst_ty.fmt(mod),
  14021             });
  14022 
  14023             var callee_buf: ["__floatun?i?f".len]u8 = undefined;
  14024             break :result try self.genCall(.{ .lib = .{
  14025                 .return_type = dst_ty.toIntern(),
  14026                 .param_types = &.{src_ty.toIntern()},
  14027                 .callee = std.fmt.bufPrint(&callee_buf, "__float{s}{c}i{c}f", .{
  14028                     switch (src_signedness) {
  14029                         .signed => "",
  14030                         .unsigned => "un",
  14031                     },
  14032                     intCompilerRtAbiName(src_bits),
  14033                     floatCompilerRtAbiName(dst_bits),
  14034                 }) catch unreachable,
  14035             } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }});
  14036         }
  14037 
  14038         const src_mcv = try self.resolveInst(ty_op.operand);
  14039         const src_reg = if (src_mcv.isRegister())
  14040             src_mcv.getReg().?
  14041         else
  14042             try self.copyToTmpRegister(src_ty, src_mcv);
  14043         const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  14044         defer self.register_manager.unlockReg(src_lock);
  14045 
  14046         if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg);
  14047 
  14048         const dst_reg = try self.register_manager.allocReg(inst, self.regClassForType(dst_ty));
  14049         const dst_mcv = MCValue{ .register = dst_reg };
  14050         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  14051         defer self.register_manager.unlockReg(dst_lock);
  14052 
  14053         const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag(mod)) {
  14054             .Float => switch (dst_ty.floatBits(self.target.*)) {
  14055                 32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 },
  14056                 64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 },
  14057                 16, 80, 128 => null,
  14058                 else => unreachable,
  14059             },
  14060             else => null,
  14061         }) orelse return self.fail("TODO implement airFloatFromInt from {} to {}", .{
  14062             src_ty.fmt(mod), dst_ty.fmt(mod),
  14063         });
  14064         const dst_alias = dst_reg.to128();
  14065         const src_alias = registerAlias(src_reg, src_size);
  14066         switch (mir_tag[0]) {
  14067             .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias),
  14068             else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias),
  14069         }
  14070 
  14071         break :result dst_mcv;
  14072     };
  14073     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  14074 }
  14075 
  14076 fn airIntFromFloat(self: *Self, inst: Air.Inst.Index) !void {
  14077     const mod = self.bin_file.comp.module.?;
  14078     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  14079 
  14080     const dst_ty = self.typeOfIndex(inst);
  14081     const dst_bits: u32 = @intCast(dst_ty.bitSize(mod));
  14082     const dst_signedness =
  14083         if (dst_ty.isAbiInt(mod)) dst_ty.intInfo(mod).signedness else .unsigned;
  14084     const dst_size = math.divCeil(u32, @max(switch (dst_signedness) {
  14085         .signed => dst_bits,
  14086         .unsigned => dst_bits + 1,
  14087     }, 32), 8) catch unreachable;
  14088 
  14089     const src_ty = self.typeOf(ty_op.operand);
  14090     const src_bits = src_ty.floatBits(self.target.*);
  14091 
  14092     const result = result: {
  14093         if (switch (src_bits) {
  14094             16, 80, 128 => true,
  14095             32, 64 => dst_size > 8,
  14096             else => unreachable,
  14097         }) {
  14098             if (dst_bits > 128) return self.fail("TODO implement airIntFromFloat from {} to {}", .{
  14099                 src_ty.fmt(mod), dst_ty.fmt(mod),
  14100             });
  14101 
  14102             var callee_buf: ["__fixuns?f?i".len]u8 = undefined;
  14103             break :result try self.genCall(.{ .lib = .{
  14104                 .return_type = dst_ty.toIntern(),
  14105                 .param_types = &.{src_ty.toIntern()},
  14106                 .callee = std.fmt.bufPrint(&callee_buf, "__fix{s}{c}f{c}i", .{
  14107                     switch (dst_signedness) {
  14108                         .signed => "",
  14109                         .unsigned => "uns",
  14110                     },
  14111                     floatCompilerRtAbiName(src_bits),
  14112                     intCompilerRtAbiName(dst_bits),
  14113                 }) catch unreachable,
  14114             } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }});
  14115         }
  14116 
  14117         const src_mcv = try self.resolveInst(ty_op.operand);
  14118         const src_reg = if (src_mcv.isRegister())
  14119             src_mcv.getReg().?
  14120         else
  14121             try self.copyToTmpRegister(src_ty, src_mcv);
  14122         const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  14123         defer self.register_manager.unlockReg(src_lock);
  14124 
  14125         const dst_reg = try self.register_manager.allocReg(inst, self.regClassForType(dst_ty));
  14126         const dst_mcv = MCValue{ .register = dst_reg };
  14127         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  14128         defer self.register_manager.unlockReg(dst_lock);
  14129 
  14130         try self.asmRegisterRegister(
  14131             switch (src_bits) {
  14132                 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si },
  14133                 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si },
  14134                 else => unreachable,
  14135             },
  14136             registerAlias(dst_reg, dst_size),
  14137             src_reg.to128(),
  14138         );
  14139 
  14140         if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg);
  14141 
  14142         break :result dst_mcv;
  14143     };
  14144     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  14145 }
  14146 
  14147 fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
  14148     const mod = self.bin_file.comp.module.?;
  14149     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  14150     const extra = self.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
  14151 
  14152     const ptr_ty = self.typeOf(extra.ptr);
  14153     const val_ty = self.typeOf(extra.expected_value);
  14154     const val_abi_size: u32 = @intCast(val_ty.abiSize(mod));
  14155 
  14156     try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx });
  14157     const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx });
  14158     defer for (regs_lock) |lock| self.register_manager.unlockReg(lock);
  14159 
  14160     const exp_mcv = try self.resolveInst(extra.expected_value);
  14161     if (val_abi_size > 8) {
  14162         const exp_addr_mcv: MCValue = switch (exp_mcv) {
  14163             .memory, .indirect, .load_frame => exp_mcv.address(),
  14164             else => .{ .register = try self.copyToTmpRegister(Type.usize, exp_mcv.address()) },
  14165         };
  14166         const exp_addr_lock =
  14167             if (exp_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  14168         defer if (exp_addr_lock) |lock| self.register_manager.unlockReg(lock);
  14169 
  14170         try self.genSetReg(.rax, Type.usize, exp_addr_mcv.deref());
  14171         try self.genSetReg(.rdx, Type.usize, exp_addr_mcv.offset(8).deref());
  14172     } else try self.genSetReg(.rax, val_ty, exp_mcv);
  14173 
  14174     const new_mcv = try self.resolveInst(extra.new_value);
  14175     const new_reg = if (val_abi_size > 8) new: {
  14176         const new_addr_mcv: MCValue = switch (new_mcv) {
  14177             .memory, .indirect, .load_frame => new_mcv.address(),
  14178             else => .{ .register = try self.copyToTmpRegister(Type.usize, new_mcv.address()) },
  14179         };
  14180         const new_addr_lock =
  14181             if (new_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  14182         defer if (new_addr_lock) |lock| self.register_manager.unlockReg(lock);
  14183 
  14184         try self.genSetReg(.rbx, Type.usize, new_addr_mcv.deref());
  14185         try self.genSetReg(.rcx, Type.usize, new_addr_mcv.offset(8).deref());
  14186         break :new null;
  14187     } else try self.copyToTmpRegister(val_ty, new_mcv);
  14188     const new_lock = if (new_reg) |reg| self.register_manager.lockRegAssumeUnused(reg) else null;
  14189     defer if (new_lock) |lock| self.register_manager.unlockReg(lock);
  14190 
  14191     const ptr_mcv = try self.resolveInst(extra.ptr);
  14192     const mem_size = Memory.Size.fromSize(val_abi_size);
  14193     const ptr_mem: Memory = switch (ptr_mcv) {
  14194         .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, mem_size),
  14195         else => .{
  14196             .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
  14197             .mod = .{ .rm = .{ .size = mem_size } },
  14198         },
  14199     };
  14200     switch (ptr_mem.mod) {
  14201         .rm => {},
  14202         .off => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}),
  14203     }
  14204     const ptr_lock = switch (ptr_mem.base) {
  14205         .none, .frame, .reloc => null,
  14206         .reg => |reg| self.register_manager.lockReg(reg),
  14207     };
  14208     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  14209 
  14210     try self.spillEflagsIfOccupied();
  14211     if (val_abi_size <= 8) try self.asmMemoryRegister(
  14212         .{ .@"lock _", .cmpxchg },
  14213         ptr_mem,
  14214         registerAlias(new_reg.?, val_abi_size),
  14215     ) else try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem);
  14216 
  14217     const result: MCValue = result: {
  14218         if (self.liveness.isUnused(inst)) break :result .unreach;
  14219 
  14220         if (val_abi_size <= 8) {
  14221             self.eflags_inst = inst;
  14222             break :result .{ .register_overflow = .{ .reg = .rax, .eflags = .ne } };
  14223         }
  14224 
  14225         const dst_mcv = try self.allocRegOrMem(inst, false);
  14226         try self.genCopy(Type.usize, dst_mcv, .{ .register = .rax });
  14227         try self.genCopy(Type.usize, dst_mcv.address().offset(8).deref(), .{ .register = .rdx });
  14228         try self.genCopy(Type.bool, dst_mcv.address().offset(16).deref(), .{ .eflags = .ne });
  14229         break :result dst_mcv;
  14230     };
  14231     return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value });
  14232 }
  14233 
  14234 fn atomicOp(
  14235     self: *Self,
  14236     ptr_mcv: MCValue,
  14237     val_mcv: MCValue,
  14238     ptr_ty: Type,
  14239     val_ty: Type,
  14240     unused: bool,
  14241     rmw_op: ?std.builtin.AtomicRmwOp,
  14242     order: std.builtin.AtomicOrder,
  14243 ) InnerError!MCValue {
  14244     const mod = self.bin_file.comp.module.?;
  14245     const ptr_lock = switch (ptr_mcv) {
  14246         .register => |reg| self.register_manager.lockReg(reg),
  14247         else => null,
  14248     };
  14249     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  14250 
  14251     const val_lock = switch (val_mcv) {
  14252         .register => |reg| self.register_manager.lockReg(reg),
  14253         else => null,
  14254     };
  14255     defer if (val_lock) |lock| self.register_manager.unlockReg(lock);
  14256 
  14257     const val_abi_size: u32 = @intCast(val_ty.abiSize(mod));
  14258     const mem_size = Memory.Size.fromSize(val_abi_size);
  14259     const ptr_mem: Memory = switch (ptr_mcv) {
  14260         .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, mem_size),
  14261         else => .{
  14262             .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
  14263             .mod = .{ .rm = .{ .size = mem_size } },
  14264         },
  14265     };
  14266     switch (ptr_mem.mod) {
  14267         .rm => {},
  14268         .off => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}),
  14269     }
  14270     const mem_lock = switch (ptr_mem.base) {
  14271         .none, .frame, .reloc => null,
  14272         .reg => |reg| self.register_manager.lockReg(reg),
  14273     };
  14274     defer if (mem_lock) |lock| self.register_manager.unlockReg(lock);
  14275 
  14276     const use_sse = rmw_op orelse .Xchg != .Xchg and val_ty.isRuntimeFloat();
  14277     const strat: enum { lock, loop, libcall } = if (use_sse) .loop else switch (rmw_op orelse .Xchg) {
  14278         .Xchg,
  14279         .Add,
  14280         .Sub,
  14281         => if (val_abi_size <= 8) .lock else if (val_abi_size <= 16) .loop else .libcall,
  14282         .And,
  14283         .Or,
  14284         .Xor,
  14285         => if (val_abi_size <= 8 and unused) .lock else if (val_abi_size <= 16) .loop else .libcall,
  14286         .Nand,
  14287         .Max,
  14288         .Min,
  14289         => if (val_abi_size <= 16) .loop else .libcall,
  14290     };
  14291     switch (strat) {
  14292         .lock => {
  14293             const tag: Mir.Inst.Tag = if (rmw_op) |op| switch (op) {
  14294                 .Xchg => if (unused) .mov else .xchg,
  14295                 .Add => if (unused) .add else .xadd,
  14296                 .Sub => if (unused) .sub else .xadd,
  14297                 .And => .@"and",
  14298                 .Or => .@"or",
  14299                 .Xor => .xor,
  14300                 else => unreachable,
  14301             } else switch (order) {
  14302                 .Unordered, .Monotonic, .Release, .AcqRel => .mov,
  14303                 .Acquire => unreachable,
  14304                 .SeqCst => .xchg,
  14305             };
  14306 
  14307             const dst_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14308             const dst_mcv = MCValue{ .register = dst_reg };
  14309             const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  14310             defer self.register_manager.unlockReg(dst_lock);
  14311 
  14312             try self.genSetReg(dst_reg, val_ty, val_mcv);
  14313             if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) {
  14314                 try self.genUnOpMir(.{ ._, .neg }, val_ty, dst_mcv);
  14315             }
  14316             try self.asmMemoryRegister(
  14317                 switch (tag) {
  14318                     .mov, .xchg => .{ ._, tag },
  14319                     .xadd, .add, .sub, .@"and", .@"or", .xor => .{ .@"lock _", tag },
  14320                     else => unreachable,
  14321                 },
  14322                 ptr_mem,
  14323                 registerAlias(dst_reg, val_abi_size),
  14324             );
  14325 
  14326             return if (unused) .unreach else dst_mcv;
  14327         },
  14328         .loop => _ = if (val_abi_size <= 8) {
  14329             const sse_reg: Register = if (use_sse)
  14330                 try self.register_manager.allocReg(null, abi.RegisterClass.sse)
  14331             else
  14332                 undefined;
  14333             const sse_lock =
  14334                 if (use_sse) self.register_manager.lockRegAssumeUnused(sse_reg) else undefined;
  14335             defer if (use_sse) self.register_manager.unlockReg(sse_lock);
  14336 
  14337             const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14338             const tmp_mcv = MCValue{ .register = tmp_reg };
  14339             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  14340             defer self.register_manager.unlockReg(tmp_lock);
  14341 
  14342             try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(.rax, val_abi_size), ptr_mem);
  14343             const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  14344             if (!use_sse and rmw_op orelse .Xchg != .Xchg) {
  14345                 try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax });
  14346             }
  14347             if (rmw_op) |op| if (use_sse) {
  14348                 const mir_tag = @as(?Mir.Inst.FixedTag, switch (op) {
  14349                     .Add => switch (val_ty.floatBits(self.target.*)) {
  14350                         32 => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
  14351                         64 => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
  14352                         else => null,
  14353                     },
  14354                     .Sub => switch (val_ty.floatBits(self.target.*)) {
  14355                         32 => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
  14356                         64 => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
  14357                         else => null,
  14358                     },
  14359                     .Min => switch (val_ty.floatBits(self.target.*)) {
  14360                         32 => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
  14361                         64 => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
  14362                         else => null,
  14363                     },
  14364                     .Max => switch (val_ty.floatBits(self.target.*)) {
  14365                         32 => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
  14366                         64 => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
  14367                         else => null,
  14368                     },
  14369                     else => unreachable,
  14370                 }) orelse return self.fail("TODO implement atomicOp of {s} for {}", .{
  14371                     @tagName(op), val_ty.fmt(mod),
  14372                 });
  14373                 try self.genSetReg(sse_reg, val_ty, .{ .register = .rax });
  14374                 switch (mir_tag[0]) {
  14375                     .v_ss, .v_sd => if (val_mcv.isMemory()) try self.asmRegisterRegisterMemory(
  14376                         mir_tag,
  14377                         sse_reg.to128(),
  14378                         sse_reg.to128(),
  14379                         try val_mcv.mem(self, self.memSize(val_ty)),
  14380                     ) else try self.asmRegisterRegisterRegister(
  14381                         mir_tag,
  14382                         sse_reg.to128(),
  14383                         sse_reg.to128(),
  14384                         (if (val_mcv.isRegister())
  14385                             val_mcv.getReg().?
  14386                         else
  14387                             try self.copyToTmpRegister(val_ty, val_mcv)).to128(),
  14388                     ),
  14389                     ._ss, ._sd => if (val_mcv.isMemory()) try self.asmRegisterMemory(
  14390                         mir_tag,
  14391                         sse_reg.to128(),
  14392                         try val_mcv.mem(self, self.memSize(val_ty)),
  14393                     ) else try self.asmRegisterRegister(
  14394                         mir_tag,
  14395                         sse_reg.to128(),
  14396                         (if (val_mcv.isRegister())
  14397                             val_mcv.getReg().?
  14398                         else
  14399                             try self.copyToTmpRegister(val_ty, val_mcv)).to128(),
  14400                     ),
  14401                     else => unreachable,
  14402                 }
  14403                 try self.genSetReg(tmp_reg, val_ty, .{ .register = sse_reg });
  14404             } else switch (op) {
  14405                 .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv),
  14406                 .Add => try self.genBinOpMir(.{ ._, .add }, val_ty, tmp_mcv, val_mcv),
  14407                 .Sub => try self.genBinOpMir(.{ ._, .sub }, val_ty, tmp_mcv, val_mcv),
  14408                 .And => try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv),
  14409                 .Nand => {
  14410                     try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv);
  14411                     try self.genUnOpMir(.{ ._, .not }, val_ty, tmp_mcv);
  14412                 },
  14413                 .Or => try self.genBinOpMir(.{ ._, .@"or" }, val_ty, tmp_mcv, val_mcv),
  14414                 .Xor => try self.genBinOpMir(.{ ._, .xor }, val_ty, tmp_mcv, val_mcv),
  14415                 .Min, .Max => {
  14416                     const cc: Condition = switch (if (val_ty.isAbiInt(mod))
  14417                         val_ty.intInfo(mod).signedness
  14418                     else
  14419                         .unsigned) {
  14420                         .unsigned => switch (op) {
  14421                             .Min => .a,
  14422                             .Max => .b,
  14423                             else => unreachable,
  14424                         },
  14425                         .signed => switch (op) {
  14426                             .Min => .g,
  14427                             .Max => .l,
  14428                             else => unreachable,
  14429                         },
  14430                     };
  14431 
  14432                     const cmov_abi_size = @max(val_abi_size, 2);
  14433                     switch (val_mcv) {
  14434                         .register => |val_reg| {
  14435                             try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv);
  14436                             try self.asmCmovccRegisterRegister(
  14437                                 cc,
  14438                                 registerAlias(tmp_reg, cmov_abi_size),
  14439                                 registerAlias(val_reg, cmov_abi_size),
  14440                             );
  14441                         },
  14442                         .memory, .indirect, .load_frame => {
  14443                             try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv);
  14444                             try self.asmCmovccRegisterMemory(
  14445                                 cc,
  14446                                 registerAlias(tmp_reg, cmov_abi_size),
  14447                                 try val_mcv.mem(self, Memory.Size.fromSize(cmov_abi_size)),
  14448                             );
  14449                         },
  14450                         else => {
  14451                             const mat_reg = try self.copyToTmpRegister(val_ty, val_mcv);
  14452                             const mat_lock = self.register_manager.lockRegAssumeUnused(mat_reg);
  14453                             defer self.register_manager.unlockReg(mat_lock);
  14454 
  14455                             try self.genBinOpMir(
  14456                                 .{ ._, .cmp },
  14457                                 val_ty,
  14458                                 tmp_mcv,
  14459                                 .{ .register = mat_reg },
  14460                             );
  14461                             try self.asmCmovccRegisterRegister(
  14462                                 cc,
  14463                                 registerAlias(tmp_reg, cmov_abi_size),
  14464                                 registerAlias(mat_reg, cmov_abi_size),
  14465                             );
  14466                         },
  14467                     }
  14468                 },
  14469             };
  14470             try self.asmMemoryRegister(
  14471                 .{ .@"lock _", .cmpxchg },
  14472                 ptr_mem,
  14473                 registerAlias(tmp_reg, val_abi_size),
  14474             );
  14475             _ = try self.asmJccReloc(.ne, loop);
  14476             return if (unused) .unreach else .{ .register = .rax };
  14477         } else {
  14478             try self.asmRegisterMemory(.{ ._, .mov }, .rax, .{
  14479                 .base = ptr_mem.base,
  14480                 .mod = .{ .rm = .{
  14481                     .size = .qword,
  14482                     .index = ptr_mem.mod.rm.index,
  14483                     .scale = ptr_mem.mod.rm.scale,
  14484                     .disp = ptr_mem.mod.rm.disp + 0,
  14485                 } },
  14486             });
  14487             try self.asmRegisterMemory(.{ ._, .mov }, .rdx, .{
  14488                 .base = ptr_mem.base,
  14489                 .mod = .{ .rm = .{
  14490                     .size = .qword,
  14491                     .index = ptr_mem.mod.rm.index,
  14492                     .scale = ptr_mem.mod.rm.scale,
  14493                     .disp = ptr_mem.mod.rm.disp + 8,
  14494                 } },
  14495             });
  14496             const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  14497             const val_mem_mcv: MCValue = switch (val_mcv) {
  14498                 .memory, .indirect, .load_frame => val_mcv,
  14499                 else => .{ .indirect = .{
  14500                     .reg = try self.copyToTmpRegister(Type.usize, val_mcv.address()),
  14501                 } },
  14502             };
  14503             const val_lo_mem = try val_mem_mcv.mem(self, .qword);
  14504             const val_hi_mem = try val_mem_mcv.address().offset(8).deref().mem(self, .qword);
  14505             if (rmw_op != std.builtin.AtomicRmwOp.Xchg) {
  14506                 try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax);
  14507                 try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx);
  14508             }
  14509             if (rmw_op) |op| switch (op) {
  14510                 .Xchg => {
  14511                     try self.asmRegisterMemory(.{ ._, .mov }, .rbx, val_lo_mem);
  14512                     try self.asmRegisterMemory(.{ ._, .mov }, .rcx, val_hi_mem);
  14513                 },
  14514                 .Add => {
  14515                     try self.asmRegisterMemory(.{ ._, .add }, .rbx, val_lo_mem);
  14516                     try self.asmRegisterMemory(.{ ._, .adc }, .rcx, val_hi_mem);
  14517                 },
  14518                 .Sub => {
  14519                     try self.asmRegisterMemory(.{ ._, .sub }, .rbx, val_lo_mem);
  14520                     try self.asmRegisterMemory(.{ ._, .sbb }, .rcx, val_hi_mem);
  14521                 },
  14522                 .And => {
  14523                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem);
  14524                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem);
  14525                 },
  14526                 .Nand => {
  14527                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem);
  14528                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem);
  14529                     try self.asmRegister(.{ ._, .not }, .rbx);
  14530                     try self.asmRegister(.{ ._, .not }, .rcx);
  14531                 },
  14532                 .Or => {
  14533                     try self.asmRegisterMemory(.{ ._, .@"or" }, .rbx, val_lo_mem);
  14534                     try self.asmRegisterMemory(.{ ._, .@"or" }, .rcx, val_hi_mem);
  14535                 },
  14536                 .Xor => {
  14537                     try self.asmRegisterMemory(.{ ._, .xor }, .rbx, val_lo_mem);
  14538                     try self.asmRegisterMemory(.{ ._, .xor }, .rcx, val_hi_mem);
  14539                 },
  14540                 .Min, .Max => {
  14541                     const cc: Condition = switch (if (val_ty.isAbiInt(mod))
  14542                         val_ty.intInfo(mod).signedness
  14543                     else
  14544                         .unsigned) {
  14545                         .unsigned => switch (op) {
  14546                             .Min => .a,
  14547                             .Max => .b,
  14548                             else => unreachable,
  14549                         },
  14550                         .signed => switch (op) {
  14551                             .Min => .g,
  14552                             .Max => .l,
  14553                             else => unreachable,
  14554                         },
  14555                     };
  14556 
  14557                     const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .register = .rcx });
  14558                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  14559                     defer self.register_manager.unlockReg(tmp_lock);
  14560 
  14561                     try self.asmRegisterMemory(.{ ._, .cmp }, .rbx, val_lo_mem);
  14562                     try self.asmRegisterMemory(.{ ._, .sbb }, tmp_reg, val_hi_mem);
  14563                     try self.asmCmovccRegisterMemory(cc, .rbx, val_lo_mem);
  14564                     try self.asmCmovccRegisterMemory(cc, .rcx, val_hi_mem);
  14565                 },
  14566             };
  14567             try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem);
  14568             _ = try self.asmJccReloc(.ne, loop);
  14569 
  14570             if (unused) return .unreach;
  14571             const dst_mcv = try self.allocTempRegOrMem(val_ty, false);
  14572             try self.asmMemoryRegister(.{ ._, .mov }, .{
  14573                 .base = .{ .frame = dst_mcv.load_frame.index },
  14574                 .mod = .{ .rm = .{
  14575                     .size = .qword,
  14576                     .disp = dst_mcv.load_frame.off + 0,
  14577                 } },
  14578             }, .rax);
  14579             try self.asmMemoryRegister(.{ ._, .mov }, .{
  14580                 .base = .{ .frame = dst_mcv.load_frame.index },
  14581                 .mod = .{ .rm = .{
  14582                     .size = .qword,
  14583                     .disp = dst_mcv.load_frame.off + 8,
  14584                 } },
  14585             }, .rdx);
  14586             return dst_mcv;
  14587         },
  14588         .libcall => return self.fail("TODO implement x86 atomic libcall", .{}),
  14589     }
  14590 }
  14591 
  14592 fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void {
  14593     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  14594     const extra = self.air.extraData(Air.AtomicRmw, pl_op.payload).data;
  14595 
  14596     try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx });
  14597     const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx });
  14598     defer for (regs_lock) |lock| self.register_manager.unlockReg(lock);
  14599 
  14600     const unused = self.liveness.isUnused(inst);
  14601 
  14602     const ptr_ty = self.typeOf(pl_op.operand);
  14603     const ptr_mcv = try self.resolveInst(pl_op.operand);
  14604 
  14605     const val_ty = self.typeOf(extra.operand);
  14606     const val_mcv = try self.resolveInst(extra.operand);
  14607 
  14608     const result =
  14609         try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, unused, extra.op(), extra.ordering());
  14610     return self.finishAir(inst, result, .{ pl_op.operand, extra.operand, .none });
  14611 }
  14612 
  14613 fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void {
  14614     const atomic_load = self.air.instructions.items(.data)[@intFromEnum(inst)].atomic_load;
  14615 
  14616     const ptr_ty = self.typeOf(atomic_load.ptr);
  14617     const ptr_mcv = try self.resolveInst(atomic_load.ptr);
  14618     const ptr_lock = switch (ptr_mcv) {
  14619         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  14620         else => null,
  14621     };
  14622     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  14623 
  14624     const dst_mcv =
  14625         if (self.reuseOperand(inst, atomic_load.ptr, 0, ptr_mcv))
  14626         ptr_mcv
  14627     else
  14628         try self.allocRegOrMem(inst, true);
  14629 
  14630     try self.load(dst_mcv, ptr_ty, ptr_mcv);
  14631     return self.finishAir(inst, dst_mcv, .{ atomic_load.ptr, .none, .none });
  14632 }
  14633 
  14634 fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void {
  14635     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  14636 
  14637     const ptr_ty = self.typeOf(bin_op.lhs);
  14638     const ptr_mcv = try self.resolveInst(bin_op.lhs);
  14639 
  14640     const val_ty = self.typeOf(bin_op.rhs);
  14641     const val_mcv = try self.resolveInst(bin_op.rhs);
  14642 
  14643     const result = try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, true, null, order);
  14644     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  14645 }
  14646 
  14647 fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
  14648     const mod = self.bin_file.comp.module.?;
  14649     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  14650 
  14651     result: {
  14652         if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result;
  14653 
  14654         try self.spillRegisters(&.{ .rax, .rdi, .rsi, .rcx });
  14655         const reg_locks = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdi, .rsi, .rcx });
  14656         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  14657 
  14658         const dst_ptr = try self.resolveInst(bin_op.lhs);
  14659         const dst_ptr_ty = self.typeOf(bin_op.lhs);
  14660         const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) {
  14661             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  14662             else => null,
  14663         };
  14664         defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock);
  14665 
  14666         const src_val = try self.resolveInst(bin_op.rhs);
  14667         const elem_ty = self.typeOf(bin_op.rhs);
  14668         const src_val_lock: ?RegisterLock = switch (src_val) {
  14669             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  14670             else => null,
  14671         };
  14672         defer if (src_val_lock) |lock| self.register_manager.unlockReg(lock);
  14673 
  14674         const elem_abi_size: u31 = @intCast(elem_ty.abiSize(mod));
  14675 
  14676         if (elem_abi_size == 1) {
  14677             const ptr: MCValue = switch (dst_ptr_ty.ptrSize(mod)) {
  14678                 // TODO: this only handles slices stored in the stack
  14679                 .Slice => dst_ptr,
  14680                 .One => dst_ptr,
  14681                 .C, .Many => unreachable,
  14682             };
  14683             const len: MCValue = switch (dst_ptr_ty.ptrSize(mod)) {
  14684                 // TODO: this only handles slices stored in the stack
  14685                 .Slice => dst_ptr.address().offset(8).deref(),
  14686                 .One => .{ .immediate = dst_ptr_ty.childType(mod).arrayLen(mod) },
  14687                 .C, .Many => unreachable,
  14688             };
  14689             const len_lock: ?RegisterLock = switch (len) {
  14690                 .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  14691                 else => null,
  14692             };
  14693             defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
  14694 
  14695             try self.genInlineMemset(ptr, src_val, len);
  14696             break :result;
  14697         }
  14698 
  14699         // Store the first element, and then rely on memcpy copying forwards.
  14700         // Length zero requires a runtime check - so we handle arrays specially
  14701         // here to elide it.
  14702         switch (dst_ptr_ty.ptrSize(mod)) {
  14703             .Slice => {
  14704                 const slice_ptr_ty = dst_ptr_ty.slicePtrFieldType(mod);
  14705 
  14706                 // TODO: this only handles slices stored in the stack
  14707                 const ptr = dst_ptr;
  14708                 const len = dst_ptr.address().offset(8).deref();
  14709 
  14710                 // Used to store the number of elements for comparison.
  14711                 // After comparison, updated to store number of bytes needed to copy.
  14712                 const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14713                 const len_mcv: MCValue = .{ .register = len_reg };
  14714                 const len_lock = self.register_manager.lockRegAssumeUnused(len_reg);
  14715                 defer self.register_manager.unlockReg(len_lock);
  14716 
  14717                 try self.genSetReg(len_reg, Type.usize, len);
  14718                 try self.asmRegisterRegister(.{ ._, .@"test" }, len_reg, len_reg);
  14719 
  14720                 const skip_reloc = try self.asmJccReloc(.z, undefined);
  14721                 try self.store(slice_ptr_ty, ptr, src_val);
  14722 
  14723                 const second_elem_ptr_reg =
  14724                     try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14725                 const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
  14726                 const second_elem_ptr_lock =
  14727                     self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
  14728                 defer self.register_manager.unlockReg(second_elem_ptr_lock);
  14729 
  14730                 try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{
  14731                     .reg = try self.copyToTmpRegister(Type.usize, ptr),
  14732                     .off = elem_abi_size,
  14733                 } });
  14734 
  14735                 try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 });
  14736                 try self.asmRegisterRegisterImmediate(
  14737                     .{ .i_, .mul },
  14738                     len_reg,
  14739                     len_reg,
  14740                     Immediate.s(elem_abi_size),
  14741                 );
  14742                 try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv);
  14743 
  14744                 try self.performReloc(skip_reloc);
  14745             },
  14746             .One => {
  14747                 const elem_ptr_ty = try mod.singleMutPtrType(elem_ty);
  14748 
  14749                 const len = dst_ptr_ty.childType(mod).arrayLen(mod);
  14750 
  14751                 assert(len != 0); // prevented by Sema
  14752                 try self.store(elem_ptr_ty, dst_ptr, src_val);
  14753 
  14754                 const second_elem_ptr_reg =
  14755                     try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14756                 const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
  14757                 const second_elem_ptr_lock =
  14758                     self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
  14759                 defer self.register_manager.unlockReg(second_elem_ptr_lock);
  14760 
  14761                 try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{
  14762                     .reg = try self.copyToTmpRegister(Type.usize, dst_ptr),
  14763                     .off = elem_abi_size,
  14764                 } });
  14765 
  14766                 const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) };
  14767                 try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy);
  14768             },
  14769             .C, .Many => unreachable,
  14770         }
  14771     }
  14772     return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
  14773 }
  14774 
  14775 fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
  14776     const mod = self.bin_file.comp.module.?;
  14777     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  14778 
  14779     try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
  14780     const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
  14781     defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  14782 
  14783     const dst_ptr = try self.resolveInst(bin_op.lhs);
  14784     const dst_ptr_ty = self.typeOf(bin_op.lhs);
  14785     const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) {
  14786         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  14787         else => null,
  14788     };
  14789     defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock);
  14790 
  14791     const src_ptr = try self.resolveInst(bin_op.rhs);
  14792     const src_ptr_lock: ?RegisterLock = switch (src_ptr) {
  14793         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  14794         else => null,
  14795     };
  14796     defer if (src_ptr_lock) |lock| self.register_manager.unlockReg(lock);
  14797 
  14798     const len: MCValue = switch (dst_ptr_ty.ptrSize(mod)) {
  14799         .Slice => len: {
  14800             const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14801             const len_lock = self.register_manager.lockRegAssumeUnused(len_reg);
  14802             defer self.register_manager.unlockReg(len_lock);
  14803 
  14804             try self.asmRegisterMemoryImmediate(
  14805                 .{ .i_, .mul },
  14806                 len_reg,
  14807                 try dst_ptr.address().offset(8).deref().mem(self, .qword),
  14808                 Immediate.s(@intCast(dst_ptr_ty.childType(mod).abiSize(mod))),
  14809             );
  14810             break :len .{ .register = len_reg };
  14811         },
  14812         .One => len: {
  14813             const array_ty = dst_ptr_ty.childType(mod);
  14814             break :len .{ .immediate = array_ty.arrayLen(mod) * array_ty.childType(mod).abiSize(mod) };
  14815         },
  14816         .C, .Many => unreachable,
  14817     };
  14818     const len_lock: ?RegisterLock = switch (len) {
  14819         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  14820         else => null,
  14821     };
  14822     defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
  14823 
  14824     // TODO: dst_ptr and src_ptr could be slices rather than raw pointers
  14825     try self.genInlineMemcpy(dst_ptr, src_ptr, len);
  14826 
  14827     return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
  14828 }
  14829 
  14830 fn airTagName(self: *Self, inst: Air.Inst.Index) !void {
  14831     const mod = self.bin_file.comp.module.?;
  14832     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  14833     const inst_ty = self.typeOfIndex(inst);
  14834     const enum_ty = self.typeOf(un_op);
  14835     const resolved_cc = abi.resolveCallingConvention(.Unspecified, self.target.*);
  14836 
  14837     // We need a properly aligned and sized call frame to be able to call this function.
  14838     {
  14839         const needed_call_frame = FrameAlloc.init(.{
  14840             .size = inst_ty.abiSize(mod),
  14841             .alignment = inst_ty.abiAlignment(mod),
  14842         });
  14843         const frame_allocs_slice = self.frame_allocs.slice();
  14844         const stack_frame_size =
  14845             &frame_allocs_slice.items(.abi_size)[@intFromEnum(FrameIndex.call_frame)];
  14846         stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size);
  14847         const stack_frame_align =
  14848             &frame_allocs_slice.items(.abi_align)[@intFromEnum(FrameIndex.call_frame)];
  14849         stack_frame_align.* = stack_frame_align.max(needed_call_frame.abi_align);
  14850     }
  14851 
  14852     try self.spillEflagsIfOccupied();
  14853     try self.spillCallerPreservedRegs(resolved_cc);
  14854 
  14855     const param_regs = abi.getCAbiIntParamRegs(resolved_cc);
  14856 
  14857     const dst_mcv = try self.allocRegOrMem(inst, false);
  14858     try self.genSetReg(param_regs[0], Type.usize, dst_mcv.address());
  14859 
  14860     const operand = try self.resolveInst(un_op);
  14861     try self.genSetReg(param_regs[1], enum_ty, operand);
  14862 
  14863     try self.genLazySymbolRef(
  14864         .call,
  14865         .rax,
  14866         link.File.LazySymbol.initDecl(.code, enum_ty.getOwnerDecl(mod), mod),
  14867     );
  14868 
  14869     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
  14870 }
  14871 
  14872 fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
  14873     const mod = self.bin_file.comp.module.?;
  14874     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  14875 
  14876     const err_ty = self.typeOf(un_op);
  14877     const err_mcv = try self.resolveInst(un_op);
  14878     const err_reg = try self.copyToTmpRegister(err_ty, err_mcv);
  14879     const err_lock = self.register_manager.lockRegAssumeUnused(err_reg);
  14880     defer self.register_manager.unlockReg(err_lock);
  14881 
  14882     const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14883     const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  14884     defer self.register_manager.unlockReg(addr_lock);
  14885     try self.genLazySymbolRef(.lea, addr_reg, link.File.LazySymbol.initDecl(.const_data, null, mod));
  14886 
  14887     const start_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14888     const start_lock = self.register_manager.lockRegAssumeUnused(start_reg);
  14889     defer self.register_manager.unlockReg(start_lock);
  14890 
  14891     const end_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14892     const end_lock = self.register_manager.lockRegAssumeUnused(end_reg);
  14893     defer self.register_manager.unlockReg(end_lock);
  14894 
  14895     try self.truncateRegister(err_ty, err_reg.to32());
  14896 
  14897     try self.asmRegisterMemory(
  14898         .{ ._, .mov },
  14899         start_reg.to32(),
  14900         .{
  14901             .base = .{ .reg = addr_reg.to64() },
  14902             .mod = .{ .rm = .{
  14903                 .size = .dword,
  14904                 .index = err_reg.to64(),
  14905                 .scale = .@"4",
  14906                 .disp = 4,
  14907             } },
  14908         },
  14909     );
  14910     try self.asmRegisterMemory(
  14911         .{ ._, .mov },
  14912         end_reg.to32(),
  14913         .{
  14914             .base = .{ .reg = addr_reg.to64() },
  14915             .mod = .{ .rm = .{
  14916                 .size = .dword,
  14917                 .index = err_reg.to64(),
  14918                 .scale = .@"4",
  14919                 .disp = 8,
  14920             } },
  14921         },
  14922     );
  14923     try self.asmRegisterRegister(.{ ._, .sub }, end_reg.to32(), start_reg.to32());
  14924     try self.asmRegisterMemory(
  14925         .{ ._, .lea },
  14926         start_reg.to64(),
  14927         .{
  14928             .base = .{ .reg = addr_reg.to64() },
  14929             .mod = .{ .rm = .{
  14930                 .size = .dword,
  14931                 .index = start_reg.to64(),
  14932             } },
  14933         },
  14934     );
  14935     try self.asmRegisterMemory(
  14936         .{ ._, .lea },
  14937         end_reg.to32(),
  14938         .{
  14939             .base = .{ .reg = end_reg.to64() },
  14940             .mod = .{ .rm = .{
  14941                 .size = .byte,
  14942                 .disp = -1,
  14943             } },
  14944         },
  14945     );
  14946 
  14947     const dst_mcv = try self.allocRegOrMem(inst, false);
  14948     try self.asmMemoryRegister(
  14949         .{ ._, .mov },
  14950         .{
  14951             .base = .{ .frame = dst_mcv.load_frame.index },
  14952             .mod = .{ .rm = .{
  14953                 .size = .qword,
  14954                 .disp = dst_mcv.load_frame.off,
  14955             } },
  14956         },
  14957         start_reg.to64(),
  14958     );
  14959     try self.asmMemoryRegister(
  14960         .{ ._, .mov },
  14961         .{
  14962             .base = .{ .frame = dst_mcv.load_frame.index },
  14963             .mod = .{ .rm = .{
  14964                 .size = .qword,
  14965                 .disp = dst_mcv.load_frame.off + 8,
  14966             } },
  14967         },
  14968         end_reg.to64(),
  14969     );
  14970 
  14971     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
  14972 }
  14973 
  14974 fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
  14975     const mod = self.bin_file.comp.module.?;
  14976     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  14977     const vector_ty = self.typeOfIndex(inst);
  14978     const vector_len = vector_ty.vectorLen(mod);
  14979     const dst_rc = self.regClassForType(vector_ty);
  14980     const scalar_ty = self.typeOf(ty_op.operand);
  14981 
  14982     const result: MCValue = result: {
  14983         switch (scalar_ty.zigTypeTag(mod)) {
  14984             else => {},
  14985             .Bool => {
  14986                 const regs =
  14987                     try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp);
  14988                 const reg_locks = self.register_manager.lockRegsAssumeUnused(2, regs);
  14989                 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  14990 
  14991                 try self.genSetReg(regs[1], vector_ty, .{ .immediate = 0 });
  14992                 try self.genSetReg(
  14993                     regs[1],
  14994                     vector_ty,
  14995                     .{ .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - vector_len) },
  14996                 );
  14997                 const src_mcv = try self.resolveInst(ty_op.operand);
  14998                 const abi_size = @max(math.divCeil(u32, vector_len, 8) catch unreachable, 4);
  14999                 try self.asmCmovccRegisterRegister(
  15000                     switch (src_mcv) {
  15001                         .eflags => |cc| cc,
  15002                         .register => |src_reg| cc: {
  15003                             try self.asmRegisterImmediate(
  15004                                 .{ ._, .@"test" },
  15005                                 src_reg.to8(),
  15006                                 Immediate.u(1),
  15007                             );
  15008                             break :cc .nz;
  15009                         },
  15010                         else => cc: {
  15011                             try self.asmMemoryImmediate(
  15012                                 .{ ._, .@"test" },
  15013                                 try src_mcv.mem(self, .byte),
  15014                                 Immediate.u(1),
  15015                             );
  15016                             break :cc .nz;
  15017                         },
  15018                     },
  15019                     registerAlias(regs[0], abi_size),
  15020                     registerAlias(regs[1], abi_size),
  15021                 );
  15022                 break :result .{ .register = regs[0] };
  15023             },
  15024             .Int => if (self.hasFeature(.avx2)) avx2: {
  15025                 const mir_tag = @as(?Mir.Inst.FixedTag, switch (scalar_ty.intInfo(mod).bits) {
  15026                     else => null,
  15027                     1...8 => switch (vector_len) {
  15028                         else => null,
  15029                         1...32 => .{ .vp_b, .broadcast },
  15030                     },
  15031                     9...16 => switch (vector_len) {
  15032                         else => null,
  15033                         1...16 => .{ .vp_w, .broadcast },
  15034                     },
  15035                     17...32 => switch (vector_len) {
  15036                         else => null,
  15037                         1...8 => .{ .vp_d, .broadcast },
  15038                     },
  15039                     33...64 => switch (vector_len) {
  15040                         else => null,
  15041                         1...4 => .{ .vp_q, .broadcast },
  15042                     },
  15043                     65...128 => switch (vector_len) {
  15044                         else => null,
  15045                         1...2 => .{ .vp_i128, .broadcast },
  15046                     },
  15047                 }) orelse break :avx2;
  15048 
  15049                 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
  15050                 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  15051                 defer self.register_manager.unlockReg(dst_lock);
  15052 
  15053                 const src_mcv = try self.resolveInst(ty_op.operand);
  15054                 if (src_mcv.isMemory()) try self.asmRegisterMemory(
  15055                     mir_tag,
  15056                     registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))),
  15057                     try src_mcv.mem(self, self.memSize(scalar_ty)),
  15058                 ) else {
  15059                     if (mir_tag[0] == .vp_i128) break :avx2;
  15060                     try self.genSetReg(dst_reg, scalar_ty, src_mcv);
  15061                     try self.asmRegisterRegister(
  15062                         mir_tag,
  15063                         registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))),
  15064                         registerAlias(dst_reg, @intCast(scalar_ty.abiSize(mod))),
  15065                     );
  15066                 }
  15067                 break :result .{ .register = dst_reg };
  15068             } else {
  15069                 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
  15070                 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  15071                 defer self.register_manager.unlockReg(dst_lock);
  15072 
  15073                 try self.genSetReg(dst_reg, scalar_ty, .{ .air_ref = ty_op.operand });
  15074                 if (vector_len == 1) break :result .{ .register = dst_reg };
  15075 
  15076                 const dst_alias = registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod)));
  15077                 const scalar_bits = scalar_ty.intInfo(mod).bits;
  15078                 if (switch (scalar_bits) {
  15079                     1...8 => true,
  15080                     9...128 => false,
  15081                     else => unreachable,
  15082                 }) if (self.hasFeature(.avx)) try self.asmRegisterRegisterRegister(
  15083                     .{ .vp_, .unpcklbw },
  15084                     dst_alias,
  15085                     dst_alias,
  15086                     dst_alias,
  15087                 ) else try self.asmRegisterRegister(
  15088                     .{ .p_, .unpcklbw },
  15089                     dst_alias,
  15090                     dst_alias,
  15091                 );
  15092                 if (switch (scalar_bits) {
  15093                     1...8 => vector_len > 2,
  15094                     9...16 => true,
  15095                     17...128 => false,
  15096                     else => unreachable,
  15097                 }) try self.asmRegisterRegisterImmediate(
  15098                     .{ if (self.hasFeature(.avx)) .vp_w else .p_w, .shufl },
  15099                     dst_alias,
  15100                     dst_alias,
  15101                     Immediate.u(0),
  15102                 );
  15103                 if (switch (scalar_bits) {
  15104                     1...8 => vector_len > 4,
  15105                     9...16 => vector_len > 2,
  15106                     17...64 => true,
  15107                     65...128 => false,
  15108                     else => unreachable,
  15109                 }) try self.asmRegisterRegisterImmediate(
  15110                     .{ if (self.hasFeature(.avx)) .vp_d else .p_d, .shuf },
  15111                     dst_alias,
  15112                     dst_alias,
  15113                     Immediate.u(if (scalar_bits <= 64) 0b00_00_00_00 else 0b01_00_01_00),
  15114                 );
  15115                 break :result .{ .register = dst_reg };
  15116             },
  15117             .Float => switch (scalar_ty.floatBits(self.target.*)) {
  15118                 32 => switch (vector_len) {
  15119                     1 => {
  15120                         const src_mcv = try self.resolveInst(ty_op.operand);
  15121                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  15122                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  15123                         try self.genSetReg(dst_reg, scalar_ty, src_mcv);
  15124                         break :result .{ .register = dst_reg };
  15125                     },
  15126                     2...4 => {
  15127                         const src_mcv = try self.resolveInst(ty_op.operand);
  15128                         if (self.hasFeature(.avx)) {
  15129                             const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  15130                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
  15131                                 .{ .v_ss, .broadcast },
  15132                                 dst_reg.to128(),
  15133                                 try src_mcv.mem(self, .dword),
  15134                             ) else {
  15135                                 const src_reg = if (src_mcv.isRegister())
  15136                                     src_mcv.getReg().?
  15137                                 else
  15138                                     try self.copyToTmpRegister(scalar_ty, src_mcv);
  15139                                 try self.asmRegisterRegisterRegisterImmediate(
  15140                                     .{ .v_ps, .shuf },
  15141                                     dst_reg.to128(),
  15142                                     src_reg.to128(),
  15143                                     src_reg.to128(),
  15144                                     Immediate.u(0),
  15145                                 );
  15146                             }
  15147                             break :result .{ .register = dst_reg };
  15148                         } else {
  15149                             const dst_mcv = if (src_mcv.isRegister() and
  15150                                 self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  15151                                 src_mcv
  15152                             else
  15153                                 try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv);
  15154                             const dst_reg = dst_mcv.getReg().?;
  15155                             try self.asmRegisterRegisterImmediate(
  15156                                 .{ ._ps, .shuf },
  15157                                 dst_reg.to128(),
  15158                                 dst_reg.to128(),
  15159                                 Immediate.u(0),
  15160                             );
  15161                             break :result dst_mcv;
  15162                         }
  15163                     },
  15164                     5...8 => if (self.hasFeature(.avx)) {
  15165                         const src_mcv = try self.resolveInst(ty_op.operand);
  15166                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  15167                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
  15168                             .{ .v_ss, .broadcast },
  15169                             dst_reg.to256(),
  15170                             try src_mcv.mem(self, .dword),
  15171                         ) else {
  15172                             const src_reg = if (src_mcv.isRegister())
  15173                                 src_mcv.getReg().?
  15174                             else
  15175                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  15176                             if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
  15177                                 .{ .v_ss, .broadcast },
  15178                                 dst_reg.to256(),
  15179                                 src_reg.to128(),
  15180                             ) else {
  15181                                 try self.asmRegisterRegisterRegisterImmediate(
  15182                                     .{ .v_ps, .shuf },
  15183                                     dst_reg.to128(),
  15184                                     src_reg.to128(),
  15185                                     src_reg.to128(),
  15186                                     Immediate.u(0),
  15187                                 );
  15188                                 try self.asmRegisterRegisterRegisterImmediate(
  15189                                     .{ .v_f128, .insert },
  15190                                     dst_reg.to256(),
  15191                                     dst_reg.to256(),
  15192                                     dst_reg.to128(),
  15193                                     Immediate.u(1),
  15194                                 );
  15195                             }
  15196                         }
  15197                         break :result .{ .register = dst_reg };
  15198                     },
  15199                     else => {},
  15200                 },
  15201                 64 => switch (vector_len) {
  15202                     1 => {
  15203                         const src_mcv = try self.resolveInst(ty_op.operand);
  15204                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  15205                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  15206                         try self.genSetReg(dst_reg, scalar_ty, src_mcv);
  15207                         break :result .{ .register = dst_reg };
  15208                     },
  15209                     2 => {
  15210                         const src_mcv = try self.resolveInst(ty_op.operand);
  15211                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  15212                         if (self.hasFeature(.sse3)) {
  15213                             if (src_mcv.isMemory()) try self.asmRegisterMemory(
  15214                                 if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
  15215                                 dst_reg.to128(),
  15216                                 try src_mcv.mem(self, .qword),
  15217                             ) else try self.asmRegisterRegister(
  15218                                 if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
  15219                                 dst_reg.to128(),
  15220                                 (if (src_mcv.isRegister())
  15221                                     src_mcv.getReg().?
  15222                                 else
  15223                                     try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
  15224                             );
  15225                             break :result .{ .register = dst_reg };
  15226                         } else try self.asmRegisterRegister(
  15227                             .{ ._ps, .movlh },
  15228                             dst_reg.to128(),
  15229                             (if (src_mcv.isRegister())
  15230                                 src_mcv.getReg().?
  15231                             else
  15232                                 try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
  15233                         );
  15234                     },
  15235                     3...4 => if (self.hasFeature(.avx)) {
  15236                         const src_mcv = try self.resolveInst(ty_op.operand);
  15237                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  15238                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
  15239                             .{ .v_sd, .broadcast },
  15240                             dst_reg.to256(),
  15241                             try src_mcv.mem(self, .qword),
  15242                         ) else {
  15243                             const src_reg = if (src_mcv.isRegister())
  15244                                 src_mcv.getReg().?
  15245                             else
  15246                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  15247                             if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
  15248                                 .{ .v_sd, .broadcast },
  15249                                 dst_reg.to256(),
  15250                                 src_reg.to128(),
  15251                             ) else {
  15252                                 try self.asmRegisterRegister(
  15253                                     .{ .v_, .movddup },
  15254                                     dst_reg.to128(),
  15255                                     src_reg.to128(),
  15256                                 );
  15257                                 try self.asmRegisterRegisterRegisterImmediate(
  15258                                     .{ .v_f128, .insert },
  15259                                     dst_reg.to256(),
  15260                                     dst_reg.to256(),
  15261                                     dst_reg.to128(),
  15262                                     Immediate.u(1),
  15263                                 );
  15264                             }
  15265                         }
  15266                         break :result .{ .register = dst_reg };
  15267                     },
  15268                     else => {},
  15269                 },
  15270                 128 => switch (vector_len) {
  15271                     1 => {
  15272                         const src_mcv = try self.resolveInst(ty_op.operand);
  15273                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  15274                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  15275                         try self.genSetReg(dst_reg, scalar_ty, src_mcv);
  15276                         break :result .{ .register = dst_reg };
  15277                     },
  15278                     2 => if (self.hasFeature(.avx)) {
  15279                         const src_mcv = try self.resolveInst(ty_op.operand);
  15280                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  15281                         if (src_mcv.isMemory()) try self.asmRegisterMemory(
  15282                             .{ .v_f128, .broadcast },
  15283                             dst_reg.to256(),
  15284                             try src_mcv.mem(self, .xword),
  15285                         ) else {
  15286                             const src_reg = if (src_mcv.isRegister())
  15287                                 src_mcv.getReg().?
  15288                             else
  15289                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  15290                             try self.asmRegisterRegisterRegisterImmediate(
  15291                                 .{ .v_f128, .insert },
  15292                                 dst_reg.to256(),
  15293                                 src_reg.to256(),
  15294                                 src_reg.to128(),
  15295                                 Immediate.u(1),
  15296                             );
  15297                         }
  15298                         break :result .{ .register = dst_reg };
  15299                     },
  15300                     else => {},
  15301                 },
  15302                 16, 80 => {},
  15303                 else => unreachable,
  15304             },
  15305         }
  15306         return self.fail("TODO implement airSplat for {}", .{vector_ty.fmt(mod)});
  15307     };
  15308     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  15309 }
  15310 
  15311 fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
  15312     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  15313     const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
  15314     _ = extra;
  15315     return self.fail("TODO implement airSelect for x86_64", .{});
  15316     //return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
  15317 }
  15318 
  15319 fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
  15320     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  15321     _ = ty_pl;
  15322     return self.fail("TODO implement airShuffle for x86_64", .{});
  15323     //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  15324 }
  15325 
  15326 fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
  15327     const mod = self.bin_file.comp.module.?;
  15328     const reduce = self.air.instructions.items(.data)[@intFromEnum(inst)].reduce;
  15329 
  15330     const result: MCValue = result: {
  15331         const operand_ty = self.typeOf(reduce.operand);
  15332         if (operand_ty.isVector(mod) and operand_ty.childType(mod).toIntern() == .bool_type) {
  15333             try self.spillEflagsIfOccupied();
  15334 
  15335             const operand_mcv = try self.resolveInst(reduce.operand);
  15336             const mask_len = (math.cast(u6, operand_ty.vectorLen(mod)) orelse
  15337                 return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)}));
  15338             const mask = (@as(u64, 1) << mask_len) - 1;
  15339             const abi_size: u32 = @intCast(operand_ty.abiSize(mod));
  15340             switch (reduce.operation) {
  15341                 .Or => {
  15342                     if (operand_mcv.isMemory()) try self.asmMemoryImmediate(
  15343                         .{ ._, .@"test" },
  15344                         try operand_mcv.mem(self, Memory.Size.fromSize(abi_size)),
  15345                         Immediate.u(mask),
  15346                     ) else {
  15347                         const operand_reg = registerAlias(if (operand_mcv.isRegister())
  15348                             operand_mcv.getReg().?
  15349                         else
  15350                             try self.copyToTmpRegister(operand_ty, operand_mcv), abi_size);
  15351                         if (mask_len < abi_size * 8) try self.asmRegisterImmediate(
  15352                             .{ ._, .@"test" },
  15353                             operand_reg,
  15354                             Immediate.u(mask),
  15355                         ) else try self.asmRegisterRegister(
  15356                             .{ ._, .@"test" },
  15357                             operand_reg,
  15358                             operand_reg,
  15359                         );
  15360                     }
  15361                     break :result .{ .eflags = .nz };
  15362                 },
  15363                 .And => {
  15364                     const tmp_reg = try self.copyToTmpRegister(operand_ty, operand_mcv);
  15365                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  15366                     defer self.register_manager.unlockReg(tmp_lock);
  15367 
  15368                     try self.asmRegister(.{ ._, .not }, tmp_reg);
  15369                     if (mask_len < abi_size * 8)
  15370                         try self.asmRegisterImmediate(.{ ._, .@"test" }, tmp_reg, Immediate.u(mask))
  15371                     else
  15372                         try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_reg, tmp_reg);
  15373                     break :result .{ .eflags = .z };
  15374                 },
  15375                 else => return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)}),
  15376             }
  15377         }
  15378         return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)});
  15379     };
  15380     return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
  15381 }
  15382 
  15383 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
  15384     const mod = self.bin_file.comp.module.?;
  15385     const result_ty = self.typeOfIndex(inst);
  15386     const len: usize = @intCast(result_ty.arrayLen(mod));
  15387     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  15388     const elements: []const Air.Inst.Ref = @ptrCast(self.air.extra[ty_pl.payload..][0..len]);
  15389     const result: MCValue = result: {
  15390         switch (result_ty.zigTypeTag(mod)) {
  15391             .Struct => {
  15392                 const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod));
  15393                 if (result_ty.containerLayout(mod) == .Packed) {
  15394                     const struct_type = mod.typeToStruct(result_ty).?;
  15395                     try self.genInlineMemset(
  15396                         .{ .lea_frame = .{ .index = frame_index } },
  15397                         .{ .immediate = 0 },
  15398                         .{ .immediate = result_ty.abiSize(mod) },
  15399                     );
  15400                     for (elements, 0..) |elem, elem_i_usize| {
  15401                         const elem_i: u32 = @intCast(elem_i_usize);
  15402                         if ((try result_ty.structFieldValueComptime(mod, elem_i)) != null) continue;
  15403 
  15404                         const elem_ty = result_ty.structFieldType(elem_i, mod);
  15405                         const elem_bit_size: u32 = @intCast(elem_ty.bitSize(mod));
  15406                         if (elem_bit_size > 64) {
  15407                             return self.fail(
  15408                                 "TODO airAggregateInit implement packed structs with large fields",
  15409                                 .{},
  15410                             );
  15411                         }
  15412                         const elem_abi_size: u32 = @intCast(elem_ty.abiSize(mod));
  15413                         const elem_abi_bits = elem_abi_size * 8;
  15414                         const elem_off = mod.structPackedFieldBitOffset(struct_type, elem_i);
  15415                         const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size);
  15416                         const elem_bit_off = elem_off % elem_abi_bits;
  15417                         const elem_mcv = try self.resolveInst(elem);
  15418                         const mat_elem_mcv = switch (elem_mcv) {
  15419                             .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  15420                             else => elem_mcv,
  15421                         };
  15422                         const elem_lock = switch (mat_elem_mcv) {
  15423                             .register => |reg| self.register_manager.lockReg(reg),
  15424                             .immediate => |imm| lock: {
  15425                                 if (imm == 0) continue;
  15426                                 break :lock null;
  15427                             },
  15428                             else => null,
  15429                         };
  15430                         defer if (elem_lock) |lock| self.register_manager.unlockReg(lock);
  15431                         const elem_reg = registerAlias(
  15432                             try self.copyToTmpRegister(elem_ty, mat_elem_mcv),
  15433                             elem_abi_size,
  15434                         );
  15435                         const elem_extra_bits = self.regExtraBits(elem_ty);
  15436                         if (elem_bit_off < elem_extra_bits) {
  15437                             try self.truncateRegister(elem_ty, elem_reg);
  15438                         }
  15439                         if (elem_bit_off > 0) try self.genShiftBinOpMir(
  15440                             .{ ._l, .sh },
  15441                             elem_ty,
  15442                             .{ .register = elem_reg },
  15443                             .{ .immediate = elem_bit_off },
  15444                         );
  15445                         try self.genBinOpMir(
  15446                             .{ ._, .@"or" },
  15447                             elem_ty,
  15448                             .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } },
  15449                             .{ .register = elem_reg },
  15450                         );
  15451                         if (elem_bit_off > elem_extra_bits) {
  15452                             const reg = try self.copyToTmpRegister(elem_ty, mat_elem_mcv);
  15453                             if (elem_extra_bits > 0) {
  15454                                 try self.truncateRegister(elem_ty, registerAlias(reg, elem_abi_size));
  15455                             }
  15456                             try self.genShiftBinOpMir(
  15457                                 .{ ._r, .sh },
  15458                                 elem_ty,
  15459                                 .{ .register = reg },
  15460                                 .{ .immediate = elem_abi_bits - elem_bit_off },
  15461                             );
  15462                             try self.genBinOpMir(
  15463                                 .{ ._, .@"or" },
  15464                                 elem_ty,
  15465                                 .{ .load_frame = .{
  15466                                     .index = frame_index,
  15467                                     .off = elem_byte_off + @as(i32, @intCast(elem_abi_size)),
  15468                                 } },
  15469                                 .{ .register = reg },
  15470                             );
  15471                         }
  15472                     }
  15473                 } else for (elements, 0..) |elem, elem_i| {
  15474                     if ((try result_ty.structFieldValueComptime(mod, elem_i)) != null) continue;
  15475 
  15476                     const elem_ty = result_ty.structFieldType(elem_i, mod);
  15477                     const elem_off: i32 = @intCast(result_ty.structFieldOffset(elem_i, mod));
  15478                     const elem_mcv = try self.resolveInst(elem);
  15479                     const mat_elem_mcv = switch (elem_mcv) {
  15480                         .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  15481                         else => elem_mcv,
  15482                     };
  15483                     try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv);
  15484                 }
  15485                 break :result .{ .load_frame = .{ .index = frame_index } };
  15486             },
  15487             .Array, .Vector => {
  15488                 const elem_ty = result_ty.childType(mod);
  15489                 if (result_ty.isVector(mod) and elem_ty.bitSize(mod) == 1) {
  15490                     const result_size: u32 = @intCast(result_ty.abiSize(mod));
  15491                     const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
  15492                     try self.asmRegisterRegister(
  15493                         .{ ._, .xor },
  15494                         registerAlias(dst_reg, @min(result_size, 4)),
  15495                         registerAlias(dst_reg, @min(result_size, 4)),
  15496                     );
  15497 
  15498                     for (elements, 0..) |elem, elem_i| {
  15499                         const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem });
  15500                         const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg);
  15501                         defer self.register_manager.unlockReg(elem_lock);
  15502 
  15503                         try self.asmRegisterImmediate(
  15504                             .{ ._, .@"and" },
  15505                             registerAlias(elem_reg, @min(result_size, 4)),
  15506                             Immediate.u(1),
  15507                         );
  15508                         if (elem_i > 0) try self.asmRegisterImmediate(
  15509                             .{ ._l, .sh },
  15510                             registerAlias(elem_reg, result_size),
  15511                             Immediate.u(@intCast(elem_i)),
  15512                         );
  15513                         try self.asmRegisterRegister(
  15514                             .{ ._, .@"or" },
  15515                             registerAlias(dst_reg, result_size),
  15516                             registerAlias(elem_reg, result_size),
  15517                         );
  15518                     }
  15519                     break :result .{ .register = dst_reg };
  15520                 } else {
  15521                     const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod));
  15522                     const elem_size: u32 = @intCast(elem_ty.abiSize(mod));
  15523 
  15524                     for (elements, 0..) |elem, elem_i| {
  15525                         const elem_mcv = try self.resolveInst(elem);
  15526                         const mat_elem_mcv = switch (elem_mcv) {
  15527                             .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  15528                             else => elem_mcv,
  15529                         };
  15530                         const elem_off: i32 = @intCast(elem_size * elem_i);
  15531                         try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv);
  15532                     }
  15533                     if (result_ty.sentinel(mod)) |sentinel| try self.genSetMem(
  15534                         .{ .frame = frame_index },
  15535                         @intCast(elem_size * elements.len),
  15536                         elem_ty,
  15537                         try self.genTypedValue(.{ .ty = elem_ty, .val = sentinel }),
  15538                     );
  15539                     break :result .{ .load_frame = .{ .index = frame_index } };
  15540                 }
  15541             },
  15542             else => unreachable,
  15543         }
  15544     };
  15545 
  15546     if (elements.len <= Liveness.bpi - 1) {
  15547         var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1);
  15548         @memcpy(buf[0..elements.len], elements);
  15549         return self.finishAir(inst, result, buf);
  15550     }
  15551     var bt = self.liveness.iterateBigTomb(inst);
  15552     for (elements) |elem| try self.feed(&bt, elem);
  15553     return self.finishAirResult(inst, result);
  15554 }
  15555 
  15556 fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void {
  15557     const mod = self.bin_file.comp.module.?;
  15558     const ip = &mod.intern_pool;
  15559     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  15560     const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data;
  15561     const result: MCValue = result: {
  15562         const union_ty = self.typeOfIndex(inst);
  15563         const layout = union_ty.unionGetLayout(mod);
  15564 
  15565         const src_ty = self.typeOf(extra.init);
  15566         const src_mcv = try self.resolveInst(extra.init);
  15567         if (layout.tag_size == 0) {
  15568             if (layout.abi_size <= src_ty.abiSize(mod) and
  15569                 self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv;
  15570 
  15571             const dst_mcv = try self.allocRegOrMem(inst, true);
  15572             try self.genCopy(src_ty, dst_mcv, src_mcv);
  15573             break :result dst_mcv;
  15574         }
  15575 
  15576         const dst_mcv = try self.allocRegOrMem(inst, false);
  15577 
  15578         const union_obj = mod.typeToUnion(union_ty).?;
  15579         const field_name = union_obj.field_names.get(ip)[extra.field_index];
  15580         const tag_ty = Type.fromInterned(union_obj.enum_tag_ty);
  15581         const field_index = tag_ty.enumFieldIndex(field_name, mod).?;
  15582         const tag_val = try mod.enumValueFieldIndex(tag_ty, field_index);
  15583         const tag_int_val = try tag_val.intFromEnum(tag_ty, mod);
  15584         const tag_int = tag_int_val.toUnsignedInt(mod);
  15585         const tag_off: i32 = if (layout.tag_align.compare(.lt, layout.payload_align))
  15586             @intCast(layout.payload_size)
  15587         else
  15588             0;
  15589         try self.genCopy(tag_ty, dst_mcv.address().offset(tag_off).deref(), .{ .immediate = tag_int });
  15590 
  15591         const pl_off: i32 = if (layout.tag_align.compare(.lt, layout.payload_align))
  15592             0
  15593         else
  15594             @intCast(layout.tag_size);
  15595         try self.genCopy(src_ty, dst_mcv.address().offset(pl_off).deref(), src_mcv);
  15596 
  15597         break :result dst_mcv;
  15598     };
  15599     return self.finishAir(inst, result, .{ extra.init, .none, .none });
  15600 }
  15601 
  15602 fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
  15603     const prefetch = self.air.instructions.items(.data)[@intFromEnum(inst)].prefetch;
  15604     return self.finishAir(inst, .unreach, .{ prefetch.ptr, .none, .none });
  15605 }
  15606 
  15607 fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
  15608     const mod = self.bin_file.comp.module.?;
  15609     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  15610     const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
  15611     const ty = self.typeOfIndex(inst);
  15612 
  15613     const ops = [3]Air.Inst.Ref{ extra.lhs, extra.rhs, pl_op.operand };
  15614     const result = result: {
  15615         if (switch (ty.scalarType(mod).floatBits(self.target.*)) {
  15616             16, 80, 128 => true,
  15617             32, 64 => !self.hasFeature(.fma),
  15618             else => unreachable,
  15619         }) {
  15620             if (ty.zigTypeTag(mod) != .Float) return self.fail("TODO implement airMulAdd for {}", .{
  15621                 ty.fmt(mod),
  15622             });
  15623 
  15624             var callee_buf: ["__fma?".len]u8 = undefined;
  15625             break :result try self.genCall(.{ .lib = .{
  15626                 .return_type = ty.toIntern(),
  15627                 .param_types = &.{ ty.toIntern(), ty.toIntern(), ty.toIntern() },
  15628                 .callee = std.fmt.bufPrint(&callee_buf, "{s}fma{s}", .{
  15629                     floatLibcAbiPrefix(ty),
  15630                     floatLibcAbiSuffix(ty),
  15631                 }) catch unreachable,
  15632             } }, &.{ ty, ty, ty }, &.{
  15633                 .{ .air_ref = extra.lhs }, .{ .air_ref = extra.rhs }, .{ .air_ref = pl_op.operand },
  15634             });
  15635         }
  15636 
  15637         var mcvs: [3]MCValue = undefined;
  15638         var locks = [1]?RegisterManager.RegisterLock{null} ** 3;
  15639         defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
  15640         var order = [1]u2{0} ** 3;
  15641         var unused = std.StaticBitSet(3).initFull();
  15642         for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| {
  15643             const op_index: u2 = @intCast(op_i);
  15644             mcv.* = try self.resolveInst(op);
  15645             if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) {
  15646                 order[op_index] = 1;
  15647                 unused.unset(0);
  15648             } else if (unused.isSet(2) and mcv.isMemory()) {
  15649                 order[op_index] = 3;
  15650                 unused.unset(2);
  15651             }
  15652             switch (mcv.*) {
  15653                 .register => |reg| lock.* = self.register_manager.lockReg(reg),
  15654                 else => {},
  15655             }
  15656         }
  15657         for (&order, &mcvs, &locks) |*mop_index, *mcv, *lock| {
  15658             if (mop_index.* != 0) continue;
  15659             mop_index.* = 1 + @as(u2, @intCast(unused.toggleFirstSet().?));
  15660             if (mop_index.* > 1 and mcv.isRegister()) continue;
  15661             const reg = try self.copyToTmpRegister(ty, mcv.*);
  15662             mcv.* = .{ .register = reg };
  15663             if (lock.*) |old_lock| self.register_manager.unlockReg(old_lock);
  15664             lock.* = self.register_manager.lockRegAssumeUnused(reg);
  15665         }
  15666 
  15667         const mir_tag = @as(?Mir.Inst.FixedTag, if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or
  15668             mem.eql(u2, &order, &.{ 3, 1, 2 }))
  15669             switch (ty.zigTypeTag(mod)) {
  15670                 .Float => switch (ty.floatBits(self.target.*)) {
  15671                     32 => .{ .v_ss, .fmadd132 },
  15672                     64 => .{ .v_sd, .fmadd132 },
  15673                     16, 80, 128 => null,
  15674                     else => unreachable,
  15675                 },
  15676                 .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
  15677                     .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
  15678                         32 => switch (ty.vectorLen(mod)) {
  15679                             1 => .{ .v_ss, .fmadd132 },
  15680                             2...8 => .{ .v_ps, .fmadd132 },
  15681                             else => null,
  15682                         },
  15683                         64 => switch (ty.vectorLen(mod)) {
  15684                             1 => .{ .v_sd, .fmadd132 },
  15685                             2...4 => .{ .v_pd, .fmadd132 },
  15686                             else => null,
  15687                         },
  15688                         16, 80, 128 => null,
  15689                         else => unreachable,
  15690                     },
  15691                     else => unreachable,
  15692                 },
  15693                 else => unreachable,
  15694             }
  15695         else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 }))
  15696             switch (ty.zigTypeTag(mod)) {
  15697                 .Float => switch (ty.floatBits(self.target.*)) {
  15698                     32 => .{ .v_ss, .fmadd213 },
  15699                     64 => .{ .v_sd, .fmadd213 },
  15700                     16, 80, 128 => null,
  15701                     else => unreachable,
  15702                 },
  15703                 .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
  15704                     .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
  15705                         32 => switch (ty.vectorLen(mod)) {
  15706                             1 => .{ .v_ss, .fmadd213 },
  15707                             2...8 => .{ .v_ps, .fmadd213 },
  15708                             else => null,
  15709                         },
  15710                         64 => switch (ty.vectorLen(mod)) {
  15711                             1 => .{ .v_sd, .fmadd213 },
  15712                             2...4 => .{ .v_pd, .fmadd213 },
  15713                             else => null,
  15714                         },
  15715                         16, 80, 128 => null,
  15716                         else => unreachable,
  15717                     },
  15718                     else => unreachable,
  15719                 },
  15720                 else => unreachable,
  15721             }
  15722         else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 }))
  15723             switch (ty.zigTypeTag(mod)) {
  15724                 .Float => switch (ty.floatBits(self.target.*)) {
  15725                     32 => .{ .v_ss, .fmadd231 },
  15726                     64 => .{ .v_sd, .fmadd231 },
  15727                     16, 80, 128 => null,
  15728                     else => unreachable,
  15729                 },
  15730                 .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
  15731                     .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
  15732                         32 => switch (ty.vectorLen(mod)) {
  15733                             1 => .{ .v_ss, .fmadd231 },
  15734                             2...8 => .{ .v_ps, .fmadd231 },
  15735                             else => null,
  15736                         },
  15737                         64 => switch (ty.vectorLen(mod)) {
  15738                             1 => .{ .v_sd, .fmadd231 },
  15739                             2...4 => .{ .v_pd, .fmadd231 },
  15740                             else => null,
  15741                         },
  15742                         16, 80, 128 => null,
  15743                         else => unreachable,
  15744                     },
  15745                     else => unreachable,
  15746                 },
  15747                 else => unreachable,
  15748             }
  15749         else
  15750             unreachable) orelse return self.fail("TODO implement airMulAdd for {}", .{ty.fmt(mod)});
  15751 
  15752         var mops: [3]MCValue = undefined;
  15753         for (order, mcvs) |mop_index, mcv| mops[mop_index - 1] = mcv;
  15754 
  15755         const abi_size: u32 = @intCast(ty.abiSize(mod));
  15756         const mop1_reg = registerAlias(mops[0].getReg().?, abi_size);
  15757         const mop2_reg = registerAlias(mops[1].getReg().?, abi_size);
  15758         if (mops[2].isRegister()) try self.asmRegisterRegisterRegister(
  15759             mir_tag,
  15760             mop1_reg,
  15761             mop2_reg,
  15762             registerAlias(mops[2].getReg().?, abi_size),
  15763         ) else try self.asmRegisterRegisterMemory(
  15764             mir_tag,
  15765             mop1_reg,
  15766             mop2_reg,
  15767             try mops[2].mem(self, Memory.Size.fromSize(abi_size)),
  15768         );
  15769         break :result mops[0];
  15770     };
  15771     return self.finishAir(inst, result, ops);
  15772 }
  15773 
  15774 fn airVaStart(self: *Self, inst: Air.Inst.Index) !void {
  15775     const mod = self.bin_file.comp.module.?;
  15776     const va_list_ty = self.air.instructions.items(.data)[@intFromEnum(inst)].ty;
  15777     const ptr_anyopaque_ty = try mod.singleMutPtrType(Type.anyopaque);
  15778 
  15779     const result: MCValue = switch (abi.resolveCallingConvention(
  15780         self.fn_type.fnCallingConvention(mod),
  15781         self.target.*,
  15782     )) {
  15783         .SysV => result: {
  15784             const info = self.va_info.sysv;
  15785             const dst_fi = try self.allocFrameIndex(FrameAlloc.initSpill(va_list_ty, mod));
  15786             var field_off: u31 = 0;
  15787             // gp_offset: c_uint,
  15788             try self.genSetMem(
  15789                 .{ .frame = dst_fi },
  15790                 field_off,
  15791                 Type.c_uint,
  15792                 .{ .immediate = info.gp_count * 8 },
  15793             );
  15794             field_off += @intCast(Type.c_uint.abiSize(mod));
  15795             // fp_offset: c_uint,
  15796             try self.genSetMem(
  15797                 .{ .frame = dst_fi },
  15798                 field_off,
  15799                 Type.c_uint,
  15800                 .{ .immediate = abi.SysV.c_abi_int_param_regs.len * 8 + info.fp_count * 16 },
  15801             );
  15802             field_off += @intCast(Type.c_uint.abiSize(mod));
  15803             // overflow_arg_area: *anyopaque,
  15804             try self.genSetMem(
  15805                 .{ .frame = dst_fi },
  15806                 field_off,
  15807                 ptr_anyopaque_ty,
  15808                 .{ .lea_frame = info.overflow_arg_area },
  15809             );
  15810             field_off += @intCast(ptr_anyopaque_ty.abiSize(mod));
  15811             // reg_save_area: *anyopaque,
  15812             try self.genSetMem(
  15813                 .{ .frame = dst_fi },
  15814                 field_off,
  15815                 ptr_anyopaque_ty,
  15816                 .{ .lea_frame = info.reg_save_area },
  15817             );
  15818             field_off += @intCast(ptr_anyopaque_ty.abiSize(mod));
  15819             break :result .{ .load_frame = .{ .index = dst_fi } };
  15820         },
  15821         .Win64 => return self.fail("TODO implement c_va_start for Win64", .{}),
  15822         else => unreachable,
  15823     };
  15824     return self.finishAir(inst, result, .{ .none, .none, .none });
  15825 }
  15826 
  15827 fn airVaArg(self: *Self, inst: Air.Inst.Index) !void {
  15828     const mod = self.bin_file.comp.module.?;
  15829     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  15830     const ty = self.typeOfIndex(inst);
  15831     const promote_ty = self.promoteVarArg(ty);
  15832     const ptr_anyopaque_ty = try mod.singleMutPtrType(Type.anyopaque);
  15833     const unused = self.liveness.isUnused(inst);
  15834 
  15835     const result: MCValue = switch (abi.resolveCallingConvention(
  15836         self.fn_type.fnCallingConvention(mod),
  15837         self.target.*,
  15838     )) {
  15839         .SysV => result: {
  15840             try self.spillEflagsIfOccupied();
  15841 
  15842             const tmp_regs =
  15843                 try self.register_manager.allocRegs(2, .{ null, null }, abi.RegisterClass.gp);
  15844             const offset_reg = tmp_regs[0].to32();
  15845             const addr_reg = tmp_regs[1].to64();
  15846             const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs);
  15847             defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
  15848 
  15849             const promote_mcv = try self.allocTempRegOrMem(promote_ty, true);
  15850             const promote_lock = switch (promote_mcv) {
  15851                 .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  15852                 else => null,
  15853             };
  15854             defer if (promote_lock) |lock| self.register_manager.unlockReg(lock);
  15855 
  15856             const ptr_arg_list_reg =
  15857                 try self.copyToTmpRegister(self.typeOf(ty_op.operand), .{ .air_ref = ty_op.operand });
  15858             const ptr_arg_list_lock = self.register_manager.lockRegAssumeUnused(ptr_arg_list_reg);
  15859             defer self.register_manager.unlockReg(ptr_arg_list_lock);
  15860 
  15861             const gp_offset: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 0 } };
  15862             const fp_offset: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 4 } };
  15863             const overflow_arg_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 8 } };
  15864             const reg_save_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 16 } };
  15865 
  15866             const classes = mem.sliceTo(&abi.classifySystemV(promote_ty, mod, .arg), .none);
  15867             switch (classes[0]) {
  15868                 .integer => {
  15869                     assert(classes.len == 1);
  15870 
  15871                     try self.genSetReg(offset_reg, Type.c_uint, gp_offset);
  15872                     try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, Immediate.u(
  15873                         abi.SysV.c_abi_int_param_regs.len * 8,
  15874                     ));
  15875                     const mem_reloc = try self.asmJccReloc(.ae, undefined);
  15876 
  15877                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area);
  15878                     if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{
  15879                         .base = .{ .reg = addr_reg },
  15880                         .mod = .{ .rm = .{
  15881                             .size = .qword,
  15882                             .index = offset_reg.to64(),
  15883                         } },
  15884                     });
  15885                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{
  15886                         .base = .{ .reg = offset_reg.to64() },
  15887                         .mod = .{ .rm = .{
  15888                             .size = .qword,
  15889                             .disp = 8,
  15890                         } },
  15891                     });
  15892                     try self.genCopy(Type.c_uint, gp_offset, .{ .register = offset_reg });
  15893                     const done_reloc = try self.asmJmpReloc(undefined);
  15894 
  15895                     try self.performReloc(mem_reloc);
  15896                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area);
  15897                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{
  15898                         .base = .{ .reg = addr_reg },
  15899                         .mod = .{ .rm = .{
  15900                             .size = .qword,
  15901                             .disp = @intCast(@max(promote_ty.abiSize(mod), 8)),
  15902                         } },
  15903                     });
  15904                     try self.genCopy(
  15905                         ptr_anyopaque_ty,
  15906                         overflow_arg_area,
  15907                         .{ .register = offset_reg.to64() },
  15908                     );
  15909 
  15910                     try self.performReloc(done_reloc);
  15911                     if (!unused) try self.genCopy(promote_ty, promote_mcv, .{
  15912                         .indirect = .{ .reg = addr_reg },
  15913                     });
  15914                 },
  15915                 .sse => {
  15916                     assert(classes.len == 1);
  15917 
  15918                     try self.genSetReg(offset_reg, Type.c_uint, fp_offset);
  15919                     try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, Immediate.u(
  15920                         abi.SysV.c_abi_int_param_regs.len * 8 + abi.SysV.c_abi_sse_param_regs.len * 16,
  15921                     ));
  15922                     const mem_reloc = try self.asmJccReloc(.ae, undefined);
  15923 
  15924                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area);
  15925                     if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{
  15926                         .base = .{ .reg = addr_reg },
  15927                         .mod = .{ .rm = .{
  15928                             .size = .qword,
  15929                             .index = offset_reg.to64(),
  15930                         } },
  15931                     });
  15932                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{
  15933                         .base = .{ .reg = offset_reg.to64() },
  15934                         .mod = .{ .rm = .{
  15935                             .size = .qword,
  15936                             .disp = 16,
  15937                         } },
  15938                     });
  15939                     try self.genCopy(Type.c_uint, fp_offset, .{ .register = offset_reg });
  15940                     const done_reloc = try self.asmJmpReloc(undefined);
  15941 
  15942                     try self.performReloc(mem_reloc);
  15943                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area);
  15944                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{
  15945                         .base = .{ .reg = addr_reg },
  15946                         .mod = .{ .rm = .{
  15947                             .size = .qword,
  15948                             .disp = @intCast(@max(promote_ty.abiSize(mod), 8)),
  15949                         } },
  15950                     });
  15951                     try self.genCopy(
  15952                         ptr_anyopaque_ty,
  15953                         overflow_arg_area,
  15954                         .{ .register = offset_reg.to64() },
  15955                     );
  15956 
  15957                     try self.performReloc(done_reloc);
  15958                     if (!unused) try self.genCopy(promote_ty, promote_mcv, .{
  15959                         .indirect = .{ .reg = addr_reg },
  15960                     });
  15961                 },
  15962                 .memory => {
  15963                     assert(classes.len == 1);
  15964                     unreachable;
  15965                 },
  15966                 else => return self.fail("TODO implement c_va_arg for {} on SysV", .{
  15967                     promote_ty.fmt(mod),
  15968                 }),
  15969             }
  15970 
  15971             if (unused) break :result .unreach;
  15972             if (ty.toIntern() == promote_ty.toIntern()) break :result promote_mcv;
  15973 
  15974             if (!promote_ty.isRuntimeFloat()) {
  15975                 const dst_mcv = try self.allocRegOrMem(inst, true);
  15976                 try self.genCopy(ty, dst_mcv, promote_mcv);
  15977                 break :result dst_mcv;
  15978             }
  15979 
  15980             assert(ty.toIntern() == .f32_type and promote_ty.toIntern() == .f64_type);
  15981             const dst_mcv = if (promote_mcv.isRegister())
  15982                 promote_mcv
  15983             else
  15984                 try self.copyToRegisterWithInstTracking(inst, ty, promote_mcv);
  15985             const dst_reg = dst_mcv.getReg().?.to128();
  15986             const dst_lock = self.register_manager.lockReg(dst_reg);
  15987             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  15988 
  15989             if (self.hasFeature(.avx)) if (promote_mcv.isMemory()) try self.asmRegisterRegisterMemory(
  15990                 .{ .v_ss, .cvtsd2 },
  15991                 dst_reg,
  15992                 dst_reg,
  15993                 try promote_mcv.mem(self, .qword),
  15994             ) else try self.asmRegisterRegisterRegister(
  15995                 .{ .v_ss, .cvtsd2 },
  15996                 dst_reg,
  15997                 dst_reg,
  15998                 (if (promote_mcv.isRegister())
  15999                     promote_mcv.getReg().?
  16000                 else
  16001                     try self.copyToTmpRegister(promote_ty, promote_mcv)).to128(),
  16002             ) else if (promote_mcv.isMemory()) try self.asmRegisterMemory(
  16003                 .{ ._ss, .cvtsd2 },
  16004                 dst_reg,
  16005                 try promote_mcv.mem(self, .qword),
  16006             ) else try self.asmRegisterRegister(
  16007                 .{ ._ss, .cvtsd2 },
  16008                 dst_reg,
  16009                 (if (promote_mcv.isRegister())
  16010                     promote_mcv.getReg().?
  16011                 else
  16012                     try self.copyToTmpRegister(promote_ty, promote_mcv)).to128(),
  16013             );
  16014             break :result promote_mcv;
  16015         },
  16016         .Win64 => return self.fail("TODO implement c_va_arg for Win64", .{}),
  16017         else => unreachable,
  16018     };
  16019     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  16020 }
  16021 
  16022 fn airVaCopy(self: *Self, inst: Air.Inst.Index) !void {
  16023     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  16024     const ptr_va_list_ty = self.typeOf(ty_op.operand);
  16025 
  16026     const dst_mcv = try self.allocRegOrMem(inst, true);
  16027     try self.load(dst_mcv, ptr_va_list_ty, .{ .air_ref = ty_op.operand });
  16028     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  16029 }
  16030 
  16031 fn airVaEnd(self: *Self, inst: Air.Inst.Index) !void {
  16032     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  16033     return self.finishAir(inst, .unreach, .{ un_op, .none, .none });
  16034 }
  16035 
  16036 fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue {
  16037     const mod = self.bin_file.comp.module.?;
  16038     const ty = self.typeOf(ref);
  16039 
  16040     // If the type has no codegen bits, no need to store it.
  16041     if (!ty.hasRuntimeBitsIgnoreComptime(mod)) return .none;
  16042 
  16043     const mcv = if (ref.toIndex()) |inst| mcv: {
  16044         break :mcv self.inst_tracking.getPtr(inst).?.short;
  16045     } else mcv: {
  16046         const ip_index = ref.toInterned().?;
  16047         const gop = try self.const_tracking.getOrPut(self.gpa, ip_index);
  16048         if (!gop.found_existing) gop.value_ptr.* = InstTracking.init(init: {
  16049             const const_mcv = try self.genTypedValue(.{ .ty = ty, .val = Value.fromInterned(ip_index) });
  16050             switch (const_mcv) {
  16051                 .lea_tlv => |tlv_sym| if (self.bin_file.cast(link.File.Elf)) |_| {
  16052                     if (self.mod.pic) {
  16053                         try self.spillRegisters(&.{ .rdi, .rax });
  16054                     } else {
  16055                         try self.spillRegisters(&.{.rax});
  16056                     }
  16057                     const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{
  16058                         .size = 8,
  16059                         .alignment = .@"8",
  16060                     }));
  16061                     try self.genSetMem(
  16062                         .{ .frame = frame_index },
  16063                         0,
  16064                         Type.usize,
  16065                         .{ .lea_symbol = .{ .sym = tlv_sym } },
  16066                     );
  16067                     break :init .{ .load_frame = .{ .index = frame_index } };
  16068                 } else if (self.bin_file.cast(link.File.MachO)) |_| {
  16069                     return self.fail("TODO implement lowering TLV variable to stack", .{});
  16070                 } else break :init const_mcv,
  16071                 else => break :init const_mcv,
  16072             }
  16073         });
  16074         break :mcv gop.value_ptr.short;
  16075     };
  16076 
  16077     switch (mcv) {
  16078         .none, .unreach, .dead => unreachable,
  16079         else => return mcv,
  16080     }
  16081 }
  16082 
  16083 fn getResolvedInstValue(self: *Self, inst: Air.Inst.Index) *InstTracking {
  16084     const tracking = self.inst_tracking.getPtr(inst).?;
  16085     return switch (tracking.short) {
  16086         .none, .unreach, .dead => unreachable,
  16087         else => tracking,
  16088     };
  16089 }
  16090 
  16091 /// If the MCValue is an immediate, and it does not fit within this type,
  16092 /// we put it in a register.
  16093 /// A potential opportunity for future optimization here would be keeping track
  16094 /// of the fact that the instruction is available both as an immediate
  16095 /// and as a register.
  16096 fn limitImmediateType(self: *Self, operand: Air.Inst.Ref, comptime T: type) !MCValue {
  16097     const mcv = try self.resolveInst(operand);
  16098     const ti = @typeInfo(T).Int;
  16099     switch (mcv) {
  16100         .immediate => |imm| {
  16101             // This immediate is unsigned.
  16102             const U = std.meta.Int(.unsigned, ti.bits - @intFromBool(ti.signedness == .signed));
  16103             if (imm >= math.maxInt(U)) {
  16104                 return MCValue{ .register = try self.copyToTmpRegister(Type.usize, mcv) };
  16105             }
  16106         },
  16107         else => {},
  16108     }
  16109     return mcv;
  16110 }
  16111 
  16112 fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue {
  16113     const mod = self.bin_file.comp.module.?;
  16114     return switch (try codegen.genTypedValue(self.bin_file, self.src_loc, arg_tv, self.owner.getDecl(mod))) {
  16115         .mcv => |mcv| switch (mcv) {
  16116             .none => .none,
  16117             .undef => .undef,
  16118             .immediate => |imm| .{ .immediate = imm },
  16119             .memory => |addr| .{ .memory = addr },
  16120             .load_symbol => |sym_index| .{ .load_symbol = .{ .sym = sym_index } },
  16121             .load_direct => |sym_index| .{ .load_direct = sym_index },
  16122             .load_got => |sym_index| .{ .lea_got = sym_index },
  16123             .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
  16124         },
  16125         .fail => |msg| {
  16126             self.err_msg = msg;
  16127             return error.CodegenFail;
  16128         },
  16129     };
  16130 }
  16131 
  16132 const CallMCValues = struct {
  16133     args: []MCValue,
  16134     return_value: InstTracking,
  16135     stack_byte_count: u31,
  16136     stack_align: Alignment,
  16137     gp_count: u32,
  16138     fp_count: u32,
  16139 
  16140     fn deinit(self: *CallMCValues, func: *Self) void {
  16141         func.gpa.free(self.args);
  16142         self.* = undefined;
  16143     }
  16144 };
  16145 
  16146 /// Caller must call `CallMCValues.deinit`.
  16147 fn resolveCallingConventionValues(
  16148     self: *Self,
  16149     fn_info: InternPool.Key.FuncType,
  16150     var_args: []const Type,
  16151     stack_frame_base: FrameIndex,
  16152 ) !CallMCValues {
  16153     const mod = self.bin_file.comp.module.?;
  16154     const ip = &mod.intern_pool;
  16155     const cc = fn_info.cc;
  16156     const param_types = try self.gpa.alloc(Type, fn_info.param_types.len + var_args.len);
  16157     defer self.gpa.free(param_types);
  16158 
  16159     for (param_types[0..fn_info.param_types.len], fn_info.param_types.get(ip)) |*dest, src| {
  16160         dest.* = Type.fromInterned(src);
  16161     }
  16162     for (param_types[fn_info.param_types.len..], var_args) |*param_ty, arg_ty|
  16163         param_ty.* = self.promoteVarArg(arg_ty);
  16164 
  16165     var result: CallMCValues = .{
  16166         .args = try self.gpa.alloc(MCValue, param_types.len),
  16167         // These undefined values must be populated before returning from this function.
  16168         .return_value = undefined,
  16169         .stack_byte_count = 0,
  16170         .stack_align = undefined,
  16171         .gp_count = 0,
  16172         .fp_count = 0,
  16173     };
  16174     errdefer self.gpa.free(result.args);
  16175 
  16176     const ret_ty = Type.fromInterned(fn_info.return_type);
  16177 
  16178     const resolved_cc = abi.resolveCallingConvention(cc, self.target.*);
  16179     switch (cc) {
  16180         .Naked => {
  16181             assert(result.args.len == 0);
  16182             result.return_value = InstTracking.init(.unreach);
  16183             result.stack_align = .@"8";
  16184         },
  16185         .C, .SysV, .Win64 => {
  16186             var ret_int_reg_i: u32 = 0;
  16187             var ret_sse_reg_i: u32 = 0;
  16188             var param_int_reg_i: u32 = 0;
  16189             var param_sse_reg_i: u32 = 0;
  16190             result.stack_align = .@"16";
  16191 
  16192             switch (resolved_cc) {
  16193                 .SysV => {},
  16194                 .Win64 => {
  16195                     // Align the stack to 16bytes before allocating shadow stack space (if any).
  16196                     result.stack_byte_count += @intCast(4 * Type.usize.abiSize(mod));
  16197                 },
  16198                 else => unreachable,
  16199             }
  16200 
  16201             // Return values
  16202             if (ret_ty.zigTypeTag(mod) == .NoReturn) {
  16203                 result.return_value = InstTracking.init(.unreach);
  16204             } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod)) {
  16205                 // TODO: is this even possible for C calling convention?
  16206                 result.return_value = InstTracking.init(.none);
  16207             } else {
  16208                 var ret_tracking: [2]InstTracking = undefined;
  16209                 var ret_tracking_i: usize = 0;
  16210 
  16211                 const classes = switch (resolved_cc) {
  16212                     .SysV => mem.sliceTo(&abi.classifySystemV(ret_ty, mod, .ret), .none),
  16213                     .Win64 => &.{abi.classifyWindows(ret_ty, mod)},
  16214                     else => unreachable,
  16215                 };
  16216                 for (classes) |class| switch (class) {
  16217                     .integer => {
  16218                         const ret_int_reg = registerAlias(
  16219                             abi.getCAbiIntReturnRegs(resolved_cc)[ret_int_reg_i],
  16220                             @intCast(@min(ret_ty.abiSize(mod), 8)),
  16221                         );
  16222                         ret_int_reg_i += 1;
  16223 
  16224                         ret_tracking[ret_tracking_i] = InstTracking.init(.{ .register = ret_int_reg });
  16225                         ret_tracking_i += 1;
  16226                     },
  16227                     .sse, .float, .float_combine, .win_i128 => {
  16228                         const ret_sse_reg = registerAlias(
  16229                             abi.getCAbiSseReturnRegs(resolved_cc)[ret_sse_reg_i],
  16230                             @intCast(ret_ty.abiSize(mod)),
  16231                         );
  16232                         ret_sse_reg_i += 1;
  16233 
  16234                         ret_tracking[ret_tracking_i] = InstTracking.init(.{ .register = ret_sse_reg });
  16235                         ret_tracking_i += 1;
  16236                     },
  16237                     .sseup => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .sse),
  16238                     .x87 => {
  16239                         ret_tracking[ret_tracking_i] = InstTracking.init(.{ .register = .st0 });
  16240                         ret_tracking_i += 1;
  16241                     },
  16242                     .x87up => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .x87),
  16243                     .complex_x87 => {
  16244                         ret_tracking[ret_tracking_i] =
  16245                             InstTracking.init(.{ .register_pair = .{ .st0, .st1 } });
  16246                         ret_tracking_i += 1;
  16247                     },
  16248                     .memory => {
  16249                         const ret_int_reg = abi.getCAbiIntReturnRegs(resolved_cc)[ret_int_reg_i].to64();
  16250                         ret_int_reg_i += 1;
  16251                         const ret_indirect_reg = abi.getCAbiIntParamRegs(resolved_cc)[param_int_reg_i];
  16252                         param_int_reg_i += 1;
  16253 
  16254                         ret_tracking[ret_tracking_i] = .{
  16255                             .short = .{ .indirect = .{ .reg = ret_int_reg } },
  16256                             .long = .{ .indirect = .{ .reg = ret_indirect_reg } },
  16257                         };
  16258                         ret_tracking_i += 1;
  16259                     },
  16260                     .none => unreachable,
  16261                 };
  16262                 result.return_value = switch (ret_tracking_i) {
  16263                     else => unreachable,
  16264                     1 => ret_tracking[0],
  16265                     2 => InstTracking.init(.{ .register_pair = .{
  16266                         ret_tracking[0].short.register, ret_tracking[1].short.register,
  16267                     } }),
  16268                 };
  16269             }
  16270 
  16271             // Input params
  16272             for (param_types, result.args) |ty, *arg| {
  16273                 assert(ty.hasRuntimeBitsIgnoreComptime(mod));
  16274                 switch (resolved_cc) {
  16275                     .SysV => {},
  16276                     .Win64 => {
  16277                         param_int_reg_i = @max(param_int_reg_i, param_sse_reg_i);
  16278                         param_sse_reg_i = param_int_reg_i;
  16279                     },
  16280                     else => unreachable,
  16281                 }
  16282 
  16283                 var arg_mcv: [2]MCValue = undefined;
  16284                 var arg_mcv_i: usize = 0;
  16285 
  16286                 const classes = switch (resolved_cc) {
  16287                     .SysV => mem.sliceTo(&abi.classifySystemV(ty, mod, .arg), .none),
  16288                     .Win64 => &.{abi.classifyWindows(ty, mod)},
  16289                     else => unreachable,
  16290                 };
  16291                 for (classes) |class| switch (class) {
  16292                     .integer => {
  16293                         const param_int_regs = abi.getCAbiIntParamRegs(resolved_cc);
  16294                         if (param_int_reg_i >= param_int_regs.len) break;
  16295 
  16296                         const param_int_reg = registerAlias(
  16297                             abi.getCAbiIntParamRegs(resolved_cc)[param_int_reg_i],
  16298                             @intCast(@min(ty.abiSize(mod), 8)),
  16299                         );
  16300                         param_int_reg_i += 1;
  16301 
  16302                         arg_mcv[arg_mcv_i] = .{ .register = param_int_reg };
  16303                         arg_mcv_i += 1;
  16304                     },
  16305                     .sse, .float, .float_combine => {
  16306                         const param_sse_regs = abi.getCAbiSseParamRegs(resolved_cc);
  16307                         if (param_sse_reg_i >= param_sse_regs.len) break;
  16308 
  16309                         const param_sse_reg = registerAlias(
  16310                             abi.getCAbiSseParamRegs(resolved_cc)[param_sse_reg_i],
  16311                             @intCast(ty.abiSize(mod)),
  16312                         );
  16313                         param_sse_reg_i += 1;
  16314 
  16315                         arg_mcv[arg_mcv_i] = .{ .register = param_sse_reg };
  16316                         arg_mcv_i += 1;
  16317                     },
  16318                     .sseup => assert(arg_mcv[arg_mcv_i - 1].register.class() == .sse),
  16319                     .x87, .x87up, .complex_x87, .memory, .win_i128 => switch (resolved_cc) {
  16320                         .SysV => switch (class) {
  16321                             .x87, .x87up, .complex_x87, .memory => break,
  16322                             else => unreachable,
  16323                         },
  16324                         .Win64 => if (ty.abiSize(mod) > 8) {
  16325                             const param_int_reg =
  16326                                 abi.getCAbiIntParamRegs(resolved_cc)[param_int_reg_i].to64();
  16327                             param_int_reg_i += 1;
  16328 
  16329                             arg_mcv[arg_mcv_i] = .{ .indirect = .{ .reg = param_int_reg } };
  16330                             arg_mcv_i += 1;
  16331                         } else break,
  16332                         else => unreachable,
  16333                     },
  16334                     .none => unreachable,
  16335                 } else {
  16336                     arg.* = switch (arg_mcv_i) {
  16337                         else => unreachable,
  16338                         1 => arg_mcv[0],
  16339                         2 => .{ .register_pair = .{ arg_mcv[0].register, arg_mcv[1].register } },
  16340                     };
  16341                     continue;
  16342                 }
  16343 
  16344                 const param_size: u31 = @intCast(ty.abiSize(mod));
  16345                 const param_align: u31 =
  16346                     @intCast(@max(ty.abiAlignment(mod).toByteUnitsOptional().?, 8));
  16347                 result.stack_byte_count =
  16348                     mem.alignForward(u31, result.stack_byte_count, param_align);
  16349                 arg.* = .{ .load_frame = .{
  16350                     .index = stack_frame_base,
  16351                     .off = result.stack_byte_count,
  16352                 } };
  16353                 result.stack_byte_count += param_size;
  16354             }
  16355             assert(param_int_reg_i <= 6);
  16356             result.gp_count = param_int_reg_i;
  16357             assert(param_sse_reg_i <= 16);
  16358             result.fp_count = param_sse_reg_i;
  16359         },
  16360         .Unspecified => {
  16361             result.stack_align = .@"16";
  16362 
  16363             // Return values
  16364             if (ret_ty.zigTypeTag(mod) == .NoReturn) {
  16365                 result.return_value = InstTracking.init(.unreach);
  16366             } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod)) {
  16367                 result.return_value = InstTracking.init(.none);
  16368             } else {
  16369                 const ret_reg = abi.getCAbiIntReturnRegs(resolved_cc)[0];
  16370                 const ret_ty_size: u31 = @intCast(ret_ty.abiSize(mod));
  16371                 if (ret_ty_size <= 8 and !ret_ty.isRuntimeFloat()) {
  16372                     const aliased_reg = registerAlias(ret_reg, ret_ty_size);
  16373                     result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none };
  16374                 } else {
  16375                     const ret_indirect_reg = abi.getCAbiIntParamRegs(resolved_cc)[0];
  16376                     result.return_value = .{
  16377                         .short = .{ .indirect = .{ .reg = ret_reg } },
  16378                         .long = .{ .indirect = .{ .reg = ret_indirect_reg } },
  16379                     };
  16380                 }
  16381             }
  16382 
  16383             // Input params
  16384             for (param_types, result.args) |ty, *arg| {
  16385                 if (!ty.hasRuntimeBitsIgnoreComptime(mod)) {
  16386                     arg.* = .none;
  16387                     continue;
  16388                 }
  16389                 const param_size: u31 = @intCast(ty.abiSize(mod));
  16390                 const param_align: u31 = @intCast(ty.abiAlignment(mod).toByteUnitsOptional().?);
  16391                 result.stack_byte_count =
  16392                     mem.alignForward(u31, result.stack_byte_count, param_align);
  16393                 arg.* = .{ .load_frame = .{
  16394                     .index = stack_frame_base,
  16395                     .off = result.stack_byte_count,
  16396                 } };
  16397                 result.stack_byte_count += param_size;
  16398             }
  16399         },
  16400         else => return self.fail("TODO implement function parameters and return values for {} on x86_64", .{cc}),
  16401     }
  16402 
  16403     result.stack_byte_count = @intCast(result.stack_align.forward(result.stack_byte_count));
  16404     return result;
  16405 }
  16406 
  16407 fn fail(self: *Self, comptime format: []const u8, args: anytype) InnerError {
  16408     @setCold(true);
  16409     assert(self.err_msg == null);
  16410     const gpa = self.gpa;
  16411     self.err_msg = try ErrorMsg.create(gpa, self.src_loc, format, args);
  16412     return error.CodegenFail;
  16413 }
  16414 
  16415 fn failSymbol(self: *Self, comptime format: []const u8, args: anytype) InnerError {
  16416     @setCold(true);
  16417     assert(self.err_msg == null);
  16418     const gpa = self.gpa;
  16419     self.err_msg = try ErrorMsg.create(gpa, self.src_loc, format, args);
  16420     return error.CodegenFail;
  16421 }
  16422 
  16423 fn parseRegName(name: []const u8) ?Register {
  16424     if (@hasDecl(Register, "parseRegName")) {
  16425         return Register.parseRegName(name);
  16426     }
  16427     return std.meta.stringToEnum(Register, name);
  16428 }
  16429 
  16430 /// Returns register wide enough to hold at least `size_bytes`.
  16431 fn registerAlias(reg: Register, size_bytes: u32) Register {
  16432     return switch (reg.class()) {
  16433         .general_purpose => if (size_bytes == 0)
  16434             unreachable // should be comptime-known
  16435         else if (size_bytes <= 1)
  16436             reg.to8()
  16437         else if (size_bytes <= 2)
  16438             reg.to16()
  16439         else if (size_bytes <= 4)
  16440             reg.to32()
  16441         else if (size_bytes <= 8)
  16442             reg.to64()
  16443         else
  16444             unreachable,
  16445         .segment => if (size_bytes <= 2)
  16446             reg
  16447         else
  16448             unreachable,
  16449         .x87 => if (size_bytes == 16)
  16450             reg
  16451         else
  16452             unreachable,
  16453         .mmx => if (size_bytes <= 8)
  16454             reg
  16455         else
  16456             unreachable,
  16457         .sse => if (size_bytes <= 16)
  16458             reg.to128()
  16459         else if (size_bytes <= 32)
  16460             reg.to256()
  16461         else
  16462             unreachable,
  16463     };
  16464 }
  16465 
  16466 fn memSize(self: *Self, ty: Type) Memory.Size {
  16467     const mod = self.bin_file.comp.module.?;
  16468     return switch (ty.zigTypeTag(mod)) {
  16469         .Float => Memory.Size.fromBitSize(ty.floatBits(self.target.*)),
  16470         else => Memory.Size.fromSize(@intCast(ty.abiSize(mod))),
  16471     };
  16472 }
  16473 
  16474 fn splitType(self: *Self, ty: Type) ![2]Type {
  16475     const mod = self.bin_file.comp.module.?;
  16476     const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none);
  16477     var parts: [2]Type = undefined;
  16478     if (classes.len == 2) for (&parts, classes, 0..) |*part, class, part_i| {
  16479         part.* = switch (class) {
  16480             .integer => switch (part_i) {
  16481                 0 => Type.u64,
  16482                 1 => part: {
  16483                     const elem_size = ty.abiAlignment(mod).minStrict(.@"8").toByteUnitsOptional().?;
  16484                     const elem_ty = try mod.intType(.unsigned, @intCast(elem_size * 8));
  16485                     break :part switch (@divExact(ty.abiSize(mod) - 8, elem_size)) {
  16486                         1 => elem_ty,
  16487                         else => |len| try mod.arrayType(.{ .len = len, .child = elem_ty.toIntern() }),
  16488                     };
  16489                 },
  16490                 else => unreachable,
  16491             },
  16492             .float => Type.f32,
  16493             .float_combine => try mod.vectorType(.{ .len = 2, .child = .f32_type }),
  16494             .sse => Type.f64,
  16495             else => break,
  16496         };
  16497     } else if (parts[0].abiSize(mod) + parts[1].abiSize(mod) == ty.abiSize(mod)) return parts;
  16498     return self.fail("TODO implement splitType for {}", .{ty.fmt(mod)});
  16499 }
  16500 
  16501 /// Truncates the value in the register in place.
  16502 /// Clobbers any remaining bits.
  16503 fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
  16504     const mod = self.bin_file.comp.module.?;
  16505     const int_info = if (ty.isAbiInt(mod)) ty.intInfo(mod) else std.builtin.Type.Int{
  16506         .signedness = .unsigned,
  16507         .bits = @intCast(ty.bitSize(mod)),
  16508     };
  16509     const shift = math.cast(u6, 64 - int_info.bits % 64) orelse return;
  16510     try self.spillEflagsIfOccupied();
  16511     switch (int_info.signedness) {
  16512         .signed => {
  16513             try self.genShiftBinOpMir(
  16514                 .{ ._l, .sa },
  16515                 Type.isize,
  16516                 .{ .register = reg },
  16517                 .{ .immediate = shift },
  16518             );
  16519             try self.genShiftBinOpMir(
  16520                 .{ ._r, .sa },
  16521                 Type.isize,
  16522                 .{ .register = reg },
  16523                 .{ .immediate = shift },
  16524             );
  16525         },
  16526         .unsigned => {
  16527             const mask = ~@as(u64, 0) >> shift;
  16528             if (int_info.bits <= 32) {
  16529                 try self.genBinOpMir(
  16530                     .{ ._, .@"and" },
  16531                     Type.u32,
  16532                     .{ .register = reg },
  16533                     .{ .immediate = mask },
  16534                 );
  16535             } else {
  16536                 const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask });
  16537                 try self.genBinOpMir(
  16538                     .{ ._, .@"and" },
  16539                     Type.usize,
  16540                     .{ .register = reg },
  16541                     .{ .register = tmp_reg },
  16542                 );
  16543             }
  16544         },
  16545     }
  16546 }
  16547 
  16548 fn regBitSize(self: *Self, ty: Type) u64 {
  16549     const mod = self.bin_file.comp.module.?;
  16550     const abi_size = ty.abiSize(mod);
  16551     return switch (ty.zigTypeTag(mod)) {
  16552         else => switch (abi_size) {
  16553             1 => 8,
  16554             2 => 16,
  16555             3...4 => 32,
  16556             5...8 => 64,
  16557             else => unreachable,
  16558         },
  16559         .Float => switch (abi_size) {
  16560             1...16 => 128,
  16561             17...32 => 256,
  16562             else => unreachable,
  16563         },
  16564     };
  16565 }
  16566 
  16567 fn regExtraBits(self: *Self, ty: Type) u64 {
  16568     const mod = self.bin_file.comp.module.?;
  16569     return self.regBitSize(ty) - ty.bitSize(mod);
  16570 }
  16571 
  16572 fn hasFeature(self: *Self, feature: Target.x86.Feature) bool {
  16573     return Target.x86.featureSetHas(self.target.cpu.features, feature);
  16574 }
  16575 fn hasAnyFeatures(self: *Self, features: anytype) bool {
  16576     return Target.x86.featureSetHasAny(self.target.cpu.features, features);
  16577 }
  16578 fn hasAllFeatures(self: *Self, features: anytype) bool {
  16579     return Target.x86.featureSetHasAll(self.target.cpu.features, features);
  16580 }
  16581 
  16582 fn typeOf(self: *Self, inst: Air.Inst.Ref) Type {
  16583     const mod = self.bin_file.comp.module.?;
  16584     return self.air.typeOf(inst, &mod.intern_pool);
  16585 }
  16586 
  16587 fn typeOfIndex(self: *Self, inst: Air.Inst.Index) Type {
  16588     const mod = self.bin_file.comp.module.?;
  16589     return self.air.typeOfIndex(inst, &mod.intern_pool);
  16590 }
  16591 
  16592 fn intCompilerRtAbiName(int_bits: u32) u8 {
  16593     return switch (int_bits) {
  16594         1...32 => 's',
  16595         33...64 => 'd',
  16596         65...128 => 't',
  16597         else => unreachable,
  16598     };
  16599 }
  16600 
  16601 fn floatCompilerRtAbiName(float_bits: u32) u8 {
  16602     return switch (float_bits) {
  16603         16 => 'h',
  16604         32 => 's',
  16605         64 => 'd',
  16606         80 => 'x',
  16607         128 => 't',
  16608         else => unreachable,
  16609     };
  16610 }
  16611 
  16612 fn floatCompilerRtAbiType(self: *Self, ty: Type, other_ty: Type) Type {
  16613     if (ty.toIntern() == .f16_type and
  16614         (other_ty.toIntern() == .f32_type or other_ty.toIntern() == .f64_type) and
  16615         self.target.isDarwin()) return Type.u16;
  16616     return ty;
  16617 }
  16618 
  16619 fn floatLibcAbiPrefix(ty: Type) []const u8 {
  16620     return switch (ty.toIntern()) {
  16621         .f16_type, .f80_type => "__",
  16622         .f32_type, .f64_type, .f128_type, .c_longdouble_type => "",
  16623         else => unreachable,
  16624     };
  16625 }
  16626 
  16627 fn floatLibcAbiSuffix(ty: Type) []const u8 {
  16628     return switch (ty.toIntern()) {
  16629         .f16_type => "h",
  16630         .f32_type => "f",
  16631         .f64_type => "",
  16632         .f80_type => "x",
  16633         .f128_type => "q",
  16634         .c_longdouble_type => "l",
  16635         else => unreachable,
  16636     };
  16637 }
  16638 
  16639 fn promoteInt(self: *Self, ty: Type) Type {
  16640     const mod = self.bin_file.comp.module.?;
  16641     const int_info: InternPool.Key.IntType = switch (ty.toIntern()) {
  16642         .bool_type => .{ .signedness = .unsigned, .bits = 1 },
  16643         else => if (ty.isAbiInt(mod)) ty.intInfo(mod) else return ty,
  16644     };
  16645     for ([_]Type{
  16646         Type.c_int,      Type.c_uint,
  16647         Type.c_long,     Type.c_ulong,
  16648         Type.c_longlong, Type.c_ulonglong,
  16649     }) |promote_ty| {
  16650         const promote_info = promote_ty.intInfo(mod);
  16651         if (int_info.signedness == .signed and promote_info.signedness == .unsigned) continue;
  16652         if (int_info.bits + @intFromBool(int_info.signedness == .unsigned and
  16653             promote_info.signedness == .signed) <= promote_info.bits) return promote_ty;
  16654     }
  16655     return ty;
  16656 }
  16657 
  16658 fn promoteVarArg(self: *Self, ty: Type) Type {
  16659     if (!ty.isRuntimeFloat()) return self.promoteInt(ty);
  16660     switch (ty.floatBits(self.target.*)) {
  16661         32, 64 => return Type.f64,
  16662         else => |float_bits| {
  16663             assert(float_bits == self.target.c_type_bit_size(.longdouble));
  16664             return Type.c_longdouble;
  16665         },
  16666     }
  16667 }