motiejus/zig

fork of https://codeberg.org/ziglang/zig
git clone https://git.jakstys.lt/motiejus/zig.git
Log | Tree | Refs | README | LICENSE

src/arch/x86_64/CodeGen.zig (1455266B) - Raw


      1 const std = @import("std");
      2 const assert = std.debug.assert;
      3 const codegen = @import("../../codegen.zig");
      4 const link = @import("../../link.zig");
      5 const log = std.log.scoped(.codegen);
      6 const tracking_log = std.log.scoped(.tracking);
      7 const verbose_tracking_log = std.log.scoped(.verbose_tracking);
      8 const wip_mir_log = std.log.scoped(.wip_mir);
      9 
     10 const Air = @import("../../Air.zig");
     11 const Allocator = std.mem.Allocator;
     12 const Emit = @import("Emit.zig");
     13 const Liveness = @import("../../Liveness.zig");
     14 const Lower = @import("Lower.zig");
     15 const Mir = @import("Mir.zig");
     16 const Zcu = @import("../../Zcu.zig");
     17 const Module = @import("../../Package/Module.zig");
     18 const InternPool = @import("../../InternPool.zig");
     19 const Type = @import("../../Type.zig");
     20 const Value = @import("../../Value.zig");
     21 
     22 const abi = @import("abi.zig");
     23 const bits = @import("bits.zig");
     24 const encoder = @import("encoder.zig");
     25 
     26 const Condition = bits.Condition;
     27 const Immediate = bits.Immediate;
     28 const Memory = bits.Memory;
     29 const Register = bits.Register;
     30 const RegisterManager = abi.RegisterManager;
     31 const RegisterLock = RegisterManager.RegisterLock;
     32 const FrameIndex = bits.FrameIndex;
     33 
     34 const InnerError = codegen.CodeGenError || error{OutOfRegisters};
     35 
     36 gpa: Allocator,
     37 pt: Zcu.PerThread,
     38 air: Air,
     39 liveness: Liveness,
     40 bin_file: *link.File,
     41 debug_output: link.File.DebugInfoOutput,
     42 target: *const std.Target,
     43 owner: Owner,
     44 inline_func: InternPool.Index,
     45 mod: *Module,
     46 arg_index: u32,
     47 args: []MCValue,
     48 va_info: union {
     49     sysv: struct {
     50         gp_count: u32,
     51         fp_count: u32,
     52         overflow_arg_area: bits.FrameAddr,
     53         reg_save_area: bits.FrameAddr,
     54     },
     55     win64: struct {},
     56 },
     57 ret_mcv: InstTracking,
     58 fn_type: Type,
     59 src_loc: Zcu.LazySrcLoc,
     60 
     61 eflags_inst: ?Air.Inst.Index = null,
     62 
     63 /// MIR Instructions
     64 mir_instructions: std.MultiArrayList(Mir.Inst) = .empty,
     65 /// MIR extra data
     66 mir_extra: std.ArrayListUnmanaged(u32) = .empty,
     67 mir_table: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty,
     68 
     69 /// Byte offset within the source file of the ending curly.
     70 end_di_line: u32,
     71 end_di_column: u32,
     72 
     73 /// The value is an offset into the `Function` `code` from the beginning.
     74 /// To perform the reloc, write 32-bit signed little-endian integer
     75 /// which is a relative jump, based on the address following the reloc.
     76 epilogue_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty,
     77 
     78 reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined,
     79 const_tracking: ConstTrackingMap = .empty,
     80 inst_tracking: InstTrackingMap = .empty,
     81 
     82 // Key is the block instruction
     83 blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockData) = .empty,
     84 
     85 register_manager: RegisterManager = .{},
     86 
     87 /// Generation of the current scope, increments by 1 for every entered scope.
     88 scope_generation: u32 = 0,
     89 
     90 frame_allocs: std.MultiArrayList(FrameAlloc) = .empty,
     91 free_frame_indices: std.AutoArrayHashMapUnmanaged(FrameIndex, void) = .empty,
     92 frame_locs: std.MultiArrayList(Mir.FrameLoc) = .empty,
     93 
     94 loops: std.AutoHashMapUnmanaged(Air.Inst.Index, struct {
     95     /// The state to restore before branching.
     96     state: State,
     97     /// The branch target.
     98     target: Mir.Inst.Index,
     99 }) = .empty,
    100 loop_switches: std.AutoHashMapUnmanaged(Air.Inst.Index, struct {
    101     start: u31,
    102     len: u11,
    103     min: Value,
    104     else_relocs: union(enum) {
    105         @"unreachable",
    106         forward: std.ArrayListUnmanaged(Mir.Inst.Index),
    107         backward: Mir.Inst.Index,
    108     },
    109 }) = .empty,
    110 
    111 next_temp_index: Temp.Index = @enumFromInt(0),
    112 temp_type: [Temp.Index.max]Type = undefined,
    113 
    114 const Owner = union(enum) {
    115     nav_index: InternPool.Nav.Index,
    116     lazy_sym: link.File.LazySymbol,
    117 
    118     fn getSymbolIndex(owner: Owner, ctx: *CodeGen) !u32 {
    119         const pt = ctx.pt;
    120         switch (owner) {
    121             .nav_index => |nav_index| if (ctx.bin_file.cast(.elf)) |elf_file| {
    122                 return elf_file.zigObjectPtr().?.getOrCreateMetadataForNav(pt.zcu, nav_index);
    123             } else if (ctx.bin_file.cast(.macho)) |macho_file| {
    124                 return macho_file.getZigObject().?.getOrCreateMetadataForNav(macho_file, nav_index);
    125             } else if (ctx.bin_file.cast(.coff)) |coff_file| {
    126                 const atom = try coff_file.getOrCreateAtomForNav(nav_index);
    127                 return coff_file.getAtom(atom).getSymbolIndex().?;
    128             } else if (ctx.bin_file.cast(.plan9)) |p9_file| {
    129                 return p9_file.seeNav(pt, nav_index);
    130             } else unreachable,
    131             .lazy_sym => |lazy_sym| if (ctx.bin_file.cast(.elf)) |elf_file| {
    132                 return elf_file.zigObjectPtr().?.getOrCreateMetadataForLazySymbol(elf_file, pt, lazy_sym) catch |err|
    133                     ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    134             } else if (ctx.bin_file.cast(.macho)) |macho_file| {
    135                 return macho_file.getZigObject().?.getOrCreateMetadataForLazySymbol(macho_file, pt, lazy_sym) catch |err|
    136                     ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    137             } else if (ctx.bin_file.cast(.coff)) |coff_file| {
    138                 const atom = coff_file.getOrCreateAtomForLazySymbol(pt, lazy_sym) catch |err|
    139                     return ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    140                 return coff_file.getAtom(atom).getSymbolIndex().?;
    141             } else if (ctx.bin_file.cast(.plan9)) |p9_file| {
    142                 return p9_file.getOrCreateAtomForLazySymbol(pt, lazy_sym) catch |err|
    143                     return ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
    144             } else unreachable,
    145         }
    146     }
    147 };
    148 
    149 const MaskKind = enum(u1) { sign, all };
    150 const MaskInfo = packed struct { kind: MaskKind, inverted: bool, scalar: Memory.Size };
    151 
    152 pub const MCValue = union(enum) {
    153     /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
    154     /// TODO Look into deleting this tag and using `dead` instead, since every use
    155     /// of MCValue.none should be instead looking at the type and noticing it is 0 bits.
    156     none,
    157     /// Control flow will not allow this value to be observed.
    158     unreach,
    159     /// No more references to this value remain.
    160     /// The payload is the value of scope_generation at the point where the death occurred
    161     dead: u32,
    162     /// The value is undefined.
    163     undef,
    164     /// A pointer-sized integer that fits in a register.
    165     /// If the type is a pointer, this is the pointer address in virtual address space.
    166     immediate: u64,
    167     /// The value resides in the EFLAGS register.
    168     eflags: Condition,
    169     /// The value is in a register.
    170     register: Register,
    171     /// The value is split across two registers.
    172     register_pair: [2]Register,
    173     /// The value is split across three registers.
    174     register_triple: [3]Register,
    175     /// The value is split across four registers.
    176     register_quadruple: [4]Register,
    177     /// The value is a constant offset from the value in a register.
    178     register_offset: bits.RegisterOffset,
    179     /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register.
    180     register_overflow: struct { reg: Register, eflags: Condition },
    181     /// The value is a bool vector stored in a vector register with a different scalar type.
    182     register_mask: struct { reg: Register, info: MaskInfo },
    183     /// The value is in memory at a hard-coded address.
    184     /// If the type is a pointer, it means the pointer address is stored at this memory location.
    185     memory: u64,
    186     /// The value is in memory at an address not-yet-allocated by the linker.
    187     /// This traditionally corresponds to a relocation emitted in a relocatable object file.
    188     load_symbol: bits.SymbolOffset,
    189     /// The address of the memory location not-yet-allocated by the linker.
    190     lea_symbol: bits.SymbolOffset,
    191     /// The value is in memory at a constant offset from the address in a register.
    192     indirect: bits.RegisterOffset,
    193     /// The value is in memory.
    194     /// Payload is a symbol index.
    195     load_direct: u32,
    196     /// The value is a pointer to a value in memory.
    197     /// Payload is a symbol index.
    198     lea_direct: u32,
    199     /// The value is in memory referenced indirectly via GOT.
    200     /// Payload is a symbol index.
    201     load_got: u32,
    202     /// The value is a pointer to a value referenced indirectly via GOT.
    203     /// Payload is a symbol index.
    204     lea_got: u32,
    205     /// The value is a threadlocal variable.
    206     /// Payload is a symbol index.
    207     load_tlv: u32,
    208     /// The value is a pointer to a threadlocal variable.
    209     /// Payload is a symbol index.
    210     lea_tlv: u32,
    211     /// The value stored at an offset from a frame index
    212     /// Payload is a frame address.
    213     load_frame: bits.FrameAddr,
    214     /// The address of an offset from a frame index
    215     /// Payload is a frame address.
    216     lea_frame: bits.FrameAddr,
    217     /// Supports integer_per_element abi
    218     elementwise_regs_then_frame: packed struct { regs: u3, frame_off: i29, frame_index: FrameIndex },
    219     /// This indicates that we have already allocated a frame index for this instruction,
    220     /// but it has not been spilled there yet in the current control flow.
    221     /// Payload is a frame index.
    222     reserved_frame: FrameIndex,
    223     air_ref: Air.Inst.Ref,
    224 
    225     fn isModifiable(mcv: MCValue) bool {
    226         return switch (mcv) {
    227             .none,
    228             .unreach,
    229             .dead,
    230             .undef,
    231             .immediate,
    232             .register_offset,
    233             .register_mask,
    234             .eflags,
    235             .register_overflow,
    236             .lea_symbol,
    237             .lea_direct,
    238             .lea_got,
    239             .lea_tlv,
    240             .lea_frame,
    241             .elementwise_regs_then_frame,
    242             .reserved_frame,
    243             .air_ref,
    244             => false,
    245             .register,
    246             .register_pair,
    247             .register_triple,
    248             .register_quadruple,
    249             .memory,
    250             .load_symbol,
    251             .load_got,
    252             .load_direct,
    253             .load_tlv,
    254             .indirect,
    255             => true,
    256             .load_frame => |frame_addr| !frame_addr.index.isNamed(),
    257         };
    258     }
    259 
    260     // hack around linker relocation bugs
    261     fn isBase(mcv: MCValue) bool {
    262         return switch (mcv) {
    263             .memory, .indirect, .load_frame => true,
    264             else => false,
    265         };
    266     }
    267 
    268     fn isMemory(mcv: MCValue) bool {
    269         return switch (mcv) {
    270             .memory, .indirect, .load_frame, .load_symbol => true,
    271             else => false,
    272         };
    273     }
    274 
    275     fn isImmediate(mcv: MCValue) bool {
    276         return switch (mcv) {
    277             .immediate => true,
    278             else => false,
    279         };
    280     }
    281 
    282     fn isRegister(mcv: MCValue) bool {
    283         return switch (mcv) {
    284             .register => true,
    285             .register_offset => |reg_off| return reg_off.off == 0,
    286             else => false,
    287         };
    288     }
    289 
    290     fn isRegisterOffset(mcv: MCValue) bool {
    291         return switch (mcv) {
    292             .register, .register_offset => true,
    293             else => false,
    294         };
    295     }
    296 
    297     fn getReg(mcv: MCValue) ?Register {
    298         return switch (mcv) {
    299             .register => |reg| reg,
    300             .register_offset, .indirect => |ro| ro.reg,
    301             .register_overflow => |ro| ro.reg,
    302             .register_mask => |rm| rm.reg,
    303             else => null,
    304         };
    305     }
    306 
    307     fn getRegs(mcv: *const MCValue) []const Register {
    308         return switch (mcv.*) {
    309             .register => |*reg| reg[0..1],
    310             inline .register_pair,
    311             .register_triple,
    312             .register_quadruple,
    313             => |*regs| regs,
    314             inline .register_offset,
    315             .indirect,
    316             .register_overflow,
    317             .register_mask,
    318             => |*pl| (&pl.reg)[0..1],
    319             else => &.{},
    320         };
    321     }
    322 
    323     fn getCondition(mcv: MCValue) ?Condition {
    324         return switch (mcv) {
    325             .eflags => |cc| cc,
    326             .register_overflow => |reg_ov| reg_ov.eflags,
    327             else => null,
    328         };
    329     }
    330 
    331     fn isAddress(mcv: MCValue) bool {
    332         return switch (mcv) {
    333             .immediate, .register, .register_offset, .lea_frame => true,
    334             else => false,
    335         };
    336     }
    337 
    338     fn address(mcv: MCValue) MCValue {
    339         return switch (mcv) {
    340             .none,
    341             .unreach,
    342             .dead,
    343             .undef,
    344             .immediate,
    345             .eflags,
    346             .register,
    347             .register_pair,
    348             .register_triple,
    349             .register_quadruple,
    350             .register_offset,
    351             .register_overflow,
    352             .register_mask,
    353             .lea_symbol,
    354             .lea_direct,
    355             .lea_got,
    356             .lea_tlv,
    357             .lea_frame,
    358             .elementwise_regs_then_frame,
    359             .reserved_frame,
    360             .air_ref,
    361             => unreachable, // not in memory
    362             .memory => |addr| .{ .immediate = addr },
    363             .indirect => |reg_off| switch (reg_off.off) {
    364                 0 => .{ .register = reg_off.reg },
    365                 else => .{ .register_offset = reg_off },
    366             },
    367             .load_direct => |sym_index| .{ .lea_direct = sym_index },
    368             .load_got => |sym_index| .{ .lea_got = sym_index },
    369             .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
    370             .load_frame => |frame_addr| .{ .lea_frame = frame_addr },
    371             .load_symbol => |sym_off| .{ .lea_symbol = sym_off },
    372         };
    373     }
    374 
    375     fn deref(mcv: MCValue) MCValue {
    376         return switch (mcv) {
    377             .none,
    378             .unreach,
    379             .dead,
    380             .undef,
    381             .eflags,
    382             .register_pair,
    383             .register_triple,
    384             .register_quadruple,
    385             .register_overflow,
    386             .register_mask,
    387             .memory,
    388             .indirect,
    389             .load_direct,
    390             .load_got,
    391             .load_tlv,
    392             .load_frame,
    393             .load_symbol,
    394             .elementwise_regs_then_frame,
    395             .reserved_frame,
    396             .air_ref,
    397             => unreachable, // not dereferenceable
    398             .immediate => |addr| .{ .memory = addr },
    399             .register => |reg| .{ .indirect = .{ .reg = reg } },
    400             .register_offset => |reg_off| .{ .indirect = reg_off },
    401             .lea_direct => |sym_index| .{ .load_direct = sym_index },
    402             .lea_got => |sym_index| .{ .load_got = sym_index },
    403             .lea_tlv => |sym_index| .{ .load_tlv = sym_index },
    404             .lea_frame => |frame_addr| .{ .load_frame = frame_addr },
    405             .lea_symbol => |sym_index| .{ .load_symbol = sym_index },
    406         };
    407     }
    408 
    409     fn offset(mcv: MCValue, off: i32) MCValue {
    410         return switch (mcv) {
    411             .none,
    412             .unreach,
    413             .dead,
    414             .undef,
    415             .elementwise_regs_then_frame,
    416             .reserved_frame,
    417             .air_ref,
    418             => unreachable, // not valid
    419             .eflags,
    420             .register_pair,
    421             .register_triple,
    422             .register_quadruple,
    423             .register_overflow,
    424             .register_mask,
    425             .memory,
    426             .indirect,
    427             .load_direct,
    428             .lea_direct,
    429             .load_got,
    430             .lea_got,
    431             .load_tlv,
    432             .lea_tlv,
    433             .load_frame,
    434             .load_symbol,
    435             .lea_symbol,
    436             => switch (off) {
    437                 0 => mcv,
    438                 else => unreachable, // not offsettable
    439             },
    440             .immediate => |imm| .{ .immediate = @bitCast(@as(i64, @bitCast(imm)) +% off) },
    441             .register => |reg| .{ .register_offset = .{ .reg = reg, .off = off } },
    442             .register_offset => |reg_off| .{
    443                 .register_offset = .{ .reg = reg_off.reg, .off = reg_off.off + off },
    444             },
    445             .lea_frame => |frame_addr| .{
    446                 .lea_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
    447             },
    448         };
    449     }
    450 
    451     fn mem(mcv: MCValue, function: *CodeGen, mod_rm: Memory.Mod.Rm) !Memory {
    452         return switch (mcv) {
    453             .none,
    454             .unreach,
    455             .dead,
    456             .undef,
    457             .immediate,
    458             .eflags,
    459             .register,
    460             .register_pair,
    461             .register_triple,
    462             .register_quadruple,
    463             .register_offset,
    464             .register_overflow,
    465             .register_mask,
    466             .load_direct,
    467             .lea_direct,
    468             .load_got,
    469             .lea_got,
    470             .load_tlv,
    471             .lea_tlv,
    472             .lea_frame,
    473             .elementwise_regs_then_frame,
    474             .reserved_frame,
    475             .lea_symbol,
    476             => unreachable,
    477             .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| .{
    478                 .base = .{ .reg = .ds },
    479                 .mod = .{ .rm = .{
    480                     .size = mod_rm.size,
    481                     .index = mod_rm.index,
    482                     .scale = mod_rm.scale,
    483                     .disp = small_addr + mod_rm.disp,
    484                 } },
    485             } else .{ .base = .{ .reg = .ds }, .mod = .{ .off = addr } },
    486             .indirect => |reg_off| .{
    487                 .base = .{ .reg = registerAlias(reg_off.reg, @divExact(function.target.ptrBitWidth(), 8)) },
    488                 .mod = .{ .rm = .{
    489                     .size = mod_rm.size,
    490                     .index = mod_rm.index,
    491                     .scale = mod_rm.scale,
    492                     .disp = reg_off.off + mod_rm.disp,
    493                 } },
    494             },
    495             .load_frame => |frame_addr| .{
    496                 .base = .{ .frame = frame_addr.index },
    497                 .mod = .{ .rm = .{
    498                     .size = mod_rm.size,
    499                     .index = mod_rm.index,
    500                     .scale = mod_rm.scale,
    501                     .disp = frame_addr.off + mod_rm.disp,
    502                 } },
    503             },
    504             .load_symbol => |sym_off| {
    505                 assert(sym_off.off == 0);
    506                 return .{
    507                     .base = .{ .reloc = sym_off.sym_index },
    508                     .mod = .{ .rm = .{
    509                         .size = mod_rm.size,
    510                         .index = mod_rm.index,
    511                         .scale = mod_rm.scale,
    512                         .disp = sym_off.off + mod_rm.disp,
    513                     } },
    514                 };
    515             },
    516             .air_ref => |ref| (try function.resolveInst(ref)).mem(function, mod_rm),
    517         };
    518     }
    519 
    520     pub fn format(
    521         mcv: MCValue,
    522         comptime _: []const u8,
    523         _: std.fmt.FormatOptions,
    524         writer: anytype,
    525     ) @TypeOf(writer).Error!void {
    526         switch (mcv) {
    527             .none, .unreach, .dead, .undef => try writer.print("({s})", .{@tagName(mcv)}),
    528             .immediate => |pl| try writer.print("0x{x}", .{pl}),
    529             .memory => |pl| try writer.print("[ds:0x{x}]", .{pl}),
    530             inline .eflags, .register => |pl| try writer.print("{s}", .{@tagName(pl)}),
    531             .register_pair => |pl| try writer.print("{s}:{s}", .{ @tagName(pl[1]), @tagName(pl[0]) }),
    532             .register_triple => |pl| try writer.print("{s}:{s}:{s}", .{
    533                 @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]),
    534             }),
    535             .register_quadruple => |pl| try writer.print("{s}:{s}:{s}:{s}", .{
    536                 @tagName(pl[3]), @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]),
    537             }),
    538             .register_offset => |pl| try writer.print("{s} + 0x{x}", .{ @tagName(pl.reg), pl.off }),
    539             .register_overflow => |pl| try writer.print("{s}:{s}", .{
    540                 @tagName(pl.eflags),
    541                 @tagName(pl.reg),
    542             }),
    543             .register_mask => |pl| try writer.print("mask({s},{}):{c}{s}", .{
    544                 @tagName(pl.info.kind),
    545                 pl.info.scalar,
    546                 @as(u8, if (pl.info.inverted) '!' else ' '),
    547                 @tagName(pl.reg),
    548             }),
    549             .load_symbol => |pl| try writer.print("[sym:{} + 0x{x}]", .{ pl.sym_index, pl.off }),
    550             .lea_symbol => |pl| try writer.print("sym:{} + 0x{x}", .{ pl.sym_index, pl.off }),
    551             .indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }),
    552             .load_direct => |pl| try writer.print("[direct:{d}]", .{pl}),
    553             .lea_direct => |pl| try writer.print("direct:{d}", .{pl}),
    554             .load_got => |pl| try writer.print("[got:{d}]", .{pl}),
    555             .lea_got => |pl| try writer.print("got:{d}", .{pl}),
    556             .load_tlv => |pl| try writer.print("[tlv:{d}]", .{pl}),
    557             .lea_tlv => |pl| try writer.print("tlv:{d}", .{pl}),
    558             .load_frame => |pl| try writer.print("[{} + 0x{x}]", .{ pl.index, pl.off }),
    559             .elementwise_regs_then_frame => |pl| try writer.print("elementwise:{d}:[{} + 0x{x}]", .{
    560                 pl.regs, pl.frame_index, pl.frame_off,
    561             }),
    562             .lea_frame => |pl| try writer.print("{} + 0x{x}", .{ pl.index, pl.off }),
    563             .reserved_frame => |pl| try writer.print("(dead:{})", .{pl}),
    564             .air_ref => |pl| try writer.print("(air:0x{x})", .{@intFromEnum(pl)}),
    565         }
    566     }
    567 };
    568 
    569 const InstTrackingMap = std.AutoArrayHashMapUnmanaged(Air.Inst.Index, InstTracking);
    570 const ConstTrackingMap = std.AutoArrayHashMapUnmanaged(InternPool.Index, InstTracking);
    571 const InstTracking = struct {
    572     long: MCValue,
    573     short: MCValue,
    574 
    575     fn init(result: MCValue) InstTracking {
    576         return .{ .long = switch (result) {
    577             .none,
    578             .unreach,
    579             .undef,
    580             .immediate,
    581             .memory,
    582             .load_direct,
    583             .lea_direct,
    584             .load_got,
    585             .lea_got,
    586             .load_tlv,
    587             .lea_tlv,
    588             .load_frame,
    589             .lea_frame,
    590             .load_symbol,
    591             .lea_symbol,
    592             => result,
    593             .dead,
    594             .elementwise_regs_then_frame,
    595             .reserved_frame,
    596             .air_ref,
    597             => unreachable,
    598             .eflags,
    599             .register,
    600             .register_pair,
    601             .register_triple,
    602             .register_quadruple,
    603             .register_offset,
    604             .register_overflow,
    605             .register_mask,
    606             .indirect,
    607             => .none,
    608         }, .short = result };
    609     }
    610 
    611     fn getReg(self: InstTracking) ?Register {
    612         return self.short.getReg();
    613     }
    614 
    615     fn getRegs(self: *const InstTracking) []const Register {
    616         return self.short.getRegs();
    617     }
    618 
    619     fn getCondition(self: InstTracking) ?Condition {
    620         return self.short.getCondition();
    621     }
    622 
    623     fn spill(self: *InstTracking, cg: *CodeGen, inst: Air.Inst.Index) !void {
    624         if (std.meta.eql(self.long, self.short)) return; // Already spilled
    625         // Allocate or reuse frame index
    626         switch (self.long) {
    627             .none => self.long = try cg.allocRegOrMem(inst, false),
    628             .load_frame => {},
    629             .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
    630             else => unreachable,
    631         }
    632         tracking_log.debug("spill {} from {} to {}", .{ inst, self.short, self.long });
    633         try cg.genCopy(cg.typeOfIndex(inst), self.long, self.short, .{});
    634     }
    635 
    636     fn reuseFrame(self: *InstTracking) void {
    637         switch (self.long) {
    638             .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
    639             else => {},
    640         }
    641         self.short = switch (self.long) {
    642             .none,
    643             .unreach,
    644             .undef,
    645             .immediate,
    646             .memory,
    647             .load_direct,
    648             .lea_direct,
    649             .load_got,
    650             .lea_got,
    651             .load_tlv,
    652             .lea_tlv,
    653             .load_frame,
    654             .lea_frame,
    655             .load_symbol,
    656             .lea_symbol,
    657             => self.long,
    658             .dead,
    659             .eflags,
    660             .register,
    661             .register_pair,
    662             .register_triple,
    663             .register_quadruple,
    664             .register_offset,
    665             .register_overflow,
    666             .register_mask,
    667             .indirect,
    668             .elementwise_regs_then_frame,
    669             .reserved_frame,
    670             .air_ref,
    671             => unreachable,
    672         };
    673     }
    674 
    675     fn trackSpill(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void {
    676         try function.freeValue(self.short);
    677         self.reuseFrame();
    678         tracking_log.debug("{} => {} (spilled)", .{ inst, self.* });
    679     }
    680 
    681     fn verifyMaterialize(self: InstTracking, target: InstTracking) void {
    682         switch (self.long) {
    683             .none,
    684             .unreach,
    685             .undef,
    686             .immediate,
    687             .memory,
    688             .load_direct,
    689             .lea_direct,
    690             .load_got,
    691             .lea_got,
    692             .load_tlv,
    693             .lea_tlv,
    694             .lea_frame,
    695             .load_symbol,
    696             .lea_symbol,
    697             => assert(std.meta.eql(self.long, target.long)),
    698             .load_frame,
    699             .reserved_frame,
    700             => switch (target.long) {
    701                 .none,
    702                 .load_frame,
    703                 .reserved_frame,
    704                 => {},
    705                 else => unreachable,
    706             },
    707             .dead,
    708             .eflags,
    709             .register,
    710             .register_pair,
    711             .register_triple,
    712             .register_quadruple,
    713             .register_offset,
    714             .register_overflow,
    715             .register_mask,
    716             .indirect,
    717             .elementwise_regs_then_frame,
    718             .air_ref,
    719             => unreachable,
    720         }
    721     }
    722 
    723     fn materialize(
    724         self: *InstTracking,
    725         function: *CodeGen,
    726         inst: Air.Inst.Index,
    727         target: InstTracking,
    728     ) !void {
    729         self.verifyMaterialize(target);
    730         try self.materializeUnsafe(function, inst, target);
    731     }
    732 
    733     fn materializeUnsafe(
    734         self: InstTracking,
    735         function: *CodeGen,
    736         inst: Air.Inst.Index,
    737         target: InstTracking,
    738     ) !void {
    739         const ty = function.typeOfIndex(inst);
    740         if ((self.long == .none or self.long == .reserved_frame) and target.long == .load_frame)
    741             try function.genCopy(ty, target.long, self.short, .{});
    742         try function.genCopy(ty, target.short, self.short, .{});
    743     }
    744 
    745     fn trackMaterialize(self: *InstTracking, inst: Air.Inst.Index, target: InstTracking) void {
    746         self.verifyMaterialize(target);
    747         // Don't clobber reserved frame indices
    748         self.long = if (target.long == .none) switch (self.long) {
    749             .load_frame => |addr| .{ .reserved_frame = addr.index },
    750             .reserved_frame => self.long,
    751             else => target.long,
    752         } else target.long;
    753         self.short = target.short;
    754         tracking_log.debug("{} => {} (materialize)", .{ inst, self.* });
    755     }
    756 
    757     fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void {
    758         switch (self.short) {
    759             .dead => |die_generation| if (die_generation >= scope_generation) {
    760                 self.reuseFrame();
    761                 tracking_log.debug("{} => {} (resurrect)", .{ inst, self.* });
    762             },
    763             else => {},
    764         }
    765     }
    766 
    767     fn die(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void {
    768         if (self.short == .dead) return;
    769         try function.freeValue(self.short);
    770         self.short = .{ .dead = function.scope_generation };
    771         tracking_log.debug("{} => {} (death)", .{ inst, self.* });
    772     }
    773 
    774     fn reuse(
    775         self: *InstTracking,
    776         function: *CodeGen,
    777         new_inst: ?Air.Inst.Index,
    778         old_inst: Air.Inst.Index,
    779     ) void {
    780         self.short = .{ .dead = function.scope_generation };
    781         tracking_log.debug("{?} => {} (reuse {})", .{ new_inst, self.*, old_inst });
    782     }
    783 
    784     fn liveOut(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) void {
    785         for (self.getRegs()) |reg| {
    786             if (function.register_manager.isRegFree(reg)) {
    787                 tracking_log.debug("{} => {} (live-out)", .{ inst, self.* });
    788                 continue;
    789             }
    790 
    791             const index = RegisterManager.indexOfRegIntoTracked(reg).?;
    792             const tracked_inst = function.register_manager.registers[index];
    793             const tracking = function.getResolvedInstValue(tracked_inst);
    794 
    795             // Disable death.
    796             var found_reg = false;
    797             var remaining_reg: Register = .none;
    798             for (tracking.getRegs()) |tracked_reg| if (tracked_reg.id() == reg.id()) {
    799                 assert(!found_reg);
    800                 found_reg = true;
    801             } else {
    802                 assert(remaining_reg == .none);
    803                 remaining_reg = tracked_reg;
    804             };
    805             assert(found_reg);
    806             tracking.short = switch (remaining_reg) {
    807                 .none => .{ .dead = function.scope_generation },
    808                 else => .{ .register = remaining_reg },
    809             };
    810 
    811             // Perform side-effects of freeValue manually.
    812             function.register_manager.freeReg(reg);
    813 
    814             tracking_log.debug("{} => {} (live-out {})", .{ inst, self.*, tracked_inst });
    815         }
    816     }
    817 
    818     pub fn format(
    819         tracking: InstTracking,
    820         comptime _: []const u8,
    821         _: std.fmt.FormatOptions,
    822         writer: anytype,
    823     ) @TypeOf(writer).Error!void {
    824         if (!std.meta.eql(tracking.long, tracking.short)) try writer.print("|{}| ", .{tracking.long});
    825         try writer.print("{}", .{tracking.short});
    826     }
    827 };
    828 
    829 const FrameAlloc = struct {
    830     abi_size: u31,
    831     spill_pad: u3,
    832     abi_align: InternPool.Alignment,
    833     ref_count: u16,
    834 
    835     fn init(alloc_abi: struct { size: u64, pad: u3 = 0, alignment: InternPool.Alignment }) FrameAlloc {
    836         return .{
    837             .abi_size = @intCast(alloc_abi.size),
    838             .spill_pad = alloc_abi.pad,
    839             .abi_align = alloc_abi.alignment,
    840             .ref_count = 0,
    841         };
    842     }
    843     fn initType(ty: Type, zcu: *Zcu) FrameAlloc {
    844         return init(.{
    845             .size = ty.abiSize(zcu),
    846             .alignment = ty.abiAlignment(zcu),
    847         });
    848     }
    849     fn initSpill(ty: Type, zcu: *Zcu) FrameAlloc {
    850         const abi_size = ty.abiSize(zcu);
    851         const spill_size = if (abi_size < 8)
    852             std.math.ceilPowerOfTwoAssert(u64, abi_size)
    853         else
    854             std.mem.alignForward(u64, abi_size, 8);
    855         return init(.{
    856             .size = spill_size,
    857             .pad = @intCast(spill_size - abi_size),
    858             .alignment = ty.abiAlignment(zcu).maxStrict(
    859                 .fromNonzeroByteUnits(@min(spill_size, 8)),
    860             ),
    861         });
    862     }
    863 };
    864 
    865 const StackAllocation = struct {
    866     inst: ?Air.Inst.Index,
    867     /// TODO do we need size? should be determined by inst.ty.abiSize(zcu)
    868     size: u32,
    869 };
    870 
    871 const BlockData = struct {
    872     relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty,
    873     state: State,
    874 
    875     fn deinit(self: *BlockData, gpa: Allocator) void {
    876         self.relocs.deinit(gpa);
    877         self.* = undefined;
    878     }
    879 };
    880 
    881 const CodeGen = @This();
    882 
    883 pub fn generate(
    884     bin_file: *link.File,
    885     pt: Zcu.PerThread,
    886     src_loc: Zcu.LazySrcLoc,
    887     func_index: InternPool.Index,
    888     air: Air,
    889     liveness: Liveness,
    890     code: *std.ArrayListUnmanaged(u8),
    891     debug_output: link.File.DebugInfoOutput,
    892 ) codegen.CodeGenError!void {
    893     const zcu = pt.zcu;
    894     const comp = zcu.comp;
    895     const gpa = zcu.gpa;
    896     const ip = &zcu.intern_pool;
    897     const func = zcu.funcInfo(func_index);
    898     const fn_type: Type = .fromInterned(func.ty);
    899     const mod = zcu.navFileScope(func.owner_nav).mod;
    900 
    901     var function: CodeGen = .{
    902         .gpa = gpa,
    903         .pt = pt,
    904         .air = air,
    905         .liveness = liveness,
    906         .target = &mod.resolved_target.result,
    907         .mod = mod,
    908         .bin_file = bin_file,
    909         .debug_output = debug_output,
    910         .owner = .{ .nav_index = func.owner_nav },
    911         .inline_func = func_index,
    912         .arg_index = undefined,
    913         .args = undefined, // populated after `resolveCallingConventionValues`
    914         .va_info = undefined, // populated after `resolveCallingConventionValues`
    915         .ret_mcv = undefined, // populated after `resolveCallingConventionValues`
    916         .fn_type = fn_type,
    917         .src_loc = src_loc,
    918         .end_di_line = func.rbrace_line,
    919         .end_di_column = func.rbrace_column,
    920     };
    921     defer {
    922         function.frame_allocs.deinit(gpa);
    923         function.free_frame_indices.deinit(gpa);
    924         function.frame_locs.deinit(gpa);
    925         function.loops.deinit(gpa);
    926         function.loop_switches.deinit(gpa);
    927         var block_it = function.blocks.valueIterator();
    928         while (block_it.next()) |block| block.deinit(gpa);
    929         function.blocks.deinit(gpa);
    930         function.inst_tracking.deinit(gpa);
    931         function.const_tracking.deinit(gpa);
    932         function.epilogue_relocs.deinit(gpa);
    933         function.mir_instructions.deinit(gpa);
    934         function.mir_extra.deinit(gpa);
    935         function.mir_table.deinit(gpa);
    936     }
    937     try function.inst_tracking.ensureTotalCapacity(gpa, Temp.Index.max);
    938     for (0..Temp.Index.max) |temp_index| {
    939         const temp: Temp.Index = @enumFromInt(temp_index);
    940         function.inst_tracking.putAssumeCapacityNoClobber(temp.toIndex(), .init(.none));
    941     }
    942 
    943     wip_mir_log.debug("{}:", .{fmtNav(func.owner_nav, ip)});
    944 
    945     try function.frame_allocs.resize(gpa, FrameIndex.named_count);
    946     function.frame_allocs.set(
    947         @intFromEnum(FrameIndex.stack_frame),
    948         .init(.{ .size = 0, .alignment = .@"1" }),
    949     );
    950     function.frame_allocs.set(
    951         @intFromEnum(FrameIndex.call_frame),
    952         .init(.{ .size = 0, .alignment = .@"1" }),
    953     );
    954 
    955     const fn_info = zcu.typeToFunc(fn_type).?;
    956     var call_info = function.resolveCallingConventionValues(fn_info, &.{}, .args_frame) catch |err| switch (err) {
    957         error.CodegenFail => return error.CodegenFail,
    958         else => |e| return e,
    959     };
    960     defer call_info.deinit(&function);
    961 
    962     function.args = call_info.args;
    963     function.ret_mcv = call_info.return_value;
    964     function.frame_allocs.set(@intFromEnum(FrameIndex.ret_addr), .init(.{
    965         .size = Type.usize.abiSize(zcu),
    966         .alignment = Type.usize.abiAlignment(zcu).min(call_info.stack_align),
    967     }));
    968     function.frame_allocs.set(@intFromEnum(FrameIndex.base_ptr), .init(.{
    969         .size = Type.usize.abiSize(zcu),
    970         .alignment = call_info.stack_align.min(
    971             .fromNonzeroByteUnits(function.target.stackAlignment()),
    972         ),
    973     }));
    974     function.frame_allocs.set(
    975         @intFromEnum(FrameIndex.args_frame),
    976         .init(.{
    977             .size = call_info.stack_byte_count,
    978             .alignment = call_info.stack_align,
    979         }),
    980     );
    981     function.va_info = switch (fn_info.cc) {
    982         else => undefined,
    983         .x86_64_sysv => .{ .sysv = .{
    984             .gp_count = call_info.gp_count,
    985             .fp_count = call_info.fp_count,
    986             .overflow_arg_area = .{ .index = .args_frame, .off = call_info.stack_byte_count },
    987             .reg_save_area = undefined,
    988         } },
    989         .x86_64_win => .{ .win64 = .{} },
    990     };
    991 
    992     function.gen() catch |err| switch (err) {
    993         error.CodegenFail => return error.CodegenFail,
    994         error.OutOfRegisters => return function.fail("ran out of registers (Zig compiler bug)", .{}),
    995         else => |e| return e,
    996     };
    997 
    998     var mir: Mir = .{
    999         .instructions = function.mir_instructions.toOwnedSlice(),
   1000         .extra = try function.mir_extra.toOwnedSlice(gpa),
   1001         .table = try function.mir_table.toOwnedSlice(gpa),
   1002         .frame_locs = function.frame_locs.toOwnedSlice(),
   1003     };
   1004     defer mir.deinit(gpa);
   1005 
   1006     var emit: Emit = .{
   1007         .air = function.air,
   1008         .lower = .{
   1009             .bin_file = bin_file,
   1010             .target = function.target,
   1011             .allocator = gpa,
   1012             .mir = mir,
   1013             .cc = fn_info.cc,
   1014             .src_loc = src_loc,
   1015             .output_mode = comp.config.output_mode,
   1016             .link_mode = comp.config.link_mode,
   1017             .pic = mod.pic,
   1018         },
   1019         .atom_index = function.owner.getSymbolIndex(&function) catch |err| switch (err) {
   1020             error.CodegenFail => return error.CodegenFail,
   1021             else => |e| return e,
   1022         },
   1023         .debug_output = debug_output,
   1024         .code = code,
   1025         .prev_di_loc = .{
   1026             .line = func.lbrace_line,
   1027             .column = func.lbrace_column,
   1028             .is_stmt = switch (debug_output) {
   1029                 .dwarf => |dwarf| dwarf.dwarf.debug_line.header.default_is_stmt,
   1030                 .plan9 => undefined,
   1031                 .none => undefined,
   1032             },
   1033         },
   1034         .prev_di_pc = 0,
   1035     };
   1036     emit.emitMir() catch |err| switch (err) {
   1037         error.LowerFail, error.EmitFail => return function.failMsg(emit.lower.err_msg.?),
   1038 
   1039         error.InvalidInstruction, error.CannotEncode => |e| return function.fail("emit MIR failed: {s} (Zig compiler bug)", .{@errorName(e)}),
   1040         else => |e| return function.fail("emit MIR failed: {s}", .{@errorName(e)}),
   1041     };
   1042 }
   1043 
   1044 pub fn generateLazy(
   1045     bin_file: *link.File,
   1046     pt: Zcu.PerThread,
   1047     src_loc: Zcu.LazySrcLoc,
   1048     lazy_sym: link.File.LazySymbol,
   1049     code: *std.ArrayListUnmanaged(u8),
   1050     debug_output: link.File.DebugInfoOutput,
   1051 ) codegen.CodeGenError!void {
   1052     const comp = bin_file.comp;
   1053     const gpa = comp.gpa;
   1054     // This function is for generating global code, so we use the root module.
   1055     const mod = comp.root_mod;
   1056     var function: CodeGen = .{
   1057         .gpa = gpa,
   1058         .pt = pt,
   1059         .air = undefined,
   1060         .liveness = undefined,
   1061         .target = &mod.resolved_target.result,
   1062         .mod = mod,
   1063         .bin_file = bin_file,
   1064         .debug_output = debug_output,
   1065         .owner = .{ .lazy_sym = lazy_sym },
   1066         .inline_func = undefined,
   1067         .arg_index = undefined,
   1068         .args = undefined,
   1069         .va_info = undefined,
   1070         .ret_mcv = undefined,
   1071         .fn_type = undefined,
   1072         .src_loc = src_loc,
   1073         .end_di_line = undefined, // no debug info yet
   1074         .end_di_column = undefined, // no debug info yet
   1075     };
   1076     defer {
   1077         function.mir_instructions.deinit(gpa);
   1078         function.mir_extra.deinit(gpa);
   1079         function.mir_table.deinit(gpa);
   1080     }
   1081 
   1082     function.genLazy(lazy_sym) catch |err| switch (err) {
   1083         error.CodegenFail => return error.CodegenFail,
   1084         error.OutOfRegisters => return function.fail("ran out of registers (Zig compiler bug)", .{}),
   1085         else => |e| return e,
   1086     };
   1087 
   1088     var mir: Mir = .{
   1089         .instructions = function.mir_instructions.toOwnedSlice(),
   1090         .extra = try function.mir_extra.toOwnedSlice(gpa),
   1091         .table = try function.mir_table.toOwnedSlice(gpa),
   1092         .frame_locs = function.frame_locs.toOwnedSlice(),
   1093     };
   1094     defer mir.deinit(gpa);
   1095 
   1096     var emit: Emit = .{
   1097         .air = function.air,
   1098         .lower = .{
   1099             .bin_file = bin_file,
   1100             .target = function.target,
   1101             .allocator = gpa,
   1102             .mir = mir,
   1103             .cc = .auto,
   1104             .src_loc = src_loc,
   1105             .output_mode = comp.config.output_mode,
   1106             .link_mode = comp.config.link_mode,
   1107             .pic = mod.pic,
   1108         },
   1109         .atom_index = function.owner.getSymbolIndex(&function) catch |err| switch (err) {
   1110             error.CodegenFail => return error.CodegenFail,
   1111             else => |e| return e,
   1112         },
   1113         .debug_output = debug_output,
   1114         .code = code,
   1115         .prev_di_loc = undefined, // no debug info yet
   1116         .prev_di_pc = undefined, // no debug info yet
   1117     };
   1118     emit.emitMir() catch |err| switch (err) {
   1119         error.LowerFail, error.EmitFail => return function.failMsg(emit.lower.err_msg.?),
   1120         error.InvalidInstruction => return function.fail("failed to find a viable x86 instruction (Zig compiler bug)", .{}),
   1121         error.CannotEncode => return function.fail("failed to encode x86 instruction (Zig compiler bug)", .{}),
   1122         else => |e| return function.fail("failed to emit MIR: {s}", .{@errorName(e)}),
   1123     };
   1124 }
   1125 
   1126 const FormatNavData = struct {
   1127     ip: *const InternPool,
   1128     nav_index: InternPool.Nav.Index,
   1129 };
   1130 fn formatNav(
   1131     data: FormatNavData,
   1132     comptime _: []const u8,
   1133     _: std.fmt.FormatOptions,
   1134     writer: anytype,
   1135 ) @TypeOf(writer).Error!void {
   1136     try writer.print("{}", .{data.ip.getNav(data.nav_index).fqn.fmt(data.ip)});
   1137 }
   1138 fn fmtNav(nav_index: InternPool.Nav.Index, ip: *const InternPool) std.fmt.Formatter(formatNav) {
   1139     return .{ .data = .{
   1140         .ip = ip,
   1141         .nav_index = nav_index,
   1142     } };
   1143 }
   1144 
   1145 const FormatAirData = struct {
   1146     self: *CodeGen,
   1147     inst: Air.Inst.Index,
   1148 };
   1149 fn formatAir(
   1150     data: FormatAirData,
   1151     comptime _: []const u8,
   1152     _: std.fmt.FormatOptions,
   1153     writer: anytype,
   1154 ) @TypeOf(writer).Error!void {
   1155     @import("../../print_air.zig").dumpInst(
   1156         data.inst,
   1157         data.self.pt,
   1158         data.self.air,
   1159         data.self.liveness,
   1160     );
   1161 }
   1162 fn fmtAir(self: *CodeGen, inst: Air.Inst.Index) std.fmt.Formatter(formatAir) {
   1163     return .{ .data = .{ .self = self, .inst = inst } };
   1164 }
   1165 
   1166 const FormatWipMirData = struct {
   1167     self: *CodeGen,
   1168     inst: Mir.Inst.Index,
   1169 };
   1170 fn formatWipMir(
   1171     data: FormatWipMirData,
   1172     comptime _: []const u8,
   1173     _: std.fmt.FormatOptions,
   1174     writer: anytype,
   1175 ) @TypeOf(writer).Error!void {
   1176     const comp = data.self.bin_file.comp;
   1177     const mod = comp.root_mod;
   1178     var lower: Lower = .{
   1179         .bin_file = data.self.bin_file,
   1180         .target = data.self.target,
   1181         .allocator = data.self.gpa,
   1182         .mir = .{
   1183             .instructions = data.self.mir_instructions.slice(),
   1184             .extra = data.self.mir_extra.items,
   1185             .table = data.self.mir_table.items,
   1186             .frame_locs = (std.MultiArrayList(Mir.FrameLoc){}).slice(),
   1187         },
   1188         .cc = .auto,
   1189         .src_loc = data.self.src_loc,
   1190         .output_mode = comp.config.output_mode,
   1191         .link_mode = comp.config.link_mode,
   1192         .pic = mod.pic,
   1193     };
   1194     var first = true;
   1195     for ((lower.lowerMir(data.inst) catch |err| switch (err) {
   1196         error.LowerFail => {
   1197             defer {
   1198                 lower.err_msg.?.deinit(data.self.gpa);
   1199                 lower.err_msg = null;
   1200             }
   1201             try writer.writeAll(lower.err_msg.?.msg);
   1202             return;
   1203         },
   1204         error.OutOfMemory, error.InvalidInstruction, error.CannotEncode => |e| {
   1205             try writer.writeAll(switch (e) {
   1206                 error.OutOfMemory => "Out of memory",
   1207                 error.InvalidInstruction => "CodeGen failed to find a viable instruction.",
   1208                 error.CannotEncode => "CodeGen failed to encode the instruction.",
   1209             });
   1210             return;
   1211         },
   1212         else => |e| return e,
   1213     }).insts) |lowered_inst| {
   1214         if (!first) try writer.writeAll("\ndebug(wip_mir): ");
   1215         try writer.print("  | {}", .{lowered_inst});
   1216         first = false;
   1217     }
   1218     if (first) {
   1219         const ip = &data.self.pt.zcu.intern_pool;
   1220         const mir_inst = lower.mir.instructions.get(data.inst);
   1221         try writer.print("  | .{s}", .{@tagName(mir_inst.ops)});
   1222         switch (mir_inst.ops) {
   1223             else => unreachable,
   1224             .pseudo_dbg_prologue_end_none,
   1225             .pseudo_dbg_epilogue_begin_none,
   1226             .pseudo_dbg_enter_block_none,
   1227             .pseudo_dbg_leave_block_none,
   1228             .pseudo_dbg_var_args_none,
   1229             .pseudo_dead_none,
   1230             => {},
   1231             .pseudo_dbg_line_stmt_line_column, .pseudo_dbg_line_line_column => try writer.print(
   1232                 " {[line]d}, {[column]d}",
   1233                 mir_inst.data.line_column,
   1234             ),
   1235             .pseudo_dbg_enter_inline_func, .pseudo_dbg_leave_inline_func => try writer.print(" {}", .{
   1236                 ip.getNav(ip.indexToKey(mir_inst.data.func).func.owner_nav).name.fmt(ip),
   1237             }),
   1238             .pseudo_dbg_local_a => try writer.print(" {}", .{mir_inst.data.a.air_inst}),
   1239             .pseudo_dbg_local_ai_s => try writer.print(" {}, {d}", .{
   1240                 mir_inst.data.ai.air_inst,
   1241                 @as(i32, @bitCast(mir_inst.data.ai.i)),
   1242             }),
   1243             .pseudo_dbg_local_ai_u => try writer.print(" {}, {d}", .{
   1244                 mir_inst.data.ai.air_inst,
   1245                 mir_inst.data.ai.i,
   1246             }),
   1247             .pseudo_dbg_local_ai_64 => try writer.print(" {}, {d}", .{
   1248                 mir_inst.data.ai.air_inst,
   1249                 lower.mir.extraData(Mir.Imm64, mir_inst.data.ai.i).data.decode(),
   1250             }),
   1251             .pseudo_dbg_local_as => {
   1252                 const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{
   1253                     .base = .{ .reloc = mir_inst.data.as.sym_index },
   1254                 }) };
   1255                 try writer.print(" {}, {}", .{ mir_inst.data.as.air_inst, mem_op.fmt(.m) });
   1256             },
   1257             .pseudo_dbg_local_aso => {
   1258                 const sym_off = lower.mir.extraData(bits.SymbolOffset, mir_inst.data.ax.payload).data;
   1259                 const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{
   1260                     .base = .{ .reloc = sym_off.sym_index },
   1261                     .disp = sym_off.off,
   1262                 }) };
   1263                 try writer.print(" {}, {}", .{ mir_inst.data.ax.air_inst, mem_op.fmt(.m) });
   1264             },
   1265             .pseudo_dbg_local_aro => {
   1266                 const air_off = lower.mir.extraData(Mir.AirOffset, mir_inst.data.rx.payload).data;
   1267                 const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{
   1268                     .base = .{ .reg = mir_inst.data.rx.r1 },
   1269                     .disp = air_off.off,
   1270                 }) };
   1271                 try writer.print(" {}, {}", .{ air_off.air_inst, mem_op.fmt(.m) });
   1272             },
   1273             .pseudo_dbg_local_af => {
   1274                 const frame_addr = lower.mir.extraData(bits.FrameAddr, mir_inst.data.ax.payload).data;
   1275                 const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{
   1276                     .base = .{ .frame = frame_addr.index },
   1277                     .disp = frame_addr.off,
   1278                 }) };
   1279                 try writer.print(" {}, {}", .{ mir_inst.data.ax.air_inst, mem_op.fmt(.m) });
   1280             },
   1281             .pseudo_dbg_local_am => {
   1282                 const mem_op: encoder.Instruction.Operand = .{
   1283                     .mem = lower.mir.extraData(Mir.Memory, mir_inst.data.ax.payload).data.decode(),
   1284                 };
   1285                 try writer.print(" {}, {}", .{ mir_inst.data.ax.air_inst, mem_op.fmt(.m) });
   1286             },
   1287         }
   1288     }
   1289 }
   1290 fn fmtWipMir(self: *CodeGen, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) {
   1291     return .{ .data = .{ .self = self, .inst = inst } };
   1292 }
   1293 
   1294 const FormatTrackingData = struct {
   1295     self: *CodeGen,
   1296 };
   1297 fn formatTracking(
   1298     data: FormatTrackingData,
   1299     comptime _: []const u8,
   1300     _: std.fmt.FormatOptions,
   1301     writer: anytype,
   1302 ) @TypeOf(writer).Error!void {
   1303     var it = data.self.inst_tracking.iterator();
   1304     while (it.next()) |entry| try writer.print("\n{} = {}", .{ entry.key_ptr.*, entry.value_ptr.* });
   1305 }
   1306 fn fmtTracking(self: *CodeGen) std.fmt.Formatter(formatTracking) {
   1307     return .{ .data = .{ .self = self } };
   1308 }
   1309 
   1310 fn addInst(self: *CodeGen, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index {
   1311     const gpa = self.gpa;
   1312     try self.mir_instructions.ensureUnusedCapacity(gpa, 1);
   1313     const result_index: Mir.Inst.Index = @intCast(self.mir_instructions.len);
   1314     self.mir_instructions.appendAssumeCapacity(inst);
   1315     if (inst.ops != .pseudo_dead_none) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)});
   1316     return result_index;
   1317 }
   1318 
   1319 fn addExtra(self: *CodeGen, extra: anytype) Allocator.Error!u32 {
   1320     const fields = std.meta.fields(@TypeOf(extra));
   1321     try self.mir_extra.ensureUnusedCapacity(self.gpa, fields.len);
   1322     return self.addExtraAssumeCapacity(extra);
   1323 }
   1324 
   1325 fn addExtraAssumeCapacity(self: *CodeGen, extra: anytype) u32 {
   1326     const fields = std.meta.fields(@TypeOf(extra));
   1327     const result: u32 = @intCast(self.mir_extra.items.len);
   1328     inline for (fields) |field| {
   1329         self.mir_extra.appendAssumeCapacity(switch (field.type) {
   1330             u32 => @field(extra, field.name),
   1331             i32, Mir.Memory.Info => @bitCast(@field(extra, field.name)),
   1332             bits.FrameIndex => @intFromEnum(@field(extra, field.name)),
   1333             else => @compileError("bad field type: " ++ field.name ++ ": " ++ @typeName(field.type)),
   1334         });
   1335     }
   1336     return result;
   1337 }
   1338 
   1339 fn asmOps(self: *CodeGen, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void {
   1340     return switch (ops[0]) {
   1341         .none => self.asmOpOnly(tag),
   1342         .reg => |reg0| switch (ops[1]) {
   1343             .none => self.asmRegister(tag, reg0),
   1344             .reg => |reg1| switch (ops[2]) {
   1345                 .none => self.asmRegisterRegister(tag, reg0, reg1),
   1346                 .reg => |reg2| switch (ops[3]) {
   1347                     .none => self.asmRegisterRegisterRegister(tag, reg0, reg1, reg2),
   1348                     .reg => |reg3| self.asmRegisterRegisterRegisterRegister(tag, reg0, reg1, reg2, reg3),
   1349                     .imm => |imm3| self.asmRegisterRegisterRegisterImmediate(tag, reg0, reg1, reg2, imm3),
   1350                     else => error.InvalidInstruction,
   1351                 },
   1352                 .mem => |mem2| switch (ops[3]) {
   1353                     .none => self.asmRegisterRegisterMemory(tag, reg0, reg1, mem2),
   1354                     .reg => |reg3| self.asmRegisterRegisterMemoryRegister(tag, reg0, reg1, mem2, reg3),
   1355                     .imm => |imm3| self.asmRegisterRegisterMemoryImmediate(tag, reg0, reg1, mem2, imm3),
   1356                     else => error.InvalidInstruction,
   1357                 },
   1358                 .imm => |imm2| switch (ops[3]) {
   1359                     .none => self.asmRegisterRegisterImmediate(tag, reg0, reg1, imm2),
   1360                     else => error.InvalidInstruction,
   1361                 },
   1362                 else => error.InvalidInstruction,
   1363             },
   1364             .mem => |mem1| switch (ops[2]) {
   1365                 .none => self.asmRegisterMemory(tag, reg0, mem1),
   1366                 .reg => |reg2| switch (ops[3]) {
   1367                     .none => self.asmRegisterMemoryRegister(tag, reg0, mem1, reg2),
   1368                     else => error.InvalidInstruction,
   1369                 },
   1370                 .imm => |imm2| switch (ops[3]) {
   1371                     .none => self.asmRegisterMemoryImmediate(tag, reg0, mem1, imm2),
   1372                     else => error.InvalidInstruction,
   1373                 },
   1374                 else => error.InvalidInstruction,
   1375             },
   1376             .imm => |imm1| switch (ops[2]) {
   1377                 .none => self.asmRegisterImmediate(tag, reg0, imm1),
   1378                 else => error.InvalidInstruction,
   1379             },
   1380             else => error.InvalidInstruction,
   1381         },
   1382         .mem => |mem0| switch (ops[1]) {
   1383             .none => self.asmMemory(tag, mem0),
   1384             .reg => |reg1| switch (ops[2]) {
   1385                 .none => self.asmMemoryRegister(tag, mem0, reg1),
   1386                 .reg => |reg2| switch (ops[3]) {
   1387                     .none => self.asmMemoryRegisterRegister(tag, mem0, reg1, reg2),
   1388                     else => error.InvalidInstruction,
   1389                 },
   1390                 .imm => |imm2| switch (ops[3]) {
   1391                     .none => self.asmMemoryRegisterImmediate(tag, mem0, reg1, imm2),
   1392                     else => error.InvalidInstruction,
   1393                 },
   1394                 else => error.InvalidInstruction,
   1395             },
   1396             .imm => |imm1| switch (ops[2]) {
   1397                 .none => self.asmMemoryImmediate(tag, mem0, imm1),
   1398                 else => error.InvalidInstruction,
   1399             },
   1400             else => error.InvalidInstruction,
   1401         },
   1402         .imm => |imm0| switch (ops[1]) {
   1403             .none => self.asmImmediate(tag, imm0),
   1404             else => error.InvalidInstruction,
   1405         },
   1406         .inst => |inst0| switch (ops[1]) {
   1407             .none => self.asmReloc(tag, inst0),
   1408             else => error.InvalidInstruction,
   1409         },
   1410     };
   1411 }
   1412 
   1413 /// A `cc` of `.z_and_np` clobbers `reg2`!
   1414 fn asmCmovccRegisterRegister(self: *CodeGen, cc: Condition, reg1: Register, reg2: Register) !void {
   1415     if (self.hasFeature(.cmov)) _ = try self.addInst(.{
   1416         .tag = switch (cc) {
   1417             else => .cmov,
   1418             .z_and_np, .nz_or_p => .pseudo,
   1419         },
   1420         .ops = switch (cc) {
   1421             else => .rr,
   1422             .z_and_np => .pseudo_cmov_z_and_np_rr,
   1423             .nz_or_p => .pseudo_cmov_nz_or_p_rr,
   1424         },
   1425         .data = .{ .rr = .{
   1426             .fixes = switch (cc) {
   1427                 else => .fromCondition(cc),
   1428                 .z_and_np, .nz_or_p => ._,
   1429             },
   1430             .r1 = reg1,
   1431             .r2 = reg2,
   1432         } },
   1433     }) else {
   1434         const reloc = try self.asmJccReloc(cc.negate(), undefined);
   1435         try self.asmRegisterRegister(.{ ._, .mov }, reg1, reg2);
   1436         self.performReloc(reloc);
   1437     }
   1438 }
   1439 
   1440 /// A `cc` of `.z_and_np` is not supported by this encoding!
   1441 fn asmCmovccRegisterMemory(self: *CodeGen, cc: Condition, reg: Register, m: Memory) !void {
   1442     if (self.hasFeature(.cmov)) _ = try self.addInst(.{
   1443         .tag = switch (cc) {
   1444             else => .cmov,
   1445             .z_and_np => unreachable,
   1446             .nz_or_p => .pseudo,
   1447         },
   1448         .ops = switch (cc) {
   1449             else => .rm,
   1450             .z_and_np => unreachable,
   1451             .nz_or_p => .pseudo_cmov_nz_or_p_rm,
   1452         },
   1453         .data = .{ .rx = .{
   1454             .fixes = switch (cc) {
   1455                 else => .fromCondition(cc),
   1456                 .z_and_np => unreachable,
   1457                 .nz_or_p => ._,
   1458             },
   1459             .r1 = reg,
   1460             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1461         } },
   1462     }) else {
   1463         const reloc = try self.asmJccReloc(cc.negate(), undefined);
   1464         try self.asmRegisterMemory(.{ ._, .mov }, reg, m);
   1465         self.performReloc(reloc);
   1466     }
   1467 }
   1468 
   1469 fn asmSetccRegister(self: *CodeGen, cc: Condition, reg: Register) !void {
   1470     _ = try self.addInst(.{
   1471         .tag = switch (cc) {
   1472             else => .set,
   1473             .z_and_np, .nz_or_p => .pseudo,
   1474         },
   1475         .ops = switch (cc) {
   1476             else => .r,
   1477             .z_and_np => .pseudo_set_z_and_np_r,
   1478             .nz_or_p => .pseudo_set_nz_or_p_r,
   1479         },
   1480         .data = switch (cc) {
   1481             else => .{ .r = .{
   1482                 .fixes = .fromCondition(cc),
   1483                 .r1 = reg,
   1484             } },
   1485             .z_and_np, .nz_or_p => .{ .rr = .{
   1486                 .r1 = reg,
   1487                 .r2 = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to8(),
   1488             } },
   1489         },
   1490     });
   1491 }
   1492 
   1493 fn asmSetccMemory(self: *CodeGen, cc: Condition, m: Memory) !void {
   1494     const payload = try self.addExtra(Mir.Memory.encode(m));
   1495     _ = try self.addInst(.{
   1496         .tag = switch (cc) {
   1497             else => .set,
   1498             .z_and_np, .nz_or_p => .pseudo,
   1499         },
   1500         .ops = switch (cc) {
   1501             else => .m,
   1502             .z_and_np => .pseudo_set_z_and_np_m,
   1503             .nz_or_p => .pseudo_set_nz_or_p_m,
   1504         },
   1505         .data = switch (cc) {
   1506             else => .{ .x = .{
   1507                 .fixes = .fromCondition(cc),
   1508                 .payload = payload,
   1509             } },
   1510             .z_and_np, .nz_or_p => .{ .rx = .{
   1511                 .r1 = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to8(),
   1512                 .payload = payload,
   1513             } },
   1514         },
   1515     });
   1516 }
   1517 
   1518 fn asmJmpReloc(self: *CodeGen, target: Mir.Inst.Index) !Mir.Inst.Index {
   1519     return self.addInst(.{
   1520         .tag = .jmp,
   1521         .ops = .inst,
   1522         .data = .{ .inst = .{
   1523             .inst = target,
   1524         } },
   1525     });
   1526 }
   1527 
   1528 fn asmJccReloc(self: *CodeGen, cc: Condition, target: Mir.Inst.Index) !Mir.Inst.Index {
   1529     return self.addInst(.{
   1530         .tag = switch (cc) {
   1531             else => .j,
   1532             .z_and_np, .nz_or_p => .pseudo,
   1533         },
   1534         .ops = switch (cc) {
   1535             else => .inst,
   1536             .z_and_np => .pseudo_j_z_and_np_inst,
   1537             .nz_or_p => .pseudo_j_nz_or_p_inst,
   1538         },
   1539         .data = .{ .inst = .{
   1540             .fixes = switch (cc) {
   1541                 else => .fromCondition(cc),
   1542                 .z_and_np, .nz_or_p => ._,
   1543             },
   1544             .inst = target,
   1545         } },
   1546     });
   1547 }
   1548 
   1549 fn asmReloc(self: *CodeGen, tag: Mir.Inst.FixedTag, target: Mir.Inst.Index) !void {
   1550     _ = try self.addInst(.{
   1551         .tag = tag[1],
   1552         .ops = .inst,
   1553         .data = .{ .inst = .{
   1554             .fixes = tag[0],
   1555             .inst = target,
   1556         } },
   1557     });
   1558 }
   1559 
   1560 fn asmPlaceholder(self: *CodeGen) !Mir.Inst.Index {
   1561     return self.addInst(.{
   1562         .tag = .pseudo,
   1563         .ops = .pseudo_dead_none,
   1564         .data = undefined,
   1565     });
   1566 }
   1567 
   1568 const MirTagAir = enum { dbg_local };
   1569 
   1570 fn asmAir(self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index) !void {
   1571     _ = try self.addInst(.{
   1572         .tag = .pseudo,
   1573         .ops = switch (tag) {
   1574             .dbg_local => .pseudo_dbg_local_a,
   1575         },
   1576         .data = .{ .a = .{ .air_inst = inst } },
   1577     });
   1578 }
   1579 
   1580 fn asmAirImmediate(self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index, imm: Immediate) !void {
   1581     switch (imm) {
   1582         .signed => |s| _ = try self.addInst(.{
   1583             .tag = .pseudo,
   1584             .ops = switch (tag) {
   1585                 .dbg_local => .pseudo_dbg_local_ai_s,
   1586             },
   1587             .data = .{ .ai = .{
   1588                 .air_inst = inst,
   1589                 .i = @bitCast(s),
   1590             } },
   1591         }),
   1592         .unsigned => |u| _ = if (std.math.cast(u32, u)) |small| try self.addInst(.{
   1593             .tag = .pseudo,
   1594             .ops = switch (tag) {
   1595                 .dbg_local => .pseudo_dbg_local_ai_u,
   1596             },
   1597             .data = .{ .ai = .{
   1598                 .air_inst = inst,
   1599                 .i = small,
   1600             } },
   1601         }) else try self.addInst(.{
   1602             .tag = .pseudo,
   1603             .ops = switch (tag) {
   1604                 .dbg_local => .pseudo_dbg_local_ai_64,
   1605             },
   1606             .data = .{ .ai = .{
   1607                 .air_inst = inst,
   1608                 .i = try self.addExtra(Mir.Imm64.encode(u)),
   1609             } },
   1610         }),
   1611         .reloc => |sym_off| _ = if (sym_off.off == 0) try self.addInst(.{
   1612             .tag = .pseudo,
   1613             .ops = switch (tag) {
   1614                 .dbg_local => .pseudo_dbg_local_as,
   1615             },
   1616             .data = .{ .as = .{
   1617                 .air_inst = inst,
   1618                 .sym_index = sym_off.sym_index,
   1619             } },
   1620         }) else try self.addInst(.{
   1621             .tag = .pseudo,
   1622             .ops = switch (tag) {
   1623                 .dbg_local => .pseudo_dbg_local_aso,
   1624             },
   1625             .data = .{ .ax = .{
   1626                 .air_inst = inst,
   1627                 .payload = try self.addExtra(sym_off),
   1628             } },
   1629         }),
   1630     }
   1631 }
   1632 
   1633 fn asmAirRegisterImmediate(
   1634     self: *CodeGen,
   1635     tag: MirTagAir,
   1636     inst: Air.Inst.Index,
   1637     reg: Register,
   1638     imm: Immediate,
   1639 ) !void {
   1640     _ = try self.addInst(.{
   1641         .tag = .pseudo,
   1642         .ops = switch (tag) {
   1643             .dbg_local => .pseudo_dbg_local_aro,
   1644         },
   1645         .data = .{ .rx = .{
   1646             .r1 = reg,
   1647             .payload = try self.addExtra(Mir.AirOffset{
   1648                 .air_inst = inst,
   1649                 .off = imm.signed,
   1650             }),
   1651         } },
   1652     });
   1653 }
   1654 
   1655 fn asmAirFrameAddress(
   1656     self: *CodeGen,
   1657     tag: MirTagAir,
   1658     inst: Air.Inst.Index,
   1659     frame_addr: bits.FrameAddr,
   1660 ) !void {
   1661     _ = try self.addInst(.{
   1662         .tag = .pseudo,
   1663         .ops = switch (tag) {
   1664             .dbg_local => .pseudo_dbg_local_af,
   1665         },
   1666         .data = .{ .ax = .{
   1667             .air_inst = inst,
   1668             .payload = try self.addExtra(frame_addr),
   1669         } },
   1670     });
   1671 }
   1672 
   1673 fn asmAirMemory(self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index, m: Memory) !void {
   1674     _ = try self.addInst(.{
   1675         .tag = .pseudo,
   1676         .ops = switch (tag) {
   1677             .dbg_local => .pseudo_dbg_local_am,
   1678         },
   1679         .data = .{ .ax = .{
   1680             .air_inst = inst,
   1681             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1682         } },
   1683     });
   1684 }
   1685 
   1686 fn asmOpOnly(self: *CodeGen, tag: Mir.Inst.FixedTag) !void {
   1687     _ = try self.addInst(.{
   1688         .tag = tag[1],
   1689         .ops = .none,
   1690         .data = .{ .none = .{
   1691             .fixes = tag[0],
   1692         } },
   1693     });
   1694 }
   1695 
   1696 fn asmPseudo(self: *CodeGen, ops: Mir.Inst.Ops) !void {
   1697     assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and
   1698         std.mem.endsWith(u8, @tagName(ops), "_none"));
   1699     _ = try self.addInst(.{
   1700         .tag = .pseudo,
   1701         .ops = ops,
   1702         .data = undefined,
   1703     });
   1704 }
   1705 
   1706 fn asmPseudoRegister(self: *CodeGen, ops: Mir.Inst.Ops, reg: Register) !void {
   1707     assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and
   1708         std.mem.endsWith(u8, @tagName(ops), "_r"));
   1709     _ = try self.addInst(.{
   1710         .tag = .pseudo,
   1711         .ops = ops,
   1712         .data = .{ .r = .{ .r1 = reg } },
   1713     });
   1714 }
   1715 
   1716 fn asmPseudoImmediate(self: *CodeGen, ops: Mir.Inst.Ops, imm: Immediate) !void {
   1717     assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and
   1718         std.mem.endsWith(u8, @tagName(ops), "_i_s"));
   1719     _ = try self.addInst(.{
   1720         .tag = .pseudo,
   1721         .ops = ops,
   1722         .data = .{ .i = .{ .i = @bitCast(imm.signed) } },
   1723     });
   1724 }
   1725 
   1726 fn asmPseudoRegisterRegister(self: *CodeGen, ops: Mir.Inst.Ops, reg1: Register, reg2: Register) !void {
   1727     assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and
   1728         std.mem.endsWith(u8, @tagName(ops), "_rr"));
   1729     _ = try self.addInst(.{
   1730         .tag = .pseudo,
   1731         .ops = ops,
   1732         .data = .{ .rr = .{ .r1 = reg1, .r2 = reg2 } },
   1733     });
   1734 }
   1735 
   1736 fn asmPseudoRegisterImmediate(self: *CodeGen, ops: Mir.Inst.Ops, reg: Register, imm: Immediate) !void {
   1737     assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and
   1738         std.mem.endsWith(u8, @tagName(ops), "_ri_s"));
   1739     _ = try self.addInst(.{
   1740         .tag = .pseudo,
   1741         .ops = ops,
   1742         .data = .{ .ri = .{ .r1 = reg, .i = @bitCast(imm.signed) } },
   1743     });
   1744 }
   1745 
   1746 fn asmRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, reg: Register) !void {
   1747     _ = try self.addInst(.{
   1748         .tag = tag[1],
   1749         .ops = .r,
   1750         .data = .{ .r = .{
   1751             .fixes = tag[0],
   1752             .r1 = reg,
   1753         } },
   1754     });
   1755 }
   1756 
   1757 fn asmImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, imm: Immediate) !void {
   1758     _ = try self.addInst(.{
   1759         .tag = tag[1],
   1760         .ops = switch (imm) {
   1761             .signed => .i_s,
   1762             .unsigned => .i_u,
   1763             .reloc => .rel,
   1764         },
   1765         .data = switch (imm) {
   1766             .reloc => |sym_off| reloc: {
   1767                 assert(tag[0] == ._);
   1768                 break :reloc .{ .reloc = sym_off };
   1769             },
   1770             .signed, .unsigned => .{ .i = .{
   1771                 .fixes = tag[0],
   1772                 .i = switch (imm) {
   1773                     .signed => |s| @bitCast(s),
   1774                     .unsigned => |u| @intCast(u),
   1775                     .reloc => unreachable,
   1776                 },
   1777             } },
   1778         },
   1779     });
   1780 }
   1781 
   1782 fn asmRegisterRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void {
   1783     _ = try self.addInst(.{
   1784         .tag = tag[1],
   1785         .ops = .rr,
   1786         .data = .{ .rr = .{
   1787             .fixes = tag[0],
   1788             .r1 = reg1,
   1789             .r2 = reg2,
   1790         } },
   1791     });
   1792 }
   1793 
   1794 fn asmRegisterImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void {
   1795     const ops: Mir.Inst.Ops, const i: u32 = switch (imm) {
   1796         .signed => |s| .{ .ri_s, @bitCast(s) },
   1797         .unsigned => |u| if (std.math.cast(u32, u)) |small|
   1798             .{ .ri_u, small }
   1799         else
   1800             .{ .ri_64, try self.addExtra(Mir.Imm64.encode(imm.unsigned)) },
   1801         .reloc => unreachable,
   1802     };
   1803     _ = try self.addInst(.{
   1804         .tag = tag[1],
   1805         .ops = ops,
   1806         .data = .{ .ri = .{
   1807             .fixes = tag[0],
   1808             .r1 = reg,
   1809             .i = i,
   1810         } },
   1811     });
   1812 }
   1813 
   1814 fn asmRegisterRegisterRegister(
   1815     self: *CodeGen,
   1816     tag: Mir.Inst.FixedTag,
   1817     reg1: Register,
   1818     reg2: Register,
   1819     reg3: Register,
   1820 ) !void {
   1821     _ = try self.addInst(.{
   1822         .tag = tag[1],
   1823         .ops = .rrr,
   1824         .data = .{ .rrr = .{
   1825             .fixes = tag[0],
   1826             .r1 = reg1,
   1827             .r2 = reg2,
   1828             .r3 = reg3,
   1829         } },
   1830     });
   1831 }
   1832 
   1833 fn asmRegisterRegisterRegisterRegister(
   1834     self: *CodeGen,
   1835     tag: Mir.Inst.FixedTag,
   1836     reg1: Register,
   1837     reg2: Register,
   1838     reg3: Register,
   1839     reg4: Register,
   1840 ) !void {
   1841     _ = try self.addInst(.{
   1842         .tag = tag[1],
   1843         .ops = .rrrr,
   1844         .data = .{ .rrrr = .{
   1845             .fixes = tag[0],
   1846             .r1 = reg1,
   1847             .r2 = reg2,
   1848             .r3 = reg3,
   1849             .r4 = reg4,
   1850         } },
   1851     });
   1852 }
   1853 
   1854 fn asmRegisterRegisterRegisterImmediate(
   1855     self: *CodeGen,
   1856     tag: Mir.Inst.FixedTag,
   1857     reg1: Register,
   1858     reg2: Register,
   1859     reg3: Register,
   1860     imm: Immediate,
   1861 ) !void {
   1862     _ = try self.addInst(.{
   1863         .tag = tag[1],
   1864         .ops = .rrri,
   1865         .data = .{ .rrri = .{
   1866             .fixes = tag[0],
   1867             .r1 = reg1,
   1868             .r2 = reg2,
   1869             .r3 = reg3,
   1870             .i = switch (imm) {
   1871                 .signed => |s| @bitCast(@as(i8, @intCast(s))),
   1872                 .unsigned => |u| @intCast(u),
   1873                 .reloc => unreachable,
   1874             },
   1875         } },
   1876     });
   1877 }
   1878 
   1879 fn asmRegisterRegisterImmediate(
   1880     self: *CodeGen,
   1881     tag: Mir.Inst.FixedTag,
   1882     reg1: Register,
   1883     reg2: Register,
   1884     imm: Immediate,
   1885 ) !void {
   1886     _ = try self.addInst(.{
   1887         .tag = tag[1],
   1888         .ops = switch (imm) {
   1889             .signed => .rri_s,
   1890             .unsigned => .rri_u,
   1891             .reloc => unreachable,
   1892         },
   1893         .data = .{ .rri = .{
   1894             .fixes = tag[0],
   1895             .r1 = reg1,
   1896             .r2 = reg2,
   1897             .i = switch (imm) {
   1898                 .signed => |s| @bitCast(s),
   1899                 .unsigned => |u| @intCast(u),
   1900                 .reloc => unreachable,
   1901             },
   1902         } },
   1903     });
   1904 }
   1905 
   1906 fn asmRegisterRegisterMemory(
   1907     self: *CodeGen,
   1908     tag: Mir.Inst.FixedTag,
   1909     reg1: Register,
   1910     reg2: Register,
   1911     m: Memory,
   1912 ) !void {
   1913     _ = try self.addInst(.{
   1914         .tag = tag[1],
   1915         .ops = .rrm,
   1916         .data = .{ .rrx = .{
   1917             .fixes = tag[0],
   1918             .r1 = reg1,
   1919             .r2 = reg2,
   1920             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1921         } },
   1922     });
   1923 }
   1924 
   1925 fn asmRegisterRegisterMemoryRegister(
   1926     self: *CodeGen,
   1927     tag: Mir.Inst.FixedTag,
   1928     reg1: Register,
   1929     reg2: Register,
   1930     m: Memory,
   1931     reg3: Register,
   1932 ) !void {
   1933     _ = try self.addInst(.{
   1934         .tag = tag[1],
   1935         .ops = .rrmr,
   1936         .data = .{ .rrrx = .{
   1937             .fixes = tag[0],
   1938             .r1 = reg1,
   1939             .r2 = reg2,
   1940             .r3 = reg3,
   1941             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1942         } },
   1943     });
   1944 }
   1945 
   1946 fn asmMemory(self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory) !void {
   1947     _ = try self.addInst(.{
   1948         .tag = tag[1],
   1949         .ops = .m,
   1950         .data = .{ .x = .{
   1951             .fixes = tag[0],
   1952             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1953         } },
   1954     });
   1955 }
   1956 
   1957 fn asmRegisterMemory(self: *CodeGen, tag: Mir.Inst.FixedTag, reg: Register, m: Memory) !void {
   1958     _ = try self.addInst(.{
   1959         .tag = tag[1],
   1960         .ops = .rm,
   1961         .data = .{ .rx = .{
   1962             .fixes = tag[0],
   1963             .r1 = reg,
   1964             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1965         } },
   1966     });
   1967 }
   1968 
   1969 fn asmRegisterMemoryRegister(
   1970     self: *CodeGen,
   1971     tag: Mir.Inst.FixedTag,
   1972     reg1: Register,
   1973     m: Memory,
   1974     reg2: Register,
   1975 ) !void {
   1976     _ = try self.addInst(.{
   1977         .tag = tag[1],
   1978         .ops = .rmr,
   1979         .data = .{ .rrx = .{
   1980             .fixes = tag[0],
   1981             .r1 = reg1,
   1982             .r2 = reg2,
   1983             .payload = try self.addExtra(Mir.Memory.encode(m)),
   1984         } },
   1985     });
   1986 }
   1987 
   1988 fn asmRegisterMemoryImmediate(
   1989     self: *CodeGen,
   1990     tag: Mir.Inst.FixedTag,
   1991     reg: Register,
   1992     m: Memory,
   1993     imm: Immediate,
   1994 ) !void {
   1995     if (switch (imm) {
   1996         .signed => |s| if (std.math.cast(i16, s)) |x| @as(u16, @bitCast(x)) else null,
   1997         .unsigned => |u| std.math.cast(u16, u),
   1998         .reloc => unreachable,
   1999     }) |small_imm| {
   2000         _ = try self.addInst(.{
   2001             .tag = tag[1],
   2002             .ops = .rmi,
   2003             .data = .{ .rix = .{
   2004                 .fixes = tag[0],
   2005                 .r1 = reg,
   2006                 .i = small_imm,
   2007                 .payload = try self.addExtra(Mir.Memory.encode(m)),
   2008             } },
   2009         });
   2010     } else {
   2011         const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) {
   2012             .signed => |s| @bitCast(s),
   2013             .unsigned => unreachable,
   2014             .reloc => unreachable,
   2015         } });
   2016         assert(payload + 1 == try self.addExtra(Mir.Memory.encode(m)));
   2017         _ = try self.addInst(.{
   2018             .tag = tag[1],
   2019             .ops = switch (imm) {
   2020                 .signed => .rmi_s,
   2021                 .unsigned => .rmi_u,
   2022                 .reloc => unreachable,
   2023             },
   2024             .data = .{ .rx = .{
   2025                 .fixes = tag[0],
   2026                 .r1 = reg,
   2027                 .payload = payload,
   2028             } },
   2029         });
   2030     }
   2031 }
   2032 
   2033 fn asmRegisterRegisterMemoryImmediate(
   2034     self: *CodeGen,
   2035     tag: Mir.Inst.FixedTag,
   2036     reg1: Register,
   2037     reg2: Register,
   2038     m: Memory,
   2039     imm: Immediate,
   2040 ) !void {
   2041     _ = try self.addInst(.{
   2042         .tag = tag[1],
   2043         .ops = .rrmi,
   2044         .data = .{ .rrix = .{
   2045             .fixes = tag[0],
   2046             .r1 = reg1,
   2047             .r2 = reg2,
   2048             .i = @intCast(imm.unsigned),
   2049             .payload = try self.addExtra(Mir.Memory.encode(m)),
   2050         } },
   2051     });
   2052 }
   2053 
   2054 fn asmMemoryRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory, reg: Register) !void {
   2055     _ = try self.addInst(.{
   2056         .tag = tag[1],
   2057         .ops = .mr,
   2058         .data = .{ .rx = .{
   2059             .fixes = tag[0],
   2060             .r1 = reg,
   2061             .payload = try self.addExtra(Mir.Memory.encode(m)),
   2062         } },
   2063     });
   2064 }
   2065 
   2066 fn asmMemoryImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void {
   2067     const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) {
   2068         .signed => |s| @bitCast(s),
   2069         .unsigned => |u| @intCast(u),
   2070         .reloc => unreachable,
   2071     } });
   2072     assert(payload + 1 == try self.addExtra(Mir.Memory.encode(m)));
   2073     _ = try self.addInst(.{
   2074         .tag = tag[1],
   2075         .ops = switch (imm) {
   2076             .signed => .mi_s,
   2077             .unsigned => .mi_u,
   2078             .reloc => unreachable,
   2079         },
   2080         .data = .{ .x = .{
   2081             .fixes = tag[0],
   2082             .payload = payload,
   2083         } },
   2084     });
   2085 }
   2086 
   2087 fn asmMemoryRegisterRegister(
   2088     self: *CodeGen,
   2089     tag: Mir.Inst.FixedTag,
   2090     m: Memory,
   2091     reg1: Register,
   2092     reg2: Register,
   2093 ) !void {
   2094     _ = try self.addInst(.{
   2095         .tag = tag[1],
   2096         .ops = .mrr,
   2097         .data = .{ .rrx = .{
   2098             .fixes = tag[0],
   2099             .r1 = reg1,
   2100             .r2 = reg2,
   2101             .payload = try self.addExtra(Mir.Memory.encode(m)),
   2102         } },
   2103     });
   2104 }
   2105 
   2106 fn asmMemoryRegisterImmediate(
   2107     self: *CodeGen,
   2108     tag: Mir.Inst.FixedTag,
   2109     m: Memory,
   2110     reg: Register,
   2111     imm: Immediate,
   2112 ) !void {
   2113     _ = try self.addInst(.{
   2114         .tag = tag[1],
   2115         .ops = .mri,
   2116         .data = .{ .rix = .{
   2117             .fixes = tag[0],
   2118             .r1 = reg,
   2119             .i = @intCast(imm.unsigned),
   2120             .payload = try self.addExtra(Mir.Memory.encode(m)),
   2121         } },
   2122     });
   2123 }
   2124 
   2125 fn gen(self: *CodeGen) InnerError!void {
   2126     const pt = self.pt;
   2127     const zcu = pt.zcu;
   2128     const fn_info = zcu.typeToFunc(self.fn_type).?;
   2129     if (fn_info.cc != .naked) {
   2130         try self.asmRegister(.{ ._, .push }, .rbp);
   2131         try self.asmPseudoImmediate(.pseudo_cfi_adjust_cfa_offset_i_s, .s(8));
   2132         try self.asmPseudoRegisterImmediate(.pseudo_cfi_rel_offset_ri_s, .rbp, .s(0));
   2133         try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp);
   2134         try self.asmPseudoRegister(.pseudo_cfi_def_cfa_register_r, .rbp);
   2135         const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
   2136         const backpatch_frame_align = try self.asmPlaceholder();
   2137         const backpatch_frame_align_extra = try self.asmPlaceholder();
   2138         const backpatch_stack_alloc = try self.asmPlaceholder();
   2139         const backpatch_stack_alloc_extra = try self.asmPlaceholder();
   2140 
   2141         switch (self.ret_mcv.long) {
   2142             .none, .unreach => {},
   2143             .indirect => {
   2144                 // The address where to store the return value for the caller is in a
   2145                 // register which the callee is free to clobber. Therefore, we purposely
   2146                 // spill it to stack immediately.
   2147                 const frame_index = try self.allocFrameIndex(.initSpill(.usize, zcu));
   2148                 try self.genSetMem(
   2149                     .{ .frame = frame_index },
   2150                     0,
   2151                     .usize,
   2152                     self.ret_mcv.long.address().offset(-self.ret_mcv.short.indirect.off),
   2153                     .{},
   2154                 );
   2155                 self.ret_mcv.long = .{ .load_frame = .{ .index = frame_index } };
   2156                 tracking_log.debug("spill {} to {}", .{ self.ret_mcv.long, frame_index });
   2157             },
   2158             else => unreachable,
   2159         }
   2160 
   2161         if (fn_info.is_var_args) switch (fn_info.cc) {
   2162             .x86_64_sysv => {
   2163                 const info = &self.va_info.sysv;
   2164                 const reg_save_area_fi = try self.allocFrameIndex(.init(.{
   2165                     .size = abi.SysV.c_abi_int_param_regs.len * 8 +
   2166                         abi.SysV.c_abi_sse_param_regs.len * 16,
   2167                     .alignment = .@"16",
   2168                 }));
   2169                 info.reg_save_area = .{ .index = reg_save_area_fi };
   2170 
   2171                 for (abi.SysV.c_abi_int_param_regs[info.gp_count..], info.gp_count..) |reg, reg_i|
   2172                     try self.genSetMem(.{ .frame = reg_save_area_fi }, @intCast(reg_i * 8), .usize, .{ .register = reg }, .{});
   2173 
   2174                 try self.asmRegisterImmediate(.{ ._, .cmp }, .al, .u(info.fp_count));
   2175                 const skip_sse_reloc = try self.asmJccReloc(.na, undefined);
   2176 
   2177                 const vec_2_f64 = try pt.vectorType(.{ .len = 2, .child = .f64_type });
   2178                 for (abi.SysV.c_abi_sse_param_regs[info.fp_count..], info.fp_count..) |reg, reg_i|
   2179                     try self.genSetMem(
   2180                         .{ .frame = reg_save_area_fi },
   2181                         @intCast(abi.SysV.c_abi_int_param_regs.len * 8 + reg_i * 16),
   2182                         vec_2_f64,
   2183                         .{ .register = reg },
   2184                         .{},
   2185                     );
   2186 
   2187                 self.performReloc(skip_sse_reloc);
   2188             },
   2189             .x86_64_win => return self.fail("TODO implement gen var arg function for Win64", .{}),
   2190             else => |cc| return self.fail("{s} does not support var args", .{@tagName(cc)}),
   2191         };
   2192 
   2193         try self.asmPseudo(.pseudo_dbg_prologue_end_none);
   2194 
   2195         try self.genBody(self.air.getMainBody());
   2196 
   2197         const epilogue = if (self.epilogue_relocs.items.len > 0) epilogue: {
   2198             const epilogue_relocs_last_index = self.epilogue_relocs.items.len - 1;
   2199             for (if (self.epilogue_relocs.items[epilogue_relocs_last_index] == self.mir_instructions.len - 1) epilogue_relocs: {
   2200                 _ = self.mir_instructions.pop();
   2201                 break :epilogue_relocs self.epilogue_relocs.items[0..epilogue_relocs_last_index];
   2202             } else self.epilogue_relocs.items) |epilogue_reloc| self.performReloc(epilogue_reloc);
   2203 
   2204             try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
   2205             const backpatch_stack_dealloc = try self.asmPlaceholder();
   2206             const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder();
   2207             try self.asmRegister(.{ ._, .pop }, .rbp);
   2208             try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, .s(8));
   2209             try self.asmOpOnly(.{ ._, .ret });
   2210             break :epilogue .{
   2211                 .backpatch_stack_dealloc = backpatch_stack_dealloc,
   2212                 .backpatch_pop_callee_preserved_regs = backpatch_pop_callee_preserved_regs,
   2213             };
   2214         } else null;
   2215 
   2216         const frame_layout = try self.computeFrameLayout(fn_info.cc);
   2217         const need_frame_align = frame_layout.stack_mask != std.math.maxInt(u32);
   2218         const need_stack_adjust = frame_layout.stack_adjust > 0;
   2219         const need_save_reg = frame_layout.save_reg_list.count() > 0;
   2220         if (need_frame_align) {
   2221             const page_align = @as(u32, std.math.maxInt(u32)) << 12;
   2222             self.mir_instructions.set(backpatch_frame_align, .{
   2223                 .tag = .@"and",
   2224                 .ops = .ri_s,
   2225                 .data = .{ .ri = .{
   2226                     .r1 = .rsp,
   2227                     .i = @max(frame_layout.stack_mask, page_align),
   2228                 } },
   2229             });
   2230             if (frame_layout.stack_mask < page_align) {
   2231                 self.mir_instructions.set(backpatch_frame_align_extra, .{
   2232                     .tag = .pseudo,
   2233                     .ops = .pseudo_probe_align_ri_s,
   2234                     .data = .{ .ri = .{
   2235                         .r1 = .rsp,
   2236                         .i = ~frame_layout.stack_mask & page_align,
   2237                     } },
   2238                 });
   2239             }
   2240         }
   2241         if (need_stack_adjust) {
   2242             const page_size: u32 = 1 << 12;
   2243             if (frame_layout.stack_adjust <= page_size) {
   2244                 self.mir_instructions.set(backpatch_stack_alloc, .{
   2245                     .tag = .sub,
   2246                     .ops = .ri_s,
   2247                     .data = .{ .ri = .{
   2248                         .r1 = .rsp,
   2249                         .i = frame_layout.stack_adjust,
   2250                     } },
   2251                 });
   2252             } else if (frame_layout.stack_adjust <
   2253                 page_size * Lower.pseudo_probe_adjust_unrolled_max_insts)
   2254             {
   2255                 self.mir_instructions.set(backpatch_stack_alloc, .{
   2256                     .tag = .pseudo,
   2257                     .ops = .pseudo_probe_adjust_unrolled_ri_s,
   2258                     .data = .{ .ri = .{
   2259                         .r1 = .rsp,
   2260                         .i = frame_layout.stack_adjust,
   2261                     } },
   2262                 });
   2263             } else {
   2264                 const scratch_reg = abi.getCAbiLinkerScratchReg(fn_info.cc);
   2265                 self.mir_instructions.set(backpatch_stack_alloc, .{
   2266                     .tag = .pseudo,
   2267                     .ops = .pseudo_probe_adjust_setup_rri_s,
   2268                     .data = .{ .rri = .{
   2269                         .r1 = .rsp,
   2270                         .r2 = scratch_reg,
   2271                         .i = frame_layout.stack_adjust,
   2272                     } },
   2273                 });
   2274                 self.mir_instructions.set(backpatch_stack_alloc_extra, .{
   2275                     .tag = .pseudo,
   2276                     .ops = .pseudo_probe_adjust_loop_rr,
   2277                     .data = .{ .rr = .{
   2278                         .r1 = .rsp,
   2279                         .r2 = scratch_reg,
   2280                     } },
   2281                 });
   2282             }
   2283         }
   2284         if (epilogue) |e| if (need_frame_align or need_stack_adjust) {
   2285             self.mir_instructions.set(e.backpatch_stack_dealloc, switch (-frame_layout.save_reg_list.size(self.target)) {
   2286                 0 => .{
   2287                     .tag = .mov,
   2288                     .ops = .rr,
   2289                     .data = .{ .rr = .{
   2290                         .r1 = .rsp,
   2291                         .r2 = .rbp,
   2292                     } },
   2293                 },
   2294                 else => |disp| .{
   2295                     .tag = .lea,
   2296                     .ops = .rm,
   2297                     .data = .{ .rx = .{
   2298                         .r1 = .rsp,
   2299                         .payload = try self.addExtra(Mir.Memory.encode(.{
   2300                             .base = .{ .reg = .rbp },
   2301                             .mod = .{ .rm = .{
   2302                                 .size = .qword,
   2303                                 .disp = disp,
   2304                             } },
   2305                         })),
   2306                     } },
   2307                 },
   2308             });
   2309         };
   2310         if (need_save_reg) {
   2311             self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{
   2312                 .tag = .pseudo,
   2313                 .ops = .pseudo_push_reg_list,
   2314                 .data = .{ .reg_list = frame_layout.save_reg_list },
   2315             });
   2316             if (epilogue) |e| self.mir_instructions.set(e.backpatch_pop_callee_preserved_regs, .{
   2317                 .tag = .pseudo,
   2318                 .ops = .pseudo_pop_reg_list,
   2319                 .data = .{ .reg_list = frame_layout.save_reg_list },
   2320             });
   2321         }
   2322     } else {
   2323         try self.asmPseudo(.pseudo_dbg_prologue_end_none);
   2324         try self.genBody(self.air.getMainBody());
   2325         try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
   2326     }
   2327 
   2328     // Drop them off at the rbrace.
   2329     _ = try self.addInst(.{
   2330         .tag = .pseudo,
   2331         .ops = .pseudo_dbg_line_stmt_line_column,
   2332         .data = .{ .line_column = .{
   2333             .line = self.end_di_line,
   2334             .column = self.end_di_column,
   2335         } },
   2336     });
   2337 }
   2338 
   2339 fn checkInvariantsAfterAirInst(self: *CodeGen) void {
   2340     assert(!self.register_manager.lockedRegsExist());
   2341 
   2342     if (std.debug.runtime_safety) {
   2343         // check consistency of tracked registers
   2344         var it = self.register_manager.free_registers.iterator(.{ .kind = .unset });
   2345         while (it.next()) |index| {
   2346             const tracked_inst = self.register_manager.registers[index];
   2347             const tracking = self.getResolvedInstValue(tracked_inst);
   2348             for (tracking.getRegs()) |reg| {
   2349                 if (RegisterManager.indexOfRegIntoTracked(reg).? == index) break;
   2350             } else unreachable; // tracked register not in use
   2351         }
   2352     }
   2353 }
   2354 
   2355 fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
   2356     try self.asmPseudo(.pseudo_dbg_enter_block_none);
   2357     try self.genBody(body);
   2358     try self.asmPseudo(.pseudo_dbg_leave_block_none);
   2359 }
   2360 
   2361 fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
   2362     @setEvalBranchQuota(1_600);
   2363     const pt = cg.pt;
   2364     const zcu = pt.zcu;
   2365     const ip = &zcu.intern_pool;
   2366     const air_tags = cg.air.instructions.items(.tag);
   2367     const air_datas = cg.air.instructions.items(.data);
   2368     const use_old = cg.target.ofmt == .coff;
   2369 
   2370     cg.arg_index = 0;
   2371     for (body) |inst| switch (air_tags[@intFromEnum(inst)]) {
   2372         .arg => {
   2373             wip_mir_log.debug("{}", .{cg.fmtAir(inst)});
   2374             verbose_tracking_log.debug("{}", .{cg.fmtTracking()});
   2375 
   2376             cg.reused_operands = .initEmpty();
   2377             try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1);
   2378 
   2379             try cg.airArg(inst);
   2380 
   2381             cg.resetTemps();
   2382             cg.checkInvariantsAfterAirInst();
   2383         },
   2384         else => break,
   2385     };
   2386 
   2387     if (cg.arg_index == 0) try cg.airDbgVarArgs();
   2388     cg.arg_index = 0;
   2389     for (body) |inst| {
   2390         if (cg.liveness.isUnused(inst) and !cg.air.mustLower(inst, ip)) continue;
   2391         wip_mir_log.debug("{}", .{cg.fmtAir(inst)});
   2392         verbose_tracking_log.debug("{}", .{cg.fmtTracking()});
   2393 
   2394         cg.reused_operands = .initEmpty();
   2395         try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1);
   2396         switch (air_tags[@intFromEnum(inst)]) {
   2397             // zig fmt: off
   2398             .add,
   2399             .add_wrap,
   2400             .sub,
   2401             .sub_wrap,
   2402             .min,
   2403             .max,
   2404             => |air_tag| try cg.airBinOp(inst, air_tag),
   2405 
   2406             .shr, .shr_exact => try cg.airShlShrBinOp(inst),
   2407             .shl, .shl_exact => try cg.airShlShrBinOp(inst),
   2408 
   2409             .mul             => try cg.airMulDivBinOp(inst),
   2410             .mul_wrap        => try cg.airMulDivBinOp(inst),
   2411             .rem             => try cg.airMulDivBinOp(inst),
   2412             .mod             => try cg.airMulDivBinOp(inst),
   2413 
   2414             .add_sat         => try cg.airAddSat(inst),
   2415             .sub_sat         => try cg.airSubSat(inst),
   2416             .mul_sat         => try cg.airMulSat(inst),
   2417             .shl_sat         => try cg.airShlSat(inst),
   2418 
   2419             .sin,
   2420             .cos,
   2421             .tan,
   2422             .exp,
   2423             .exp2,
   2424             .log,
   2425             .log2,
   2426             .log10,
   2427             .round,
   2428             => |air_tag| try cg.airUnaryMath(inst, air_tag),
   2429 
   2430             .floor       => try cg.airRound(inst, .{ .mode = .down, .precision = .inexact }),
   2431             .ceil        => try cg.airRound(inst, .{ .mode = .up, .precision = .inexact }),
   2432             .trunc_float => try cg.airRound(inst, .{ .mode = .zero, .precision = .inexact }),
   2433             .sqrt        => try cg.airSqrt(inst),
   2434             .neg         => try cg.airFloatSign(inst),
   2435 
   2436             .abs => try cg.airAbs(inst),
   2437 
   2438             .add_with_overflow => try cg.airAddSubWithOverflow(inst),
   2439             .sub_with_overflow => try cg.airAddSubWithOverflow(inst),
   2440             .mul_with_overflow => try cg.airMulWithOverflow(inst),
   2441             .shl_with_overflow => try cg.airShlWithOverflow(inst),
   2442 
   2443             .div_float, .div_trunc, .div_floor, .div_exact => try cg.airMulDivBinOp(inst),
   2444 
   2445             .cmp_lt_errors_len => try cg.airCmpLtErrorsLen(inst),
   2446 
   2447             .bitcast          => try cg.airBitCast(inst),
   2448             .fptrunc          => try cg.airFptrunc(inst),
   2449             .fpext            => try cg.airFpext(inst),
   2450             .intcast          => try cg.airIntCast(inst),
   2451             .trunc            => try cg.airTrunc(inst),
   2452             .is_non_null      => try cg.airIsNonNull(inst),
   2453             .is_null          => try cg.airIsNull(inst),
   2454             .is_non_err       => try cg.airIsNonErr(inst),
   2455             .is_err           => try cg.airIsErr(inst),
   2456             .float_from_int   => try cg.airFloatFromInt(inst),
   2457             .int_from_float   => try cg.airIntFromFloat(inst),
   2458             .cmpxchg_strong   => try cg.airCmpxchg(inst),
   2459             .cmpxchg_weak     => try cg.airCmpxchg(inst),
   2460             .atomic_rmw       => try cg.airAtomicRmw(inst),
   2461             .atomic_load      => try cg.airAtomicLoad(inst),
   2462             .memcpy           => try cg.airMemcpy(inst),
   2463             .memset           => try cg.airMemset(inst, false),
   2464             .memset_safe      => try cg.airMemset(inst, true),
   2465             .ctz              => try cg.airCtz(inst),
   2466             .popcount         => try cg.airPopCount(inst),
   2467             .byte_swap        => try cg.airByteSwap(inst),
   2468             .bit_reverse      => try cg.airBitReverse(inst),
   2469             .tag_name         => try cg.airTagName(inst),
   2470             .error_name       => try cg.airErrorName(inst),
   2471             .splat            => try cg.airSplat(inst),
   2472             .select           => try cg.airSelect(inst),
   2473             .shuffle          => try cg.airShuffle(inst),
   2474             .reduce           => try cg.airReduce(inst),
   2475             .aggregate_init   => try cg.airAggregateInit(inst),
   2476             .prefetch         => try cg.airPrefetch(inst),
   2477             .mul_add          => try cg.airMulAdd(inst),
   2478 
   2479             .atomic_store_unordered => try cg.airAtomicStore(inst, .unordered),
   2480             .atomic_store_monotonic => try cg.airAtomicStore(inst, .monotonic),
   2481             .atomic_store_release   => try cg.airAtomicStore(inst, .release),
   2482             .atomic_store_seq_cst   => try cg.airAtomicStore(inst, .seq_cst),
   2483 
   2484             .array_elem_val      => try cg.airArrayElemVal(inst),
   2485 
   2486             .optional_payload           => try cg.airOptionalPayload(inst),
   2487             .unwrap_errunion_err        => try cg.airUnwrapErrUnionErr(inst),
   2488             .unwrap_errunion_payload    => try cg.airUnwrapErrUnionPayload(inst),
   2489             .err_return_trace           => try cg.airErrReturnTrace(inst),
   2490             .set_err_return_trace       => try cg.airSetErrReturnTrace(inst),
   2491             .save_err_return_trace_index=> try cg.airSaveErrReturnTraceIndex(inst),
   2492 
   2493             .wrap_optional         => try cg.airWrapOptional(inst),
   2494             .wrap_errunion_payload => try cg.airWrapErrUnionPayload(inst),
   2495             .wrap_errunion_err     => try cg.airWrapErrUnionErr(inst),
   2496             // zig fmt: on
   2497 
   2498             .add_safe,
   2499             .sub_safe,
   2500             .mul_safe,
   2501             => return cg.fail("TODO implement safety_checked_instructions", .{}),
   2502             .add_optimized,
   2503             .sub_optimized,
   2504             .mul_optimized,
   2505             .div_float_optimized,
   2506             .div_trunc_optimized,
   2507             .div_floor_optimized,
   2508             .div_exact_optimized,
   2509             .rem_optimized,
   2510             .mod_optimized,
   2511             .neg_optimized,
   2512             .reduce_optimized,
   2513             .int_from_float_optimized,
   2514             => return cg.fail("TODO implement optimized float mode", .{}),
   2515 
   2516             .arg => try cg.airDbgArg(inst),
   2517             .ptr_add => |air_tag| if (use_old) try cg.airPtrArithmetic(inst, air_tag) else {
   2518                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
   2519                 const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
   2520                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
   2521                 try ops[0].toSlicePtr(cg);
   2522                 var res: [1]Temp = undefined;
   2523                 cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{
   2524                     .patterns = &.{
   2525                         .{ .src = .{ .to_gpr, .simm32 } },
   2526                     },
   2527                     .dst_temps = .{.{ .rc = .general_purpose }},
   2528                     .each = .{ .once = &.{
   2529                         .{ ._, ._, .lea, .dst0p, .leaa(.none, .src0, .add_src0_elem_size_times_src1), ._, ._ },
   2530                     } },
   2531                 }, .{
   2532                     .dst_constraints = .{.{ .elem_size_is = 1 }},
   2533                     .patterns = &.{
   2534                         .{ .src = .{ .to_gpr, .to_gpr } },
   2535                     },
   2536                     .dst_temps = .{.{ .rc = .general_purpose }},
   2537                     .each = .{ .once = &.{
   2538                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
   2539                     } },
   2540                 }, .{
   2541                     .dst_constraints = .{.{ .elem_size_is = 2 }},
   2542                     .patterns = &.{
   2543                         .{ .src = .{ .to_gpr, .to_gpr } },
   2544                     },
   2545                     .dst_temps = .{.{ .rc = .general_purpose }},
   2546                     .each = .{ .once = &.{
   2547                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"2", .src1), ._, ._ },
   2548                     } },
   2549                 }, .{
   2550                     .dst_constraints = .{.{ .elem_size_is = 2 + 1 }},
   2551                     .patterns = &.{
   2552                         .{ .src = .{ .to_gpr, .to_gpr } },
   2553                     },
   2554                     .dst_temps = .{.{ .rc = .general_purpose }},
   2555                     .each = .{ .once = &.{
   2556                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"2", .src1), ._, ._ },
   2557                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
   2558                     } },
   2559                 }, .{
   2560                     .dst_constraints = .{.{ .elem_size_is = 4 }},
   2561                     .patterns = &.{
   2562                         .{ .src = .{ .to_gpr, .to_gpr } },
   2563                     },
   2564                     .dst_temps = .{.{ .rc = .general_purpose }},
   2565                     .each = .{ .once = &.{
   2566                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"4", .src1), ._, ._ },
   2567                     } },
   2568                 }, .{
   2569                     .dst_constraints = .{.{ .elem_size_is = 4 + 1 }},
   2570                     .patterns = &.{
   2571                         .{ .src = .{ .to_gpr, .to_gpr } },
   2572                     },
   2573                     .dst_temps = .{.{ .ref = .src1 }},
   2574                     .each = .{ .once = &.{
   2575                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"4", .src1), ._, ._ },
   2576                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
   2577                     } },
   2578                 }, .{
   2579                     .required_features = .{ .@"64bit", null, null, null },
   2580                     .dst_constraints = .{.{ .elem_size_is = 8 }},
   2581                     .patterns = &.{
   2582                         .{ .src = .{ .to_gpr, .to_gpr } },
   2583                     },
   2584                     .dst_temps = .{.{ .rc = .general_purpose }},
   2585                     .each = .{ .once = &.{
   2586                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"8", .src1), ._, ._ },
   2587                     } },
   2588                 }, .{
   2589                     .required_features = .{ .@"64bit", null, null, null },
   2590                     .dst_constraints = .{.{ .elem_size_is = 8 + 1 }},
   2591                     .patterns = &.{
   2592                         .{ .src = .{ .to_gpr, .to_gpr } },
   2593                     },
   2594                     .dst_temps = .{.{ .ref = .src1 }},
   2595                     .each = .{ .once = &.{
   2596                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"8", .src1), ._, ._ },
   2597                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
   2598                     } },
   2599                 }, .{
   2600                     .dst_constraints = .{.po2_elem_size},
   2601                     .patterns = &.{
   2602                         .{ .src = .{ .to_gpr, .to_mut_gpr } },
   2603                     },
   2604                     .dst_temps = .{.{ .ref = .src1 }},
   2605                     .clobbers = .{ .eflags = true },
   2606                     .each = .{ .once = &.{
   2607                         .{ ._, ._l, .sh, .src1p, .sa(.none, .add_log2_src0_elem_size), ._, ._ },
   2608                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
   2609                     } },
   2610                 }, .{
   2611                     .patterns = &.{
   2612                         .{ .src = .{ .to_gpr, .to_gpr } },
   2613                     },
   2614                     .dst_temps = .{.{ .rc = .general_purpose }},
   2615                     .clobbers = .{ .eflags = true },
   2616                     .each = .{ .once = &.{
   2617                         .{ ._, .i_, .mul, .dst0p, .src1p, .sa(.none, .add_src0_elem_size), ._ },
   2618                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
   2619                     } },
   2620                 } }) catch |err| switch (err) {
   2621                     error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
   2622                         @tagName(air_tag),
   2623                         cg.typeOf(bin_op.lhs).fmt(pt),
   2624                         ops[0].tracking(cg),
   2625                         ops[1].tracking(cg),
   2626                     }),
   2627                     else => |e| return e,
   2628                 };
   2629                 for (ops) |op| for (res) |r| {
   2630                     if (op.index == r.index) break;
   2631                 } else try op.die(cg);
   2632                 try res[0].moveTo(inst, cg);
   2633             },
   2634             .ptr_sub => |air_tag| if (use_old) try cg.airPtrArithmetic(inst, air_tag) else {
   2635                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
   2636                 const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
   2637                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
   2638                 try ops[0].toSlicePtr(cg);
   2639                 var res: [1]Temp = undefined;
   2640                 cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{
   2641                     .patterns = &.{
   2642                         .{ .src = .{ .to_gpr, .simm32 } },
   2643                     },
   2644                     .dst_temps = .{.{ .rc = .general_purpose }},
   2645                     .each = .{ .once = &.{
   2646                         .{ ._, ._, .lea, .dst0p, .leaa(.none, .src0, .sub_src0_elem_size_times_src1), ._, ._ },
   2647                     } },
   2648                 }, .{
   2649                     .dst_constraints = .{.{ .elem_size_is = 1 }},
   2650                     .patterns = &.{
   2651                         .{ .src = .{ .to_gpr, .to_mut_gpr } },
   2652                     },
   2653                     .dst_temps = .{.{ .ref = .src1 }},
   2654                     .clobbers = .{ .eflags = true },
   2655                     .each = .{ .once = &.{
   2656                         .{ ._, ._, .neg, .src1p, ._, ._, ._ },
   2657                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
   2658                     } },
   2659                 }, .{
   2660                     .dst_constraints = .{.{ .elem_size_is = 2 }},
   2661                     .patterns = &.{
   2662                         .{ .src = .{ .to_gpr, .to_mut_gpr } },
   2663                     },
   2664                     .dst_temps = .{.{ .ref = .src1 }},
   2665                     .clobbers = .{ .eflags = true },
   2666                     .each = .{ .once = &.{
   2667                         .{ ._, ._, .neg, .src1p, ._, ._, ._ },
   2668                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"2", .src1), ._, ._ },
   2669                     } },
   2670                 }, .{
   2671                     .dst_constraints = .{.{ .elem_size_is = 2 + 1 }},
   2672                     .patterns = &.{
   2673                         .{ .src = .{ .to_gpr, .to_gpr } },
   2674                     },
   2675                     .dst_temps = .{.{ .rc = .general_purpose }},
   2676                     .clobbers = .{ .eflags = true },
   2677                     .each = .{ .once = &.{
   2678                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"2", .src1), ._, ._ },
   2679                         .{ ._, ._, .neg, .dst0p, ._, ._, ._ },
   2680                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
   2681                     } },
   2682                 }, .{
   2683                     .dst_constraints = .{.{ .elem_size_is = 4 }},
   2684                     .patterns = &.{
   2685                         .{ .src = .{ .to_gpr, .to_mut_gpr } },
   2686                     },
   2687                     .dst_temps = .{.{ .ref = .src1 }},
   2688                     .clobbers = .{ .eflags = true },
   2689                     .each = .{ .once = &.{
   2690                         .{ ._, ._, .neg, .src1p, ._, ._, ._ },
   2691                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"4", .src1), ._, ._ },
   2692                     } },
   2693                 }, .{
   2694                     .dst_constraints = .{.{ .elem_size_is = 4 + 1 }},
   2695                     .patterns = &.{
   2696                         .{ .src = .{ .to_gpr, .to_gpr } },
   2697                     },
   2698                     .dst_temps = .{.{ .rc = .general_purpose }},
   2699                     .clobbers = .{ .eflags = true },
   2700                     .each = .{ .once = &.{
   2701                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"4", .src1), ._, ._ },
   2702                         .{ ._, ._, .neg, .dst0p, ._, ._, ._ },
   2703                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
   2704                     } },
   2705                 }, .{
   2706                     .required_features = .{ .@"64bit", null, null, null },
   2707                     .dst_constraints = .{.{ .elem_size_is = 8 }},
   2708                     .patterns = &.{
   2709                         .{ .src = .{ .to_gpr, .to_mut_gpr } },
   2710                     },
   2711                     .dst_temps = .{.{ .ref = .src1 }},
   2712                     .clobbers = .{ .eflags = true },
   2713                     .each = .{ .once = &.{
   2714                         .{ ._, ._, .neg, .src1p, ._, ._, ._ },
   2715                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"8", .src1), ._, ._ },
   2716                     } },
   2717                 }, .{
   2718                     .required_features = .{ .@"64bit", null, null, null },
   2719                     .dst_constraints = .{.{ .elem_size_is = 8 + 1 }},
   2720                     .patterns = &.{
   2721                         .{ .src = .{ .to_gpr, .to_gpr } },
   2722                     },
   2723                     .dst_temps = .{.{ .rc = .general_purpose }},
   2724                     .clobbers = .{ .eflags = true },
   2725                     .each = .{ .once = &.{
   2726                         .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"8", .src1), ._, ._ },
   2727                         .{ ._, ._, .neg, .dst0p, ._, ._, ._ },
   2728                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
   2729                     } },
   2730                 }, .{
   2731                     .dst_constraints = .{.po2_elem_size},
   2732                     .patterns = &.{
   2733                         .{ .src = .{ .to_gpr, .to_mut_gpr } },
   2734                     },
   2735                     .dst_temps = .{.{ .ref = .src1 }},
   2736                     .clobbers = .{ .eflags = true },
   2737                     .each = .{ .once = &.{
   2738                         .{ ._, ._l, .sa, .src1p, .sa(.none, .add_log2_src0_elem_size), ._, ._ },
   2739                         .{ ._, ._, .neg, .src1p, ._, ._, ._ },
   2740                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
   2741                     } },
   2742                 }, .{
   2743                     .patterns = &.{
   2744                         .{ .src = .{ .to_gpr, .to_gpr } },
   2745                     },
   2746                     .dst_temps = .{.{ .rc = .general_purpose }},
   2747                     .clobbers = .{ .eflags = true },
   2748                     .each = .{ .once = &.{
   2749                         .{ ._, .i_, .mul, .dst0p, .src1p, .sa(.none, .sub_src0_elem_size), ._ },
   2750                         .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
   2751                     } },
   2752                 } }) catch |err| switch (err) {
   2753                     error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
   2754                         @tagName(air_tag),
   2755                         cg.typeOf(bin_op.lhs).fmt(pt),
   2756                         ops[0].tracking(cg),
   2757                         ops[1].tracking(cg),
   2758                     }),
   2759                     else => |e| return e,
   2760                 };
   2761                 for (ops) |op| for (res) |r| {
   2762                     if (op.index == r.index) break;
   2763                 } else try op.die(cg);
   2764                 try res[0].moveTo(inst, cg);
   2765             },
   2766             .alloc => if (use_old) try cg.airAlloc(inst) else {
   2767                 const ty = air_datas[@intFromEnum(inst)].ty;
   2768                 var slot = try cg.tempInit(ty, .{ .lea_frame = .{
   2769                     .index = try cg.allocMemPtr(inst),
   2770                 } });
   2771                 try slot.moveTo(inst, cg);
   2772             },
   2773             .inferred_alloc, .inferred_alloc_comptime => unreachable,
   2774             .ret_ptr => if (use_old) try cg.airRetPtr(inst) else {
   2775                 const ty = air_datas[@intFromEnum(inst)].ty;
   2776                 var slot = switch (cg.ret_mcv.long) {
   2777                     else => unreachable,
   2778                     .none => try cg.tempInit(ty, .{ .lea_frame = .{
   2779                         .index = try cg.allocMemPtr(inst),
   2780                     } }),
   2781                     .load_frame => slot: {
   2782                         var slot = try cg.tempInit(ty, cg.ret_mcv.long);
   2783                         try slot.toOffset(cg.ret_mcv.short.indirect.off, cg);
   2784                         break :slot slot;
   2785                     },
   2786                 };
   2787                 try slot.moveTo(inst, cg);
   2788             },
   2789             .assembly => try cg.airAsm(inst),
   2790             .bit_and, .bit_or, .xor, .bool_and, .bool_or => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else {
   2791                 const bin_op = air_datas[@intFromEnum(inst)].bin_op;
   2792                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
   2793                 var res: [1]Temp = undefined;
   2794                 cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, switch (@as(Mir.Inst.Tag, switch (air_tag) {
   2795                     else => unreachable,
   2796                     .bit_and, .bool_and => .@"and",
   2797                     .bit_or, .bool_or => .@"or",
   2798                     .xor => .xor,
   2799                 })) {
   2800                     else => unreachable,
   2801                     inline .@"and", .@"or", .xor => |mir_tag| comptime &.{ .{
   2802                         .required_features = .{ .avx2, null, null, null },
   2803                         .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } },
   2804                         .patterns = &.{
   2805                             .{ .src = .{ .to_ymm, .mem } },
   2806                             .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
   2807                             .{ .src = .{ .to_ymm, .to_ymm } },
   2808                         },
   2809                         .dst_temps = .{.{ .rc = .sse }},
   2810                         .each = .{ .once = &.{
   2811                             .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ },
   2812                         } },
   2813                     }, .{
   2814                         .required_features = .{ .avx, null, null, null },
   2815                         .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } },
   2816                         .patterns = &.{
   2817                             .{ .src = .{ .to_ymm, .mem } },
   2818                             .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
   2819                             .{ .src = .{ .to_ymm, .to_ymm } },
   2820                         },
   2821                         .dst_temps = .{.{ .rc = .sse }},
   2822                         .each = .{ .once = &.{
   2823                             .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ },
   2824                         } },
   2825                     }, .{
   2826                         .required_features = .{ .avx, null, null, null },
   2827                         .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } },
   2828                         .patterns = &.{
   2829                             .{ .src = .{ .to_xmm, .mem } },
   2830                             .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
   2831                             .{ .src = .{ .to_xmm, .to_xmm } },
   2832                         },
   2833                         .dst_temps = .{.{ .rc = .sse }},
   2834                         .each = .{ .once = &.{
   2835                             .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ },
   2836                         } },
   2837                     }, .{
   2838                         .required_features = .{ .sse2, null, null, null },
   2839                         .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } },
   2840                         .patterns = &.{
   2841                             .{ .src = .{ .to_mut_xmm, .mem } },
   2842                             .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
   2843                             .{ .src = .{ .to_mut_xmm, .to_xmm } },
   2844                         },
   2845                         .dst_temps = .{.{ .ref = .src0 }},
   2846                         .each = .{ .once = &.{
   2847                             .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ },
   2848                         } },
   2849                     }, .{
   2850                         .required_features = .{ .sse, null, null, null },
   2851                         .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } },
   2852                         .patterns = &.{
   2853                             .{ .src = .{ .to_mut_xmm, .mem } },
   2854                             .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
   2855                             .{ .src = .{ .to_mut_xmm, .to_xmm } },
   2856                         },
   2857                         .dst_temps = .{.{ .ref = .src0 }},
   2858                         .each = .{ .once = &.{
   2859                             .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ },
   2860                         } },
   2861                     }, .{
   2862                         .required_features = .{ .mmx, null, null, null },
   2863                         .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } },
   2864                         .patterns = &.{
   2865                             .{ .src = .{ .to_mut_mm, .mem } },
   2866                             .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
   2867                             .{ .src = .{ .to_mut_mm, .to_mm } },
   2868                         },
   2869                         .dst_temps = .{.{ .ref = .src0 }},
   2870                         .each = .{ .once = &.{
   2871                             .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ },
   2872                         } },
   2873                     }, .{
   2874                         .src_constraints = .{ .{ .int_or_vec = .byte }, .{ .int_or_vec = .byte } },
   2875                         .patterns = &.{
   2876                             .{ .src = .{ .mut_mem, .imm8 } },
   2877                             .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } },
   2878                             .{ .src = .{ .to_mut_gpr, .imm8 } },
   2879                             .{ .src = .{ .imm8, .to_mut_gpr }, .commute = .{ 0, 1 } },
   2880                             .{ .src = .{ .mut_mem, .to_gpr } },
   2881                             .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
   2882                             .{ .src = .{ .to_mut_gpr, .mem } },
   2883                             .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
   2884                             .{ .src = .{ .to_mut_gpr, .to_gpr } },
   2885                         },
   2886                         .dst_temps = .{.{ .ref = .src0 }},
   2887                         .clobbers = .{ .eflags = true },
   2888                         .each = .{ .once = &.{
   2889                             .{ ._, ._, mir_tag, .dst0b, .src1b, ._, ._ },
   2890                         } },
   2891                     }, .{
   2892                         .src_constraints = .{ .{ .int_or_vec = .word }, .{ .int_or_vec = .word } },
   2893                         .patterns = &.{
   2894                             .{ .src = .{ .mut_mem, .imm16 } },
   2895                             .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } },
   2896                             .{ .src = .{ .to_mut_gpr, .imm16 } },
   2897                             .{ .src = .{ .imm16, .to_mut_gpr }, .commute = .{ 0, 1 } },
   2898                             .{ .src = .{ .mut_mem, .to_gpr } },
   2899                             .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
   2900                             .{ .src = .{ .to_mut_gpr, .mem } },
   2901                             .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
   2902                             .{ .src = .{ .to_mut_gpr, .to_gpr } },
   2903                         },
   2904                         .dst_temps = .{.{ .ref = .src0 }},
   2905                         .clobbers = .{ .eflags = true },
   2906                         .each = .{ .once = &.{
   2907                             .{ ._, ._, mir_tag, .dst0w, .src1w, ._, ._ },
   2908                         } },
   2909                     }, .{
   2910                         .src_constraints = .{ .{ .int_or_vec = .dword }, .{ .int_or_vec = .dword } },
   2911                         .patterns = &.{
   2912                             .{ .src = .{ .mut_mem, .imm32 } },
   2913                             .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } },
   2914                             .{ .src = .{ .to_mut_gpr, .imm32 } },
   2915                             .{ .src = .{ .imm32, .to_mut_gpr }, .commute = .{ 0, 1 } },
   2916                             .{ .src = .{ .mut_mem, .to_gpr } },
   2917                             .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
   2918                             .{ .src = .{ .to_mut_gpr, .mem } },
   2919                             .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
   2920                             .{ .src = .{ .to_mut_gpr, .to_gpr } },
   2921                         },
   2922                         .dst_temps = .{.{ .ref = .src0 }},
   2923                         .clobbers = .{ .eflags = true },
   2924                         .each = .{ .once = &.{
   2925                             .{ ._, ._, mir_tag, .dst0d, .src1d, ._, ._ },
   2926                         } },
   2927                     }, .{
   2928                         .required_features = .{ .@"64bit", null, null, null },
   2929                         .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } },
   2930                         .patterns = &.{
   2931                             .{ .src = .{ .mut_mem, .simm32 } },
   2932                             .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } },
   2933                             .{ .src = .{ .to_mut_gpr, .simm32 } },
   2934                             .{ .src = .{ .simm32, .to_mut_gpr }, .commute = .{ 0, 1 } },
   2935                             .{ .src = .{ .mut_mem, .to_gpr } },
   2936                             .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
   2937                             .{ .src = .{ .to_mut_gpr, .mem } },
   2938                             .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
   2939                             .{ .src = .{ .to_mut_gpr, .to_gpr } },
   2940                         },
   2941                         .dst_temps = .{.{ .ref = .src0 }},
   2942                         .clobbers = .{ .eflags = true },
   2943                         .each = .{ .once = &.{
   2944                             .{ ._, ._, mir_tag, .dst0q, .src1q, ._, ._ },
   2945                         } },
   2946                     }, .{
   2947                         .required_features = .{ .avx2, null, null, null },
   2948                         .src_constraints = .{
   2949                             .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
   2950                             .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
   2951                         },
   2952                         .patterns = &.{
   2953                             .{ .src = .{ .to_mem, .to_mem } },
   2954                         },
   2955                         .extra_temps = .{
   2956                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   2957                             .{ .kind = .{ .rc = .sse } },
   2958                             .unused,
   2959                             .unused,
   2960                             .unused,
   2961                             .unused,
   2962                         },
   2963                         .dst_temps = .{.mem},
   2964                         .clobbers = .{ .eflags = true },
   2965                         .each = .{ .once = &.{
   2966                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   2967                             .{ .@"0:", .v_dqu, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   2968                             .{ ._, .vp_, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
   2969                             .{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
   2970                             .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   2971                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   2972                         } },
   2973                     }, .{
   2974                         .required_features = .{ .avx, null, null, null },
   2975                         .src_constraints = .{
   2976                             .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
   2977                             .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
   2978                         },
   2979                         .patterns = &.{
   2980                             .{ .src = .{ .to_mem, .to_mem } },
   2981                         },
   2982                         .extra_temps = .{
   2983                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   2984                             .{ .kind = .{ .rc = .sse } },
   2985                             .unused,
   2986                             .unused,
   2987                             .unused,
   2988                             .unused,
   2989                         },
   2990                         .dst_temps = .{.mem},
   2991                         .clobbers = .{ .eflags = true },
   2992                         .each = .{ .once = &.{
   2993                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   2994                             .{ .@"0:", .v_pd, .movu, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   2995                             .{ ._, .v_pd, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
   2996                             .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
   2997                             .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   2998                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   2999                         } },
   3000                     }, .{
   3001                         .required_features = .{ .avx, null, null, null },
   3002                         .src_constraints = .{
   3003                             .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
   3004                             .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
   3005                         },
   3006                         .patterns = &.{
   3007                             .{ .src = .{ .to_mem, .to_mem } },
   3008                         },
   3009                         .extra_temps = .{
   3010                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3011                             .{ .kind = .{ .rc = .sse } },
   3012                             .unused,
   3013                             .unused,
   3014                             .unused,
   3015                             .unused,
   3016                         },
   3017                         .dst_temps = .{.mem},
   3018                         .clobbers = .{ .eflags = true },
   3019                         .each = .{ .once = &.{
   3020                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3021                             .{ .@"0:", .v_dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   3022                             .{ ._, .vp_, mir_tag, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
   3023                             .{ ._, .v_dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
   3024                             .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   3025                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3026                         } },
   3027                     }, .{
   3028                         .required_features = .{ .sse2, null, null, null },
   3029                         .src_constraints = .{
   3030                             .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
   3031                             .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
   3032                         },
   3033                         .patterns = &.{
   3034                             .{ .src = .{ .to_mem, .to_mem } },
   3035                         },
   3036                         .extra_temps = .{
   3037                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3038                             .{ .kind = .{ .rc = .sse } },
   3039                             .unused,
   3040                             .unused,
   3041                             .unused,
   3042                             .unused,
   3043                         },
   3044                         .dst_temps = .{.mem},
   3045                         .clobbers = .{ .eflags = true },
   3046                         .each = .{ .once = &.{
   3047                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3048                             .{ .@"0:", ._dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   3049                             .{ ._, .p_, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   3050                             .{ ._, ._dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
   3051                             .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   3052                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3053                         } },
   3054                     }, .{
   3055                         .required_features = .{ .sse, null, null, null },
   3056                         .src_constraints = .{
   3057                             .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
   3058                             .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
   3059                         },
   3060                         .patterns = &.{
   3061                             .{ .src = .{ .to_mem, .to_mem } },
   3062                         },
   3063                         .extra_temps = .{
   3064                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3065                             .{ .kind = .{ .rc = .sse } },
   3066                             .unused,
   3067                             .unused,
   3068                             .unused,
   3069                             .unused,
   3070                         },
   3071                         .dst_temps = .{.mem},
   3072                         .clobbers = .{ .eflags = true },
   3073                         .each = .{ .once = &.{
   3074                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3075                             .{ .@"0:", ._ps, .movu, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   3076                             .{ ._, ._ps, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   3077                             .{ ._, ._ps, .movu, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
   3078                             .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   3079                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3080                         } },
   3081                     }, .{
   3082                         .required_features = .{ .mmx, null, null, null },
   3083                         .src_constraints = .{
   3084                             .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
   3085                             .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
   3086                         },
   3087                         .patterns = &.{
   3088                             .{ .src = .{ .to_mem, .to_mem } },
   3089                         },
   3090                         .extra_temps = .{
   3091                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3092                             .{ .kind = .{ .rc = .mmx } },
   3093                             .unused,
   3094                             .unused,
   3095                             .unused,
   3096                             .unused,
   3097                         },
   3098                         .dst_temps = .{.mem},
   3099                         .clobbers = .{ .eflags = true },
   3100                         .each = .{ .once = &.{
   3101                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3102                             .{ .@"0:", ._q, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   3103                             .{ ._, .p_, mir_tag, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   3104                             .{ ._, ._q, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
   3105                             .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3106                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3107                         } },
   3108                     }, .{
   3109                         .src_constraints = .{
   3110                             .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
   3111                             .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
   3112                         },
   3113                         .patterns = &.{
   3114                             .{ .src = .{ .to_mem, .to_mem } },
   3115                         },
   3116                         .extra_temps = .{
   3117                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3118                             .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   3119                             .unused,
   3120                             .unused,
   3121                             .unused,
   3122                             .unused,
   3123                         },
   3124                         .dst_temps = .{.mem},
   3125                         .clobbers = .{ .eflags = true },
   3126                         .each = .{ .once = &.{
   3127                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3128                             .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
   3129                             .{ ._, ._, mir_tag, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
   3130                             .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ },
   3131                             .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ },
   3132                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3133                         } },
   3134                     } },
   3135                 }) catch |err| switch (err) {
   3136                     error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
   3137                         @tagName(air_tag),
   3138                         cg.typeOf(bin_op.lhs).fmt(pt),
   3139                         ops[0].tracking(cg),
   3140                         ops[1].tracking(cg),
   3141                     }),
   3142                     else => |e| return e,
   3143                 };
   3144                 for (ops) |op| for (res) |r| {
   3145                     if (op.index == r.index) break;
   3146                 } else try op.die(cg);
   3147                 try res[0].moveTo(inst, cg);
   3148             },
   3149             .not => |air_tag| if (use_old) try cg.airUnOp(inst, air_tag) else {
   3150                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   3151                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   3152                 var res: [1]Temp = undefined;
   3153                 cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{
   3154                     .src_constraints = .{ .{ .signed_or_exact_int = .byte }, .any },
   3155                     .patterns = &.{
   3156                         .{ .src = .{ .mut_mem, .none } },
   3157                         .{ .src = .{ .to_mut_gpr, .none } },
   3158                     },
   3159                     .dst_temps = .{.{ .ref = .src0 }},
   3160                     .each = .{ .once = &.{
   3161                         .{ ._, ._, .not, .dst0b, ._, ._, ._ },
   3162                     } },
   3163                 }, .{
   3164                     .src_constraints = .{ .{ .unsigned_int = .byte }, .any },
   3165                     .patterns = &.{
   3166                         .{ .src = .{ .mut_mem, .none } },
   3167                         .{ .src = .{ .to_mut_gpr, .none } },
   3168                     },
   3169                     .dst_temps = .{.{ .ref = .src0 }},
   3170                     .clobbers = .{ .eflags = true },
   3171                     .each = .{ .once = &.{
   3172                         .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_umax), ._, ._ },
   3173                     } },
   3174                 }, .{
   3175                     .src_constraints = .{ .{ .signed_or_exact_int = .word }, .any },
   3176                     .patterns = &.{
   3177                         .{ .src = .{ .mut_mem, .none } },
   3178                         .{ .src = .{ .to_mut_gpr, .none } },
   3179                     },
   3180                     .dst_temps = .{.{ .ref = .src0 }},
   3181                     .each = .{ .once = &.{
   3182                         .{ ._, ._, .not, .dst0w, ._, ._, ._ },
   3183                     } },
   3184                 }, .{
   3185                     .src_constraints = .{ .{ .unsigned_int = .word }, .any },
   3186                     .patterns = &.{
   3187                         .{ .src = .{ .mut_mem, .none } },
   3188                         .{ .src = .{ .to_mut_gpr, .none } },
   3189                     },
   3190                     .dst_temps = .{.{ .ref = .src0 }},
   3191                     .clobbers = .{ .eflags = true },
   3192                     .each = .{ .once = &.{
   3193                         .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_umax), ._, ._ },
   3194                     } },
   3195                 }, .{
   3196                     .src_constraints = .{ .{ .signed_or_exact_int = .dword }, .any },
   3197                     .patterns = &.{
   3198                         .{ .src = .{ .mut_mem, .none } },
   3199                         .{ .src = .{ .to_mut_gpr, .none } },
   3200                     },
   3201                     .dst_temps = .{.{ .ref = .src0 }},
   3202                     .each = .{ .once = &.{
   3203                         .{ ._, ._, .not, .dst0d, ._, ._, ._ },
   3204                     } },
   3205                 }, .{
   3206                     .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
   3207                     .patterns = &.{
   3208                         .{ .src = .{ .mut_mem, .none } },
   3209                         .{ .src = .{ .to_mut_gpr, .none } },
   3210                     },
   3211                     .dst_temps = .{.{ .ref = .src0 }},
   3212                     .clobbers = .{ .eflags = true },
   3213                     .each = .{ .once = &.{
   3214                         .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_umax), ._, ._ },
   3215                     } },
   3216                 }, .{
   3217                     .required_features = .{ .@"64bit", null, null, null },
   3218                     .src_constraints = .{ .{ .signed_or_exact_int = .qword }, .any },
   3219                     .patterns = &.{
   3220                         .{ .src = .{ .mut_mem, .none } },
   3221                         .{ .src = .{ .to_mut_gpr, .none } },
   3222                     },
   3223                     .dst_temps = .{.{ .ref = .src0 }},
   3224                     .each = .{ .once = &.{
   3225                         .{ ._, ._, .not, .dst0q, ._, ._, ._ },
   3226                     } },
   3227                 }, .{
   3228                     .required_features = .{ .@"64bit", null, null, null },
   3229                     .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
   3230                     .patterns = &.{
   3231                         .{ .src = .{ .mem, .none } },
   3232                         .{ .src = .{ .to_gpr, .none } },
   3233                     },
   3234                     .dst_temps = .{.{ .rc = .general_purpose }},
   3235                     .each = .{ .once = &.{
   3236                         .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ },
   3237                         .{ ._, ._, .xor, .dst0q, .src0q, ._, ._ },
   3238                     } },
   3239                 }, .{
   3240                     .required_features = .{ .mmx, null, null, null },
   3241                     .src_constraints = .{ .{ .signed_or_exact_int = .qword }, .any },
   3242                     .patterns = &.{
   3243                         .{ .src = .{ .mem, .none } },
   3244                         .{ .src = .{ .to_mm, .none } },
   3245                     },
   3246                     .dst_temps = .{.{ .rc = .mmx }},
   3247                     .each = .{ .once = &.{
   3248                         .{ ._, .p_d, .cmpeq, .dst0q, .dst0q, ._, ._ },
   3249                         .{ ._, .p_, .xor, .dst0q, .src0q, ._, ._ },
   3250                     } },
   3251                 }, .{
   3252                     .required_features = .{ .mmx, null, null, null },
   3253                     .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
   3254                     .patterns = &.{
   3255                         .{ .src = .{ .to_mut_mm, .none } },
   3256                     },
   3257                     .extra_temps = .{
   3258                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   3259                         .{ .kind = .{ .umax_mem = .src0 } },
   3260                         .unused,
   3261                         .unused,
   3262                         .unused,
   3263                         .unused,
   3264                     },
   3265                     .dst_temps = .{.{ .ref = .src0 }},
   3266                     .each = .{ .once = &.{
   3267                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   3268                         .{ ._, .p_, .xor, .dst0q, .lea(.qword, .tmp0), ._, ._ },
   3269                     } },
   3270                 }, .{
   3271                     .required_features = .{ .avx, null, null, null },
   3272                     .src_constraints = .{ .{ .signed_or_exact_int = .xword }, .any },
   3273                     .patterns = &.{
   3274                         .{ .src = .{ .mem, .none } },
   3275                         .{ .src = .{ .to_xmm, .none } },
   3276                     },
   3277                     .dst_temps = .{.{ .rc = .sse }},
   3278                     .each = .{ .once = &.{
   3279                         .{ ._, .vp_q, .cmpeq, .dst0x, .dst0x, .dst0x, ._ },
   3280                         .{ ._, .vp_, .xor, .dst0x, .dst0x, .src0x, ._ },
   3281                     } },
   3282                 }, .{
   3283                     .required_features = .{ .avx, null, null, null },
   3284                     .src_constraints = .{ .{ .unsigned_int = .xword }, .any },
   3285                     .patterns = &.{
   3286                         .{ .src = .{ .to_xmm, .none } },
   3287                     },
   3288                     .extra_temps = .{
   3289                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   3290                         .{ .kind = .{ .umax_mem = .src0 } },
   3291                         .unused,
   3292                         .unused,
   3293                         .unused,
   3294                         .unused,
   3295                     },
   3296                     .dst_temps = .{.{ .rc = .sse }},
   3297                     .each = .{ .once = &.{
   3298                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   3299                         .{ ._, .vp_, .xor, .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
   3300                     } },
   3301                 }, .{
   3302                     .required_features = .{ .sse2, null, null, null },
   3303                     .src_constraints = .{ .{ .signed_or_exact_int = .xword }, .any },
   3304                     .patterns = &.{
   3305                         .{ .src = .{ .mem, .none } },
   3306                         .{ .src = .{ .to_xmm, .none } },
   3307                     },
   3308                     .dst_temps = .{.{ .rc = .sse }},
   3309                     .each = .{ .once = &.{
   3310                         .{ ._, .p_d, .cmpeq, .dst0x, .dst0x, ._, ._ },
   3311                         .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ },
   3312                     } },
   3313                 }, .{
   3314                     .required_features = .{ .sse2, null, null, null },
   3315                     .src_constraints = .{ .{ .unsigned_int = .xword }, .any },
   3316                     .patterns = &.{
   3317                         .{ .src = .{ .to_mut_xmm, .none } },
   3318                     },
   3319                     .extra_temps = .{
   3320                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   3321                         .{ .kind = .{ .umax_mem = .src0 } },
   3322                         .unused,
   3323                         .unused,
   3324                         .unused,
   3325                         .unused,
   3326                     },
   3327                     .dst_temps = .{.{ .ref = .src0 }},
   3328                     .each = .{ .once = &.{
   3329                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   3330                         .{ ._, .p_, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ },
   3331                     } },
   3332                 }, .{
   3333                     .required_features = .{ .sse, null, null, null },
   3334                     .src_constraints = .{ .{ .int = .xword }, .any },
   3335                     .patterns = &.{
   3336                         .{ .src = .{ .to_mut_xmm, .none } },
   3337                     },
   3338                     .extra_temps = .{
   3339                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   3340                         .{ .kind = .{ .umax_mem = .src0 } },
   3341                         .unused,
   3342                         .unused,
   3343                         .unused,
   3344                         .unused,
   3345                     },
   3346                     .dst_temps = .{.{ .ref = .src0 }},
   3347                     .each = .{ .once = &.{
   3348                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   3349                         .{ ._, ._ps, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ },
   3350                     } },
   3351                 }, .{
   3352                     .required_features = .{ .avx2, null, null, null },
   3353                     .src_constraints = .{ .{ .signed_or_exact_int = .yword }, .any },
   3354                     .patterns = &.{
   3355                         .{ .src = .{ .mem, .none } },
   3356                         .{ .src = .{ .to_ymm, .none } },
   3357                     },
   3358                     .dst_temps = .{.{ .rc = .sse }},
   3359                     .each = .{ .once = &.{
   3360                         .{ ._, .vp_q, .cmpeq, .dst0y, .dst0y, .dst0y, ._ },
   3361                         .{ ._, .vp_, .xor, .dst0y, .dst0y, .src0y, ._ },
   3362                     } },
   3363                 }, .{
   3364                     .required_features = .{ .avx2, null, null, null },
   3365                     .src_constraints = .{ .{ .unsigned_int = .yword }, .any },
   3366                     .patterns = &.{
   3367                         .{ .src = .{ .to_ymm, .none } },
   3368                     },
   3369                     .extra_temps = .{
   3370                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   3371                         .{ .kind = .{ .umax_mem = .src0 } },
   3372                         .unused,
   3373                         .unused,
   3374                         .unused,
   3375                         .unused,
   3376                     },
   3377                     .dst_temps = .{.{ .rc = .sse }},
   3378                     .each = .{ .once = &.{
   3379                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   3380                         .{ ._, .vp_, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
   3381                     } },
   3382                 }, .{
   3383                     .required_features = .{ .avx, null, null, null },
   3384                     .src_constraints = .{ .{ .signed_or_exact_int = .yword }, .any },
   3385                     .patterns = &.{
   3386                         .{ .src = .{ .mem, .none } },
   3387                         .{ .src = .{ .to_ymm, .none } },
   3388                     },
   3389                     .dst_temps = .{.{ .rc = .sse }},
   3390                     .each = .{ .once = &.{
   3391                         .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) },
   3392                         .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ },
   3393                     } },
   3394                 }, .{
   3395                     .required_features = .{ .avx, null, null, null },
   3396                     .src_constraints = .{ .{ .unsigned_int = .yword }, .any },
   3397                     .patterns = &.{
   3398                         .{ .src = .{ .to_ymm, .none } },
   3399                     },
   3400                     .extra_temps = .{
   3401                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   3402                         .{ .kind = .{ .umax_mem = .src0 } },
   3403                         .unused,
   3404                         .unused,
   3405                         .unused,
   3406                         .unused,
   3407                     },
   3408                     .dst_temps = .{.{ .rc = .sse }},
   3409                     .each = .{ .once = &.{
   3410                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   3411                         .{ ._, .v_pd, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
   3412                     } },
   3413                 }, .{
   3414                     .required_features = .{ .avx2, null, null, null },
   3415                     .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .xword } }, .any },
   3416                     .patterns = &.{
   3417                         .{ .src = .{ .to_mem, .none } },
   3418                     },
   3419                     .extra_temps = .{
   3420                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3421                         .{ .kind = .{ .rc = .sse } },
   3422                         .{ .kind = .{ .rc = .sse } },
   3423                         .unused,
   3424                         .unused,
   3425                         .unused,
   3426                     },
   3427                     .dst_temps = .{.mem},
   3428                     .clobbers = .{ .eflags = true },
   3429                     .each = .{ .once = &.{
   3430                         .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   3431                         .{ ._, .vp_q, .cmpeq, .tmp1y, .tmp1y, .tmp1y, ._ },
   3432                         .{ .@"0:", .vp_, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ },
   3433                         .{ ._, .v_dqu, .mov, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ },
   3434                         .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   3435                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3436                         .{ .@"0:", .vp_, .xor, .tmp2x, .tmp1x, .memad(.src0x, .add_size, -16), ._ },
   3437                         .{ ._, .v_dqa, .mov, .memad(.dst0x, .add_size, -16), .tmp2x, ._, ._ },
   3438                     } },
   3439                 }, .{
   3440                     .required_features = .{ .avx2, null, null, null },
   3441                     .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .yword } }, .any },
   3442                     .patterns = &.{
   3443                         .{ .src = .{ .to_mem, .none } },
   3444                     },
   3445                     .extra_temps = .{
   3446                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3447                         .{ .kind = .{ .rc = .sse } },
   3448                         .{ .kind = .{ .rc = .sse } },
   3449                         .unused,
   3450                         .unused,
   3451                         .unused,
   3452                     },
   3453                     .dst_temps = .{.mem},
   3454                     .clobbers = .{ .eflags = true },
   3455                     .each = .{ .once = &.{
   3456                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3457                         .{ ._, .vp_q, .cmpeq, .tmp1y, .tmp1y, .tmp1y, ._ },
   3458                         .{ .@"0:", .vp_, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ },
   3459                         .{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ },
   3460                         .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   3461                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3462                     } },
   3463                 }, .{
   3464                     .required_features = .{ .avx, null, null, null },
   3465                     .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .xword } }, .any },
   3466                     .patterns = &.{
   3467                         .{ .src = .{ .to_mem, .none } },
   3468                     },
   3469                     .extra_temps = .{
   3470                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3471                         .{ .kind = .{ .rc = .sse } },
   3472                         .{ .kind = .{ .rc = .sse } },
   3473                         .unused,
   3474                         .unused,
   3475                         .unused,
   3476                     },
   3477                     .dst_temps = .{.mem},
   3478                     .clobbers = .{ .eflags = true },
   3479                     .each = .{ .once = &.{
   3480                         .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   3481                         .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) },
   3482                         .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ },
   3483                         .{ ._, .v_pd, .movu, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ },
   3484                         .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   3485                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3486                         .{ .@"0:", .v_pd, .xor, .tmp2x, .tmp1x, .memad(.src0x, .add_size, -16), ._ },
   3487                         .{ ._, .v_pd, .mova, .memad(.dst0x, .add_size, -16), .tmp2x, ._, ._ },
   3488                     } },
   3489                 }, .{
   3490                     .required_features = .{ .avx, null, null, null },
   3491                     .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .yword } }, .any },
   3492                     .patterns = &.{
   3493                         .{ .src = .{ .to_mem, .none } },
   3494                     },
   3495                     .extra_temps = .{
   3496                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3497                         .{ .kind = .{ .rc = .sse } },
   3498                         .{ .kind = .{ .rc = .sse } },
   3499                         .unused,
   3500                         .unused,
   3501                         .unused,
   3502                     },
   3503                     .dst_temps = .{.mem},
   3504                     .clobbers = .{ .eflags = true },
   3505                     .each = .{ .once = &.{
   3506                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3507                         .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) },
   3508                         .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ },
   3509                         .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ },
   3510                         .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   3511                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3512                     } },
   3513                 }, .{
   3514                     .required_features = .{ .avx, null, null, null },
   3515                     .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   3516                     .patterns = &.{
   3517                         .{ .src = .{ .to_mem, .none } },
   3518                     },
   3519                     .extra_temps = .{
   3520                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3521                         .{ .kind = .{ .rc = .sse } },
   3522                         .{ .kind = .{ .rc = .sse } },
   3523                         .unused,
   3524                         .unused,
   3525                         .unused,
   3526                     },
   3527                     .dst_temps = .{.mem},
   3528                     .clobbers = .{ .eflags = true },
   3529                     .each = .{ .once = &.{
   3530                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3531                         .{ ._, .vp_q, .cmpeq, .tmp1x, .tmp1x, .tmp1x, ._ },
   3532                         .{ .@"0:", .v_, .xor, .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._ },
   3533                         .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ },
   3534                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   3535                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3536                     } },
   3537                 }, .{
   3538                     .required_features = .{ .sse2, null, null, null },
   3539                     .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   3540                     .patterns = &.{
   3541                         .{ .src = .{ .to_mem, .none } },
   3542                     },
   3543                     .extra_temps = .{
   3544                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3545                         .{ .kind = .{ .rc = .sse } },
   3546                         .{ .kind = .{ .rc = .sse } },
   3547                         .unused,
   3548                         .unused,
   3549                         .unused,
   3550                     },
   3551                     .dst_temps = .{.mem},
   3552                     .clobbers = .{ .eflags = true },
   3553                     .each = .{ .once = &.{
   3554                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3555                         .{ ._, .p_d, .cmpeq, .tmp1x, .tmp1x, ._, ._ },
   3556                         .{ .@"0:", ._dqa, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   3557                         .{ ._, .p_, .xor, .tmp2x, .tmp1x, ._, ._ },
   3558                         .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ },
   3559                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   3560                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3561                     } },
   3562                 }, .{
   3563                     .required_features = .{ .@"64bit", null, null, null },
   3564                     .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   3565                     .patterns = &.{
   3566                         .{ .src = .{ .mut_mem, .none } },
   3567                     },
   3568                     .extra_temps = .{
   3569                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3570                         .unused,
   3571                         .unused,
   3572                         .unused,
   3573                         .unused,
   3574                         .unused,
   3575                     },
   3576                     .dst_temps = .{.{ .ref = .src0 }},
   3577                     .clobbers = .{ .eflags = true },
   3578                     .each = .{ .once = &.{
   3579                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3580                         .{ .@"0:", ._, .not, .memia(.dst0q, .tmp0, .add_size), ._, ._, ._ },
   3581                         .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, 8), ._, ._, ._ },
   3582                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   3583                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3584                     } },
   3585                 }, .{
   3586                     .required_features = .{ .@"64bit", null, null, null },
   3587                     .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   3588                     .patterns = &.{
   3589                         .{ .src = .{ .to_mem, .none } },
   3590                     },
   3591                     .extra_temps = .{
   3592                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3593                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   3594                         .unused,
   3595                         .unused,
   3596                         .unused,
   3597                         .unused,
   3598                     },
   3599                     .dst_temps = .{.mem},
   3600                     .clobbers = .{ .eflags = true },
   3601                     .each = .{ .once = &.{
   3602                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   3603                         .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   3604                         .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
   3605                         .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
   3606                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3607                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3608                     } },
   3609                 }, .{
   3610                     .required_features = .{ .@"64bit", null, null, null },
   3611                     .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .dword } }, .any },
   3612                     .patterns = &.{
   3613                         .{ .src = .{ .mut_mem, .none } },
   3614                     },
   3615                     .extra_temps = .{
   3616                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3617                         .unused,
   3618                         .unused,
   3619                         .unused,
   3620                         .unused,
   3621                         .unused,
   3622                     },
   3623                     .dst_temps = .{.{ .ref = .src0 }},
   3624                     .clobbers = .{ .eflags = true },
   3625                     .each = .{ .once = &.{
   3626                         .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   3627                         .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ },
   3628                         .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ },
   3629                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   3630                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3631                         .{ ._, ._, .not, .memad(.dst0d, .add_size, -16), ._, ._, ._ },
   3632                     } },
   3633                 }, .{
   3634                     .required_features = .{ .@"64bit", null, null, null },
   3635                     .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .dword } }, .any },
   3636                     .patterns = &.{
   3637                         .{ .src = .{ .to_mem, .none } },
   3638                     },
   3639                     .extra_temps = .{
   3640                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3641                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   3642                         .unused,
   3643                         .unused,
   3644                         .unused,
   3645                         .unused,
   3646                     },
   3647                     .dst_temps = .{.mem},
   3648                     .clobbers = .{ .eflags = true },
   3649                     .each = .{ .once = &.{
   3650                         .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   3651                         .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ },
   3652                         .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
   3653                         .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ },
   3654                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3655                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3656                         .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -16), ._, ._ },
   3657                         .{ ._, ._, .not, .tmp0d, ._, ._, ._ },
   3658                         .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16), .tmp0d, ._, ._ },
   3659                         .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16 + 4), .si(0), ._, ._ },
   3660                         .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16 + 8), .si(0), ._, ._ },
   3661                     } },
   3662                 }, .{
   3663                     .required_features = .{ .@"64bit", null, null, null },
   3664                     .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .qword, .is = .qword } }, .any },
   3665                     .patterns = &.{
   3666                         .{ .src = .{ .mut_mem, .none } },
   3667                     },
   3668                     .extra_temps = .{
   3669                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3670                         .unused,
   3671                         .unused,
   3672                         .unused,
   3673                         .unused,
   3674                         .unused,
   3675                     },
   3676                     .dst_temps = .{.{ .ref = .src0 }},
   3677                     .clobbers = .{ .eflags = true },
   3678                     .each = .{ .once = &.{
   3679                         .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   3680                         .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ },
   3681                         .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ },
   3682                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   3683                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3684                         .{ ._, ._, .not, .memad(.dst0q, .add_size, -16), ._, ._, ._ },
   3685                     } },
   3686                 }, .{
   3687                     .required_features = .{ .@"64bit", null, null, null },
   3688                     .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .qword, .is = .qword } }, .any },
   3689                     .patterns = &.{
   3690                         .{ .src = .{ .to_mem, .none } },
   3691                     },
   3692                     .extra_temps = .{
   3693                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3694                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   3695                         .unused,
   3696                         .unused,
   3697                         .unused,
   3698                         .unused,
   3699                     },
   3700                     .dst_temps = .{.mem},
   3701                     .clobbers = .{ .eflags = true },
   3702                     .each = .{ .once = &.{
   3703                         .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
   3704                         .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ },
   3705                         .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
   3706                         .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ },
   3707                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3708                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3709                         .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
   3710                     } },
   3711                 }, .{
   3712                     .required_features = .{ .@"64bit", null, null, null },
   3713                     .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .dword, .is = .dword } }, .any },
   3714                     .patterns = &.{
   3715                         .{ .src = .{ .mut_mem, .none } },
   3716                     },
   3717                     .extra_temps = .{
   3718                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3719                         .unused,
   3720                         .unused,
   3721                         .unused,
   3722                         .unused,
   3723                         .unused,
   3724                     },
   3725                     .dst_temps = .{.{ .ref = .src0 }},
   3726                     .clobbers = .{ .eflags = true },
   3727                     .each = .{ .once = &.{
   3728                         .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
   3729                         .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ },
   3730                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3731                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3732                         .{ ._, ._, .not, .memad(.dst0d, .add_size, -8), ._, ._, ._ },
   3733                     } },
   3734                 }, .{
   3735                     .required_features = .{ .@"64bit", null, null, null },
   3736                     .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .dword, .is = .dword } }, .any },
   3737                     .patterns = &.{
   3738                         .{ .src = .{ .to_mem, .none } },
   3739                     },
   3740                     .extra_temps = .{
   3741                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3742                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   3743                         .unused,
   3744                         .unused,
   3745                         .unused,
   3746                         .unused,
   3747                     },
   3748                     .dst_temps = .{.mem},
   3749                     .clobbers = .{ .eflags = true },
   3750                     .each = .{ .once = &.{
   3751                         .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
   3752                         .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ },
   3753                         .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
   3754                         .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ },
   3755                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3756                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3757                         .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -8), ._, ._ },
   3758                         .{ ._, ._, .not, .tmp0d, ._, ._, ._ },
   3759                         .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8), .tmp0d, ._, ._ },
   3760                         .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8 + 4), .si(0), ._, ._ },
   3761                     } },
   3762                 }, .{
   3763                     .required_features = .{ .@"64bit", null, null, null },
   3764                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .dword } }, .any },
   3765                     .patterns = &.{
   3766                         .{ .src = .{ .mut_mem, .none } },
   3767                     },
   3768                     .extra_temps = .{
   3769                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3770                         .unused,
   3771                         .unused,
   3772                         .unused,
   3773                         .unused,
   3774                         .unused,
   3775                     },
   3776                     .dst_temps = .{.{ .ref = .src0 }},
   3777                     .clobbers = .{ .eflags = true },
   3778                     .each = .{ .once = &.{
   3779                         .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   3780                         .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ },
   3781                         .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ },
   3782                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   3783                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3784                         .{ ._, ._, .xor, .memad(.dst0d, .add_size, -16), .sa(.src0, .add_umax), ._, ._ },
   3785                     } },
   3786                 }, .{
   3787                     .required_features = .{ .@"64bit", null, null, null },
   3788                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .dword } }, .any },
   3789                     .patterns = &.{
   3790                         .{ .src = .{ .to_mem, .none } },
   3791                     },
   3792                     .extra_temps = .{
   3793                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3794                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   3795                         .unused,
   3796                         .unused,
   3797                         .unused,
   3798                         .unused,
   3799                     },
   3800                     .dst_temps = .{.mem},
   3801                     .clobbers = .{ .eflags = true },
   3802                     .each = .{ .once = &.{
   3803                         .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   3804                         .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ },
   3805                         .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
   3806                         .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ },
   3807                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3808                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3809                         .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -16), ._, ._ },
   3810                         .{ ._, ._, .xor, .tmp0d, .sa(.src0, .add_umax), ._, ._ },
   3811                         .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16), .tmp0d, ._, ._ },
   3812                         .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16 + 4), .si(0), ._, ._ },
   3813                         .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16 + 8), .si(0), ._, ._ },
   3814                     } },
   3815                 }, .{
   3816                     .required_features = .{ .@"64bit", null, null, null },
   3817                     .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .dword } }, .any },
   3818                     .patterns = &.{
   3819                         .{ .src = .{ .mut_mem, .none } },
   3820                     },
   3821                     .extra_temps = .{
   3822                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3823                         .unused,
   3824                         .unused,
   3825                         .unused,
   3826                         .unused,
   3827                         .unused,
   3828                     },
   3829                     .dst_temps = .{.{ .ref = .src0 }},
   3830                     .clobbers = .{ .eflags = true },
   3831                     .each = .{ .once = &.{
   3832                         .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
   3833                         .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ },
   3834                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3835                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3836                         .{ ._, ._, .xor, .memad(.dst0d, .add_size, -8), .sa(.src0, .add_umax), ._, ._ },
   3837                     } },
   3838                 }, .{
   3839                     .required_features = .{ .@"64bit", null, null, null },
   3840                     .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .dword } }, .any },
   3841                     .patterns = &.{
   3842                         .{ .src = .{ .to_mem, .none } },
   3843                     },
   3844                     .extra_temps = .{
   3845                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3846                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   3847                         .unused,
   3848                         .unused,
   3849                         .unused,
   3850                         .unused,
   3851                     },
   3852                     .dst_temps = .{.mem},
   3853                     .clobbers = .{ .eflags = true },
   3854                     .each = .{ .once = &.{
   3855                         .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
   3856                         .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ },
   3857                         .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
   3858                         .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ },
   3859                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3860                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3861                         .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -8), ._, ._ },
   3862                         .{ ._, ._, .xor, .tmp0d, .sa(.src0, .add_umax), ._, ._ },
   3863                         .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8), .tmp0d, ._, ._ },
   3864                         .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8 + 4), .si(0), ._, ._ },
   3865                     } },
   3866                 }, .{
   3867                     .required_features = .{ .@"64bit", null, null, null },
   3868                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   3869                     .patterns = &.{
   3870                         .{ .src = .{ .mut_mem, .none } },
   3871                     },
   3872                     .extra_temps = .{
   3873                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3874                         .unused,
   3875                         .unused,
   3876                         .unused,
   3877                         .unused,
   3878                         .unused,
   3879                     },
   3880                     .dst_temps = .{.{ .ref = .src0 }},
   3881                     .clobbers = .{ .eflags = true },
   3882                     .each = .{ .once = &.{
   3883                         .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   3884                         .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ },
   3885                         .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ },
   3886                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   3887                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3888                         .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
   3889                         .{ ._, ._, .xor, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
   3890                     } },
   3891                 }, .{
   3892                     .required_features = .{ .@"64bit", null, null, null },
   3893                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   3894                     .patterns = &.{
   3895                         .{ .src = .{ .to_mem, .none } },
   3896                     },
   3897                     .extra_temps = .{
   3898                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3899                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   3900                         .unused,
   3901                         .unused,
   3902                         .unused,
   3903                         .unused,
   3904                     },
   3905                     .dst_temps = .{.mem},
   3906                     .clobbers = .{ .eflags = true },
   3907                     .each = .{ .once = &.{
   3908                         .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   3909                         .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ },
   3910                         .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
   3911                         .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ },
   3912                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3913                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3914                         .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
   3915                         .{ ._, ._, .xor, .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ },
   3916                         .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
   3917                         .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
   3918                     } },
   3919                 }, .{
   3920                     .required_features = .{ .@"64bit", null, null, null },
   3921                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   3922                     .patterns = &.{
   3923                         .{ .src = .{ .mut_mem, .none } },
   3924                     },
   3925                     .extra_temps = .{
   3926                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3927                         .unused,
   3928                         .unused,
   3929                         .unused,
   3930                         .unused,
   3931                         .unused,
   3932                     },
   3933                     .dst_temps = .{.{ .ref = .src0 }},
   3934                     .clobbers = .{ .eflags = true },
   3935                     .each = .{ .once = &.{
   3936                         .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
   3937                         .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ },
   3938                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3939                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3940                         .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
   3941                         .{ ._, ._, .xor, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
   3942                     } },
   3943                 }, .{
   3944                     .required_features = .{ .@"64bit", null, null, null },
   3945                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   3946                     .patterns = &.{
   3947                         .{ .src = .{ .to_mem, .none } },
   3948                     },
   3949                     .extra_temps = .{
   3950                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   3951                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   3952                         .unused,
   3953                         .unused,
   3954                         .unused,
   3955                         .unused,
   3956                     },
   3957                     .dst_temps = .{.mem},
   3958                     .clobbers = .{ .eflags = true },
   3959                     .each = .{ .once = &.{
   3960                         .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ },
   3961                         .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ },
   3962                         .{ ._, ._, .not, .tmp1q, ._, ._, ._ },
   3963                         .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ },
   3964                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   3965                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   3966                         .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
   3967                         .{ ._, ._, .xor, .tmp0q, .memad(.src0q, .add_size, -8), ._, ._ },
   3968                         .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
   3969                     } },
   3970                 }, .{
   3971                     .required_features = .{ .mmx, null, null, null },
   3972                     .src_constraints = .{ .{ .signed_int_or_full_vec = .qword }, .any },
   3973                     .patterns = &.{
   3974                         .{ .src = .{ .mem, .none } },
   3975                         .{ .src = .{ .to_mm, .none } },
   3976                     },
   3977                     .dst_temps = .{.{ .rc = .mmx }},
   3978                     .each = .{ .once = &.{
   3979                         .{ ._, .p_d, .cmpeq, .dst0q, .dst0q, ._, ._ },
   3980                         .{ ._, .p_, .xor, .dst0q, .src0q, ._, ._ },
   3981                     } },
   3982                 }, .{
   3983                     .required_features = .{ .mmx, null, null, null },
   3984                     .src_constraints = .{ .{ .unsigned_int_vec = .qword }, .any },
   3985                     .patterns = &.{
   3986                         .{ .src = .{ .to_mut_mm, .none } },
   3987                     },
   3988                     .extra_temps = .{
   3989                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   3990                         .{ .kind = .{ .umax_mem = .src0 } },
   3991                         .unused,
   3992                         .unused,
   3993                         .unused,
   3994                         .unused,
   3995                     },
   3996                     .dst_temps = .{.{ .ref = .src0 }},
   3997                     .each = .{ .once = &.{
   3998                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   3999                         .{ ._, .p_, .xor, .dst0q, .lea(.qword, .tmp0), ._, ._ },
   4000                     } },
   4001                 }, .{
   4002                     .required_features = .{ .avx, null, null, null },
   4003                     .src_constraints = .{ .{ .signed_int_or_full_vec = .xword }, .any },
   4004                     .patterns = &.{
   4005                         .{ .src = .{ .mem, .none } },
   4006                         .{ .src = .{ .to_xmm, .none } },
   4007                     },
   4008                     .dst_temps = .{.{ .rc = .sse }},
   4009                     .each = .{ .once = &.{
   4010                         .{ ._, .vp_q, .cmpeq, .dst0x, .dst0x, .dst0x, ._ },
   4011                         .{ ._, .vp_, .xor, .dst0x, .dst0x, .src0x, ._ },
   4012                     } },
   4013                 }, .{
   4014                     .required_features = .{ .avx, null, null, null },
   4015                     .src_constraints = .{ .{ .unsigned_int_vec = .xword }, .any },
   4016                     .patterns = &.{
   4017                         .{ .src = .{ .to_xmm, .none } },
   4018                     },
   4019                     .extra_temps = .{
   4020                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   4021                         .{ .kind = .{ .umax_mem = .src0 } },
   4022                         .unused,
   4023                         .unused,
   4024                         .unused,
   4025                         .unused,
   4026                     },
   4027                     .dst_temps = .{.{ .rc = .sse }},
   4028                     .each = .{ .once = &.{
   4029                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   4030                         .{ ._, .vp_, .xor, .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
   4031                     } },
   4032                 }, .{
   4033                     .required_features = .{ .sse2, null, null, null },
   4034                     .src_constraints = .{ .{ .signed_int_or_full_vec = .xword }, .any },
   4035                     .patterns = &.{
   4036                         .{ .src = .{ .mem, .none } },
   4037                         .{ .src = .{ .to_xmm, .none } },
   4038                     },
   4039                     .dst_temps = .{.{ .rc = .sse }},
   4040                     .each = .{ .once = &.{
   4041                         .{ ._, .p_d, .cmpeq, .dst0x, .dst0x, ._, ._ },
   4042                         .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ },
   4043                     } },
   4044                 }, .{
   4045                     .required_features = .{ .sse2, null, null, null },
   4046                     .src_constraints = .{ .{ .unsigned_int_vec = .xword }, .any },
   4047                     .patterns = &.{
   4048                         .{ .src = .{ .to_mut_xmm, .none } },
   4049                     },
   4050                     .extra_temps = .{
   4051                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   4052                         .{ .kind = .{ .umax_mem = .src0 } },
   4053                         .unused,
   4054                         .unused,
   4055                         .unused,
   4056                         .unused,
   4057                     },
   4058                     .dst_temps = .{.{ .ref = .src0 }},
   4059                     .each = .{ .once = &.{
   4060                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   4061                         .{ ._, .p_, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ },
   4062                     } },
   4063                 }, .{
   4064                     .required_features = .{ .sse, null, null, null },
   4065                     .src_constraints = .{ .{ .vec = .xword }, .any },
   4066                     .patterns = &.{
   4067                         .{ .src = .{ .to_mut_xmm, .none } },
   4068                     },
   4069                     .extra_temps = .{
   4070                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   4071                         .{ .kind = .{ .umax_mem = .src0 } },
   4072                         .unused,
   4073                         .unused,
   4074                         .unused,
   4075                         .unused,
   4076                     },
   4077                     .dst_temps = .{.{ .ref = .src0 }},
   4078                     .each = .{ .once = &.{
   4079                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   4080                         .{ ._, ._ps, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ },
   4081                     } },
   4082                 }, .{
   4083                     .required_features = .{ .avx2, null, null, null },
   4084                     .src_constraints = .{ .{ .signed_int_or_full_vec = .yword }, .any },
   4085                     .patterns = &.{
   4086                         .{ .src = .{ .mem, .none } },
   4087                         .{ .src = .{ .to_ymm, .none } },
   4088                     },
   4089                     .dst_temps = .{.{ .rc = .sse }},
   4090                     .each = .{ .once = &.{
   4091                         .{ ._, .vp_q, .cmpeq, .dst0y, .dst0y, .dst0y, ._ },
   4092                         .{ ._, .vp_, .xor, .dst0y, .dst0y, .src0y, ._ },
   4093                     } },
   4094                 }, .{
   4095                     .required_features = .{ .avx2, null, null, null },
   4096                     .src_constraints = .{ .{ .unsigned_int_vec = .yword }, .any },
   4097                     .patterns = &.{
   4098                         .{ .src = .{ .to_ymm, .none } },
   4099                     },
   4100                     .extra_temps = .{
   4101                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   4102                         .{ .kind = .{ .umax_mem = .src0 } },
   4103                         .unused,
   4104                         .unused,
   4105                         .unused,
   4106                         .unused,
   4107                     },
   4108                     .dst_temps = .{.{ .rc = .sse }},
   4109                     .each = .{ .once = &.{
   4110                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   4111                         .{ ._, .vp_, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
   4112                     } },
   4113                 }, .{
   4114                     .required_features = .{ .avx, null, null, null },
   4115                     .src_constraints = .{ .{ .signed_int_or_full_vec = .yword }, .any },
   4116                     .patterns = &.{
   4117                         .{ .src = .{ .mem, .none } },
   4118                         .{ .src = .{ .to_ymm, .none } },
   4119                     },
   4120                     .dst_temps = .{.{ .rc = .sse }},
   4121                     .each = .{ .once = &.{
   4122                         .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) },
   4123                         .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ },
   4124                     } },
   4125                 }, .{
   4126                     .required_features = .{ .avx, null, null, null },
   4127                     .src_constraints = .{ .{ .unsigned_int_vec = .yword }, .any },
   4128                     .patterns = &.{
   4129                         .{ .src = .{ .to_ymm, .none } },
   4130                     },
   4131                     .extra_temps = .{
   4132                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   4133                         .{ .kind = .{ .umax_mem = .src0 } },
   4134                         .unused,
   4135                         .unused,
   4136                         .unused,
   4137                         .unused,
   4138                     },
   4139                     .dst_temps = .{.{ .rc = .sse }},
   4140                     .each = .{ .once = &.{
   4141                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
   4142                         .{ ._, .v_pd, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
   4143                     } },
   4144                 }, .{
   4145                     .required_features = .{ .@"64bit", null, null, null },
   4146                     .patterns = &.{
   4147                         .{ .src = .{ .to_mem, .none } },
   4148                     },
   4149                     .extra_temps = .{
   4150                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   4151                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   4152                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   4153                         .{ .kind = .{ .umax_mem = .src0 } },
   4154                         .unused,
   4155                         .unused,
   4156                     },
   4157                     .dst_temps = .{.mem},
   4158                     .each = .{ .once = &.{
   4159                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_src0_size), ._, ._ },
   4160                         .{ ._, ._, .lea, .tmp1p, .mem(.tmp3), ._, ._ },
   4161                         .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_src0_size), ._, ._ },
   4162                         .{ ._, ._, .xor, .tmp2q, .leaia(.qword, .tmp1, .tmp0, .add_src0_size), ._, ._ },
   4163                         .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_src0_size), .tmp2q, ._, ._ },
   4164                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   4165                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   4166                     } },
   4167                 }, .{
   4168                     .patterns = &.{
   4169                         .{ .src = .{ .to_mem, .none } },
   4170                     },
   4171                     .extra_temps = .{
   4172                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   4173                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   4174                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4175                         .{ .kind = .{ .umax_mem = .src0 } },
   4176                         .unused,
   4177                         .unused,
   4178                     },
   4179                     .dst_temps = .{.mem},
   4180                     .each = .{ .once = &.{
   4181                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_src0_size), ._, ._ },
   4182                         .{ ._, ._, .lea, .tmp1p, .mem(.tmp3), ._, ._ },
   4183                         .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_src0_size), ._, ._ },
   4184                         .{ ._, ._, .xor, .tmp2d, .leaia(.dword, .tmp1, .tmp0, .add_src0_size), ._, ._ },
   4185                         .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_src0_size), .tmp2d, ._, ._ },
   4186                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
   4187                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   4188                     } },
   4189                 } }) catch |err| switch (err) {
   4190                     error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{
   4191                         @tagName(air_tag),
   4192                         cg.typeOf(ty_op.operand).fmt(pt),
   4193                         ops[0].tracking(cg),
   4194                     }),
   4195                     else => |e| return e,
   4196                 };
   4197                 for (ops) |op| for (res) |r| {
   4198                     if (op.index == r.index) break;
   4199                 } else try op.die(cg);
   4200                 try res[0].moveTo(inst, cg);
   4201             },
   4202 
   4203             .block => if (use_old) try cg.airBlock(inst) else {
   4204                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
   4205                 const extra = cg.air.extraData(Air.Block, ty_pl.payload);
   4206                 try cg.asmPseudo(.pseudo_dbg_enter_block_none);
   4207                 try cg.lowerBlock(inst, @ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len]));
   4208                 try cg.asmPseudo(.pseudo_dbg_leave_block_none);
   4209             },
   4210             .loop => if (use_old) try cg.airLoop(inst) else {
   4211                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
   4212                 const extra = cg.air.extraData(Air.Block, ty_pl.payload);
   4213                 cg.scope_generation += 1;
   4214                 try cg.loops.putNoClobber(cg.gpa, inst, .{
   4215                     .state = try cg.saveState(),
   4216                     .target = @intCast(cg.mir_instructions.len),
   4217                 });
   4218                 defer assert(cg.loops.remove(inst));
   4219                 try cg.genBodyBlock(@ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len]));
   4220             },
   4221             .repeat => if (use_old) try cg.airRepeat(inst) else {
   4222                 const repeat = air_datas[@intFromEnum(inst)].repeat;
   4223                 const loop = cg.loops.get(repeat.loop_inst).?;
   4224                 try cg.restoreState(loop.state, &.{}, .{
   4225                     .emit_instructions = true,
   4226                     .update_tracking = false,
   4227                     .resurrect = false,
   4228                     .close_scope = true,
   4229                 });
   4230                 _ = try cg.asmJmpReloc(loop.target);
   4231             },
   4232             .br => try cg.airBr(inst),
   4233             .trap => try cg.asmOpOnly(.{ ._, .ud2 }),
   4234             .breakpoint => try cg.asmOpOnly(.{ ._, .int3 }),
   4235             .ret_addr => if (use_old) try cg.airRetAddr(inst) else {
   4236                 var slot = try cg.tempInit(.usize, .{ .load_frame = .{
   4237                     .index = .ret_addr,
   4238                 } });
   4239                 while (try slot.toRegClass(true, .general_purpose, cg)) {}
   4240                 try slot.moveTo(inst, cg);
   4241             },
   4242             .frame_addr => if (use_old) try cg.airFrameAddress(inst) else {
   4243                 var slot = try cg.tempInit(.usize, .{ .lea_frame = .{
   4244                     .index = .base_ptr,
   4245                 } });
   4246                 try slot.moveTo(inst, cg);
   4247             },
   4248             .call => try cg.airCall(inst, .auto, .{ .safety = true }),
   4249             .call_always_tail => try cg.airCall(inst, .always_tail, .{ .safety = true }),
   4250             .call_never_tail => try cg.airCall(inst, .never_tail, .{ .safety = true }),
   4251             .call_never_inline => try cg.airCall(inst, .never_inline, .{ .safety = true }),
   4252 
   4253             .clz => |air_tag| if (use_old) try cg.airClz(inst) else {
   4254                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   4255                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   4256                 var res: [1]Temp = undefined;
   4257                 cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{
   4258                     .required_features = .{ .slow_incdec, null, null, null },
   4259                     .src_constraints = .{ .{ .exact_signed_int = 1 }, .any },
   4260                     .patterns = &.{
   4261                         .{ .src = .{ .mut_mem, .none } },
   4262                         .{ .src = .{ .to_mut_gpr, .none } },
   4263                     },
   4264                     .dst_temps = .{.{ .ref = .src0 }},
   4265                     .clobbers = .{ .eflags = true },
   4266                     .each = .{ .once = &.{
   4267                         .{ ._, ._, .add, .dst0b, .si(1), ._, ._ },
   4268                     } },
   4269                 }, .{
   4270                     .src_constraints = .{ .{ .exact_signed_int = 1 }, .any },
   4271                     .patterns = &.{
   4272                         .{ .src = .{ .mut_mem, .none } },
   4273                         .{ .src = .{ .to_mut_gpr, .none } },
   4274                     },
   4275                     .dst_temps = .{.{ .ref = .src0 }},
   4276                     .clobbers = .{ .eflags = true },
   4277                     .each = .{ .once = &.{
   4278                         .{ ._, ._, .inc, .dst0b, ._, ._, ._ },
   4279                     } },
   4280                 }, .{
   4281                     .src_constraints = .{ .{ .exact_unsigned_int = 1 }, .any },
   4282                     .patterns = &.{
   4283                         .{ .src = .{ .mut_mem, .none } },
   4284                         .{ .src = .{ .to_mut_gpr, .none } },
   4285                     },
   4286                     .dst_temps = .{.{ .ref = .src0 }},
   4287                     .clobbers = .{ .eflags = true },
   4288                     .each = .{ .once = &.{
   4289                         .{ ._, ._, .xor, .dst0b, .si(1), ._, ._ },
   4290                     } },
   4291                 }, .{
   4292                     .required_features = .{ .lzcnt, null, null, null },
   4293                     .src_constraints = .{ .{ .unsigned_or_exact_int = .byte }, .any },
   4294                     .patterns = &.{
   4295                         .{ .src = .{ .mem, .none } },
   4296                         .{ .src = .{ .to_gpr, .none } },
   4297                     },
   4298                     .dst_temps = .{.{ .rc = .general_purpose }},
   4299                     .clobbers = .{ .eflags = true },
   4300                     .each = .{ .once = &.{
   4301                         .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
   4302                         .{ ._, ._, .lzcnt, .dst0d, .dst0d, ._, ._ },
   4303                         .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   4304                     } },
   4305                 }, .{
   4306                     .required_features = .{ .lzcnt, null, null, null },
   4307                     .src_constraints = .{ .{ .signed_int = .byte }, .any },
   4308                     .patterns = &.{
   4309                         .{ .src = .{ .mem, .none } },
   4310                         .{ .src = .{ .to_gpr, .none } },
   4311                     },
   4312                     .dst_temps = .{.{ .rc = .general_purpose }},
   4313                     .clobbers = .{ .eflags = true },
   4314                     .each = .{ .once = &.{
   4315                         .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
   4316                         .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ },
   4317                         .{ ._, ._, .lzcnt, .dst0d, .dst0d, ._, ._ },
   4318                         .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   4319                     } },
   4320                 }, .{
   4321                     .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null },
   4322                     .src_constraints = .{ .{ .exact_int = 16 }, .any },
   4323                     .patterns = &.{
   4324                         .{ .src = .{ .to_mut_gpr, .none } },
   4325                     },
   4326                     .dst_temps = .{.{ .ref = .src0 }},
   4327                     .clobbers = .{ .eflags = true },
   4328                     .each = .{ .once = &.{
   4329                         .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ },
   4330                     } },
   4331                 }, .{
   4332                     .required_features = .{ .lzcnt, null, null, null },
   4333                     .src_constraints = .{ .{ .exact_int = 16 }, .any },
   4334                     .patterns = &.{
   4335                         .{ .src = .{ .mem, .none } },
   4336                         .{ .src = .{ .to_gpr, .none } },
   4337                     },
   4338                     .dst_temps = .{.{ .rc = .general_purpose }},
   4339                     .clobbers = .{ .eflags = true },
   4340                     .each = .{ .once = &.{
   4341                         .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ },
   4342                     } },
   4343                 }, .{
   4344                     .required_features = .{ .lzcnt, null, null, null },
   4345                     .src_constraints = .{ .{ .signed_int = .word }, .any },
   4346                     .patterns = &.{
   4347                         .{ .src = .{ .to_mut_gpr, .none } },
   4348                     },
   4349                     .dst_temps = .{.{ .ref = .src0 }},
   4350                     .clobbers = .{ .eflags = true },
   4351                     .each = .{ .once = &.{
   4352                         .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ },
   4353                         .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ },
   4354                         .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ },
   4355                     } },
   4356                 }, .{
   4357                     .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null },
   4358                     .src_constraints = .{ .{ .unsigned_int = .word }, .any },
   4359                     .patterns = &.{
   4360                         .{ .src = .{ .to_mut_gpr, .none } },
   4361                     },
   4362                     .dst_temps = .{.{ .ref = .src0 }},
   4363                     .clobbers = .{ .eflags = true },
   4364                     .each = .{ .once = &.{
   4365                         .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ },
   4366                         .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ },
   4367                     } },
   4368                 }, .{
   4369                     .required_features = .{ .lzcnt, null, null, null },
   4370                     .src_constraints = .{ .{ .unsigned_int = .word }, .any },
   4371                     .patterns = &.{
   4372                         .{ .src = .{ .mem, .none } },
   4373                         .{ .src = .{ .to_gpr, .none } },
   4374                     },
   4375                     .dst_temps = .{.{ .rc = .general_purpose }},
   4376                     .clobbers = .{ .eflags = true },
   4377                     .each = .{ .once = &.{
   4378                         .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ },
   4379                         .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ },
   4380                     } },
   4381                 }, .{
   4382                     .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null },
   4383                     .src_constraints = .{ .{ .exact_int = 32 }, .any },
   4384                     .patterns = &.{
   4385                         .{ .src = .{ .to_mut_gpr, .none } },
   4386                     },
   4387                     .dst_temps = .{.{ .ref = .src0 }},
   4388                     .clobbers = .{ .eflags = true },
   4389                     .each = .{ .once = &.{
   4390                         .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ },
   4391                     } },
   4392                 }, .{
   4393                     .required_features = .{ .lzcnt, null, null, null },
   4394                     .src_constraints = .{ .{ .exact_int = 32 }, .any },
   4395                     .patterns = &.{
   4396                         .{ .src = .{ .mem, .none } },
   4397                         .{ .src = .{ .to_gpr, .none } },
   4398                     },
   4399                     .dst_temps = .{.{ .rc = .general_purpose }},
   4400                     .clobbers = .{ .eflags = true },
   4401                     .each = .{ .once = &.{
   4402                         .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ },
   4403                     } },
   4404                 }, .{
   4405                     .required_features = .{ .lzcnt, null, null, null },
   4406                     .src_constraints = .{ .{ .signed_int = .dword }, .any },
   4407                     .patterns = &.{
   4408                         .{ .src = .{ .to_mut_gpr, .none } },
   4409                     },
   4410                     .dst_temps = .{.{ .ref = .src0 }},
   4411                     .clobbers = .{ .eflags = true },
   4412                     .each = .{ .once = &.{
   4413                         .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ },
   4414                         .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ },
   4415                         .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   4416                     } },
   4417                 }, .{
   4418                     .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null },
   4419                     .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
   4420                     .patterns = &.{
   4421                         .{ .src = .{ .to_mut_gpr, .none } },
   4422                     },
   4423                     .dst_temps = .{.{ .ref = .src0 }},
   4424                     .clobbers = .{ .eflags = true },
   4425                     .each = .{ .once = &.{
   4426                         .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ },
   4427                         .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   4428                     } },
   4429                 }, .{
   4430                     .required_features = .{ .lzcnt, null, null, null },
   4431                     .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
   4432                     .patterns = &.{
   4433                         .{ .src = .{ .mem, .none } },
   4434                         .{ .src = .{ .to_gpr, .none } },
   4435                     },
   4436                     .dst_temps = .{.{ .rc = .general_purpose }},
   4437                     .clobbers = .{ .eflags = true },
   4438                     .each = .{ .once = &.{
   4439                         .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ },
   4440                         .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   4441                     } },
   4442                 }, .{
   4443                     .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
   4444                     .src_constraints = .{ .{ .exact_int = 64 }, .any },
   4445                     .patterns = &.{
   4446                         .{ .src = .{ .to_mut_gpr, .none } },
   4447                     },
   4448                     .dst_temps = .{.{ .ref = .src0 }},
   4449                     .clobbers = .{ .eflags = true },
   4450                     .each = .{ .once = &.{
   4451                         .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ },
   4452                     } },
   4453                 }, .{
   4454                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   4455                     .src_constraints = .{ .{ .exact_int = 64 }, .any },
   4456                     .patterns = &.{
   4457                         .{ .src = .{ .mem, .none } },
   4458                         .{ .src = .{ .to_gpr, .none } },
   4459                     },
   4460                     .dst_temps = .{.{ .rc = .general_purpose }},
   4461                     .clobbers = .{ .eflags = true },
   4462                     .each = .{ .once = &.{
   4463                         .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ },
   4464                     } },
   4465                 }, .{
   4466                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   4467                     .src_constraints = .{ .{ .signed_int = .qword }, .any },
   4468                     .patterns = &.{
   4469                         .{ .src = .{ .mem, .none } },
   4470                         .{ .src = .{ .to_gpr, .none } },
   4471                     },
   4472                     .dst_temps = .{.{ .rc = .general_purpose }},
   4473                     .clobbers = .{ .eflags = true },
   4474                     .each = .{ .once = &.{
   4475                         .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ },
   4476                         .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ },
   4477                         .{ ._, ._, .lzcnt, .dst0q, .dst0q, ._, ._ },
   4478                         .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ },
   4479                     } },
   4480                 }, .{
   4481                     .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
   4482                     .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
   4483                     .patterns = &.{
   4484                         .{ .src = .{ .to_mut_gpr, .none } },
   4485                     },
   4486                     .dst_temps = .{.{ .ref = .src0 }},
   4487                     .clobbers = .{ .eflags = true },
   4488                     .each = .{ .once = &.{
   4489                         .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ },
   4490                         .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ },
   4491                     } },
   4492                 }, .{
   4493                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   4494                     .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
   4495                     .patterns = &.{
   4496                         .{ .src = .{ .mem, .none } },
   4497                         .{ .src = .{ .to_gpr, .none } },
   4498                     },
   4499                     .dst_temps = .{.{ .rc = .general_purpose }},
   4500                     .clobbers = .{ .eflags = true },
   4501                     .each = .{ .once = &.{
   4502                         .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ },
   4503                         .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ },
   4504                     } },
   4505                 }, .{
   4506                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   4507                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any },
   4508                     .patterns = &.{
   4509                         .{ .src = .{ .mem, .none } },
   4510                         .{ .src = .{ .to_gpr, .none } },
   4511                     },
   4512                     .extra_temps = .{
   4513                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4514                         .unused,
   4515                         .unused,
   4516                         .unused,
   4517                         .unused,
   4518                         .unused,
   4519                     },
   4520                     .dst_temps = .{.{ .rc = .general_purpose }},
   4521                     .clobbers = .{ .eflags = true },
   4522                     .each = .{ .once = &.{
   4523                         .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
   4524                         .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ },
   4525                         .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4526                         .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ },
   4527                         .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4528                     } },
   4529                 }, .{
   4530                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   4531                     .src_constraints = .{ .{ .signed_po2_int = .byte }, .any },
   4532                     .patterns = &.{
   4533                         .{ .src = .{ .mem, .none } },
   4534                         .{ .src = .{ .to_gpr, .none } },
   4535                     },
   4536                     .extra_temps = .{
   4537                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4538                         .unused,
   4539                         .unused,
   4540                         .unused,
   4541                         .unused,
   4542                         .unused,
   4543                     },
   4544                     .dst_temps = .{.{ .rc = .general_purpose }},
   4545                     .clobbers = .{ .eflags = true },
   4546                     .each = .{ .once = &.{
   4547                         .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
   4548                         .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ },
   4549                         .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ },
   4550                         .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4551                         .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ },
   4552                         .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4553                     } },
   4554                 }, .{
   4555                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   4556                     .src_constraints = .{ .{ .signed_int = .byte }, .any },
   4557                     .patterns = &.{
   4558                         .{ .src = .{ .mem, .none } },
   4559                         .{ .src = .{ .to_gpr, .none } },
   4560                     },
   4561                     .extra_temps = .{
   4562                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4563                         .unused,
   4564                         .unused,
   4565                         .unused,
   4566                         .unused,
   4567                         .unused,
   4568                     },
   4569                     .dst_temps = .{.{ .rc = .general_purpose }},
   4570                     .clobbers = .{ .eflags = true },
   4571                     .each = .{ .once = &.{
   4572                         .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
   4573                         .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ },
   4574                         .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ },
   4575                         .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
   4576                         .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ },
   4577                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4578                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   4579                     } },
   4580                 }, .{
   4581                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   4582                     .src_constraints = .{ .{ .unsigned_int = .byte }, .any },
   4583                     .patterns = &.{
   4584                         .{ .src = .{ .mem, .none } },
   4585                         .{ .src = .{ .to_gpr, .none } },
   4586                     },
   4587                     .extra_temps = .{
   4588                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4589                         .unused,
   4590                         .unused,
   4591                         .unused,
   4592                         .unused,
   4593                         .unused,
   4594                     },
   4595                     .dst_temps = .{.{ .rc = .general_purpose }},
   4596                     .clobbers = .{ .eflags = true },
   4597                     .each = .{ .once = &.{
   4598                         .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
   4599                         .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ },
   4600                         .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
   4601                         .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ },
   4602                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4603                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   4604                     } },
   4605                 }, .{
   4606                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   4607                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any },
   4608                     .patterns = &.{
   4609                         .{ .src = .{ .mem, .none } },
   4610                         .{ .src = .{ .to_gpr, .none } },
   4611                     },
   4612                     .dst_temps = .{.{ .rc = .general_purpose }},
   4613                     .clobbers = .{ .eflags = true },
   4614                     .each = .{ .once = &.{
   4615                         .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
   4616                         .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ },
   4617                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   4618                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4619                         .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4620                     } },
   4621                 }, .{
   4622                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   4623                     .src_constraints = .{ .{ .signed_po2_int = .byte }, .any },
   4624                     .patterns = &.{
   4625                         .{ .src = .{ .mem, .none } },
   4626                         .{ .src = .{ .to_gpr, .none } },
   4627                     },
   4628                     .dst_temps = .{.{ .rc = .general_purpose }},
   4629                     .clobbers = .{ .eflags = true },
   4630                     .each = .{ .once = &.{
   4631                         .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
   4632                         .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ },
   4633                         .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ },
   4634                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   4635                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4636                         .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4637                     } },
   4638                 }, .{
   4639                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   4640                     .src_constraints = .{ .{ .signed_int = .byte }, .any },
   4641                     .patterns = &.{
   4642                         .{ .src = .{ .mem, .none } },
   4643                         .{ .src = .{ .to_gpr, .none } },
   4644                     },
   4645                     .extra_temps = .{
   4646                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4647                         .unused,
   4648                         .unused,
   4649                         .unused,
   4650                         .unused,
   4651                         .unused,
   4652                     },
   4653                     .dst_temps = .{.{ .rc = .general_purpose }},
   4654                     .clobbers = .{ .eflags = true },
   4655                     .each = .{ .once = &.{
   4656                         .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
   4657                         .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ },
   4658                         .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ },
   4659                         .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
   4660                         .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   4661                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   4662                         .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ },
   4663                     } },
   4664                 }, .{
   4665                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   4666                     .src_constraints = .{ .{ .unsigned_int = .byte }, .any },
   4667                     .patterns = &.{
   4668                         .{ .src = .{ .mem, .none } },
   4669                         .{ .src = .{ .to_gpr, .none } },
   4670                     },
   4671                     .extra_temps = .{
   4672                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4673                         .unused,
   4674                         .unused,
   4675                         .unused,
   4676                         .unused,
   4677                         .unused,
   4678                     },
   4679                     .dst_temps = .{.{ .rc = .general_purpose }},
   4680                     .clobbers = .{ .eflags = true },
   4681                     .each = .{ .once = &.{
   4682                         .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
   4683                         .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ },
   4684                         .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
   4685                         .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   4686                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   4687                         .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ },
   4688                     } },
   4689                 }, .{
   4690                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any },
   4691                     .patterns = &.{
   4692                         .{ .src = .{ .mem, .none } },
   4693                         .{ .src = .{ .to_gpr, .none } },
   4694                     },
   4695                     .extra_temps = .{
   4696                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4697                         .unused,
   4698                         .unused,
   4699                         .unused,
   4700                         .unused,
   4701                         .unused,
   4702                     },
   4703                     .dst_temps = .{.{ .rc = .general_purpose }},
   4704                     .clobbers = .{ .eflags = true },
   4705                     .each = .{ .once = &.{
   4706                         .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
   4707                         .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4708                         .{ ._, ._r, .bs, .dst0d, .tmp0d, ._, ._ },
   4709                         .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4710                     } },
   4711                 }, .{
   4712                     .src_constraints = .{ .{ .signed_po2_int = .byte }, .any },
   4713                     .patterns = &.{
   4714                         .{ .src = .{ .mem, .none } },
   4715                         .{ .src = .{ .to_gpr, .none } },
   4716                     },
   4717                     .extra_temps = .{
   4718                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4719                         .unused,
   4720                         .unused,
   4721                         .unused,
   4722                         .unused,
   4723                         .unused,
   4724                     },
   4725                     .dst_temps = .{.{ .rc = .general_purpose }},
   4726                     .clobbers = .{ .eflags = true },
   4727                     .each = .{ .once = &.{
   4728                         .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ },
   4729                         .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ },
   4730                         .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4731                         .{ ._, ._r, .bs, .dst0d, .tmp0d, ._, ._ },
   4732                         .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4733                     } },
   4734                 }, .{
   4735                     .src_constraints = .{ .{ .signed_int = .byte }, .any },
   4736                     .patterns = &.{
   4737                         .{ .src = .{ .mem, .none } },
   4738                         .{ .src = .{ .to_gpr, .none } },
   4739                     },
   4740                     .extra_temps = .{
   4741                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4742                         .unused,
   4743                         .unused,
   4744                         .unused,
   4745                         .unused,
   4746                         .unused,
   4747                     },
   4748                     .dst_temps = .{.{ .rc = .general_purpose }},
   4749                     .clobbers = .{ .eflags = true },
   4750                     .each = .{ .once = &.{
   4751                         .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
   4752                         .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ },
   4753                         .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
   4754                         .{ ._, ._r, .bs, .tmp0d, .dst0d, ._, ._ },
   4755                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4756                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   4757                     } },
   4758                 }, .{
   4759                     .src_constraints = .{ .{ .unsigned_int = .byte }, .any },
   4760                     .patterns = &.{
   4761                         .{ .src = .{ .mem, .none } },
   4762                         .{ .src = .{ .to_gpr, .none } },
   4763                     },
   4764                     .extra_temps = .{
   4765                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   4766                         .unused,
   4767                         .unused,
   4768                         .unused,
   4769                         .unused,
   4770                         .unused,
   4771                     },
   4772                     .dst_temps = .{.{ .rc = .general_purpose }},
   4773                     .clobbers = .{ .eflags = true },
   4774                     .each = .{ .once = &.{
   4775                         .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ },
   4776                         .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
   4777                         .{ ._, ._r, .bs, .tmp0d, .dst0d, ._, ._ },
   4778                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4779                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   4780                     } },
   4781                 }, .{
   4782                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   4783                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any },
   4784                     .patterns = &.{
   4785                         .{ .src = .{ .to_mut_gpr, .none } },
   4786                     },
   4787                     .dst_temps = .{.{ .rc = .general_purpose }},
   4788                     .clobbers = .{ .eflags = true },
   4789                     .each = .{ .once = &.{
   4790                         .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ },
   4791                         .{ ._, ._, .mov, .dst0w, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4792                         .{ ._, ._nz, .cmov, .dst0w, .src0w, ._, ._ },
   4793                         .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4794                     } },
   4795                 }, .{
   4796                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   4797                     .src_constraints = .{ .{ .signed_int = .word }, .any },
   4798                     .patterns = &.{
   4799                         .{ .src = .{ .to_mut_gpr, .none } },
   4800                     },
   4801                     .dst_temps = .{.{ .rc = .general_purpose }},
   4802                     .clobbers = .{ .eflags = true },
   4803                     .each = .{ .once = &.{
   4804                         .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ },
   4805                         .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ },
   4806                         .{ ._, ._, .mov, .dst0w, .si(0xff), ._, ._ },
   4807                         .{ ._, ._z, .cmov, .src0w, .dst0w, ._, ._ },
   4808                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4809                         .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ },
   4810                     } },
   4811                 }, .{
   4812                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   4813                     .src_constraints = .{ .{ .unsigned_int = .word }, .any },
   4814                     .patterns = &.{
   4815                         .{ .src = .{ .to_mut_gpr, .none } },
   4816                     },
   4817                     .dst_temps = .{.{ .rc = .general_purpose }},
   4818                     .clobbers = .{ .eflags = true },
   4819                     .each = .{ .once = &.{
   4820                         .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ },
   4821                         .{ ._, ._, .mov, .dst0w, .si(0xff), ._, ._ },
   4822                         .{ ._, ._z, .cmov, .src0w, .dst0w, ._, ._ },
   4823                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4824                         .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ },
   4825                     } },
   4826                 }, .{
   4827                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   4828                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any },
   4829                     .patterns = &.{
   4830                         .{ .src = .{ .to_mut_gpr, .none } },
   4831                     },
   4832                     .dst_temps = .{.{ .ref = .src0 }},
   4833                     .clobbers = .{ .eflags = true },
   4834                     .each = .{ .once = &.{
   4835                         .{ ._, ._r, .bs, .dst0w, .src0w, ._, ._ },
   4836                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   4837                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4838                         .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4839                     } },
   4840                 }, .{
   4841                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   4842                     .src_constraints = .{ .{ .signed_int = .word }, .any },
   4843                     .patterns = &.{
   4844                         .{ .src = .{ .to_mut_gpr, .none } },
   4845                     },
   4846                     .dst_temps = .{.{ .rc = .general_purpose }},
   4847                     .clobbers = .{ .eflags = true },
   4848                     .each = .{ .once = &.{
   4849                         .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ },
   4850                         .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ },
   4851                         .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
   4852                         .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   4853                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   4854                         .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ },
   4855                     } },
   4856                 }, .{
   4857                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   4858                     .src_constraints = .{ .{ .unsigned_int = .word }, .any },
   4859                     .patterns = &.{
   4860                         .{ .src = .{ .to_mut_gpr, .none } },
   4861                     },
   4862                     .dst_temps = .{.{ .rc = .general_purpose }},
   4863                     .clobbers = .{ .eflags = true },
   4864                     .each = .{ .once = &.{
   4865                         .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ },
   4866                         .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
   4867                         .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   4868                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   4869                         .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ },
   4870                     } },
   4871                 }, .{
   4872                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any },
   4873                     .patterns = &.{
   4874                         .{ .src = .{ .mem, .none } },
   4875                         .{ .src = .{ .to_gpr, .none } },
   4876                     },
   4877                     .dst_temps = .{.{ .rc = .general_purpose }},
   4878                     .clobbers = .{ .eflags = true },
   4879                     .each = .{ .once = &.{
   4880                         .{ ._, ._, .mov, .dst0w, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4881                         .{ ._, ._r, .bs, .dst0w, .src0w, ._, ._ },
   4882                         .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4883                     } },
   4884                 }, .{
   4885                     .src_constraints = .{ .{ .signed_int = .word }, .any },
   4886                     .patterns = &.{
   4887                         .{ .src = .{ .to_mut_gpr, .none } },
   4888                     },
   4889                     .extra_temps = .{
   4890                         .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
   4891                         .unused,
   4892                         .unused,
   4893                         .unused,
   4894                         .unused,
   4895                         .unused,
   4896                     },
   4897                     .dst_temps = .{.{ .rc = .general_purpose }},
   4898                     .clobbers = .{ .eflags = true },
   4899                     .each = .{ .once = &.{
   4900                         .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ },
   4901                         .{ ._, ._, .mov, .tmp0w, .si(0xff), ._, ._ },
   4902                         .{ ._, ._r, .bs, .tmp0w, .src0w, ._, ._ },
   4903                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4904                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   4905                     } },
   4906                 }, .{
   4907                     .src_constraints = .{ .{ .unsigned_int = .word }, .any },
   4908                     .patterns = &.{
   4909                         .{ .src = .{ .mem, .none } },
   4910                         .{ .src = .{ .to_gpr, .none } },
   4911                     },
   4912                     .extra_temps = .{
   4913                         .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
   4914                         .unused,
   4915                         .unused,
   4916                         .unused,
   4917                         .unused,
   4918                         .unused,
   4919                     },
   4920                     .dst_temps = .{.{ .rc = .general_purpose }},
   4921                     .clobbers = .{ .eflags = true },
   4922                     .each = .{ .once = &.{
   4923                         .{ ._, ._, .mov, .tmp0w, .si(0xff), ._, ._ },
   4924                         .{ ._, ._r, .bs, .tmp0w, .src0w, ._, ._ },
   4925                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4926                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   4927                     } },
   4928                 }, .{
   4929                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   4930                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any },
   4931                     .patterns = &.{
   4932                         .{ .src = .{ .to_mut_gpr, .none } },
   4933                     },
   4934                     .dst_temps = .{.{ .rc = .general_purpose }},
   4935                     .clobbers = .{ .eflags = true },
   4936                     .each = .{ .once = &.{
   4937                         .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ },
   4938                         .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4939                         .{ ._, ._nz, .cmov, .dst0d, .src0d, ._, ._ },
   4940                         .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4941                     } },
   4942                 }, .{
   4943                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   4944                     .src_constraints = .{ .{ .signed_int = .dword }, .any },
   4945                     .patterns = &.{
   4946                         .{ .src = .{ .to_mut_gpr, .none } },
   4947                     },
   4948                     .dst_temps = .{.{ .rc = .general_purpose }},
   4949                     .clobbers = .{ .eflags = true },
   4950                     .each = .{ .once = &.{
   4951                         .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ },
   4952                         .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ },
   4953                         .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
   4954                         .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ },
   4955                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4956                         .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ },
   4957                     } },
   4958                 }, .{
   4959                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   4960                     .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
   4961                     .patterns = &.{
   4962                         .{ .src = .{ .to_mut_gpr, .none } },
   4963                     },
   4964                     .dst_temps = .{.{ .rc = .general_purpose }},
   4965                     .clobbers = .{ .eflags = true },
   4966                     .each = .{ .once = &.{
   4967                         .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ },
   4968                         .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
   4969                         .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ },
   4970                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4971                         .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ },
   4972                     } },
   4973                 }, .{
   4974                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   4975                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any },
   4976                     .patterns = &.{
   4977                         .{ .src = .{ .to_mut_gpr, .none } },
   4978                     },
   4979                     .dst_temps = .{.{ .ref = .src0 }},
   4980                     .clobbers = .{ .eflags = true },
   4981                     .each = .{ .once = &.{
   4982                         .{ ._, ._r, .bs, .dst0d, .src0d, ._, ._ },
   4983                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   4984                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   4985                         .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   4986                     } },
   4987                 }, .{
   4988                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   4989                     .src_constraints = .{ .{ .signed_int = .dword }, .any },
   4990                     .patterns = &.{
   4991                         .{ .src = .{ .to_mut_gpr, .none } },
   4992                     },
   4993                     .dst_temps = .{.{ .rc = .general_purpose }},
   4994                     .clobbers = .{ .eflags = true },
   4995                     .each = .{ .once = &.{
   4996                         .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ },
   4997                         .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ },
   4998                         .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
   4999                         .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   5000                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   5001                         .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ },
   5002                     } },
   5003                 }, .{
   5004                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   5005                     .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
   5006                     .patterns = &.{
   5007                         .{ .src = .{ .to_mut_gpr, .none } },
   5008                     },
   5009                     .dst_temps = .{.{ .rc = .general_purpose }},
   5010                     .clobbers = .{ .eflags = true },
   5011                     .each = .{ .once = &.{
   5012                         .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ },
   5013                         .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
   5014                         .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   5015                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   5016                         .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ },
   5017                     } },
   5018                 }, .{
   5019                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any },
   5020                     .patterns = &.{
   5021                         .{ .src = .{ .mem, .none } },
   5022                         .{ .src = .{ .to_gpr, .none } },
   5023                     },
   5024                     .dst_temps = .{.{ .rc = .general_purpose }},
   5025                     .clobbers = .{ .eflags = true },
   5026                     .each = .{ .once = &.{
   5027                         .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   5028                         .{ ._, ._r, .bs, .dst0d, .src0d, ._, ._ },
   5029                         .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5030                     } },
   5031                 }, .{
   5032                     .src_constraints = .{ .{ .signed_int = .dword }, .any },
   5033                     .patterns = &.{
   5034                         .{ .src = .{ .to_mut_gpr, .none } },
   5035                     },
   5036                     .extra_temps = .{
   5037                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5038                         .unused,
   5039                         .unused,
   5040                         .unused,
   5041                         .unused,
   5042                         .unused,
   5043                     },
   5044                     .dst_temps = .{.{ .rc = .general_purpose }},
   5045                     .clobbers = .{ .eflags = true },
   5046                     .each = .{ .once = &.{
   5047                         .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ },
   5048                         .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
   5049                         .{ ._, ._r, .bs, .tmp0d, .src0d, ._, ._ },
   5050                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5051                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   5052                     } },
   5053                 }, .{
   5054                     .src_constraints = .{ .{ .unsigned_int = .dword }, .any },
   5055                     .patterns = &.{
   5056                         .{ .src = .{ .mem, .none } },
   5057                         .{ .src = .{ .to_gpr, .none } },
   5058                     },
   5059                     .extra_temps = .{
   5060                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5061                         .unused,
   5062                         .unused,
   5063                         .unused,
   5064                         .unused,
   5065                         .unused,
   5066                     },
   5067                     .dst_temps = .{.{ .rc = .general_purpose }},
   5068                     .clobbers = .{ .eflags = true },
   5069                     .each = .{ .once = &.{
   5070                         .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
   5071                         .{ ._, ._r, .bs, .tmp0d, .src0d, ._, ._ },
   5072                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5073                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   5074                     } },
   5075                 }, .{
   5076                     .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null },
   5077                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any },
   5078                     .patterns = &.{
   5079                         .{ .src = .{ .to_mut_gpr, .none } },
   5080                     },
   5081                     .dst_temps = .{.{ .rc = .general_purpose }},
   5082                     .clobbers = .{ .eflags = true },
   5083                     .each = .{ .once = &.{
   5084                         .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ },
   5085                         .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   5086                         .{ ._, ._nz, .cmov, .dst0d, .src0d, ._, ._ },
   5087                         .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5088                     } },
   5089                 }, .{
   5090                     .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null },
   5091                     .src_constraints = .{ .{ .signed_int = .qword }, .any },
   5092                     .patterns = &.{
   5093                         .{ .src = .{ .mem, .none } },
   5094                         .{ .src = .{ .to_gpr, .none } },
   5095                     },
   5096                     .extra_temps = .{
   5097                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   5098                         .unused,
   5099                         .unused,
   5100                         .unused,
   5101                         .unused,
   5102                         .unused,
   5103                     },
   5104                     .dst_temps = .{.{ .rc = .general_purpose }},
   5105                     .clobbers = .{ .eflags = true },
   5106                     .each = .{ .once = &.{
   5107                         .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
   5108                         .{ ._, ._, .@"and", .tmp0q, .src0q, ._, ._ },
   5109                         .{ ._, ._r, .bs, .tmp0q, .tmp0q, ._, ._ },
   5110                         .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
   5111                         .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ },
   5112                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5113                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   5114                     } },
   5115                 }, .{
   5116                     .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null },
   5117                     .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
   5118                     .patterns = &.{
   5119                         .{ .src = .{ .to_mut_gpr, .none } },
   5120                     },
   5121                     .dst_temps = .{.{ .rc = .general_purpose }},
   5122                     .clobbers = .{ .eflags = true },
   5123                     .each = .{ .once = &.{
   5124                         .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ },
   5125                         .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ },
   5126                         .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ },
   5127                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5128                         .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ },
   5129                     } },
   5130                 }, .{
   5131                     .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
   5132                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any },
   5133                     .patterns = &.{
   5134                         .{ .src = .{ .to_mut_gpr, .none } },
   5135                     },
   5136                     .dst_temps = .{.{ .ref = .src0 }},
   5137                     .clobbers = .{ .eflags = true },
   5138                     .each = .{ .once = &.{
   5139                         .{ ._, ._r, .bs, .dst0q, .src0q, ._, ._ },
   5140                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   5141                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   5142                         .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5143                     } },
   5144                 }, .{
   5145                     .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
   5146                     .src_constraints = .{ .{ .signed_int = .qword }, .any },
   5147                     .patterns = &.{
   5148                         .{ .src = .{ .mem, .none } },
   5149                         .{ .src = .{ .to_gpr, .none } },
   5150                     },
   5151                     .extra_temps = .{
   5152                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   5153                         .unused,
   5154                         .unused,
   5155                         .unused,
   5156                         .unused,
   5157                         .unused,
   5158                     },
   5159                     .dst_temps = .{.{ .rc = .general_purpose }},
   5160                     .clobbers = .{ .eflags = true },
   5161                     .each = .{ .once = &.{
   5162                         .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
   5163                         .{ ._, ._, .@"and", .tmp0q, .src0q, ._, ._ },
   5164                         .{ ._, ._r, .bs, .tmp0q, .tmp0q, ._, ._ },
   5165                         .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
   5166                         .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   5167                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   5168                         .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ },
   5169                     } },
   5170                 }, .{
   5171                     .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
   5172                     .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
   5173                     .patterns = &.{
   5174                         .{ .src = .{ .to_mut_gpr, .none } },
   5175                     },
   5176                     .dst_temps = .{.{ .rc = .general_purpose }},
   5177                     .clobbers = .{ .eflags = true },
   5178                     .each = .{ .once = &.{
   5179                         .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ },
   5180                         .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ },
   5181                         .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   5182                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   5183                         .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ },
   5184                     } },
   5185                 }, .{
   5186                     .required_features = .{ .@"64bit", null, null, null },
   5187                     .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any },
   5188                     .patterns = &.{
   5189                         .{ .src = .{ .mem, .none } },
   5190                         .{ .src = .{ .to_gpr, .none } },
   5191                     },
   5192                     .dst_temps = .{.{ .rc = .general_purpose }},
   5193                     .clobbers = .{ .eflags = true },
   5194                     .each = .{ .once = &.{
   5195                         .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ },
   5196                         .{ ._, ._r, .bs, .dst0q, .src0q, ._, ._ },
   5197                         .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5198                     } },
   5199                 }, .{
   5200                     .required_features = .{ .@"64bit", null, null, null },
   5201                     .src_constraints = .{ .{ .signed_int = .qword }, .any },
   5202                     .patterns = &.{
   5203                         .{ .src = .{ .mem, .none } },
   5204                         .{ .src = .{ .to_gpr, .none } },
   5205                     },
   5206                     .extra_temps = .{
   5207                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   5208                         .unused,
   5209                         .unused,
   5210                         .unused,
   5211                         .unused,
   5212                         .unused,
   5213                     },
   5214                     .dst_temps = .{.{ .rc = .general_purpose }},
   5215                     .clobbers = .{ .eflags = true },
   5216                     .each = .{ .once = &.{
   5217                         .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ },
   5218                         .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ },
   5219                         .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
   5220                         .{ ._, ._r, .bs, .tmp0q, .dst0q, ._, ._ },
   5221                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5222                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   5223                     } },
   5224                 }, .{
   5225                     .required_features = .{ .@"64bit", null, null, null },
   5226                     .src_constraints = .{ .{ .unsigned_int = .qword }, .any },
   5227                     .patterns = &.{
   5228                         .{ .src = .{ .mem, .none } },
   5229                         .{ .src = .{ .to_gpr, .none } },
   5230                     },
   5231                     .extra_temps = .{
   5232                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   5233                         .unused,
   5234                         .unused,
   5235                         .unused,
   5236                         .unused,
   5237                         .unused,
   5238                     },
   5239                     .dst_temps = .{.{ .rc = .general_purpose }},
   5240                     .clobbers = .{ .eflags = true },
   5241                     .each = .{ .once = &.{
   5242                         .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ },
   5243                         .{ ._, ._r, .bs, .tmp0q, .src0q, ._, ._ },
   5244                         .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5245                         .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ },
   5246                     } },
   5247                 }, .{
   5248                     .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
   5249                     .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   5250                     .patterns = &.{
   5251                         .{ .src = .{ .to_mem, .none } },
   5252                     },
   5253                     .extra_temps = .{
   5254                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5255                         .unused,
   5256                         .unused,
   5257                         .unused,
   5258                         .unused,
   5259                         .unused,
   5260                     },
   5261                     .dst_temps = .{.{ .rc = .general_purpose }},
   5262                     .clobbers = .{ .eflags = true },
   5263                     .each = .{ .once = &.{
   5264                         .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
   5265                         .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
   5266                         .{ ._, ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
   5267                         .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
   5268                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5269                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5270                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5271                         .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
   5272                         .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
   5273                     } },
   5274                 }, .{
   5275                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   5276                     .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   5277                     .patterns = &.{
   5278                         .{ .src = .{ .to_mem, .none } },
   5279                     },
   5280                     .extra_temps = .{
   5281                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5282                         .unused,
   5283                         .unused,
   5284                         .unused,
   5285                         .unused,
   5286                         .unused,
   5287                     },
   5288                     .dst_temps = .{.{ .rc = .general_purpose }},
   5289                     .clobbers = .{ .eflags = true },
   5290                     .each = .{ .once = &.{
   5291                         .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
   5292                         .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
   5293                         .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
   5294                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5295                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5296                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5297                         .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
   5298                         .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
   5299                     } },
   5300                 }, .{
   5301                     .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
   5302                     .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   5303                     .patterns = &.{
   5304                         .{ .src = .{ .to_mem, .none } },
   5305                     },
   5306                     .extra_temps = .{
   5307                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5308                         .unused,
   5309                         .unused,
   5310                         .unused,
   5311                         .unused,
   5312                         .unused,
   5313                     },
   5314                     .dst_temps = .{.{ .rc = .general_purpose }},
   5315                     .clobbers = .{ .eflags = true },
   5316                     .each = .{ .once = &.{
   5317                         .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
   5318                         .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
   5319                         .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
   5320                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   5321                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5322                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5323                         .{ ._, ._, .mov, .dst0d, .si(-1), ._, ._ },
   5324                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5325                         .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
   5326                         .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
   5327                     } },
   5328                 }, .{
   5329                     .required_features = .{ .@"64bit", null, null, null },
   5330                     .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   5331                     .patterns = &.{
   5332                         .{ .src = .{ .to_mem, .none } },
   5333                     },
   5334                     .extra_temps = .{
   5335                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5336                         .unused,
   5337                         .unused,
   5338                         .unused,
   5339                         .unused,
   5340                         .unused,
   5341                     },
   5342                     .dst_temps = .{.{ .rc = .general_purpose }},
   5343                     .clobbers = .{ .eflags = true },
   5344                     .each = .{ .once = &.{
   5345                         .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
   5346                         .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ },
   5347                         .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
   5348                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   5349                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5350                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5351                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5352                         .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
   5353                         .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
   5354                     } },
   5355                 }, .{
   5356                     .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
   5357                     .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   5358                     .patterns = &.{
   5359                         .{ .src = .{ .to_mem, .none } },
   5360                     },
   5361                     .extra_temps = .{
   5362                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5363                         .unused,
   5364                         .unused,
   5365                         .unused,
   5366                         .unused,
   5367                         .unused,
   5368                     },
   5369                     .dst_temps = .{.{ .rc = .general_purpose }},
   5370                     .clobbers = .{ .eflags = true },
   5371                     .each = .{ .once = &.{
   5372                         .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
   5373                         .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
   5374                         .{ ._, ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
   5375                         .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
   5376                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5377                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5378                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5379                         .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
   5380                         .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
   5381                     } },
   5382                 }, .{
   5383                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   5384                     .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   5385                     .patterns = &.{
   5386                         .{ .src = .{ .to_mem, .none } },
   5387                     },
   5388                     .extra_temps = .{
   5389                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5390                         .unused,
   5391                         .unused,
   5392                         .unused,
   5393                         .unused,
   5394                         .unused,
   5395                     },
   5396                     .dst_temps = .{.{ .rc = .general_purpose }},
   5397                     .clobbers = .{ .eflags = true },
   5398                     .each = .{ .once = &.{
   5399                         .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
   5400                         .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
   5401                         .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
   5402                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5403                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5404                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5405                         .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
   5406                         .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
   5407                     } },
   5408                 }, .{
   5409                     .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
   5410                     .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   5411                     .patterns = &.{
   5412                         .{ .src = .{ .to_mem, .none } },
   5413                     },
   5414                     .extra_temps = .{
   5415                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5416                         .unused,
   5417                         .unused,
   5418                         .unused,
   5419                         .unused,
   5420                         .unused,
   5421                     },
   5422                     .dst_temps = .{.{ .rc = .general_purpose }},
   5423                     .clobbers = .{ .eflags = true },
   5424                     .each = .{ .once = &.{
   5425                         .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
   5426                         .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
   5427                         .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
   5428                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   5429                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5430                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5431                         .{ ._, ._, .mov, .dst0d, .si(-1), ._, ._ },
   5432                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5433                         .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
   5434                         .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
   5435                     } },
   5436                 }, .{
   5437                     .required_features = .{ .@"64bit", null, null, null },
   5438                     .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   5439                     .patterns = &.{
   5440                         .{ .src = .{ .to_mem, .none } },
   5441                     },
   5442                     .extra_temps = .{
   5443                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5444                         .unused,
   5445                         .unused,
   5446                         .unused,
   5447                         .unused,
   5448                         .unused,
   5449                     },
   5450                     .dst_temps = .{.{ .rc = .general_purpose }},
   5451                     .clobbers = .{ .eflags = true },
   5452                     .each = .{ .once = &.{
   5453                         .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
   5454                         .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ },
   5455                         .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ },
   5456                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   5457                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5458                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5459                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5460                         .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
   5461                         .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
   5462                     } },
   5463                 }, .{
   5464                     .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
   5465                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   5466                     .patterns = &.{
   5467                         .{ .src = .{ .to_mem, .none } },
   5468                     },
   5469                     .extra_temps = .{
   5470                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5471                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   5472                         .unused,
   5473                         .unused,
   5474                         .unused,
   5475                         .unused,
   5476                     },
   5477                     .dst_temps = .{.{ .rc = .general_purpose }},
   5478                     .clobbers = .{ .eflags = true },
   5479                     .each = .{ .once = &.{
   5480                         .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
   5481                         .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
   5482                         .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
   5483                         .{ ._, ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ },
   5484                         .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ },
   5485                         .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
   5486                         .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ },
   5487                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5488                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5489                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5490                         .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
   5491                         .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
   5492                     } },
   5493                 }, .{
   5494                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   5495                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   5496                     .patterns = &.{
   5497                         .{ .src = .{ .to_mem, .none } },
   5498                     },
   5499                     .extra_temps = .{
   5500                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5501                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   5502                         .unused,
   5503                         .unused,
   5504                         .unused,
   5505                         .unused,
   5506                     },
   5507                     .dst_temps = .{.{ .rc = .general_purpose }},
   5508                     .clobbers = .{ .eflags = true },
   5509                     .each = .{ .once = &.{
   5510                         .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
   5511                         .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
   5512                         .{ .@"0:", ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ },
   5513                         .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ },
   5514                         .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
   5515                         .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ },
   5516                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5517                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5518                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5519                         .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
   5520                         .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
   5521                     } },
   5522                 }, .{
   5523                     .required_features = .{ .@"64bit", null, null, null },
   5524                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   5525                     .patterns = &.{
   5526                         .{ .src = .{ .to_mem, .none } },
   5527                     },
   5528                     .extra_temps = .{
   5529                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5530                         .unused,
   5531                         .unused,
   5532                         .unused,
   5533                         .unused,
   5534                         .unused,
   5535                     },
   5536                     .dst_temps = .{.{ .rc = .general_purpose }},
   5537                     .clobbers = .{ .eflags = true },
   5538                     .each = .{ .once = &.{
   5539                         .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ },
   5540                         .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ },
   5541                         .{ .@"0:", ._, .@"and", .dst0q, .memi(.src0q, .tmp0), ._, ._ },
   5542                         .{ ._, ._r, .bs, .dst0q, .dst0q, ._, ._ },
   5543                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   5544                         .{ ._, ._, .mov, .dst0q, .si(-1), ._, ._ },
   5545                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5546                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5547                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5548                         .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
   5549                         .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
   5550                     } },
   5551                 }, .{
   5552                     .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
   5553                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   5554                     .patterns = &.{
   5555                         .{ .src = .{ .to_mem, .none } },
   5556                     },
   5557                     .extra_temps = .{
   5558                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5559                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   5560                         .unused,
   5561                         .unused,
   5562                         .unused,
   5563                         .unused,
   5564                     },
   5565                     .dst_temps = .{.{ .rc = .general_purpose }},
   5566                     .clobbers = .{ .eflags = true },
   5567                     .each = .{ .once = &.{
   5568                         .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
   5569                         .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
   5570                         .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ },
   5571                         .{ ._, ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ },
   5572                         .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ },
   5573                         .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
   5574                         .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ },
   5575                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5576                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5577                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5578                         .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
   5579                         .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
   5580                     } },
   5581                 }, .{
   5582                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   5583                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   5584                     .patterns = &.{
   5585                         .{ .src = .{ .to_mem, .none } },
   5586                     },
   5587                     .extra_temps = .{
   5588                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5589                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   5590                         .unused,
   5591                         .unused,
   5592                         .unused,
   5593                         .unused,
   5594                     },
   5595                     .dst_temps = .{.{ .rc = .general_purpose }},
   5596                     .clobbers = .{ .eflags = true },
   5597                     .each = .{ .once = &.{
   5598                         .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
   5599                         .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
   5600                         .{ .@"0:", ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ },
   5601                         .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ },
   5602                         .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
   5603                         .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ },
   5604                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5605                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5606                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5607                         .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ },
   5608                         .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ },
   5609                     } },
   5610                 }, .{
   5611                     .required_features = .{ .@"64bit", null, null, null },
   5612                     .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   5613                     .patterns = &.{
   5614                         .{ .src = .{ .to_mem, .none } },
   5615                     },
   5616                     .extra_temps = .{
   5617                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5618                         .unused,
   5619                         .unused,
   5620                         .unused,
   5621                         .unused,
   5622                         .unused,
   5623                     },
   5624                     .dst_temps = .{.{ .rc = .general_purpose }},
   5625                     .clobbers = .{ .eflags = true },
   5626                     .each = .{ .once = &.{
   5627                         .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
   5628                         .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ },
   5629                         .{ .@"0:", ._, .@"and", .dst0q, .memi(.src0q, .tmp0), ._, ._ },
   5630                         .{ ._, ._r, .bs, .dst0q, .dst0q, ._, ._ },
   5631                         .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
   5632                         .{ ._, ._, .mov, .dst0q, .si(-1), ._, ._ },
   5633                         .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
   5634                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5635                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   5636                         .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ },
   5637                         .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
   5638                     } },
   5639                 }, .{
   5640                     .required_features = .{ .lzcnt, .slow_incdec, null, null },
   5641                     .src_constraints = .{ .{ .scalar_int = .byte }, .any },
   5642                     .patterns = &.{
   5643                         .{ .src = .{ .to_mem, .none } },
   5644                     },
   5645                     .extra_temps = .{
   5646                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5647                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5648                         .unused,
   5649                         .unused,
   5650                         .unused,
   5651                         .unused,
   5652                     },
   5653                     .dst_temps = .{.mem},
   5654                     .clobbers = .{ .eflags = true },
   5655                     .each = .{ .once = &.{
   5656                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5657                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
   5658                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   5659                         .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
   5660                         .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   5661                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   5662                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   5663                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5664                     } },
   5665                 }, .{
   5666                     .required_features = .{ .lzcnt, null, null, null },
   5667                     .src_constraints = .{ .{ .scalar_int = .byte }, .any },
   5668                     .patterns = &.{
   5669                         .{ .src = .{ .to_mem, .none } },
   5670                     },
   5671                     .extra_temps = .{
   5672                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5673                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5674                         .unused,
   5675                         .unused,
   5676                         .unused,
   5677                         .unused,
   5678                     },
   5679                     .dst_temps = .{.mem},
   5680                     .clobbers = .{ .eflags = true },
   5681                     .each = .{ .once = &.{
   5682                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5683                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
   5684                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   5685                         .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
   5686                         .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   5687                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   5688                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   5689                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   5690                     } },
   5691                 }, .{
   5692                     .required_features = .{ .lzcnt, .slow_incdec, null, null },
   5693                     .src_constraints = .{ .{ .scalar_int = .word }, .any },
   5694                     .patterns = &.{
   5695                         .{ .src = .{ .to_mem, .none } },
   5696                     },
   5697                     .extra_temps = .{
   5698                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5699                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5700                         .unused,
   5701                         .unused,
   5702                         .unused,
   5703                         .unused,
   5704                     },
   5705                     .dst_temps = .{.mem},
   5706                     .clobbers = .{ .eflags = true },
   5707                     .each = .{ .once = &.{
   5708                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5709                         .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
   5710                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   5711                         .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
   5712                         .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   5713                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   5714                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   5715                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5716                     } },
   5717                 }, .{
   5718                     .required_features = .{ .lzcnt, null, null, null },
   5719                     .src_constraints = .{ .{ .scalar_int = .word }, .any },
   5720                     .patterns = &.{
   5721                         .{ .src = .{ .to_mem, .none } },
   5722                     },
   5723                     .extra_temps = .{
   5724                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5725                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5726                         .unused,
   5727                         .unused,
   5728                         .unused,
   5729                         .unused,
   5730                     },
   5731                     .dst_temps = .{.mem},
   5732                     .clobbers = .{ .eflags = true },
   5733                     .each = .{ .once = &.{
   5734                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5735                         .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
   5736                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   5737                         .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
   5738                         .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   5739                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   5740                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   5741                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   5742                     } },
   5743                 }, .{
   5744                     .required_features = .{ .lzcnt, .slow_incdec, null, null },
   5745                     .src_constraints = .{ .{ .scalar_int = .dword }, .any },
   5746                     .patterns = &.{
   5747                         .{ .src = .{ .to_mem, .none } },
   5748                     },
   5749                     .extra_temps = .{
   5750                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5751                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5752                         .unused,
   5753                         .unused,
   5754                         .unused,
   5755                         .unused,
   5756                     },
   5757                     .dst_temps = .{.mem},
   5758                     .clobbers = .{ .eflags = true },
   5759                     .each = .{ .once = &.{
   5760                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5761                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
   5762                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   5763                         .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
   5764                         .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   5765                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   5766                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   5767                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5768                     } },
   5769                 }, .{
   5770                     .required_features = .{ .lzcnt, null, null, null },
   5771                     .src_constraints = .{ .{ .scalar_int = .dword }, .any },
   5772                     .patterns = &.{
   5773                         .{ .src = .{ .to_mem, .none } },
   5774                     },
   5775                     .extra_temps = .{
   5776                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5777                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5778                         .unused,
   5779                         .unused,
   5780                         .unused,
   5781                         .unused,
   5782                     },
   5783                     .dst_temps = .{.mem},
   5784                     .clobbers = .{ .eflags = true },
   5785                     .each = .{ .once = &.{
   5786                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5787                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
   5788                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   5789                         .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
   5790                         .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
   5791                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   5792                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   5793                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   5794                     } },
   5795                 }, .{
   5796                     .required_features = .{ .@"64bit", .lzcnt, .slow_incdec, null },
   5797                     .src_constraints = .{ .{ .scalar_int = .qword }, .any },
   5798                     .patterns = &.{
   5799                         .{ .src = .{ .to_mem, .none } },
   5800                     },
   5801                     .extra_temps = .{
   5802                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5803                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   5804                         .unused,
   5805                         .unused,
   5806                         .unused,
   5807                         .unused,
   5808                     },
   5809                     .dst_temps = .{.mem},
   5810                     .clobbers = .{ .eflags = true },
   5811                     .each = .{ .once = &.{
   5812                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5813                         .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
   5814                         .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
   5815                         .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ },
   5816                         .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ },
   5817                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   5818                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   5819                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5820                     } },
   5821                 }, .{
   5822                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   5823                     .src_constraints = .{ .{ .scalar_int = .qword }, .any },
   5824                     .patterns = &.{
   5825                         .{ .src = .{ .to_mem, .none } },
   5826                     },
   5827                     .extra_temps = .{
   5828                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5829                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   5830                         .unused,
   5831                         .unused,
   5832                         .unused,
   5833                         .unused,
   5834                     },
   5835                     .dst_temps = .{.mem},
   5836                     .clobbers = .{ .eflags = true },
   5837                     .each = .{ .once = &.{
   5838                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5839                         .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
   5840                         .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
   5841                         .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ },
   5842                         .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ },
   5843                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   5844                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   5845                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   5846                     } },
   5847                 }, .{
   5848                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null },
   5849                     .src_constraints = .{ .{ .scalar_int = .byte }, .any },
   5850                     .patterns = &.{
   5851                         .{ .src = .{ .to_mem, .none } },
   5852                     },
   5853                     .extra_temps = .{
   5854                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5855                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5856                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5857                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   5858                         .unused,
   5859                         .unused,
   5860                     },
   5861                     .dst_temps = .{.mem},
   5862                     .clobbers = .{ .eflags = true },
   5863                     .each = .{ .once = &.{
   5864                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5865                         .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
   5866                         .{ .@"0:", ._, .movzx, .tmp2d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
   5867                         .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
   5868                         .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
   5869                         .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
   5870                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5871                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
   5872                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   5873                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   5874                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5875                     } },
   5876                 }, .{
   5877                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   5878                     .src_constraints = .{ .{ .scalar_int = .byte }, .any },
   5879                     .patterns = &.{
   5880                         .{ .src = .{ .to_mem, .none } },
   5881                     },
   5882                     .extra_temps = .{
   5883                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5884                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5885                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5886                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   5887                         .unused,
   5888                         .unused,
   5889                     },
   5890                     .dst_temps = .{.mem},
   5891                     .clobbers = .{ .eflags = true },
   5892                     .each = .{ .once = &.{
   5893                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5894                         .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
   5895                         .{ .@"0:", ._, .movzx, .tmp2d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
   5896                         .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
   5897                         .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
   5898                         .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
   5899                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5900                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
   5901                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   5902                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   5903                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   5904                     } },
   5905                 }, .{
   5906                     .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null },
   5907                     .src_constraints = .{ .{ .scalar_int = .byte }, .any },
   5908                     .patterns = &.{
   5909                         .{ .src = .{ .to_mem, .none } },
   5910                     },
   5911                     .extra_temps = .{
   5912                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5913                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5914                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5915                         .unused,
   5916                         .unused,
   5917                         .unused,
   5918                     },
   5919                     .dst_temps = .{.mem},
   5920                     .clobbers = .{ .eflags = true },
   5921                     .each = .{ .once = &.{
   5922                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5923                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
   5924                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   5925                         .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
   5926                         .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
   5927                         .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
   5928                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   5929                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
   5930                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
   5931                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   5932                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5933                     } },
   5934                 }, .{
   5935                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   5936                     .src_constraints = .{ .{ .scalar_int = .byte }, .any },
   5937                     .patterns = &.{
   5938                         .{ .src = .{ .to_mem, .none } },
   5939                     },
   5940                     .extra_temps = .{
   5941                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5942                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5943                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5944                         .unused,
   5945                         .unused,
   5946                         .unused,
   5947                     },
   5948                     .dst_temps = .{.mem},
   5949                     .clobbers = .{ .eflags = true },
   5950                     .each = .{ .once = &.{
   5951                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5952                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
   5953                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   5954                         .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
   5955                         .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
   5956                         .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
   5957                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   5958                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
   5959                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
   5960                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   5961                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   5962                     } },
   5963                 }, .{
   5964                     .required_features = .{ .slow_incdec, null, null, null },
   5965                     .src_constraints = .{ .{ .scalar_int = .byte }, .any },
   5966                     .patterns = &.{
   5967                         .{ .src = .{ .to_mem, .none } },
   5968                     },
   5969                     .extra_temps = .{
   5970                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5971                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5972                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5973                         .unused,
   5974                         .unused,
   5975                         .unused,
   5976                     },
   5977                     .dst_temps = .{.mem},
   5978                     .clobbers = .{ .eflags = true },
   5979                     .each = .{ .once = &.{
   5980                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   5981                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
   5982                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   5983                         .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
   5984                         .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
   5985                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   5986                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
   5987                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   5988                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   5989                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   5990                     } },
   5991                 }, .{
   5992                     .src_constraints = .{ .{ .scalar_int = .byte }, .any },
   5993                     .patterns = &.{
   5994                         .{ .src = .{ .to_mem, .none } },
   5995                     },
   5996                     .extra_temps = .{
   5997                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   5998                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   5999                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6000                         .unused,
   6001                         .unused,
   6002                         .unused,
   6003                     },
   6004                     .dst_temps = .{.mem},
   6005                     .clobbers = .{ .eflags = true },
   6006                     .each = .{ .once = &.{
   6007                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6008                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ },
   6009                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   6010                         .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
   6011                         .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
   6012                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6013                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
   6014                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   6015                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   6016                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   6017                     } },
   6018                 }, .{
   6019                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null },
   6020                     .src_constraints = .{ .{ .scalar_int = .word }, .any },
   6021                     .patterns = &.{
   6022                         .{ .src = .{ .to_mem, .none } },
   6023                     },
   6024                     .extra_temps = .{
   6025                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6026                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6027                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6028                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   6029                         .unused,
   6030                         .unused,
   6031                     },
   6032                     .dst_temps = .{.mem},
   6033                     .clobbers = .{ .eflags = true },
   6034                     .each = .{ .once = &.{
   6035                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6036                         .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
   6037                         .{ .@"0:", ._, .movzx, .tmp2d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
   6038                         .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
   6039                         .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
   6040                         .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
   6041                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6042                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
   6043                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6044                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6045                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6046                     } },
   6047                 }, .{
   6048                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   6049                     .src_constraints = .{ .{ .scalar_int = .word }, .any },
   6050                     .patterns = &.{
   6051                         .{ .src = .{ .to_mem, .none } },
   6052                     },
   6053                     .extra_temps = .{
   6054                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6055                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6056                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6057                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   6058                         .unused,
   6059                         .unused,
   6060                     },
   6061                     .dst_temps = .{.mem},
   6062                     .clobbers = .{ .eflags = true },
   6063                     .each = .{ .once = &.{
   6064                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6065                         .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
   6066                         .{ .@"0:", ._, .movzx, .tmp2d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
   6067                         .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
   6068                         .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
   6069                         .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
   6070                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6071                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
   6072                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6073                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   6074                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   6075                     } },
   6076                 }, .{
   6077                     .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null },
   6078                     .src_constraints = .{ .{ .scalar_int = .word }, .any },
   6079                     .patterns = &.{
   6080                         .{ .src = .{ .to_mem, .none } },
   6081                     },
   6082                     .extra_temps = .{
   6083                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6084                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6085                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6086                         .unused,
   6087                         .unused,
   6088                         .unused,
   6089                     },
   6090                     .dst_temps = .{.mem},
   6091                     .clobbers = .{ .eflags = true },
   6092                     .each = .{ .once = &.{
   6093                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6094                         .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
   6095                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   6096                         .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
   6097                         .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
   6098                         .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
   6099                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   6100                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
   6101                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
   6102                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6103                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6104                     } },
   6105                 }, .{
   6106                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   6107                     .src_constraints = .{ .{ .scalar_int = .word }, .any },
   6108                     .patterns = &.{
   6109                         .{ .src = .{ .to_mem, .none } },
   6110                     },
   6111                     .extra_temps = .{
   6112                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6113                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6114                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6115                         .unused,
   6116                         .unused,
   6117                         .unused,
   6118                     },
   6119                     .dst_temps = .{.mem},
   6120                     .clobbers = .{ .eflags = true },
   6121                     .each = .{ .once = &.{
   6122                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6123                         .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
   6124                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   6125                         .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
   6126                         .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
   6127                         .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
   6128                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   6129                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
   6130                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
   6131                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   6132                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   6133                     } },
   6134                 }, .{
   6135                     .required_features = .{ .slow_incdec, null, null, null },
   6136                     .src_constraints = .{ .{ .scalar_int = .word }, .any },
   6137                     .patterns = &.{
   6138                         .{ .src = .{ .to_mem, .none } },
   6139                     },
   6140                     .extra_temps = .{
   6141                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6142                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6143                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6144                         .unused,
   6145                         .unused,
   6146                         .unused,
   6147                     },
   6148                     .dst_temps = .{.mem},
   6149                     .clobbers = .{ .eflags = true },
   6150                     .each = .{ .once = &.{
   6151                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6152                         .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
   6153                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   6154                         .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
   6155                         .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
   6156                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6157                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
   6158                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   6159                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6160                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6161                     } },
   6162                 }, .{
   6163                     .src_constraints = .{ .{ .scalar_int = .word }, .any },
   6164                     .patterns = &.{
   6165                         .{ .src = .{ .to_mem, .none } },
   6166                     },
   6167                     .extra_temps = .{
   6168                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6169                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6170                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6171                         .unused,
   6172                         .unused,
   6173                         .unused,
   6174                     },
   6175                     .dst_temps = .{.mem},
   6176                     .clobbers = .{ .eflags = true },
   6177                     .each = .{ .once = &.{
   6178                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6179                         .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ },
   6180                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   6181                         .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
   6182                         .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
   6183                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6184                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
   6185                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   6186                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   6187                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   6188                     } },
   6189                 }, .{
   6190                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null },
   6191                     .src_constraints = .{ .{ .scalar_int = .dword }, .any },
   6192                     .patterns = &.{
   6193                         .{ .src = .{ .to_mem, .none } },
   6194                     },
   6195                     .extra_temps = .{
   6196                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6197                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6198                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6199                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   6200                         .unused,
   6201                         .unused,
   6202                     },
   6203                     .dst_temps = .{.mem},
   6204                     .clobbers = .{ .eflags = true },
   6205                     .each = .{ .once = &.{
   6206                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6207                         .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
   6208                         .{ .@"0:", ._, .mov, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
   6209                         .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
   6210                         .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
   6211                         .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
   6212                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6213                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
   6214                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6215                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6216                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6217                     } },
   6218                 }, .{
   6219                     .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null },
   6220                     .src_constraints = .{ .{ .scalar_int = .dword }, .any },
   6221                     .patterns = &.{
   6222                         .{ .src = .{ .to_mem, .none } },
   6223                     },
   6224                     .extra_temps = .{
   6225                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6226                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6227                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6228                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   6229                         .unused,
   6230                         .unused,
   6231                     },
   6232                     .dst_temps = .{.mem},
   6233                     .clobbers = .{ .eflags = true },
   6234                     .each = .{ .once = &.{
   6235                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6236                         .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
   6237                         .{ .@"0:", ._, .mov, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
   6238                         .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ },
   6239                         .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ },
   6240                         .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
   6241                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6242                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
   6243                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6244                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   6245                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   6246                     } },
   6247                 }, .{
   6248                     .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null },
   6249                     .src_constraints = .{ .{ .scalar_int = .dword }, .any },
   6250                     .patterns = &.{
   6251                         .{ .src = .{ .to_mem, .none } },
   6252                     },
   6253                     .extra_temps = .{
   6254                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6255                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6256                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6257                         .unused,
   6258                         .unused,
   6259                         .unused,
   6260                     },
   6261                     .dst_temps = .{.mem},
   6262                     .clobbers = .{ .eflags = true },
   6263                     .each = .{ .once = &.{
   6264                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6265                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
   6266                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   6267                         .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
   6268                         .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
   6269                         .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
   6270                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   6271                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
   6272                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
   6273                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6274                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6275                     } },
   6276                 }, .{
   6277                     .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null },
   6278                     .src_constraints = .{ .{ .scalar_int = .dword }, .any },
   6279                     .patterns = &.{
   6280                         .{ .src = .{ .to_mem, .none } },
   6281                     },
   6282                     .extra_temps = .{
   6283                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6284                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6285                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6286                         .unused,
   6287                         .unused,
   6288                         .unused,
   6289                     },
   6290                     .dst_temps = .{.mem},
   6291                     .clobbers = .{ .eflags = true },
   6292                     .each = .{ .once = &.{
   6293                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6294                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
   6295                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   6296                         .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ },
   6297                         .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
   6298                         .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
   6299                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   6300                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
   6301                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
   6302                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   6303                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   6304                     } },
   6305                 }, .{
   6306                     .required_features = .{ .slow_incdec, null, null, null },
   6307                     .src_constraints = .{ .{ .scalar_int = .dword }, .any },
   6308                     .patterns = &.{
   6309                         .{ .src = .{ .to_mem, .none } },
   6310                     },
   6311                     .extra_temps = .{
   6312                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6313                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6314                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6315                         .unused,
   6316                         .unused,
   6317                         .unused,
   6318                     },
   6319                     .dst_temps = .{.mem},
   6320                     .clobbers = .{ .eflags = true },
   6321                     .each = .{ .once = &.{
   6322                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6323                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
   6324                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   6325                         .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
   6326                         .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
   6327                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6328                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
   6329                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   6330                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6331                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6332                     } },
   6333                 }, .{
   6334                     .src_constraints = .{ .{ .scalar_int = .dword }, .any },
   6335                     .patterns = &.{
   6336                         .{ .src = .{ .to_mem, .none } },
   6337                     },
   6338                     .extra_temps = .{
   6339                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6340                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6341                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6342                         .unused,
   6343                         .unused,
   6344                         .unused,
   6345                     },
   6346                     .dst_temps = .{.mem},
   6347                     .clobbers = .{ .eflags = true },
   6348                     .each = .{ .once = &.{
   6349                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6350                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ },
   6351                         .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ },
   6352                         .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
   6353                         .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ },
   6354                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6355                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
   6356                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   6357                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   6358                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   6359                     } },
   6360                 }, .{
   6361                     .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec },
   6362                     .src_constraints = .{ .{ .scalar_int = .qword }, .any },
   6363                     .patterns = &.{
   6364                         .{ .src = .{ .to_mem, .none } },
   6365                     },
   6366                     .extra_temps = .{
   6367                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6368                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6369                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6370                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   6371                         .unused,
   6372                         .unused,
   6373                     },
   6374                     .dst_temps = .{.mem},
   6375                     .clobbers = .{ .eflags = true },
   6376                     .each = .{ .once = &.{
   6377                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6378                         .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
   6379                         .{ .@"0:", ._, .mov, .tmp2q, .ua(.src0, .add_umax), ._, ._ },
   6380                         .{ ._, ._, .@"and", .tmp2q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
   6381                         .{ ._, ._r, .bs, .tmp2q, .tmp2q, ._, ._ },
   6382                         .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
   6383                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6384                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
   6385                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6386                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6387                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6388                     } },
   6389                 }, .{
   6390                     .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null },
   6391                     .src_constraints = .{ .{ .scalar_int = .qword }, .any },
   6392                     .patterns = &.{
   6393                         .{ .src = .{ .to_mem, .none } },
   6394                     },
   6395                     .extra_temps = .{
   6396                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6397                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6398                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6399                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   6400                         .unused,
   6401                         .unused,
   6402                     },
   6403                     .dst_temps = .{.mem},
   6404                     .clobbers = .{ .eflags = true },
   6405                     .each = .{ .once = &.{
   6406                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6407                         .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ },
   6408                         .{ .@"0:", ._, .mov, .tmp2q, .ua(.src0, .add_umax), ._, ._ },
   6409                         .{ ._, ._, .@"and", .tmp2q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
   6410                         .{ ._, ._r, .bs, .tmp2q, .tmp2q, ._, ._ },
   6411                         .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ },
   6412                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6413                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
   6414                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6415                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   6416                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   6417                     } },
   6418                 }, .{
   6419                     .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, .slow_incdec, null },
   6420                     .src_constraints = .{ .{ .scalar_int = .qword }, .any },
   6421                     .patterns = &.{
   6422                         .{ .src = .{ .to_mem, .none } },
   6423                     },
   6424                     .extra_temps = .{
   6425                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6426                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6427                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6428                         .unused,
   6429                         .unused,
   6430                         .unused,
   6431                     },
   6432                     .dst_temps = .{.mem},
   6433                     .clobbers = .{ .eflags = true },
   6434                     .each = .{ .once = &.{
   6435                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6436                         .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
   6437                         .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
   6438                         .{ ._, ._r, .bs, .tmp1q, .tmp1q, ._, ._ },
   6439                         .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
   6440                         .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
   6441                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   6442                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
   6443                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
   6444                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6445                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6446                     } },
   6447                 }, .{
   6448                     .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null },
   6449                     .src_constraints = .{ .{ .scalar_int = .qword }, .any },
   6450                     .patterns = &.{
   6451                         .{ .src = .{ .to_mem, .none } },
   6452                     },
   6453                     .extra_temps = .{
   6454                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6455                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6456                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6457                         .unused,
   6458                         .unused,
   6459                         .unused,
   6460                     },
   6461                     .dst_temps = .{.mem},
   6462                     .clobbers = .{ .eflags = true },
   6463                     .each = .{ .once = &.{
   6464                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6465                         .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
   6466                         .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
   6467                         .{ ._, ._r, .bs, .tmp1q, .tmp1q, ._, ._ },
   6468                         .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ },
   6469                         .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
   6470                         .{ ._, ._c, .st, ._, ._, ._, ._ },
   6471                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
   6472                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
   6473                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   6474                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   6475                     } },
   6476                 }, .{
   6477                     .required_features = .{ .@"64bit", .slow_incdec, null, null },
   6478                     .src_constraints = .{ .{ .scalar_int = .qword }, .any },
   6479                     .patterns = &.{
   6480                         .{ .src = .{ .to_mem, .none } },
   6481                     },
   6482                     .extra_temps = .{
   6483                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6484                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6485                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6486                         .unused,
   6487                         .unused,
   6488                         .unused,
   6489                     },
   6490                     .dst_temps = .{.mem},
   6491                     .clobbers = .{ .eflags = true },
   6492                     .each = .{ .once = &.{
   6493                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6494                         .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
   6495                         .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
   6496                         .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
   6497                         .{ ._, ._r, .bs, .tmp2q, .tmp1q, ._, ._ },
   6498                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6499                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
   6500                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   6501                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6502                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6503                     } },
   6504                 }, .{
   6505                     .required_features = .{ .@"64bit", null, null, null },
   6506                     .src_constraints = .{ .{ .scalar_int = .qword }, .any },
   6507                     .patterns = &.{
   6508                         .{ .src = .{ .to_mem, .none } },
   6509                     },
   6510                     .extra_temps = .{
   6511                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6512                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6513                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6514                         .unused,
   6515                         .unused,
   6516                         .unused,
   6517                     },
   6518                     .dst_temps = .{.mem},
   6519                     .clobbers = .{ .eflags = true },
   6520                     .each = .{ .once = &.{
   6521                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6522                         .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
   6523                         .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ },
   6524                         .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ },
   6525                         .{ ._, ._r, .bs, .tmp2q, .tmp1q, ._, ._ },
   6526                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
   6527                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
   6528                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
   6529                         .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
   6530                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
   6531                     } },
   6532                 }, .{
   6533                     .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
   6534                     .dst_constraints = .{.{ .scalar_int = .byte }},
   6535                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   6536                     .patterns = &.{
   6537                         .{ .src = .{ .to_mem, .none } },
   6538                     },
   6539                     .extra_temps = .{
   6540                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6541                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6542                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6543                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6544                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6545                         .unused,
   6546                     },
   6547                     .dst_temps = .{.mem},
   6548                     .clobbers = .{ .eflags = true },
   6549                     .each = .{ .once = &.{
   6550                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6551                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6552                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
   6553                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6554                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6555                         .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
   6556                         .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
   6557                         .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
   6558                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6559                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6560                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6561                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6562                         .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
   6563                         .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
   6564                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6565                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6566                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6567                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6568                     } },
   6569                 }, .{
   6570                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   6571                     .dst_constraints = .{.{ .scalar_int = .byte }},
   6572                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   6573                     .patterns = &.{
   6574                         .{ .src = .{ .to_mem, .none } },
   6575                     },
   6576                     .extra_temps = .{
   6577                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6578                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6579                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6580                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6581                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6582                         .unused,
   6583                     },
   6584                     .dst_temps = .{.mem},
   6585                     .clobbers = .{ .eflags = true },
   6586                     .each = .{ .once = &.{
   6587                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6588                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6589                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
   6590                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6591                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6592                         .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
   6593                         .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
   6594                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6595                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6596                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6597                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6598                         .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
   6599                         .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
   6600                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6601                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6602                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6603                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6604                     } },
   6605                 }, .{
   6606                     .required_features = .{ .@"64bit", null, null, null },
   6607                     .dst_constraints = .{.{ .scalar_int = .byte }},
   6608                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   6609                     .patterns = &.{
   6610                         .{ .src = .{ .to_mem, .none } },
   6611                     },
   6612                     .extra_temps = .{
   6613                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6614                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6615                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6616                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6617                         .unused,
   6618                         .unused,
   6619                     },
   6620                     .dst_temps = .{.mem},
   6621                     .clobbers = .{ .eflags = true },
   6622                     .each = .{ .once = &.{
   6623                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6624                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6625                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
   6626                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6627                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6628                         .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ },
   6629                         .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   6630                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6631                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6632                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6633                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6634                         .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ },
   6635                         .{ ._, ._, .neg, .tmp3b, ._, ._, ._ },
   6636                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6637                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6638                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6639                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6640                     } },
   6641                 }, .{
   6642                     .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
   6643                     .dst_constraints = .{.{ .scalar_int = .byte }},
   6644                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   6645                     .patterns = &.{
   6646                         .{ .src = .{ .to_mem, .none } },
   6647                     },
   6648                     .extra_temps = .{
   6649                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6650                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6651                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6652                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6653                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6654                         .unused,
   6655                     },
   6656                     .dst_temps = .{.mem},
   6657                     .clobbers = .{ .eflags = true },
   6658                     .each = .{ .once = &.{
   6659                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6660                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6661                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
   6662                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6663                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6664                         .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
   6665                         .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
   6666                         .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
   6667                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6668                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6669                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6670                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6671                         .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
   6672                         .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
   6673                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6674                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6675                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6676                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6677                     } },
   6678                 }, .{
   6679                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   6680                     .dst_constraints = .{.{ .scalar_int = .byte }},
   6681                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   6682                     .patterns = &.{
   6683                         .{ .src = .{ .to_mem, .none } },
   6684                     },
   6685                     .extra_temps = .{
   6686                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6687                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6688                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6689                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6690                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6691                         .unused,
   6692                     },
   6693                     .dst_temps = .{.mem},
   6694                     .clobbers = .{ .eflags = true },
   6695                     .each = .{ .once = &.{
   6696                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6697                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6698                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
   6699                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6700                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6701                         .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
   6702                         .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
   6703                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6704                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6705                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6706                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6707                         .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
   6708                         .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
   6709                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6710                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6711                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6712                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6713                     } },
   6714                 }, .{
   6715                     .required_features = .{ .@"64bit", null, null, null },
   6716                     .dst_constraints = .{.{ .scalar_int = .byte }},
   6717                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   6718                     .patterns = &.{
   6719                         .{ .src = .{ .to_mem, .none } },
   6720                     },
   6721                     .extra_temps = .{
   6722                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6723                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6724                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6725                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6726                         .unused,
   6727                         .unused,
   6728                     },
   6729                     .dst_temps = .{.mem},
   6730                     .clobbers = .{ .eflags = true },
   6731                     .each = .{ .once = &.{
   6732                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6733                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6734                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
   6735                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6736                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6737                         .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ },
   6738                         .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   6739                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6740                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6741                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6742                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6743                         .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ },
   6744                         .{ ._, ._, .neg, .tmp3b, ._, ._, ._ },
   6745                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
   6746                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6747                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6748                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6749                     } },
   6750                 }, .{
   6751                     .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
   6752                     .dst_constraints = .{.{ .scalar_int = .word }},
   6753                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   6754                     .patterns = &.{
   6755                         .{ .src = .{ .to_mem, .none } },
   6756                     },
   6757                     .extra_temps = .{
   6758                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6759                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6760                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6761                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6762                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6763                         .unused,
   6764                     },
   6765                     .dst_temps = .{.mem},
   6766                     .clobbers = .{ .eflags = true },
   6767                     .each = .{ .once = &.{
   6768                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6769                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6770                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
   6771                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6772                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6773                         .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
   6774                         .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
   6775                         .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
   6776                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6777                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6778                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6779                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6780                         .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
   6781                         .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
   6782                         .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
   6783                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6784                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6785                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6786                     } },
   6787                 }, .{
   6788                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   6789                     .dst_constraints = .{.{ .scalar_int = .word }},
   6790                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   6791                     .patterns = &.{
   6792                         .{ .src = .{ .to_mem, .none } },
   6793                     },
   6794                     .extra_temps = .{
   6795                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6796                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6797                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6798                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6799                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6800                         .unused,
   6801                     },
   6802                     .dst_temps = .{.mem},
   6803                     .clobbers = .{ .eflags = true },
   6804                     .each = .{ .once = &.{
   6805                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6806                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6807                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
   6808                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6809                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6810                         .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
   6811                         .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
   6812                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6813                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6814                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6815                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6816                         .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
   6817                         .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
   6818                         .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
   6819                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6820                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6821                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6822                     } },
   6823                 }, .{
   6824                     .required_features = .{ .@"64bit", null, null, null },
   6825                     .dst_constraints = .{.{ .scalar_int = .word }},
   6826                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any },
   6827                     .patterns = &.{
   6828                         .{ .src = .{ .to_mem, .none } },
   6829                     },
   6830                     .extra_temps = .{
   6831                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6832                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6833                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6834                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6835                         .unused,
   6836                         .unused,
   6837                     },
   6838                     .dst_temps = .{.mem},
   6839                     .clobbers = .{ .eflags = true },
   6840                     .each = .{ .once = &.{
   6841                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6842                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6843                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ },
   6844                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6845                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6846                         .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ },
   6847                         .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   6848                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6849                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6850                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6851                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6852                         .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ },
   6853                         .{ ._, ._, .neg, .tmp3d, ._, ._, ._ },
   6854                         .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
   6855                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6856                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6857                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6858                     } },
   6859                 }, .{
   6860                     .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null },
   6861                     .dst_constraints = .{.{ .scalar_int = .word }},
   6862                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   6863                     .patterns = &.{
   6864                         .{ .src = .{ .to_mem, .none } },
   6865                     },
   6866                     .extra_temps = .{
   6867                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6868                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6869                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6870                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6871                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6872                         .unused,
   6873                     },
   6874                     .dst_temps = .{.mem},
   6875                     .clobbers = .{ .eflags = true },
   6876                     .each = .{ .once = &.{
   6877                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6878                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6879                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
   6880                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6881                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6882                         .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
   6883                         .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
   6884                         .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
   6885                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6886                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6887                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6888                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6889                         .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
   6890                         .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
   6891                         .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
   6892                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6893                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6894                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6895                     } },
   6896                 }, .{
   6897                     .required_features = .{ .@"64bit", .lzcnt, null, null },
   6898                     .dst_constraints = .{.{ .scalar_int = .word }},
   6899                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   6900                     .patterns = &.{
   6901                         .{ .src = .{ .to_mem, .none } },
   6902                     },
   6903                     .extra_temps = .{
   6904                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6905                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6906                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6907                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6908                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6909                         .unused,
   6910                     },
   6911                     .dst_temps = .{.mem},
   6912                     .clobbers = .{ .eflags = true },
   6913                     .each = .{ .once = &.{
   6914                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6915                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6916                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
   6917                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6918                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6919                         .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ },
   6920                         .{ ._, ._nc, .j, .@"1f", ._, ._, ._ },
   6921                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6922                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6923                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6924                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6925                         .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ },
   6926                         .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ },
   6927                         .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
   6928                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6929                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6930                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6931                     } },
   6932                 }, .{
   6933                     .required_features = .{ .@"64bit", null, null, null },
   6934                     .dst_constraints = .{.{ .scalar_int = .word }},
   6935                     .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any },
   6936                     .patterns = &.{
   6937                         .{ .src = .{ .to_mem, .none } },
   6938                     },
   6939                     .extra_temps = .{
   6940                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   6941                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   6942                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   6943                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   6944                         .unused,
   6945                         .unused,
   6946                     },
   6947                     .dst_temps = .{.mem},
   6948                     .clobbers = .{ .eflags = true },
   6949                     .each = .{ .once = &.{
   6950                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ },
   6951                         .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ },
   6952                         .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ },
   6953                         .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ },
   6954                         .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ },
   6955                         .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ },
   6956                         .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   6957                         .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ },
   6958                         .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ },
   6959                         .{ ._, ._nc, .j, .@"1b", ._, ._, ._ },
   6960                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   6961                         .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ },
   6962                         .{ ._, ._, .neg, .tmp3d, ._, ._, ._ },
   6963                         .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ },
   6964                         .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ },
   6965                         .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   6966                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   6967                     } },
   6968                 } }) catch |err| switch (err) {
   6969                     error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{
   6970                         @tagName(air_tag),
   6971                         cg.typeOf(ty_op.operand).fmt(pt),
   6972                         ops[0].tracking(cg),
   6973                     }),
   6974                     else => |e| return e,
   6975                 };
   6976                 for (ops) |op| for (res) |r| {
   6977                     if (op.index == r.index) break;
   6978                 } else try op.die(cg);
   6979                 try res[0].moveTo(inst, cg);
   6980             },
   6981 
   6982             .cmp_vector, .cmp_vector_optimized => |air_tag| if (use_old) try cg.airCmpVector(inst) else fallback: {
   6983                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
   6984                 const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data;
   6985                 switch (extra.compareOperator()) {
   6986                     .eq, .neq => {},
   6987                     else => break :fallback try cg.airCmpVector(inst),
   6988                 }
   6989                 var ops = try cg.tempsFromOperands(inst, .{ extra.lhs, extra.rhs });
   6990                 var res: [1]Temp = undefined;
   6991                 switch (extra.compareOperator()) {
   6992                     .lt => unreachable,
   6993                     .lte => unreachable,
   6994                     .eq, .neq => |cmp_op| cg.select(&res, &.{ty_pl.ty.toType()}, &ops, switch (@as(Condition, switch (cmp_op) {
   6995                         else => unreachable,
   6996                         .eq => .e,
   6997                         .neq => .ne,
   6998                     })) {
   6999                         else => unreachable,
   7000                         inline .e, .ne => |cc| comptime &.{ .{
   7001                             .required_features = .{ .avx2, null, null, null },
   7002                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   7003                             .patterns = &.{
   7004                                 .{ .src = .{ .to_ymm, .mem } },
   7005                                 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
   7006                                 .{ .src = .{ .to_ymm, .to_ymm } },
   7007                             },
   7008                             .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
   7009                                 .kind = .all,
   7010                                 .inverted = switch (cc) {
   7011                                     else => unreachable,
   7012                                     .e => false,
   7013                                     .ne => true,
   7014                                 },
   7015                                 .scalar = .byte,
   7016                             } } }},
   7017                             .each = .{ .once = &.{
   7018                                 .{ ._, .vp_b, .cmpeq, .dst0y, .src0y, .src1y, ._ },
   7019                             } },
   7020                         }, .{
   7021                             .required_features = .{ .avx2, null, null, null },
   7022                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   7023                             .patterns = &.{
   7024                                 .{ .src = .{ .to_ymm, .mem } },
   7025                                 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
   7026                                 .{ .src = .{ .to_ymm, .to_ymm } },
   7027                             },
   7028                             .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
   7029                                 .kind = .all,
   7030                                 .inverted = switch (cc) {
   7031                                     else => unreachable,
   7032                                     .e => false,
   7033                                     .ne => true,
   7034                                 },
   7035                                 .scalar = .word,
   7036                             } } }},
   7037                             .each = .{ .once = &.{
   7038                                 .{ ._, .vp_w, .cmpeq, .dst0y, .src0y, .src1y, ._ },
   7039                             } },
   7040                         }, .{
   7041                             .required_features = .{ .avx2, null, null, null },
   7042                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   7043                             .patterns = &.{
   7044                                 .{ .src = .{ .to_ymm, .mem } },
   7045                                 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
   7046                                 .{ .src = .{ .to_ymm, .to_ymm } },
   7047                             },
   7048                             .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
   7049                                 .kind = .all,
   7050                                 .inverted = switch (cc) {
   7051                                     else => unreachable,
   7052                                     .e => false,
   7053                                     .ne => true,
   7054                                 },
   7055                                 .scalar = .dword,
   7056                             } } }},
   7057                             .each = .{ .once = &.{
   7058                                 .{ ._, .vp_d, .cmpeq, .dst0y, .src0y, .src1y, ._ },
   7059                             } },
   7060                         }, .{
   7061                             .required_features = .{ .avx2, null, null, null },
   7062                             .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
   7063                             .patterns = &.{
   7064                                 .{ .src = .{ .to_ymm, .mem } },
   7065                                 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
   7066                                 .{ .src = .{ .to_ymm, .to_ymm } },
   7067                             },
   7068                             .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
   7069                                 .kind = .all,
   7070                                 .inverted = switch (cc) {
   7071                                     else => unreachable,
   7072                                     .e => false,
   7073                                     .ne => true,
   7074                                 },
   7075                                 .scalar = .qword,
   7076                             } } }},
   7077                             .each = .{ .once = &.{
   7078                                 .{ ._, .vp_q, .cmpeq, .dst0y, .src0y, .src1y, ._ },
   7079                             } },
   7080                         }, .{
   7081                             .required_features = .{ .avx, null, null, null },
   7082                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   7083                             .patterns = &.{
   7084                                 .{ .src = .{ .to_xmm, .mem } },
   7085                                 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
   7086                                 .{ .src = .{ .to_xmm, .to_xmm } },
   7087                             },
   7088                             .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
   7089                                 .kind = .all,
   7090                                 .inverted = switch (cc) {
   7091                                     else => unreachable,
   7092                                     .e => false,
   7093                                     .ne => true,
   7094                                 },
   7095                                 .scalar = .byte,
   7096                             } } }},
   7097                             .each = .{ .once = &.{
   7098                                 .{ ._, .vp_b, .cmpeq, .dst0x, .src0x, .src1x, ._ },
   7099                             } },
   7100                         }, .{
   7101                             .required_features = .{ .avx, null, null, null },
   7102                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   7103                             .patterns = &.{
   7104                                 .{ .src = .{ .to_xmm, .mem } },
   7105                                 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
   7106                                 .{ .src = .{ .to_xmm, .to_xmm } },
   7107                             },
   7108                             .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
   7109                                 .kind = .all,
   7110                                 .inverted = switch (cc) {
   7111                                     else => unreachable,
   7112                                     .e => false,
   7113                                     .ne => true,
   7114                                 },
   7115                                 .scalar = .word,
   7116                             } } }},
   7117                             .each = .{ .once = &.{
   7118                                 .{ ._, .vp_w, .cmpeq, .dst0x, .src0x, .src1x, ._ },
   7119                             } },
   7120                         }, .{
   7121                             .required_features = .{ .avx, null, null, null },
   7122                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   7123                             .patterns = &.{
   7124                                 .{ .src = .{ .to_xmm, .mem } },
   7125                                 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
   7126                                 .{ .src = .{ .to_xmm, .to_xmm } },
   7127                             },
   7128                             .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
   7129                                 .kind = .all,
   7130                                 .inverted = switch (cc) {
   7131                                     else => unreachable,
   7132                                     .e => false,
   7133                                     .ne => true,
   7134                                 },
   7135                                 .scalar = .dword,
   7136                             } } }},
   7137                             .each = .{ .once = &.{
   7138                                 .{ ._, .vp_d, .cmpeq, .dst0x, .src0x, .src1x, ._ },
   7139                             } },
   7140                         }, .{
   7141                             .required_features = .{ .avx, null, null, null },
   7142                             .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
   7143                             .patterns = &.{
   7144                                 .{ .src = .{ .to_xmm, .mem } },
   7145                                 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
   7146                                 .{ .src = .{ .to_xmm, .to_xmm } },
   7147                             },
   7148                             .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
   7149                                 .kind = .all,
   7150                                 .inverted = switch (cc) {
   7151                                     else => unreachable,
   7152                                     .e => false,
   7153                                     .ne => true,
   7154                                 },
   7155                                 .scalar = .qword,
   7156                             } } }},
   7157                             .each = .{ .once = &.{
   7158                                 .{ ._, .vp_q, .cmpeq, .dst0x, .src0x, .src1x, ._ },
   7159                             } },
   7160                         }, .{
   7161                             .required_features = .{ .sse2, null, null, null },
   7162                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   7163                             .patterns = &.{
   7164                                 .{ .src = .{ .to_mut_xmm, .mem } },
   7165                                 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
   7166                                 .{ .src = .{ .to_mut_xmm, .to_xmm } },
   7167                             },
   7168                             .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
   7169                                 .kind = .all,
   7170                                 .inverted = switch (cc) {
   7171                                     else => unreachable,
   7172                                     .e => false,
   7173                                     .ne => true,
   7174                                 },
   7175                                 .scalar = .byte,
   7176                             } } }},
   7177                             .each = .{ .once = &.{
   7178                                 .{ ._, .p_b, .cmpeq, .dst0x, .src1x, ._, ._ },
   7179                             } },
   7180                         }, .{
   7181                             .required_features = .{ .sse2, null, null, null },
   7182                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   7183                             .patterns = &.{
   7184                                 .{ .src = .{ .to_mut_xmm, .mem } },
   7185                                 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
   7186                                 .{ .src = .{ .to_mut_xmm, .to_xmm } },
   7187                             },
   7188                             .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
   7189                                 .kind = .all,
   7190                                 .inverted = switch (cc) {
   7191                                     else => unreachable,
   7192                                     .e => false,
   7193                                     .ne => true,
   7194                                 },
   7195                                 .scalar = .word,
   7196                             } } }},
   7197                             .each = .{ .once = &.{
   7198                                 .{ ._, .p_w, .cmpeq, .dst0x, .src1x, ._, ._ },
   7199                             } },
   7200                         }, .{
   7201                             .required_features = .{ .sse2, null, null, null },
   7202                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   7203                             .patterns = &.{
   7204                                 .{ .src = .{ .to_mut_xmm, .mem } },
   7205                                 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
   7206                                 .{ .src = .{ .to_mut_xmm, .to_xmm } },
   7207                             },
   7208                             .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
   7209                                 .kind = .all,
   7210                                 .inverted = switch (cc) {
   7211                                     else => unreachable,
   7212                                     .e => false,
   7213                                     .ne => true,
   7214                                 },
   7215                                 .scalar = .dword,
   7216                             } } }},
   7217                             .each = .{ .once = &.{
   7218                                 .{ ._, .p_d, .cmpeq, .dst0x, .src1x, ._, ._ },
   7219                             } },
   7220                         }, .{
   7221                             .required_features = .{ .sse4_1, null, null, null },
   7222                             .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
   7223                             .patterns = &.{
   7224                                 .{ .src = .{ .to_mut_xmm, .mem } },
   7225                                 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
   7226                                 .{ .src = .{ .to_mut_xmm, .to_xmm } },
   7227                             },
   7228                             .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
   7229                                 .kind = .all,
   7230                                 .inverted = switch (cc) {
   7231                                     else => unreachable,
   7232                                     .e => false,
   7233                                     .ne => true,
   7234                                 },
   7235                                 .scalar = .qword,
   7236                             } } }},
   7237                             .each = .{ .once = &.{
   7238                                 .{ ._, .p_q, .cmpeq, .dst0x, .src1x, ._, ._ },
   7239                             } },
   7240                         }, .{
   7241                             .required_features = .{ .mmx, null, null, null },
   7242                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   7243                             .patterns = &.{
   7244                                 .{ .src = .{ .to_mut_mm, .mem } },
   7245                                 .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
   7246                                 .{ .src = .{ .to_mut_mm, .to_mm } },
   7247                             },
   7248                             .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
   7249                                 .kind = .all,
   7250                                 .inverted = switch (cc) {
   7251                                     else => unreachable,
   7252                                     .e => false,
   7253                                     .ne => true,
   7254                                 },
   7255                                 .scalar = .byte,
   7256                             } } }},
   7257                             .each = .{ .once = &.{
   7258                                 .{ ._, .p_b, .cmpeq, .dst0q, .src1q, ._, ._ },
   7259                             } },
   7260                         }, .{
   7261                             .required_features = .{ .mmx, null, null, null },
   7262                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   7263                             .patterns = &.{
   7264                                 .{ .src = .{ .to_mut_mm, .mem } },
   7265                                 .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
   7266                                 .{ .src = .{ .to_mut_mm, .to_mm } },
   7267                             },
   7268                             .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
   7269                                 .kind = .all,
   7270                                 .inverted = switch (cc) {
   7271                                     else => unreachable,
   7272                                     .e => false,
   7273                                     .ne => true,
   7274                                 },
   7275                                 .scalar = .word,
   7276                             } } }},
   7277                             .each = .{ .once = &.{
   7278                                 .{ ._, .p_w, .cmpeq, .dst0q, .src1q, ._, ._ },
   7279                             } },
   7280                         }, .{
   7281                             .required_features = .{ .mmx, null, null, null },
   7282                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   7283                             .patterns = &.{
   7284                                 .{ .src = .{ .to_mut_mm, .mem } },
   7285                                 .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
   7286                                 .{ .src = .{ .to_mut_mm, .to_mm } },
   7287                             },
   7288                             .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{
   7289                                 .kind = .all,
   7290                                 .inverted = switch (cc) {
   7291                                     else => unreachable,
   7292                                     .e => false,
   7293                                     .ne => true,
   7294                                 },
   7295                                 .scalar = .dword,
   7296                             } } }},
   7297                             .each = .{ .once = &.{
   7298                                 .{ ._, .p_d, .cmpeq, .dst0q, .src1q, ._, ._ },
   7299                             } },
   7300                         }, .{
   7301                             .src_constraints = .{ .{ .bool_vec = .byte }, .{ .bool_vec = .byte } },
   7302                             .patterns = &.{
   7303                                 .{ .src = .{ .mut_mem, .imm8 } },
   7304                                 .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } },
   7305                                 .{ .src = .{ .to_mut_gpr, .imm8 } },
   7306                                 .{ .src = .{ .imm8, .to_mut_gpr }, .commute = .{ 0, 1 } },
   7307                                 .{ .src = .{ .mut_mem, .to_gpr } },
   7308                                 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
   7309                                 .{ .src = .{ .to_mut_gpr, .mem } },
   7310                                 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
   7311                                 .{ .src = .{ .to_mut_gpr, .to_gpr } },
   7312                             },
   7313                             .dst_temps = .{.{ .ref = .src0 }},
   7314                             .clobbers = .{ .eflags = true },
   7315                             .each = .{ .once = switch (cc) {
   7316                                 else => unreachable,
   7317                                 .e => &.{
   7318                                     .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ },
   7319                                     .{ ._, ._, .not, .dst0b, ._, ._, ._ },
   7320                                 },
   7321                                 .ne => &.{
   7322                                     .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ },
   7323                                 },
   7324                             } },
   7325                         }, .{
   7326                             .src_constraints = .{ .{ .bool_vec = .word }, .{ .bool_vec = .word } },
   7327                             .patterns = &.{
   7328                                 .{ .src = .{ .mut_mem, .imm16 } },
   7329                                 .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } },
   7330                                 .{ .src = .{ .to_mut_gpr, .imm16 } },
   7331                                 .{ .src = .{ .imm16, .to_mut_gpr }, .commute = .{ 0, 1 } },
   7332                                 .{ .src = .{ .mut_mem, .to_gpr } },
   7333                                 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
   7334                                 .{ .src = .{ .to_mut_gpr, .mem } },
   7335                                 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
   7336                                 .{ .src = .{ .to_mut_gpr, .to_gpr } },
   7337                             },
   7338                             .dst_temps = .{.{ .ref = .src0 }},
   7339                             .clobbers = .{ .eflags = true },
   7340                             .each = .{ .once = switch (cc) {
   7341                                 else => unreachable,
   7342                                 .e => &.{
   7343                                     .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ },
   7344                                     .{ ._, ._, .not, .dst0w, ._, ._, ._ },
   7345                                 },
   7346                                 .ne => &.{
   7347                                     .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ },
   7348                                 },
   7349                             } },
   7350                         }, .{
   7351                             .src_constraints = .{ .{ .bool_vec = .dword }, .{ .bool_vec = .dword } },
   7352                             .patterns = &.{
   7353                                 .{ .src = .{ .mut_mem, .imm32 } },
   7354                                 .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } },
   7355                                 .{ .src = .{ .to_mut_gpr, .imm32 } },
   7356                                 .{ .src = .{ .imm32, .to_mut_gpr }, .commute = .{ 0, 1 } },
   7357                                 .{ .src = .{ .mut_mem, .to_gpr } },
   7358                                 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
   7359                                 .{ .src = .{ .to_mut_gpr, .mem } },
   7360                                 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
   7361                                 .{ .src = .{ .to_mut_gpr, .to_gpr } },
   7362                             },
   7363                             .dst_temps = .{.{ .ref = .src0 }},
   7364                             .clobbers = .{ .eflags = true },
   7365                             .each = .{ .once = switch (cc) {
   7366                                 else => unreachable,
   7367                                 .e => &.{
   7368                                     .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ },
   7369                                     .{ ._, ._, .not, .dst0d, ._, ._, ._ },
   7370                                 },
   7371                                 .ne => &.{
   7372                                     .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ },
   7373                                 },
   7374                             } },
   7375                         }, .{
   7376                             .required_features = .{ .@"64bit", null, null, null },
   7377                             .src_constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } },
   7378                             .patterns = &.{
   7379                                 .{ .src = .{ .mut_mem, .simm32 } },
   7380                                 .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } },
   7381                                 .{ .src = .{ .to_mut_gpr, .simm32 } },
   7382                                 .{ .src = .{ .simm32, .to_mut_gpr }, .commute = .{ 0, 1 } },
   7383                                 .{ .src = .{ .mut_mem, .to_gpr } },
   7384                                 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } },
   7385                                 .{ .src = .{ .to_mut_gpr, .mem } },
   7386                                 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
   7387                                 .{ .src = .{ .to_mut_gpr, .to_gpr } },
   7388                             },
   7389                             .dst_temps = .{.{ .ref = .src0 }},
   7390                             .clobbers = .{ .eflags = true },
   7391                             .each = .{ .once = switch (cc) {
   7392                                 else => unreachable,
   7393                                 .e => &.{
   7394                                     .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ },
   7395                                     .{ ._, ._, .not, .dst0q, ._, ._, ._ },
   7396                                 },
   7397                                 .ne => &.{
   7398                                     .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ },
   7399                                 },
   7400                             } },
   7401                         }, .{
   7402                             .src_constraints = .{ .any_bool_vec, .any_bool_vec },
   7403                             .patterns = &.{
   7404                                 .{ .src = .{ .to_mem, .to_mem } },
   7405                             },
   7406                             .extra_temps = .{
   7407                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7408                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   7409                                 .unused,
   7410                                 .unused,
   7411                                 .unused,
   7412                                 .unused,
   7413                             },
   7414                             .dst_temps = .{.mem},
   7415                             .clobbers = .{ .eflags = true },
   7416                             .each = .{ .once = switch (cc) {
   7417                                 else => unreachable,
   7418                                 .e => &.{
   7419                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7420                                     .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
   7421                                     .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
   7422                                     .{ ._, ._, .not, .tmp1p, ._, ._, ._ },
   7423                                     .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ },
   7424                                     .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ },
   7425                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7426                                 },
   7427                                 .ne => &.{
   7428                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7429                                     .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
   7430                                     .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
   7431                                     .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ },
   7432                                     .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ },
   7433                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7434                                 },
   7435                             } },
   7436                         }, .{
   7437                             .required_features = .{ .avx2, null, null, null },
   7438                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   7439                             .patterns = &.{
   7440                                 .{ .src = .{ .to_mem, .to_mem } },
   7441                             },
   7442                             .extra_temps = .{
   7443                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7444                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   7445                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   7446                                 .{ .kind = .{ .rc = .sse } },
   7447                                 .unused,
   7448                                 .unused,
   7449                             },
   7450                             .dst_temps = .{.mem},
   7451                             .clobbers = .{ .eflags = true },
   7452                             .each = .{ .once = switch (cc) {
   7453                                 else => unreachable,
   7454                                 .e => &.{
   7455                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7456                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7457                                     .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   7458                                     .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
   7459                                     .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
   7460                                     .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ },
   7461                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
   7462                                     .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   7463                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7464                                 },
   7465                                 .ne => &.{
   7466                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7467                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7468                                     .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   7469                                     .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
   7470                                     .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
   7471                                     .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
   7472                                     .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ },
   7473                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
   7474                                     .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   7475                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7476                                 },
   7477                             } },
   7478                         }, .{
   7479                             .required_features = .{ .avx2, null, null, null },
   7480                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   7481                             .patterns = &.{
   7482                                 .{ .src = .{ .to_mem, .to_mem } },
   7483                             },
   7484                             .extra_temps = .{
   7485                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7486                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   7487                                 .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
   7488                                 .{ .kind = .{ .rc = .sse } },
   7489                                 .unused,
   7490                                 .unused,
   7491                             },
   7492                             .dst_temps = .{.mem},
   7493                             .clobbers = .{ .eflags = true },
   7494                             .each = .{ .once = switch (cc) {
   7495                                 else => unreachable,
   7496                                 .e => &.{
   7497                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7498                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7499                                     .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   7500                                     .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
   7501                                     .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ },
   7502                                     .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
   7503                                     .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
   7504                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   7505                                     .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   7506                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7507                                 },
   7508                                 .ne => &.{
   7509                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7510                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7511                                     .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   7512                                     .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
   7513                                     .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ },
   7514                                     .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ },
   7515                                     .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
   7516                                     .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
   7517                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   7518                                     .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   7519                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7520                                 },
   7521                             } },
   7522                         }, .{
   7523                             .required_features = .{ .avx2, null, null, null },
   7524                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   7525                             .patterns = &.{
   7526                                 .{ .src = .{ .to_mem, .to_mem } },
   7527                             },
   7528                             .extra_temps = .{
   7529                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7530                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   7531                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7532                                 .{ .kind = .{ .rc = .sse } },
   7533                                 .unused,
   7534                                 .unused,
   7535                             },
   7536                             .dst_temps = .{.mem},
   7537                             .clobbers = .{ .eflags = true },
   7538                             .each = .{ .once = switch (cc) {
   7539                                 else => unreachable,
   7540                                 .e => &.{
   7541                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7542                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7543                                     .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   7544                                     .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
   7545                                     .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ },
   7546                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
   7547                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
   7548                                     .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   7549                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7550                                 },
   7551                                 .ne => &.{
   7552                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7553                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7554                                     .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   7555                                     .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ },
   7556                                     .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ },
   7557                                     .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
   7558                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
   7559                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
   7560                                     .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   7561                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7562                                 },
   7563                             } },
   7564                         }, .{
   7565                             .required_features = .{ .avx2, null, null, null },
   7566                             .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
   7567                             .patterns = &.{
   7568                                 .{ .src = .{ .to_mem, .to_mem } },
   7569                             },
   7570                             .extra_temps = .{
   7571                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7572                                 .{ .type = .u32, .kind = .{ .reg = .rcx } },
   7573                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7574                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7575                                 .{ .kind = .{ .rc = .sse } },
   7576                                 .unused,
   7577                             },
   7578                             .dst_temps = .{.mem},
   7579                             .clobbers = .{ .eflags = true },
   7580                             .each = .{ .once = switch (cc) {
   7581                                 else => unreachable,
   7582                                 .e => &.{
   7583                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7584                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7585                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7586                                     .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   7587                                     .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ },
   7588                                     .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ },
   7589                                     .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
   7590                                     .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
   7591                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
   7592                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7593                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   7594                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7595                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7596                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
   7597                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7598                                     .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ },
   7599                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7600                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7601                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   7602                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7603                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7604                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
   7605                                 },
   7606                                 .ne => &.{
   7607                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7608                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7609                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7610                                     .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   7611                                     .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ },
   7612                                     .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ },
   7613                                     .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ },
   7614                                     .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
   7615                                     .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
   7616                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
   7617                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7618                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   7619                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7620                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7621                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
   7622                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7623                                     .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ },
   7624                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7625                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7626                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   7627                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7628                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7629                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
   7630                                 },
   7631                             } },
   7632                         }, .{
   7633                             .required_features = .{ .avx, null, null, null },
   7634                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   7635                             .patterns = &.{
   7636                                 .{ .src = .{ .to_mem, .to_mem } },
   7637                             },
   7638                             .extra_temps = .{
   7639                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7640                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   7641                                 .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
   7642                                 .{ .kind = .{ .rc = .sse } },
   7643                                 .unused,
   7644                                 .unused,
   7645                             },
   7646                             .dst_temps = .{.mem},
   7647                             .clobbers = .{ .eflags = true },
   7648                             .each = .{ .once = switch (cc) {
   7649                                 else => unreachable,
   7650                                 .e => &.{
   7651                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7652                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7653                                     .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7654                                     .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
   7655                                     .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
   7656                                     .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
   7657                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   7658                                     .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   7659                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7660                                 },
   7661                                 .ne => &.{
   7662                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7663                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7664                                     .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7665                                     .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
   7666                                     .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
   7667                                     .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
   7668                                     .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
   7669                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   7670                                     .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   7671                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7672                                 },
   7673                             } },
   7674                         }, .{
   7675                             .required_features = .{ .avx, null, null, null },
   7676                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   7677                             .patterns = &.{
   7678                                 .{ .src = .{ .to_mem, .to_mem } },
   7679                             },
   7680                             .extra_temps = .{
   7681                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7682                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   7683                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7684                                 .{ .kind = .{ .rc = .sse } },
   7685                                 .unused,
   7686                                 .unused,
   7687                             },
   7688                             .dst_temps = .{.mem},
   7689                             .clobbers = .{ .eflags = true },
   7690                             .each = .{ .once = switch (cc) {
   7691                                 else => unreachable,
   7692                                 .e => &.{
   7693                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7694                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7695                                     .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7696                                     .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
   7697                                     .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ },
   7698                                     .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
   7699                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
   7700                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
   7701                                     .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   7702                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7703                                 },
   7704                                 .ne => &.{
   7705                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7706                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7707                                     .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7708                                     .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ },
   7709                                     .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ },
   7710                                     .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
   7711                                     .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
   7712                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
   7713                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
   7714                                     .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   7715                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7716                                 },
   7717                             } },
   7718                         }, .{
   7719                             .required_features = .{ .avx, null, null, null },
   7720                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   7721                             .patterns = &.{
   7722                                 .{ .src = .{ .to_mem, .to_mem } },
   7723                             },
   7724                             .extra_temps = .{
   7725                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7726                                 .{ .type = .u32, .kind = .{ .reg = .rcx } },
   7727                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7728                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7729                                 .{ .kind = .{ .rc = .sse } },
   7730                                 .unused,
   7731                             },
   7732                             .dst_temps = .{.mem},
   7733                             .clobbers = .{ .eflags = true },
   7734                             .each = .{ .once = switch (cc) {
   7735                                 else => unreachable,
   7736                                 .e => &.{
   7737                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7738                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7739                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7740                                     .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7741                                     .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
   7742                                     .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
   7743                                     .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
   7744                                     .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
   7745                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
   7746                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7747                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   7748                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7749                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7750                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
   7751                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7752                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
   7753                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7754                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7755                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   7756                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7757                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7758                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
   7759                                 },
   7760                                 .ne => &.{
   7761                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7762                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7763                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7764                                     .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7765                                     .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
   7766                                     .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
   7767                                     .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ },
   7768                                     .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
   7769                                     .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
   7770                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
   7771                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7772                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   7773                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7774                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7775                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
   7776                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7777                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
   7778                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7779                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7780                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   7781                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7782                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7783                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
   7784                                 },
   7785                             } },
   7786                         }, .{
   7787                             .required_features = .{ .avx, null, null, null },
   7788                             .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
   7789                             .patterns = &.{
   7790                                 .{ .src = .{ .to_mem, .to_mem } },
   7791                             },
   7792                             .extra_temps = .{
   7793                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7794                                 .{ .type = .u32, .kind = .{ .reg = .rcx } },
   7795                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7796                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7797                                 .{ .kind = .{ .rc = .sse } },
   7798                                 .unused,
   7799                             },
   7800                             .dst_temps = .{.mem},
   7801                             .clobbers = .{ .eflags = true },
   7802                             .each = .{ .once = switch (cc) {
   7803                                 else => unreachable,
   7804                                 .e => &.{
   7805                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7806                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7807                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7808                                     .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7809                                     .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
   7810                                     .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
   7811                                     .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
   7812                                     .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
   7813                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   7814                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7815                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   7816                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7817                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7818                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
   7819                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7820                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
   7821                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7822                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7823                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   7824                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7825                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7826                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
   7827                                 },
   7828                                 .ne => &.{
   7829                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7830                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7831                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7832                                     .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7833                                     .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ },
   7834                                     .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
   7835                                     .{ ._, ._, .xor, .tmp3b, .si(0b11), ._, ._ },
   7836                                     .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
   7837                                     .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
   7838                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   7839                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7840                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   7841                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7842                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7843                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
   7844                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7845                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
   7846                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7847                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7848                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   7849                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7850                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7851                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
   7852                                 },
   7853                             } },
   7854                         }, .{
   7855                             .required_features = .{ .sse2, null, null, null },
   7856                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   7857                             .patterns = &.{
   7858                                 .{ .src = .{ .to_mem, .to_mem } },
   7859                             },
   7860                             .extra_temps = .{
   7861                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7862                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   7863                                 .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
   7864                                 .{ .kind = .{ .rc = .sse } },
   7865                                 .unused,
   7866                                 .unused,
   7867                             },
   7868                             .dst_temps = .{.mem},
   7869                             .clobbers = .{ .eflags = true },
   7870                             .each = .{ .once = switch (cc) {
   7871                                 else => unreachable,
   7872                                 .e => &.{
   7873                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7874                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7875                                     .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7876                                     .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   7877                                     .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
   7878                                     .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
   7879                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   7880                                     .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   7881                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7882                                 },
   7883                                 .ne => &.{
   7884                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7885                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7886                                     .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7887                                     .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   7888                                     .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
   7889                                     .{ ._, ._, .not, .tmp2d, ._, ._, ._ },
   7890                                     .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ },
   7891                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   7892                                     .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   7893                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7894                                 },
   7895                             } },
   7896                         }, .{
   7897                             .required_features = .{ .sse2, null, null, null },
   7898                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   7899                             .patterns = &.{
   7900                                 .{ .src = .{ .to_mem, .to_mem } },
   7901                             },
   7902                             .extra_temps = .{
   7903                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7904                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   7905                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7906                                 .{ .kind = .{ .rc = .sse } },
   7907                                 .unused,
   7908                                 .unused,
   7909                             },
   7910                             .dst_temps = .{.mem},
   7911                             .clobbers = .{ .eflags = true },
   7912                             .each = .{ .once = switch (cc) {
   7913                                 else => unreachable,
   7914                                 .e => &.{
   7915                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7916                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7917                                     .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7918                                     .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   7919                                     .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ },
   7920                                     .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
   7921                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
   7922                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
   7923                                     .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   7924                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7925                                 },
   7926                                 .ne => &.{
   7927                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7928                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7929                                     .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7930                                     .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   7931                                     .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ },
   7932                                     .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ },
   7933                                     .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
   7934                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
   7935                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
   7936                                     .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   7937                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7938                                 },
   7939                             } },
   7940                         }, .{
   7941                             .required_features = .{ .sse2, null, null, null },
   7942                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   7943                             .patterns = &.{
   7944                                 .{ .src = .{ .to_mem, .to_mem } },
   7945                             },
   7946                             .extra_temps = .{
   7947                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   7948                                 .{ .type = .u32, .kind = .{ .reg = .rcx } },
   7949                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7950                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   7951                                 .{ .kind = .{ .rc = .sse } },
   7952                                 .unused,
   7953                             },
   7954                             .dst_temps = .{.mem},
   7955                             .clobbers = .{ .eflags = true },
   7956                             .each = .{ .once = switch (cc) {
   7957                                 else => unreachable,
   7958                                 .e => &.{
   7959                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7960                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7961                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7962                                     .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7963                                     .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   7964                                     .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
   7965                                     .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
   7966                                     .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
   7967                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
   7968                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7969                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   7970                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7971                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7972                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
   7973                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7974                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
   7975                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   7976                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7977                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   7978                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7979                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7980                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
   7981                                 },
   7982                                 .ne => &.{
   7983                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   7984                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   7985                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7986                                     .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   7987                                     .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   7988                                     .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ },
   7989                                     .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ },
   7990                                     .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
   7991                                     .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
   7992                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
   7993                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   7994                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   7995                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   7996                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   7997                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
   7998                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   7999                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
   8000                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8001                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8002                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   8003                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   8004                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   8005                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
   8006                                 },
   8007                             } },
   8008                         }, .{
   8009                             .required_features = .{ .sse4_1, null, null, null },
   8010                             .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
   8011                             .patterns = &.{
   8012                                 .{ .src = .{ .to_mem, .to_mem } },
   8013                             },
   8014                             .extra_temps = .{
   8015                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8016                                 .{ .type = .u32, .kind = .{ .reg = .rcx } },
   8017                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8018                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8019                                 .{ .kind = .{ .rc = .sse } },
   8020                                 .unused,
   8021                             },
   8022                             .dst_temps = .{.mem},
   8023                             .clobbers = .{ .eflags = true },
   8024                             .each = .{ .once = switch (cc) {
   8025                                 else => unreachable,
   8026                                 .e => &.{
   8027                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8028                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   8029                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8030                                     .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   8031                                     .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   8032                                     .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
   8033                                     .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
   8034                                     .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
   8035                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   8036                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8037                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   8038                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   8039                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   8040                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
   8041                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8042                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
   8043                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8044                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8045                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   8046                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   8047                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   8048                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
   8049                                 },
   8050                                 .ne => &.{
   8051                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8052                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   8053                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8054                                     .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   8055                                     .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   8056                                     .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ },
   8057                                     .{ ._, ._, .xor, .tmp3b, .si(0b11), ._, ._ },
   8058                                     .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
   8059                                     .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
   8060                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   8061                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8062                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   8063                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   8064                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   8065                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
   8066                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8067                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
   8068                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8069                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8070                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   8071                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   8072                                     .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   8073                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
   8074                                 },
   8075                             } },
   8076                         }, .{
   8077                             .required_features = .{ .sse, .mmx, null, null },
   8078                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   8079                             .patterns = &.{
   8080                                 .{ .src = .{ .to_mem, .to_mem } },
   8081                             },
   8082                             .extra_temps = .{
   8083                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8084                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8085                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8086                                 .{ .kind = .{ .rc = .mmx } },
   8087                                 .unused,
   8088                                 .unused,
   8089                             },
   8090                             .dst_temps = .{.mem},
   8091                             .clobbers = .{ .eflags = true },
   8092                             .each = .{ .once = switch (cc) {
   8093                                 else => unreachable,
   8094                                 .e => &.{
   8095                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8096                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   8097                                     .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   8098                                     .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   8099                                     .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ },
   8100                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
   8101                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
   8102                                     .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   8103                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8104                                 },
   8105                                 .ne => &.{
   8106                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8107                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   8108                                     .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   8109                                     .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   8110                                     .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ },
   8111                                     .{ ._, ._, .not, .tmp2b, ._, ._, ._ },
   8112                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
   8113                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
   8114                                     .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   8115                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8116                                 },
   8117                             } },
   8118                         }, .{
   8119                             .required_features = .{ .sse, .mmx, null, null },
   8120                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   8121                             .patterns = &.{
   8122                                 .{ .src = .{ .to_mem, .to_mem } },
   8123                             },
   8124                             .extra_temps = .{
   8125                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8126                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8127                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8128                                 .{ .kind = .{ .rc = .mmx } },
   8129                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8130                                 .{ .kind = .{ .rc = .mmx } },
   8131                             },
   8132                             .dst_temps = .{.mem},
   8133                             .clobbers = .{ .eflags = true },
   8134                             .each = .{ .once = switch (cc) {
   8135                                 else => unreachable,
   8136                                 .e => &.{
   8137                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8138                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   8139                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8140                                     .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
   8141                                     .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   8142                                     .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   8143                                     .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
   8144                                     .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
   8145                                     .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
   8146                                     .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
   8147                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
   8148                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8149                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   8150                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
   8151                                     .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
   8152                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
   8153                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8154                                     .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
   8155                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8156                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8157                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   8158                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
   8159                                     .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
   8160                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
   8161                                 },
   8162                                 .ne => &.{
   8163                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8164                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   8165                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8166                                     .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
   8167                                     .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   8168                                     .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   8169                                     .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
   8170                                     .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
   8171                                     .{ ._, ._, .xor, .tmp4b, .si(0b1111), ._, ._ },
   8172                                     .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
   8173                                     .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
   8174                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
   8175                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8176                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   8177                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
   8178                                     .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
   8179                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
   8180                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8181                                     .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
   8182                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8183                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8184                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   8185                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
   8186                                     .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
   8187                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
   8188                                 },
   8189                             } },
   8190                         }, .{
   8191                             .required_features = .{ .sse, .mmx, null, null },
   8192                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   8193                             .patterns = &.{
   8194                                 .{ .src = .{ .to_mem, .to_mem } },
   8195                             },
   8196                             .extra_temps = .{
   8197                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8198                                 .{ .type = .u32, .kind = .{ .reg = .rcx } },
   8199                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8200                                 .{ .kind = .{ .rc = .mmx } },
   8201                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8202                                 .{ .kind = .{ .rc = .mmx } },
   8203                             },
   8204                             .dst_temps = .{.mem},
   8205                             .clobbers = .{ .eflags = true },
   8206                             .each = .{ .once = switch (cc) {
   8207                                 else => unreachable,
   8208                                 .e => &.{
   8209                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8210                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   8211                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8212                                     .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
   8213                                     .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   8214                                     .{ ._, .p_d, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   8215                                     .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ },
   8216                                     .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
   8217                                     .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
   8218                                     .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
   8219                                     .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
   8220                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   8221                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8222                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   8223                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
   8224                                     .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
   8225                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
   8226                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8227                                     .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
   8228                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8229                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8230                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   8231                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
   8232                                     .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
   8233                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
   8234                                 },
   8235                                 .ne => &.{
   8236                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8237                                     .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   8238                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8239                                     .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ },
   8240                                     .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   8241                                     .{ ._, .p_d, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   8242                                     .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ },
   8243                                     .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ },
   8244                                     .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ },
   8245                                     .{ ._, ._, .xor, .tmp4b, .si(0b11), ._, ._ },
   8246                                     .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
   8247                                     .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
   8248                                     .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
   8249                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8250                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   8251                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
   8252                                     .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
   8253                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
   8254                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
   8255                                     .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
   8256                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8257                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
   8258                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   8259                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
   8260                                     .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
   8261                                     .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
   8262                                 },
   8263                             } },
   8264                         }, .{
   8265                             .dst_constraints = .{.{ .bool_vec = .byte }},
   8266                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   8267                             .patterns = &.{
   8268                                 .{ .src = .{ .to_mem, .to_mem } },
   8269                             },
   8270                             .extra_temps = .{
   8271                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8272                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8273                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8274                                 .unused,
   8275                                 .unused,
   8276                                 .unused,
   8277                             },
   8278                             .dst_temps = .{.{ .rc = .general_purpose }},
   8279                             .clobbers = .{ .eflags = true },
   8280                             .each = .{ .once = &.{
   8281                                 .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
   8282                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8283                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8284                                 .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
   8285                                 .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
   8286                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8287                                 .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
   8288                                 .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
   8289                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8290                                 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   8291                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8292                             } },
   8293                         }, .{
   8294                             .dst_constraints = .{.{ .bool_vec = .byte }},
   8295                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   8296                             .patterns = &.{
   8297                                 .{ .src = .{ .to_mem, .to_mem } },
   8298                             },
   8299                             .extra_temps = .{
   8300                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8301                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8302                                 .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
   8303                                 .unused,
   8304                                 .unused,
   8305                                 .unused,
   8306                             },
   8307                             .dst_temps = .{.{ .rc = .general_purpose }},
   8308                             .clobbers = .{ .eflags = true },
   8309                             .each = .{ .once = &.{
   8310                                 .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
   8311                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8312                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8313                                 .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
   8314                                 .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
   8315                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8316                                 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
   8317                                 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
   8318                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8319                                 .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
   8320                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8321                             } },
   8322                         }, .{
   8323                             .dst_constraints = .{.{ .bool_vec = .byte }},
   8324                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   8325                             .patterns = &.{
   8326                                 .{ .src = .{ .to_mem, .to_mem } },
   8327                             },
   8328                             .extra_temps = .{
   8329                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8330                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8331                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8332                                 .unused,
   8333                                 .unused,
   8334                                 .unused,
   8335                             },
   8336                             .dst_temps = .{.{ .rc = .general_purpose }},
   8337                             .clobbers = .{ .eflags = true },
   8338                             .each = .{ .once = &.{
   8339                                 .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
   8340                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8341                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8342                                 .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
   8343                                 .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
   8344                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8345                                 .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
   8346                                 .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
   8347                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8348                                 .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
   8349                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8350                             } },
   8351                         }, .{
   8352                             .required_features = .{ .@"64bit", null, null, null },
   8353                             .dst_constraints = .{.{ .bool_vec = .byte }},
   8354                             .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
   8355                             .patterns = &.{
   8356                                 .{ .src = .{ .to_mem, .to_mem } },
   8357                             },
   8358                             .extra_temps = .{
   8359                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8360                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8361                                 .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   8362                                 .unused,
   8363                                 .unused,
   8364                                 .unused,
   8365                             },
   8366                             .dst_temps = .{.{ .rc = .general_purpose }},
   8367                             .clobbers = .{ .eflags = true },
   8368                             .each = .{ .once = &.{
   8369                                 .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
   8370                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8371                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8372                                 .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   8373                                 .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   8374                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8375                                 .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
   8376                                 .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
   8377                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8378                                 .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
   8379                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8380                             } },
   8381                         }, .{
   8382                             .dst_constraints = .{.{ .bool_vec = .byte }},
   8383                             .patterns = &.{
   8384                                 .{ .src = .{ .to_mem, .to_mem } },
   8385                             },
   8386                             .extra_temps = .{
   8387                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8388                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8389                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8390                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8391                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8392                                 .unused,
   8393                             },
   8394                             .dst_temps = .{.{ .rc = .general_purpose }},
   8395                             .clobbers = .{ .eflags = true },
   8396                             .each = .{ .once = &.{
   8397                                 .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ },
   8398                                 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   8399                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8400                                 .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ },
   8401                                 .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
   8402                                 .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
   8403                                 .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
   8404                                 .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
   8405                                 .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ },
   8406                                 .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ },
   8407                                 .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
   8408                                 .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
   8409                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8410                                 .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
   8411                                 .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ },
   8412                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8413                                 .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ },
   8414                                 .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
   8415                             } },
   8416                         }, .{
   8417                             .dst_constraints = .{.{ .bool_vec = .dword }},
   8418                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   8419                             .patterns = &.{
   8420                                 .{ .src = .{ .to_mem, .to_mem } },
   8421                             },
   8422                             .extra_temps = .{
   8423                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8424                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8425                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8426                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8427                                 .unused,
   8428                                 .unused,
   8429                             },
   8430                             .dst_temps = .{.{ .rc = .general_purpose }},
   8431                             .clobbers = .{ .eflags = true },
   8432                             .each = .{ .once = &.{
   8433                                 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
   8434                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8435                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8436                                 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8437                                 .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
   8438                                 .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
   8439                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8440                                 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
   8441                                 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
   8442                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8443                                 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   8444                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8445                             } },
   8446                         }, .{
   8447                             .dst_constraints = .{.{ .bool_vec = .dword }},
   8448                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   8449                             .patterns = &.{
   8450                                 .{ .src = .{ .to_mem, .to_mem } },
   8451                             },
   8452                             .extra_temps = .{
   8453                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8454                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8455                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8456                                 .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
   8457                                 .unused,
   8458                                 .unused,
   8459                             },
   8460                             .dst_temps = .{.{ .rc = .general_purpose }},
   8461                             .clobbers = .{ .eflags = true },
   8462                             .each = .{ .once = &.{
   8463                                 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
   8464                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8465                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8466                                 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8467                                 .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
   8468                                 .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
   8469                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8470                                 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
   8471                                 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
   8472                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8473                                 .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
   8474                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8475                             } },
   8476                         }, .{
   8477                             .dst_constraints = .{.{ .bool_vec = .dword }},
   8478                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   8479                             .patterns = &.{
   8480                                 .{ .src = .{ .to_mem, .to_mem } },
   8481                             },
   8482                             .extra_temps = .{
   8483                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8484                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8485                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8486                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8487                                 .unused,
   8488                                 .unused,
   8489                             },
   8490                             .dst_temps = .{.{ .rc = .general_purpose }},
   8491                             .clobbers = .{ .eflags = true },
   8492                             .each = .{ .once = &.{
   8493                                 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
   8494                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8495                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8496                                 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8497                                 .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
   8498                                 .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
   8499                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8500                                 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
   8501                                 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
   8502                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8503                                 .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
   8504                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8505                             } },
   8506                         }, .{
   8507                             .required_features = .{ .@"64bit", null, null, null },
   8508                             .dst_constraints = .{.{ .bool_vec = .dword }},
   8509                             .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
   8510                             .patterns = &.{
   8511                                 .{ .src = .{ .to_mem, .to_mem } },
   8512                             },
   8513                             .extra_temps = .{
   8514                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8515                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8516                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8517                                 .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   8518                                 .unused,
   8519                                 .unused,
   8520                             },
   8521                             .dst_temps = .{.{ .rc = .general_purpose }},
   8522                             .clobbers = .{ .eflags = true },
   8523                             .each = .{ .once = &.{
   8524                                 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
   8525                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8526                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8527                                 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8528                                 .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   8529                                 .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   8530                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8531                                 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
   8532                                 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
   8533                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8534                                 .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
   8535                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8536                             } },
   8537                         }, .{
   8538                             .dst_constraints = .{.{ .bool_vec = .dword }},
   8539                             .patterns = &.{
   8540                                 .{ .src = .{ .to_mem, .to_mem } },
   8541                             },
   8542                             .extra_temps = .{
   8543                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8544                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8545                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8546                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8547                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8548                                 .unused,
   8549                             },
   8550                             .dst_temps = .{.{ .rc = .general_purpose }},
   8551                             .clobbers = .{ .eflags = true },
   8552                             .each = .{ .once = &.{
   8553                                 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
   8554                                 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   8555                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8556                                 .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ },
   8557                                 .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
   8558                                 .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
   8559                                 .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
   8560                                 .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
   8561                                 .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ },
   8562                                 .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ },
   8563                                 .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
   8564                                 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8565                                 .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
   8566                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8567                                 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ },
   8568                                 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ },
   8569                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8570                                 .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ },
   8571                                 .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
   8572                             } },
   8573                         }, .{
   8574                             .required_features = .{ .@"64bit", null, null, null },
   8575                             .dst_constraints = .{.{ .bool_vec = .qword }},
   8576                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   8577                             .patterns = &.{
   8578                                 .{ .src = .{ .to_mem, .to_mem } },
   8579                             },
   8580                             .extra_temps = .{
   8581                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8582                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8583                                 .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   8584                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8585                                 .unused,
   8586                                 .unused,
   8587                             },
   8588                             .dst_temps = .{.{ .rc = .general_purpose }},
   8589                             .clobbers = .{ .eflags = true },
   8590                             .each = .{ .once = &.{
   8591                                 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
   8592                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8593                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8594                                 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8595                                 .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
   8596                                 .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
   8597                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8598                                 .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
   8599                                 .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
   8600                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8601                                 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
   8602                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8603                             } },
   8604                         }, .{
   8605                             .required_features = .{ .@"64bit", null, null, null },
   8606                             .dst_constraints = .{.{ .bool_vec = .qword }},
   8607                             .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } },
   8608                             .patterns = &.{
   8609                                 .{ .src = .{ .to_mem, .to_mem } },
   8610                             },
   8611                             .extra_temps = .{
   8612                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8613                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8614                                 .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   8615                                 .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
   8616                                 .unused,
   8617                                 .unused,
   8618                             },
   8619                             .dst_temps = .{.{ .rc = .general_purpose }},
   8620                             .clobbers = .{ .eflags = true },
   8621                             .each = .{ .once = &.{
   8622                                 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
   8623                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8624                                 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8625                                 .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ },
   8626                                 .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
   8627                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8628                                 .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
   8629                                 .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
   8630                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8631                                 .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
   8632                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8633                             } },
   8634                         }, .{
   8635                             .required_features = .{ .@"64bit", null, null, null },
   8636                             .dst_constraints = .{.{ .bool_vec = .qword }},
   8637                             .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } },
   8638                             .patterns = &.{
   8639                                 .{ .src = .{ .to_mem, .to_mem } },
   8640                             },
   8641                             .extra_temps = .{
   8642                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8643                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8644                                 .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   8645                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   8646                                 .unused,
   8647                                 .unused,
   8648                             },
   8649                             .dst_temps = .{.{ .rc = .general_purpose }},
   8650                             .clobbers = .{ .eflags = true },
   8651                             .each = .{ .once = &.{
   8652                                 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
   8653                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8654                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8655                                 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8656                                 .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
   8657                                 .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
   8658                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8659                                 .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
   8660                                 .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
   8661                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8662                                 .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
   8663                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8664                             } },
   8665                         }, .{
   8666                             .required_features = .{ .@"64bit", null, null, null },
   8667                             .dst_constraints = .{.{ .bool_vec = .qword }},
   8668                             .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } },
   8669                             .patterns = &.{
   8670                                 .{ .src = .{ .to_mem, .to_mem } },
   8671                             },
   8672                             .extra_temps = .{
   8673                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8674                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8675                                 .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   8676                                 .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
   8677                                 .unused,
   8678                                 .unused,
   8679                             },
   8680                             .dst_temps = .{.{ .rc = .general_purpose }},
   8681                             .clobbers = .{ .eflags = true },
   8682                             .each = .{ .once = &.{
   8683                                 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
   8684                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8685                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8686                                 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8687                                 .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   8688                                 .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   8689                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8690                                 .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
   8691                                 .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
   8692                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8693                                 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   8694                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8695                             } },
   8696                         }, .{
   8697                             .required_features = .{ .@"64bit", null, null, null },
   8698                             .dst_constraints = .{.{ .bool_vec = .qword }},
   8699                             .patterns = &.{
   8700                                 .{ .src = .{ .to_mem, .to_mem } },
   8701                             },
   8702                             .extra_temps = .{
   8703                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8704                                 .{ .type = .u8, .kind = .{ .reg = .cl } },
   8705                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8706                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8707                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8708                                 .unused,
   8709                             },
   8710                             .dst_temps = .{.{ .rc = .general_purpose }},
   8711                             .clobbers = .{ .eflags = true },
   8712                             .each = .{ .once = &.{
   8713                                 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
   8714                                 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
   8715                                 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8716                                 .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ },
   8717                                 .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
   8718                                 .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ },
   8719                                 .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ },
   8720                                 .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
   8721                                 .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ },
   8722                                 .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ },
   8723                                 .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
   8724                                 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8725                                 .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
   8726                                 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ },
   8727                                 .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ },
   8728                                 .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ },
   8729                                 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ },
   8730                                 .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ },
   8731                                 .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
   8732                             } },
   8733                         }, .{
   8734                             .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } },
   8735                             .patterns = &.{
   8736                                 .{ .src = .{ .to_mem, .to_mem } },
   8737                             },
   8738                             .extra_temps = .{
   8739                                 .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8740                                 .{ .type = .u32, .kind = .{ .reg = .ecx } },
   8741                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8742                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8743                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
   8744                                 .unused,
   8745                             },
   8746                             .dst_temps = .{.mem},
   8747                             .clobbers = .{ .eflags = true },
   8748                             .each = .{ .once = &.{
   8749                                 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8750                                 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
   8751                                 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8752                                 .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
   8753                                 .{ ._, ._, .mov, .tmp4b, .memia(.src0b, .tmp0, .add_size), ._, ._ },
   8754                                 .{ ._, ._, .cmp, .tmp4b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
   8755                                 .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
   8756                                 .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ },
   8757                                 .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ },
   8758                                 .{ ._, ._, .add, .tmp1d, .si(1), ._, ._ },
   8759                                 .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ },
   8760                                 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
   8761                                 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   8762                                 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   8763                                 .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ },
   8764                                 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
   8765                                 .{ .@"1:", ._, .add, .tmp0p, .si(1), ._, ._ },
   8766                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8767                                 .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ },
   8768                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
   8769                                 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
   8770                                 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
   8771                                 .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ },
   8772                             } },
   8773                         } },
   8774                     }) catch |err| switch (err) {
   8775                         error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
   8776                             @tagName(air_tag),
   8777                             cg.typeOf(extra.lhs).fmt(pt),
   8778                             ops[0].tracking(cg),
   8779                             ops[1].tracking(cg),
   8780                         }),
   8781                         else => |e| return e,
   8782                     },
   8783                     .gte => unreachable,
   8784                     .gt => unreachable,
   8785                 }
   8786                 for (ops) |op| for (res) |r| {
   8787                     if (op.index == r.index) break;
   8788                 } else try op.die(cg);
   8789                 try res[0].moveTo(inst, cg);
   8790             },
   8791 
   8792             .cmp_lt,
   8793             .cmp_lt_optimized,
   8794             .cmp_lte,
   8795             .cmp_lte_optimized,
   8796             .cmp_gte,
   8797             .cmp_gte_optimized,
   8798             .cmp_gt,
   8799             .cmp_gt_optimized,
   8800             => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) {
   8801                 else => unreachable,
   8802                 .cmp_lt, .cmp_lt_optimized => .lt,
   8803                 .cmp_lte, .cmp_lte_optimized => .lte,
   8804                 .cmp_gte, .cmp_gte_optimized => .gte,
   8805                 .cmp_gt, .cmp_gt_optimized => .gt,
   8806             }) else fallback: {
   8807                 const bin_op = air_datas[@intFromEnum(inst)].bin_op;
   8808                 const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu);
   8809                 if (scalar_ty.isRuntimeFloat()) break :fallback try cg.airCmp(inst, switch (air_tag) {
   8810                     else => unreachable,
   8811                     .cmp_lt, .cmp_lt_optimized => .lt,
   8812                     .cmp_lte, .cmp_lte_optimized => .lte,
   8813                     .cmp_gte, .cmp_gte_optimized => .gte,
   8814                     .cmp_gt, .cmp_gt_optimized => .gt,
   8815                 });
   8816                 const signedness = if (scalar_ty.isAbiInt(zcu))
   8817                     scalar_ty.intInfo(zcu).signedness
   8818                 else
   8819                     .unsigned;
   8820                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
   8821                 var res: [1]Temp = undefined;
   8822                 cg.select(&res, &.{.bool}, &ops, switch (@as(Condition, switch (signedness) {
   8823                     .signed => switch (air_tag) {
   8824                         else => unreachable,
   8825                         .cmp_lt, .cmp_lt_optimized => .l,
   8826                         .cmp_lte, .cmp_lte_optimized => .le,
   8827                         .cmp_gte, .cmp_gte_optimized => .ge,
   8828                         .cmp_gt, .cmp_gt_optimized => .g,
   8829                     },
   8830                     .unsigned => switch (air_tag) {
   8831                         else => unreachable,
   8832                         .cmp_lt, .cmp_lt_optimized => .b,
   8833                         .cmp_lte, .cmp_lte_optimized => .be,
   8834                         .cmp_gte, .cmp_gte_optimized => .ae,
   8835                         .cmp_gt, .cmp_gt_optimized => .a,
   8836                     },
   8837                 })) {
   8838                     else => unreachable,
   8839                     inline .l, .le, .ge, .g, .b, .be, .ae, .a => |cc| comptime &.{ .{
   8840                         .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
   8841                         .patterns = &.{
   8842                             .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
   8843                             .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } },
   8844                             .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
   8845                         },
   8846                         .dst_temps = .{.{ .cc = cc.commute() }},
   8847                         .clobbers = .{ .eflags = true },
   8848                         .each = .{ .once = &.{
   8849                             .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
   8850                         } },
   8851                     }, .{
   8852                         .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
   8853                         .patterns = &.{
   8854                             .{ .src = .{ .mem, .imm8 } },
   8855                             .{ .src = .{ .to_gpr, .imm8 } },
   8856                             .{ .src = .{ .to_gpr, .mem } },
   8857                             .{ .src = .{ .to_gpr, .to_gpr } },
   8858                         },
   8859                         .dst_temps = .{.{ .cc = cc }},
   8860                         .clobbers = .{ .eflags = true },
   8861                         .each = .{ .once = &.{
   8862                             .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
   8863                         } },
   8864                     }, .{
   8865                         .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
   8866                         .patterns = &.{
   8867                             .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
   8868                             .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } },
   8869                             .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
   8870                         },
   8871                         .dst_temps = .{.{ .cc = cc.commute() }},
   8872                         .clobbers = .{ .eflags = true },
   8873                         .each = .{ .once = &.{
   8874                             .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
   8875                         } },
   8876                     }, .{
   8877                         .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
   8878                         .patterns = &.{
   8879                             .{ .src = .{ .mem, .imm16 } },
   8880                             .{ .src = .{ .to_gpr, .imm16 } },
   8881                             .{ .src = .{ .to_gpr, .mem } },
   8882                             .{ .src = .{ .to_gpr, .to_gpr } },
   8883                         },
   8884                         .dst_temps = .{.{ .cc = cc }},
   8885                         .clobbers = .{ .eflags = true },
   8886                         .each = .{ .once = &.{
   8887                             .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
   8888                         } },
   8889                     }, .{
   8890                         .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
   8891                         .patterns = &.{
   8892                             .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
   8893                             .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } },
   8894                             .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
   8895                         },
   8896                         .dst_temps = .{.{ .cc = cc.commute() }},
   8897                         .clobbers = .{ .eflags = true },
   8898                         .each = .{ .once = &.{
   8899                             .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
   8900                         } },
   8901                     }, .{
   8902                         .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
   8903                         .patterns = &.{
   8904                             .{ .src = .{ .mem, .imm32 } },
   8905                             .{ .src = .{ .to_gpr, .imm32 } },
   8906                             .{ .src = .{ .to_gpr, .mem } },
   8907                             .{ .src = .{ .to_gpr, .to_gpr } },
   8908                         },
   8909                         .dst_temps = .{.{ .cc = cc }},
   8910                         .clobbers = .{ .eflags = true },
   8911                         .each = .{ .once = &.{
   8912                             .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
   8913                         } },
   8914                     }, .{
   8915                         .required_features = .{ .@"64bit", null, null, null },
   8916                         .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
   8917                         .patterns = &.{
   8918                             .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
   8919                             .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } },
   8920                             .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
   8921                         },
   8922                         .dst_temps = .{.{ .cc = cc.commute() }},
   8923                         .clobbers = .{ .eflags = true },
   8924                         .each = .{ .once = &.{
   8925                             .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
   8926                         } },
   8927                     }, .{
   8928                         .required_features = .{ .@"64bit", null, null, null },
   8929                         .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
   8930                         .patterns = &.{
   8931                             .{ .src = .{ .mem, .simm32 } },
   8932                             .{ .src = .{ .to_gpr, .simm32 } },
   8933                             .{ .src = .{ .to_gpr, .mem } },
   8934                             .{ .src = .{ .to_gpr, .to_gpr } },
   8935                         },
   8936                         .dst_temps = .{.{ .cc = cc }},
   8937                         .clobbers = .{ .eflags = true },
   8938                         .each = .{ .once = &.{
   8939                             .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
   8940                         } },
   8941                     }, .{
   8942                         .src_constraints = .{ .any_int, .any_int },
   8943                         .patterns = &.{
   8944                             .{ .src = .{ .to_mem, .to_mem }, .commute = switch (cc) {
   8945                                 else => unreachable,
   8946                                 .l, .ge, .b, .ae => .{ 0, 0 },
   8947                                 .le, .g, .be, .a => .{ 0, 1 },
   8948                             } },
   8949                         },
   8950                         .extra_temps = .{
   8951                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   8952                             .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   8953                             .unused,
   8954                             .unused,
   8955                             .unused,
   8956                             .unused,
   8957                         },
   8958                         .dst_temps = .{.{ .rc = .general_purpose }},
   8959                         .clobbers = .{ .eflags = true },
   8960                         .each = .{ .once = &.{
   8961                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   8962                             .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
   8963                             .{ .@"0:", ._r, .sh, .tmp1b, .si(1), ._, ._ },
   8964                             .{ ._, ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
   8965                             .{ ._, ._, .sbb, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
   8966                             .{ ._, ._c, .set, .tmp1b, ._, ._, ._ },
   8967                             .{ ._, .fromCondition(switch (cc) {
   8968                                 else => unreachable,
   8969                                 .l, .ge, .b, .ae => cc,
   8970                                 .le, .g, .be, .a => cc.commute(),
   8971                             }), .set, .dst0b, ._, ._, ._ },
   8972                             .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ },
   8973                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   8974                         } },
   8975                     } },
   8976                 }) catch |err| switch (err) {
   8977                     error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
   8978                         @tagName(air_tag),
   8979                         cg.typeOf(bin_op.lhs).fmt(pt),
   8980                         ops[0].tracking(cg),
   8981                         ops[1].tracking(cg),
   8982                     }),
   8983                     else => |e| return e,
   8984                 };
   8985                 for (ops) |op| for (res) |r| {
   8986                     if (op.index == r.index) break;
   8987                 } else try op.die(cg);
   8988                 try res[0].moveTo(inst, cg);
   8989             },
   8990             .cmp_eq,
   8991             .cmp_eq_optimized,
   8992             .cmp_neq,
   8993             .cmp_neq_optimized,
   8994             => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) {
   8995                 else => unreachable,
   8996                 .cmp_eq, .cmp_eq_optimized => .eq,
   8997                 .cmp_neq, .cmp_neq_optimized => .neq,
   8998             }) else fallback: {
   8999                 const bin_op = air_datas[@intFromEnum(inst)].bin_op;
   9000                 const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu);
   9001                 if (scalar_ty.isRuntimeFloat() or ip.isOptionalType(scalar_ty.toIntern())) break :fallback try cg.airCmp(inst, switch (air_tag) {
   9002                     else => unreachable,
   9003                     .cmp_eq, .cmp_eq_optimized => .eq,
   9004                     .cmp_neq, .cmp_neq_optimized => .neq,
   9005                 });
   9006                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
   9007                 var res: [1]Temp = undefined;
   9008                 cg.select(&res, &.{.bool}, &ops, switch (@as(Condition, switch (air_tag) {
   9009                     else => unreachable,
   9010                     .cmp_eq, .cmp_eq_optimized => .e,
   9011                     .cmp_neq, .cmp_neq_optimized => .ne,
   9012                 })) {
   9013                     else => unreachable,
   9014                     inline .e, .ne => |cc| comptime &.{ .{
   9015                         .required_features = .{ .avx2, null, null, null },
   9016                         .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } },
   9017                         .patterns = &.{
   9018                             .{ .src = .{ .to_ymm, .mem } },
   9019                             .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
   9020                             .{ .src = .{ .to_ymm, .to_ymm } },
   9021                         },
   9022                         .extra_temps = .{
   9023                             .{ .kind = .{ .rc = .sse } },
   9024                             .unused,
   9025                             .unused,
   9026                             .unused,
   9027                             .unused,
   9028                             .unused,
   9029                         },
   9030                         .dst_temps = .{.{ .cc = cc }},
   9031                         .clobbers = .{ .eflags = true },
   9032                         .each = .{ .once = &.{
   9033                             .{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ },
   9034                             .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ },
   9035                         } },
   9036                     }, .{
   9037                         .required_features = .{ .avx, null, null, null },
   9038                         .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } },
   9039                         .patterns = &.{
   9040                             .{ .src = .{ .to_ymm, .mem } },
   9041                             .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
   9042                             .{ .src = .{ .to_ymm, .to_ymm } },
   9043                         },
   9044                         .extra_temps = .{
   9045                             .{ .kind = .{ .rc = .sse } },
   9046                             .unused,
   9047                             .unused,
   9048                             .unused,
   9049                             .unused,
   9050                             .unused,
   9051                         },
   9052                         .dst_temps = .{.{ .cc = cc }},
   9053                         .clobbers = .{ .eflags = true },
   9054                         .each = .{ .once = &.{
   9055                             .{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ },
   9056                             .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ },
   9057                         } },
   9058                     }, .{
   9059                         .required_features = .{ .avx, null, null, null },
   9060                         .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
   9061                         .patterns = &.{
   9062                             .{ .src = .{ .to_xmm, .mem } },
   9063                             .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
   9064                             .{ .src = .{ .to_xmm, .to_xmm } },
   9065                         },
   9066                         .extra_temps = .{
   9067                             .{ .kind = .{ .rc = .sse } },
   9068                             .unused,
   9069                             .unused,
   9070                             .unused,
   9071                             .unused,
   9072                             .unused,
   9073                         },
   9074                         .dst_temps = .{.{ .cc = cc }},
   9075                         .clobbers = .{ .eflags = true },
   9076                         .each = .{ .once = &.{
   9077                             .{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ },
   9078                             .{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ },
   9079                         } },
   9080                     }, .{
   9081                         .required_features = .{ .sse4_1, null, null, null },
   9082                         .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
   9083                         .patterns = &.{
   9084                             .{ .src = .{ .to_mut_xmm, .mem } },
   9085                             .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
   9086                             .{ .src = .{ .to_mut_xmm, .to_xmm } },
   9087                         },
   9088                         .dst_temps = .{.{ .cc = cc }},
   9089                         .clobbers = .{ .eflags = true },
   9090                         .each = .{ .once = &.{
   9091                             .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
   9092                             .{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ },
   9093                         } },
   9094                     }, .{
   9095                         .required_features = .{ .sse2, null, null, null },
   9096                         .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
   9097                         .patterns = &.{
   9098                             .{ .src = .{ .to_mut_xmm, .mem } },
   9099                             .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
   9100                             .{ .src = .{ .to_mut_xmm, .to_xmm } },
   9101                         },
   9102                         .extra_temps = .{
   9103                             .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   9104                             .{ .kind = .{ .rc = .sse } },
   9105                             .unused,
   9106                             .unused,
   9107                             .unused,
   9108                             .unused,
   9109                         },
   9110                         .dst_temps = .{.{ .cc = cc }},
   9111                         .clobbers = .{ .eflags = true },
   9112                         .each = .{ .once = &.{
   9113                             .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
   9114                             .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
   9115                             .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ },
   9116                             .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ },
   9117                             .{ ._, ._, .xor, .tmp0d, .si(0xffff), ._, ._ },
   9118                         } },
   9119                     }, .{
   9120                         .required_features = .{ .sse, .mmx, null, null },
   9121                         .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
   9122                         .patterns = &.{
   9123                             .{ .src = .{ .to_mut_mm, .mem } },
   9124                             .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
   9125                             .{ .src = .{ .to_mut_mm, .to_mm } },
   9126                         },
   9127                         .extra_temps = .{
   9128                             .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
   9129                             .{ .kind = .{ .rc = .mmx } },
   9130                             .unused,
   9131                             .unused,
   9132                             .unused,
   9133                             .unused,
   9134                         },
   9135                         .dst_temps = .{.{ .cc = cc }},
   9136                         .clobbers = .{ .eflags = true },
   9137                         .each = .{ .once = &.{
   9138                             .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
   9139                             .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ },
   9140                             .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ },
   9141                             .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ },
   9142                             .{ ._, ._, .xor, .tmp0d, .si(0xff), ._, ._ },
   9143                         } },
   9144                     }, .{
   9145                         .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
   9146                         .patterns = &.{
   9147                             .{ .src = .{ .mem, .imm8 } },
   9148                             .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
   9149                             .{ .src = .{ .to_gpr, .imm8 } },
   9150                             .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } },
   9151                             .{ .src = .{ .to_gpr, .mem } },
   9152                             .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
   9153                             .{ .src = .{ .to_gpr, .to_gpr } },
   9154                         },
   9155                         .dst_temps = .{.{ .cc = cc }},
   9156                         .clobbers = .{ .eflags = true },
   9157                         .each = .{ .once = &.{
   9158                             .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
   9159                         } },
   9160                     }, .{
   9161                         .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
   9162                         .patterns = &.{
   9163                             .{ .src = .{ .mem, .imm16 } },
   9164                             .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
   9165                             .{ .src = .{ .to_gpr, .imm16 } },
   9166                             .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } },
   9167                             .{ .src = .{ .to_gpr, .mem } },
   9168                             .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
   9169                             .{ .src = .{ .to_gpr, .to_gpr } },
   9170                         },
   9171                         .dst_temps = .{.{ .cc = cc }},
   9172                         .clobbers = .{ .eflags = true },
   9173                         .each = .{ .once = &.{
   9174                             .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
   9175                         } },
   9176                     }, .{
   9177                         .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
   9178                         .patterns = &.{
   9179                             .{ .src = .{ .mem, .imm32 } },
   9180                             .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
   9181                             .{ .src = .{ .to_gpr, .imm32 } },
   9182                             .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } },
   9183                             .{ .src = .{ .to_gpr, .mem } },
   9184                             .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
   9185                             .{ .src = .{ .to_gpr, .to_gpr } },
   9186                         },
   9187                         .dst_temps = .{.{ .cc = cc }},
   9188                         .clobbers = .{ .eflags = true },
   9189                         .each = .{ .once = &.{
   9190                             .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
   9191                         } },
   9192                     }, .{
   9193                         .required_features = .{ .@"64bit", null, null, null },
   9194                         .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
   9195                         .patterns = &.{
   9196                             .{ .src = .{ .mem, .simm32 } },
   9197                             .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
   9198                             .{ .src = .{ .to_gpr, .simm32 } },
   9199                             .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } },
   9200                             .{ .src = .{ .to_gpr, .mem } },
   9201                             .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
   9202                             .{ .src = .{ .to_gpr, .to_gpr } },
   9203                         },
   9204                         .dst_temps = .{.{ .cc = cc }},
   9205                         .clobbers = .{ .eflags = true },
   9206                         .each = .{ .once = &.{
   9207                             .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
   9208                         } },
   9209                     }, .{
   9210                         .required_features = .{ .avx2, null, null, null },
   9211                         .src_constraints = .{
   9212                             .{ .remainder_int = .{ .of = .yword, .is = .xword } },
   9213                             .{ .remainder_int = .{ .of = .yword, .is = .xword } },
   9214                         },
   9215                         .patterns = &.{
   9216                             .{ .src = .{ .to_mem, .to_mem } },
   9217                         },
   9218                         .extra_temps = .{
   9219                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   9220                             .{ .kind = .{ .rc = .sse } },
   9221                             .{ .kind = .{ .rc = .sse } },
   9222                             .unused,
   9223                             .unused,
   9224                             .unused,
   9225                         },
   9226                         .dst_temps = .{.{ .cc = cc }},
   9227                         .clobbers = .{ .eflags = true },
   9228                         .each = .{ .once = &.{
   9229                             .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   9230                             .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
   9231                             .{ .@"0:", .v_dqu, .mov, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ },
   9232                             .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ },
   9233                             .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
   9234                             .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   9235                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   9236                             .{ ._, .v_dqa, .mov, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ },
   9237                             .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ },
   9238                             .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
   9239                             .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
   9240                         } },
   9241                     }, .{
   9242                         .required_features = .{ .avx2, null, null, null },
   9243                         .patterns = &.{
   9244                             .{ .src = .{ .to_mem, .to_mem } },
   9245                         },
   9246                         .extra_temps = .{
   9247                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   9248                             .{ .kind = .{ .rc = .sse } },
   9249                             .{ .kind = .{ .rc = .sse } },
   9250                             .unused,
   9251                             .unused,
   9252                             .unused,
   9253                         },
   9254                         .dst_temps = .{.{ .cc = cc }},
   9255                         .clobbers = .{ .eflags = true },
   9256                         .each = .{ .once = &.{
   9257                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   9258                             .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
   9259                             .{ .@"0:", .v_dqu, .mov, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   9260                             .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ },
   9261                             .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
   9262                             .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   9263                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   9264                             .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
   9265                         } },
   9266                     }, .{
   9267                         .required_features = .{ .avx, null, null, null },
   9268                         .src_constraints = .{
   9269                             .{ .remainder_int = .{ .of = .yword, .is = .xword } },
   9270                             .{ .remainder_int = .{ .of = .yword, .is = .xword } },
   9271                         },
   9272                         .patterns = &.{
   9273                             .{ .src = .{ .to_mem, .to_mem } },
   9274                         },
   9275                         .extra_temps = .{
   9276                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   9277                             .{ .kind = .{ .rc = .sse } },
   9278                             .{ .kind = .{ .rc = .sse } },
   9279                             .unused,
   9280                             .unused,
   9281                             .unused,
   9282                         },
   9283                         .dst_temps = .{.{ .cc = cc }},
   9284                         .clobbers = .{ .eflags = true },
   9285                         .each = .{ .once = &.{
   9286                             .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
   9287                             .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
   9288                             .{ .@"0:", .v_pd, .movu, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ },
   9289                             .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ },
   9290                             .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
   9291                             .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   9292                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   9293                             .{ ._, .v_pd, .mova, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ },
   9294                             .{ ._, .v_pd, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ },
   9295                             .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
   9296                             .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
   9297                         } },
   9298                     }, .{
   9299                         .required_features = .{ .avx, null, null, null },
   9300                         .patterns = &.{
   9301                             .{ .src = .{ .to_mem, .to_mem } },
   9302                         },
   9303                         .extra_temps = .{
   9304                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   9305                             .{ .kind = .{ .rc = .sse } },
   9306                             .{ .kind = .{ .rc = .sse } },
   9307                             .unused,
   9308                             .unused,
   9309                             .unused,
   9310                         },
   9311                         .dst_temps = .{.{ .cc = cc }},
   9312                         .clobbers = .{ .eflags = true },
   9313                         .each = .{ .once = &.{
   9314                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   9315                             .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
   9316                             .{ .@"0:", .v_pd, .movu, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
   9317                             .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ },
   9318                             .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
   9319                             .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
   9320                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   9321                             .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
   9322                         } },
   9323                     }, .{
   9324                         .required_features = .{ .avx, null, null, null },
   9325                         .patterns = &.{
   9326                             .{ .src = .{ .to_mem, .to_mem } },
   9327                         },
   9328                         .extra_temps = .{
   9329                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   9330                             .{ .kind = .{ .rc = .sse } },
   9331                             .{ .kind = .{ .rc = .sse } },
   9332                             .unused,
   9333                             .unused,
   9334                             .unused,
   9335                         },
   9336                         .dst_temps = .{.{ .cc = cc }},
   9337                         .clobbers = .{ .eflags = true },
   9338                         .each = .{ .once = &.{
   9339                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   9340                             .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .tmp1x, ._ },
   9341                             .{ .@"0:", .v_dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   9342                             .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._ },
   9343                             .{ ._, .vp_, .@"or", .tmp1x, .tmp1x, .tmp2x, ._ },
   9344                             .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   9345                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   9346                             .{ ._, .vp_, .@"test", .tmp1x, .tmp1x, ._, ._ },
   9347                         } },
   9348                     }, .{
   9349                         .required_features = .{ .sse4_1, null, null, null },
   9350                         .patterns = &.{
   9351                             .{ .src = .{ .to_mem, .to_mem } },
   9352                         },
   9353                         .extra_temps = .{
   9354                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   9355                             .{ .kind = .{ .rc = .sse } },
   9356                             .{ .kind = .{ .rc = .sse } },
   9357                             .unused,
   9358                             .unused,
   9359                             .unused,
   9360                         },
   9361                         .dst_temps = .{.{ .cc = cc }},
   9362                         .clobbers = .{ .eflags = true },
   9363                         .each = .{ .once = &.{
   9364                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   9365                             .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
   9366                             .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   9367                             .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   9368                             .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ },
   9369                             .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   9370                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   9371                             .{ ._, .p_, .@"test", .tmp1x, .tmp1x, ._, ._ },
   9372                         } },
   9373                     }, .{
   9374                         .required_features = .{ .sse2, null, null, null },
   9375                         .patterns = &.{
   9376                             .{ .src = .{ .to_mem, .to_mem } },
   9377                         },
   9378                         .extra_temps = .{
   9379                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   9380                             .{ .kind = .{ .rc = .sse } },
   9381                             .{ .kind = .{ .rc = .sse } },
   9382                             .unused,
   9383                             .unused,
   9384                             .unused,
   9385                         },
   9386                         .dst_temps = .{.{ .cc = cc }},
   9387                         .clobbers = .{ .eflags = true },
   9388                         .each = .{ .once = &.{
   9389                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   9390                             .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
   9391                             .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
   9392                             .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
   9393                             .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ },
   9394                             .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
   9395                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   9396                             .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
   9397                             .{ ._, .p_b, .cmpeq, .tmp1x, .tmp2x, ._, ._ },
   9398                             .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ },
   9399                             .{ ._, ._, .cmp, .tmp0d, .si(0xffff), ._, ._ },
   9400                         } },
   9401                     }, .{
   9402                         .required_features = .{ .sse, .mmx, null, null },
   9403                         .patterns = &.{
   9404                             .{ .src = .{ .to_mem, .to_mem } },
   9405                         },
   9406                         .extra_temps = .{
   9407                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   9408                             .{ .kind = .{ .rc = .mmx } },
   9409                             .{ .kind = .{ .rc = .mmx } },
   9410                             .unused,
   9411                             .unused,
   9412                             .unused,
   9413                         },
   9414                         .dst_temps = .{.{ .cc = cc }},
   9415                         .clobbers = .{ .eflags = true },
   9416                         .each = .{ .once = &.{
   9417                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   9418                             .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
   9419                             .{ .@"0:", ._q, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
   9420                             .{ ._, .p_, .xor, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
   9421                             .{ ._, .p_, .@"or", .tmp1q, .tmp2q, ._, ._ },
   9422                             .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
   9423                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   9424                             .{ ._, .p_, .xor, .tmp2q, .tmp2q, ._, ._ },
   9425                             .{ ._, .p_b, .cmpeq, .tmp1q, .tmp2q, ._, ._ },
   9426                             .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ },
   9427                             .{ ._, ._, .cmp, .tmp0d, .si(0xff), ._, ._ },
   9428                         } },
   9429                     }, .{
   9430                         .patterns = &.{
   9431                             .{ .src = .{ .to_mem, .to_mem } },
   9432                         },
   9433                         .extra_temps = .{
   9434                             .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
   9435                             .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   9436                             .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
   9437                             .unused,
   9438                             .unused,
   9439                             .unused,
   9440                         },
   9441                         .dst_temps = .{.{ .cc = cc }},
   9442                         .clobbers = .{ .eflags = true },
   9443                         .each = .{ .once = &.{
   9444                             .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
   9445                             .{ ._, ._, .xor, .tmp1p, .tmp1p, ._, ._ },
   9446                             .{ .@"0:", ._, .mov, .tmp2p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
   9447                             .{ ._, ._, .xor, .tmp2p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
   9448                             .{ ._, ._, .@"or", .tmp1p, .tmp2p, ._, ._ },
   9449                             .{ ._, ._, .add, .tmp0p, .sa(.tmp2, .add_size), ._, ._ },
   9450                             .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
   9451                             .{ ._, ._, .@"test", .tmp1p, .tmp1p, ._, ._ },
   9452                         } },
   9453                     } },
   9454                 }) catch |err| switch (err) {
   9455                     error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
   9456                         @tagName(air_tag),
   9457                         cg.typeOf(bin_op.lhs).fmt(pt),
   9458                         ops[0].tracking(cg),
   9459                         ops[1].tracking(cg),
   9460                     }),
   9461                     else => |e| return e,
   9462                 };
   9463                 for (ops) |op| for (res) |r| {
   9464                     if (op.index == r.index) break;
   9465                 } else try op.die(cg);
   9466                 try res[0].moveTo(inst, cg);
   9467             },
   9468 
   9469             .cond_br => try cg.airCondBr(inst),
   9470             .switch_br => try cg.airSwitchBr(inst),
   9471             .loop_switch_br => try cg.airLoopSwitchBr(inst),
   9472             .switch_dispatch => try cg.airSwitchDispatch(inst),
   9473             .@"try", .try_cold => try cg.airTry(inst),
   9474             .try_ptr, .try_ptr_cold => try cg.airTryPtr(inst),
   9475             .dbg_stmt => if (use_old) try cg.airDbgStmt(inst) else {
   9476                 const dbg_stmt = air_datas[@intFromEnum(inst)].dbg_stmt;
   9477                 _ = try cg.addInst(.{
   9478                     .tag = .pseudo,
   9479                     .ops = .pseudo_dbg_line_line_column,
   9480                     .data = .{ .line_column = .{
   9481                         .line = dbg_stmt.line,
   9482                         .column = dbg_stmt.column,
   9483                     } },
   9484                 });
   9485             },
   9486             .dbg_empty_stmt => if (use_old) try cg.airDbgEmptyStmt() else {
   9487                 if (cg.mir_instructions.len > 0) {
   9488                     const prev_mir_op = &cg.mir_instructions.items(.ops)[cg.mir_instructions.len - 1];
   9489                     if (prev_mir_op.* == .pseudo_dbg_line_stmt_line_column)
   9490                         prev_mir_op.* = .pseudo_dbg_line_line_column;
   9491                 }
   9492                 try cg.asmOpOnly(.{ ._, .nop });
   9493             },
   9494             .dbg_inline_block => if (use_old) try cg.airDbgInlineBlock(inst) else {
   9495                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
   9496                 const extra = cg.air.extraData(Air.DbgInlineBlock, ty_pl.payload);
   9497                 const old_inline_func = cg.inline_func;
   9498                 defer cg.inline_func = old_inline_func;
   9499                 cg.inline_func = extra.data.func;
   9500                 _ = try cg.addInst(.{
   9501                     .tag = .pseudo,
   9502                     .ops = .pseudo_dbg_enter_inline_func,
   9503                     .data = .{ .func = extra.data.func },
   9504                 });
   9505                 try cg.lowerBlock(inst, @ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len]));
   9506                 _ = try cg.addInst(.{
   9507                     .tag = .pseudo,
   9508                     .ops = .pseudo_dbg_leave_inline_func,
   9509                     .data = .{ .func = old_inline_func },
   9510                 });
   9511             },
   9512             .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => if (use_old) try cg.airDbgVar(inst) else {
   9513                 const pl_op = air_datas[@intFromEnum(inst)].pl_op;
   9514                 var ops = try cg.tempsFromOperands(inst, .{pl_op.operand});
   9515                 try cg.genLocalDebugInfo(inst, ops[0].tracking(cg).short);
   9516                 try ops[0].die(cg);
   9517             },
   9518             .is_null_ptr => if (use_old) try cg.airIsNullPtr(inst) else {
   9519                 const un_op = air_datas[@intFromEnum(inst)].un_op;
   9520                 const opt_ty = cg.typeOf(un_op).childType(zcu);
   9521                 const opt_repr_is_pl = opt_ty.optionalReprIsPayload(zcu);
   9522                 const opt_child_ty = opt_ty.optionalChild(zcu);
   9523                 const opt_child_abi_size: u31 = @intCast(opt_child_ty.abiSize(zcu));
   9524                 var ops = try cg.tempsFromOperands(inst, .{un_op});
   9525                 if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, cg);
   9526                 while (try ops[0].toLea(cg)) {}
   9527                 try cg.asmMemoryImmediate(
   9528                     .{ ._, .cmp },
   9529                     try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = if (!opt_repr_is_pl)
   9530                         .byte
   9531                     else if (opt_child_ty.isSlice(zcu))
   9532                         .qword
   9533                     else
   9534                         .fromSize(opt_child_abi_size) }),
   9535                     .u(0),
   9536                 );
   9537                 var is_null = try cg.tempInit(.bool, .{ .eflags = .e });
   9538                 try ops[0].die(cg);
   9539                 try is_null.moveTo(inst, cg);
   9540             },
   9541             .is_non_null_ptr => if (use_old) try cg.airIsNonNullPtr(inst) else {
   9542                 const un_op = air_datas[@intFromEnum(inst)].un_op;
   9543                 const opt_ty = cg.typeOf(un_op).childType(zcu);
   9544                 const opt_repr_is_pl = opt_ty.optionalReprIsPayload(zcu);
   9545                 const opt_child_ty = opt_ty.optionalChild(zcu);
   9546                 const opt_child_abi_size: u31 = @intCast(opt_child_ty.abiSize(zcu));
   9547                 var ops = try cg.tempsFromOperands(inst, .{un_op});
   9548                 if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, cg);
   9549                 while (try ops[0].toLea(cg)) {}
   9550                 try cg.asmMemoryImmediate(
   9551                     .{ ._, .cmp },
   9552                     try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = if (!opt_repr_is_pl)
   9553                         .byte
   9554                     else if (opt_child_ty.isSlice(zcu))
   9555                         .qword
   9556                     else
   9557                         .fromSize(opt_child_abi_size) }),
   9558                     .u(0),
   9559                 );
   9560                 var is_non_null = try cg.tempInit(.bool, .{ .eflags = .ne });
   9561                 try ops[0].die(cg);
   9562                 try is_non_null.moveTo(inst, cg);
   9563             },
   9564             .is_err_ptr => if (use_old) try cg.airIsErrPtr(inst) else {
   9565                 const un_op = air_datas[@intFromEnum(inst)].un_op;
   9566                 const eu_ty = cg.typeOf(un_op).childType(zcu);
   9567                 const eu_err_ty = eu_ty.errorUnionSet(zcu);
   9568                 const eu_pl_ty = eu_ty.errorUnionPayload(zcu);
   9569                 const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu));
   9570                 var ops = try cg.tempsFromOperands(inst, .{un_op});
   9571                 try ops[0].toOffset(eu_err_off, cg);
   9572                 while (try ops[0].toLea(cg)) {}
   9573                 try cg.asmMemoryImmediate(
   9574                     .{ ._, .cmp },
   9575                     try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(eu_err_ty) }),
   9576                     .u(0),
   9577                 );
   9578                 var is_err = try cg.tempInit(.bool, .{ .eflags = .ne });
   9579                 try ops[0].die(cg);
   9580                 try is_err.moveTo(inst, cg);
   9581             },
   9582             .is_non_err_ptr => if (use_old) try cg.airIsNonErrPtr(inst) else {
   9583                 const un_op = air_datas[@intFromEnum(inst)].un_op;
   9584                 const eu_ty = cg.typeOf(un_op).childType(zcu);
   9585                 const eu_err_ty = eu_ty.errorUnionSet(zcu);
   9586                 const eu_pl_ty = eu_ty.errorUnionPayload(zcu);
   9587                 const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu));
   9588                 var ops = try cg.tempsFromOperands(inst, .{un_op});
   9589                 try ops[0].toOffset(eu_err_off, cg);
   9590                 while (try ops[0].toLea(cg)) {}
   9591                 try cg.asmMemoryImmediate(
   9592                     .{ ._, .cmp },
   9593                     try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(eu_err_ty) }),
   9594                     .u(0),
   9595                 );
   9596                 var is_non_err = try cg.tempInit(.bool, .{ .eflags = .e });
   9597                 try ops[0].die(cg);
   9598                 try is_non_err.moveTo(inst, cg);
   9599             },
   9600             .load => if (use_old) try cg.airLoad(inst) else fallback: {
   9601                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9602                 const val_ty = ty_op.ty.toType();
   9603                 const ptr_ty = cg.typeOf(ty_op.operand);
   9604                 const ptr_info = ptr_ty.ptrInfo(zcu);
   9605                 if (ptr_info.packed_offset.host_size > 0 and
   9606                     (ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type))
   9607                     break :fallback try cg.airLoad(inst);
   9608                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9609                 var res = try ops[0].load(val_ty, .{
   9610                     .disp = switch (ptr_info.flags.vector_index) {
   9611                         .none => 0,
   9612                         .runtime => unreachable,
   9613                         else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)),
   9614                     },
   9615                 }, cg);
   9616                 for (ops) |op| if (op.index != res.index) try op.die(cg);
   9617                 try res.moveTo(inst, cg);
   9618             },
   9619             .int_from_ptr => if (use_old) try cg.airIntFromPtr(inst) else {
   9620                 const un_op = air_datas[@intFromEnum(inst)].un_op;
   9621                 var ops = try cg.tempsFromOperands(inst, .{un_op});
   9622                 try ops[0].toSlicePtr(cg);
   9623                 try ops[0].moveTo(inst, cg);
   9624             },
   9625             .int_from_bool => if (use_old) try cg.airIntFromBool(inst) else {
   9626                 const un_op = air_datas[@intFromEnum(inst)].un_op;
   9627                 var ops = try cg.tempsFromOperands(inst, .{un_op});
   9628                 try ops[0].moveTo(inst, cg);
   9629             },
   9630             .ret => try cg.airRet(inst, false),
   9631             .ret_safe => try cg.airRet(inst, true),
   9632             .ret_load => try cg.airRetLoad(inst),
   9633             .store, .store_safe => |air_tag| if (use_old) try cg.airStore(inst, switch (air_tag) {
   9634                 else => unreachable,
   9635                 .store => false,
   9636                 .store_safe => true,
   9637             }) else fallback: {
   9638                 const bin_op = air_datas[@intFromEnum(inst)].bin_op;
   9639                 const ptr_ty = cg.typeOf(bin_op.lhs);
   9640                 const ptr_info = ptr_ty.ptrInfo(zcu);
   9641                 const val_ty = cg.typeOf(bin_op.rhs);
   9642                 if (ptr_info.packed_offset.host_size > 0 and
   9643                     (ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type))
   9644                     break :fallback try cg.airStore(inst, switch (air_tag) {
   9645                         else => unreachable,
   9646                         .store => false,
   9647                         .store_safe => true,
   9648                     });
   9649                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
   9650                 try ops[0].store(&ops[1], .{
   9651                     .disp = switch (ptr_info.flags.vector_index) {
   9652                         .none => 0,
   9653                         .runtime => unreachable,
   9654                         else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)),
   9655                     },
   9656                     .safe = switch (air_tag) {
   9657                         else => unreachable,
   9658                         .store => false,
   9659                         .store_safe => true,
   9660                     },
   9661                 }, cg);
   9662                 for (ops) |op| try op.die(cg);
   9663             },
   9664             .unreach => {},
   9665             .optional_payload_ptr => if (use_old) try cg.airOptionalPayloadPtr(inst) else {
   9666                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9667                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9668                 try ops[0].moveTo(inst, cg);
   9669             },
   9670             .optional_payload_ptr_set => if (use_old) try cg.airOptionalPayloadPtrSet(inst) else {
   9671                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9672                 const opt_ty = cg.typeOf(ty_op.operand).childType(zcu);
   9673                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9674                 if (!opt_ty.optionalReprIsPayload(zcu)) {
   9675                     const opt_child_ty = opt_ty.optionalChild(zcu);
   9676                     const opt_child_abi_size: i32 = @intCast(opt_child_ty.abiSize(zcu));
   9677                     try ops[0].toOffset(opt_child_abi_size, cg);
   9678                     var has_value = try cg.tempInit(.bool, .{ .immediate = 1 });
   9679                     try ops[0].store(&has_value, .{}, cg);
   9680                     try has_value.die(cg);
   9681                     try ops[0].toOffset(-opt_child_abi_size, cg);
   9682                 }
   9683                 try ops[0].moveTo(inst, cg);
   9684             },
   9685             .unwrap_errunion_payload_ptr => if (use_old) try cg.airUnwrapErrUnionPayloadPtr(inst) else {
   9686                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9687                 const eu_ty = cg.typeOf(ty_op.operand).childType(zcu);
   9688                 const eu_pl_ty = eu_ty.errorUnionPayload(zcu);
   9689                 const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu));
   9690                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9691                 try ops[0].toOffset(eu_pl_off, cg);
   9692                 try ops[0].moveTo(inst, cg);
   9693             },
   9694             .unwrap_errunion_err_ptr => if (use_old) try cg.airUnwrapErrUnionErrPtr(inst) else {
   9695                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9696                 const eu_ty = cg.typeOf(ty_op.operand).childType(zcu);
   9697                 const eu_pl_ty = eu_ty.errorUnionPayload(zcu);
   9698                 const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu));
   9699                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9700                 try ops[0].toOffset(eu_err_off, cg);
   9701                 var err = try ops[0].load(eu_ty.errorUnionSet(zcu), .{}, cg);
   9702                 try ops[0].die(cg);
   9703                 try err.moveTo(inst, cg);
   9704             },
   9705             .errunion_payload_ptr_set => if (use_old) try cg.airErrUnionPayloadPtrSet(inst) else {
   9706                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9707                 const eu_ty = cg.typeOf(ty_op.operand).childType(zcu);
   9708                 const eu_err_ty = eu_ty.errorUnionSet(zcu);
   9709                 const eu_pl_ty = eu_ty.errorUnionPayload(zcu);
   9710                 const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu));
   9711                 const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu));
   9712                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9713                 try ops[0].toOffset(eu_err_off, cg);
   9714                 var no_err = try cg.tempInit(eu_err_ty, .{ .immediate = 0 });
   9715                 try ops[0].store(&no_err, .{}, cg);
   9716                 try no_err.die(cg);
   9717                 try ops[0].toOffset(eu_pl_off - eu_err_off, cg);
   9718                 try ops[0].moveTo(inst, cg);
   9719             },
   9720             .struct_field_ptr => if (use_old) try cg.airStructFieldPtr(inst) else {
   9721                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
   9722                 const extra = cg.air.extraData(Air.StructField, ty_pl.payload).data;
   9723                 var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand});
   9724                 try ops[0].toOffset(cg.fieldOffset(
   9725                     cg.typeOf(extra.struct_operand),
   9726                     ty_pl.ty.toType(),
   9727                     extra.field_index,
   9728                 ), cg);
   9729                 try ops[0].moveTo(inst, cg);
   9730             },
   9731             .struct_field_ptr_index_0,
   9732             .struct_field_ptr_index_1,
   9733             .struct_field_ptr_index_2,
   9734             .struct_field_ptr_index_3,
   9735             => |air_tag| if (use_old) try cg.airStructFieldPtrIndex(inst, switch (air_tag) {
   9736                 else => unreachable,
   9737                 .struct_field_ptr_index_0 => 0,
   9738                 .struct_field_ptr_index_1 => 1,
   9739                 .struct_field_ptr_index_2 => 2,
   9740                 .struct_field_ptr_index_3 => 3,
   9741             }) else {
   9742                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9743                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9744                 try ops[0].toOffset(cg.fieldOffset(
   9745                     cg.typeOf(ty_op.operand),
   9746                     ty_op.ty.toType(),
   9747                     switch (air_tag) {
   9748                         else => unreachable,
   9749                         .struct_field_ptr_index_0 => 0,
   9750                         .struct_field_ptr_index_1 => 1,
   9751                         .struct_field_ptr_index_2 => 2,
   9752                         .struct_field_ptr_index_3 => 3,
   9753                     },
   9754                 ), cg);
   9755                 try ops[0].moveTo(inst, cg);
   9756             },
   9757             .struct_field_val => if (use_old) try cg.airStructFieldVal(inst) else fallback: {
   9758                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
   9759                 const extra = cg.air.extraData(Air.StructField, ty_pl.payload).data;
   9760                 const agg_ty = cg.typeOf(extra.struct_operand);
   9761                 const field_ty = ty_pl.ty.toType();
   9762                 const field_off: u31 = switch (agg_ty.containerLayout(zcu)) {
   9763                     .auto, .@"extern" => @intCast(agg_ty.structFieldOffset(extra.field_index, zcu)),
   9764                     .@"packed" => break :fallback try cg.airStructFieldVal(inst),
   9765                 };
   9766                 if (field_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
   9767                     var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand});
   9768                     var res = try ops[0].read(field_ty, .{ .disp = field_off }, cg);
   9769                     for (ops) |op| if (op.index != res.index) try op.die(cg);
   9770                     try res.moveTo(inst, cg);
   9771                 } else {
   9772                     // hack around Sema OPV bugs
   9773                     const res = try cg.tempInit(field_ty, .none);
   9774                     try res.moveTo(inst, cg);
   9775                 }
   9776             },
   9777             .set_union_tag => if (use_old) try cg.airSetUnionTag(inst) else {
   9778                 const bin_op = air_datas[@intFromEnum(inst)].bin_op;
   9779                 const union_ty = cg.typeOf(bin_op.lhs).childType(zcu);
   9780                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
   9781                 const union_layout = union_ty.unionGetLayout(zcu);
   9782                 // hack around Sema OPV bugs
   9783                 if (union_layout.tag_size > 0) try ops[0].store(&ops[1], .{
   9784                     .disp = @intCast(union_layout.tagOffset()),
   9785                 }, cg);
   9786                 for (ops) |op| try op.die(cg);
   9787             },
   9788             .get_union_tag => if (use_old) try cg.airGetUnionTag(inst) else {
   9789                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9790                 const union_ty = cg.typeOf(ty_op.operand);
   9791                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9792                 const union_layout = union_ty.unionGetLayout(zcu);
   9793                 assert(union_layout.tag_size > 0);
   9794                 var res = try ops[0].read(ty_op.ty.toType(), .{
   9795                     .disp = @intCast(union_layout.tagOffset()),
   9796                 }, cg);
   9797                 for (ops) |op| if (op.index != res.index) try op.die(cg);
   9798                 try res.moveTo(inst, cg);
   9799             },
   9800             .slice => if (use_old) try cg.airSlice(inst) else {
   9801                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
   9802                 const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
   9803                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
   9804                 try ops[0].toPair(&ops[1], cg);
   9805                 try ops[0].moveTo(inst, cg);
   9806             },
   9807             .slice_len => if (use_old) try cg.airSliceLen(inst) else {
   9808                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9809                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9810                 try ops[0].toSliceLen(cg);
   9811                 try ops[0].moveTo(inst, cg);
   9812             },
   9813             .slice_ptr => if (use_old) try cg.airSlicePtr(inst) else {
   9814                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9815                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9816                 try ops[0].toSlicePtr(cg);
   9817                 try ops[0].moveTo(inst, cg);
   9818             },
   9819             .ptr_slice_len_ptr => if (use_old) try cg.airPtrSliceLenPtr(inst) else {
   9820                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9821                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9822                 try ops[0].toOffset(8, cg);
   9823                 try ops[0].moveTo(inst, cg);
   9824             },
   9825             .ptr_slice_ptr_ptr => if (use_old) try cg.airPtrSlicePtrPtr(inst) else {
   9826                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
   9827                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
   9828                 try ops[0].toOffset(0, cg);
   9829                 try ops[0].moveTo(inst, cg);
   9830             },
   9831             .slice_elem_val, .ptr_elem_val => |air_tag| if (use_old) switch (air_tag) {
   9832                 else => unreachable,
   9833                 .slice_elem_val => try cg.airSliceElemVal(inst),
   9834                 .ptr_elem_val => try cg.airPtrElemVal(inst),
   9835             } else {
   9836                 const bin_op = air_datas[@intFromEnum(inst)].bin_op;
   9837                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
   9838                 try ops[0].toSlicePtr(cg);
   9839                 var res: [1]Temp = undefined;
   9840                 const res_ty = cg.typeOf(bin_op.lhs).elemType2(zcu);
   9841                 cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{
   9842                     .dst_constraints = .{.{ .int = .byte }},
   9843                     .patterns = &.{
   9844                         .{ .src = .{ .to_gpr, .simm32 } },
   9845                     },
   9846                     .dst_temps = .{.{ .rc = .general_purpose }},
   9847                     .each = .{ .once = &.{
   9848                         .{ ._, ._, .movzx, .dst0d, .leaa(.byte, .src0, .add_src0_elem_size_times_src1), ._, ._ },
   9849                     } },
   9850                 }, .{
   9851                     .dst_constraints = .{.{ .int = .byte }},
   9852                     .patterns = &.{
   9853                         .{ .src = .{ .to_gpr, .to_gpr } },
   9854                     },
   9855                     .dst_temps = .{.{ .rc = .general_purpose }},
   9856                     .each = .{ .once = &.{
   9857                         .{ ._, ._, .movzx, .dst0d, .leai(.byte, .src0, .src1), ._, ._ },
   9858                     } },
   9859                 }, .{
   9860                     .dst_constraints = .{.{ .int = .word }},
   9861                     .patterns = &.{
   9862                         .{ .src = .{ .to_gpr, .simm32 } },
   9863                     },
   9864                     .dst_temps = .{.{ .rc = .general_purpose }},
   9865                     .each = .{ .once = &.{
   9866                         .{ ._, ._, .movzx, .dst0d, .leaa(.word, .src0, .add_src0_elem_size_times_src1), ._, ._ },
   9867                     } },
   9868                 }, .{
   9869                     .dst_constraints = .{.{ .int = .word }},
   9870                     .patterns = &.{
   9871                         .{ .src = .{ .to_gpr, .to_gpr } },
   9872                     },
   9873                     .dst_temps = .{.{ .rc = .general_purpose }},
   9874                     .each = .{ .once = &.{
   9875                         .{ ._, ._, .movzx, .dst0d, .leasi(.word, .src0, .@"2", .src1), ._, ._ },
   9876                     } },
   9877                 }, .{
   9878                     .dst_constraints = .{.{ .int = .dword }},
   9879                     .patterns = &.{
   9880                         .{ .src = .{ .to_gpr, .simm32 } },
   9881                     },
   9882                     .dst_temps = .{.{ .rc = .general_purpose }},
   9883                     .each = .{ .once = &.{
   9884                         .{ ._, ._, .mov, .dst0d, .leaa(.dword, .src0, .add_src0_elem_size_times_src1), ._, ._ },
   9885                     } },
   9886                 }, .{
   9887                     .dst_constraints = .{.{ .int = .dword }},
   9888                     .patterns = &.{
   9889                         .{ .src = .{ .to_gpr, .to_gpr } },
   9890                     },
   9891                     .dst_temps = .{.{ .rc = .general_purpose }},
   9892                     .each = .{ .once = &.{
   9893                         .{ ._, ._, .mov, .dst0d, .leasi(.dword, .src0, .@"4", .src1), ._, ._ },
   9894                     } },
   9895                 }, .{
   9896                     .dst_constraints = .{.{ .int = .qword }},
   9897                     .patterns = &.{
   9898                         .{ .src = .{ .to_gpr, .simm32 } },
   9899                     },
   9900                     .dst_temps = .{.{ .rc = .general_purpose }},
   9901                     .each = .{ .once = &.{
   9902                         .{ ._, ._, .mov, .dst0q, .leaa(.qword, .src0, .add_src0_elem_size_times_src1), ._, ._ },
   9903                     } },
   9904                 }, .{
   9905                     .required_features = .{ .@"64bit", null, null, null },
   9906                     .dst_constraints = .{.{ .int = .qword }},
   9907                     .patterns = &.{
   9908                         .{ .src = .{ .to_gpr, .to_gpr } },
   9909                     },
   9910                     .dst_temps = .{.{ .rc = .general_purpose }},
   9911                     .each = .{ .once = &.{
   9912                         .{ ._, ._, .mov, .dst0q, .leasi(.qword, .src0, .@"8", .src1), ._, ._ },
   9913                     } },
   9914                 } }) catch |err| switch (err) {
   9915                     error.SelectFailed => switch (res_ty.abiSize(zcu)) {
   9916                         // hack around Sema OPV bugs
   9917                         0 => res[0] = try cg.tempInit(res_ty, .none),
   9918                         else => |elem_size| {
   9919                             while (true) for (&ops) |*op| {
   9920                                 if (try op.toRegClass(true, .general_purpose, cg)) break;
   9921                             } else break;
   9922                             const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64();
   9923                             const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64();
   9924                             if (!std.math.isPowerOfTwo(elem_size)) {
   9925                                 try cg.spillEflagsIfOccupied();
   9926                                 try cg.asmRegisterRegisterImmediate(
   9927                                     .{ .i_, .mul },
   9928                                     rhs_reg,
   9929                                     rhs_reg,
   9930                                     .u(elem_size),
   9931                                 );
   9932                                 try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
   9933                                     .base = .{ .reg = lhs_reg },
   9934                                     .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
   9935                                 });
   9936                             } else if (elem_size > 8) {
   9937                                 try cg.spillEflagsIfOccupied();
   9938                                 try cg.asmRegisterImmediate(
   9939                                     .{ ._l, .sh },
   9940                                     rhs_reg,
   9941                                     .u(std.math.log2_int(u64, elem_size)),
   9942                                 );
   9943                                 try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
   9944                                     .base = .{ .reg = lhs_reg },
   9945                                     .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
   9946                                 });
   9947                             } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
   9948                                 .base = .{ .reg = lhs_reg },
   9949                                 .mod = .{ .rm = .{
   9950                                     .size = .qword,
   9951                                     .index = rhs_reg,
   9952                                     .scale = .fromFactor(@intCast(elem_size)),
   9953                                 } },
   9954                             });
   9955                             res[0] = try ops[0].load(res_ty, .{}, cg);
   9956                         },
   9957                     },
   9958                     else => |e| return e,
   9959                 };
   9960                 for (ops) |op| for (res) |r| {
   9961                     if (op.index == r.index) break;
   9962                 } else try op.die(cg);
   9963                 try res[0].moveTo(inst, cg);
   9964             },
   9965             .slice_elem_ptr, .ptr_elem_ptr => |air_tag| if (use_old) switch (air_tag) {
   9966                 else => unreachable,
   9967                 .slice_elem_ptr => try cg.airSliceElemPtr(inst),
   9968                 .ptr_elem_ptr => try cg.airPtrElemPtr(inst),
   9969             } else {
   9970                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
   9971                 const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
   9972                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
   9973                 try ops[0].toSlicePtr(cg);
   9974                 const dst_ty = ty_pl.ty.toType();
   9975                 if (dst_ty.ptrInfo(zcu).flags.vector_index == .none) zero_offset: {
   9976                     const elem_size = dst_ty.childType(zcu).abiSize(zcu);
   9977                     // hack around Sema OPV bugs
   9978                     if (elem_size == 0) break :zero_offset;
   9979                     while (true) for (&ops) |*op| {
   9980                         if (try op.toRegClass(true, .general_purpose, cg)) break;
   9981                     } else break;
   9982                     const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64();
   9983                     const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64();
   9984                     if (!std.math.isPowerOfTwo(elem_size)) {
   9985                         try cg.spillEflagsIfOccupied();
   9986                         try cg.asmRegisterRegisterImmediate(
   9987                             .{ .i_, .mul },
   9988                             rhs_reg,
   9989                             rhs_reg,
   9990                             .u(elem_size),
   9991                         );
   9992                         try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
   9993                             .base = .{ .reg = lhs_reg },
   9994                             .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
   9995                         });
   9996                     } else if (elem_size > 8) {
   9997                         try cg.spillEflagsIfOccupied();
   9998                         try cg.asmRegisterImmediate(
   9999                             .{ ._l, .sh },
  10000                             rhs_reg,
  10001                             .u(std.math.log2_int(u64, elem_size)),
  10002                         );
  10003                         try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
  10004                             .base = .{ .reg = lhs_reg },
  10005                             .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
  10006                         });
  10007                     } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
  10008                         .base = .{ .reg = lhs_reg },
  10009                         .mod = .{ .rm = .{
  10010                             .size = .qword,
  10011                             .index = rhs_reg,
  10012                             .scale = .fromFactor(@intCast(elem_size)),
  10013                         } },
  10014                     });
  10015                 }
  10016                 try ops[1].die(cg);
  10017                 try ops[0].moveTo(inst, cg);
  10018             },
  10019             .array_to_slice => if (use_old) try cg.airArrayToSlice(inst) else {
  10020                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
  10021                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
  10022                 var len = try cg.tempInit(.usize, .{
  10023                     .immediate = cg.typeOf(ty_op.operand).childType(zcu).arrayLen(zcu),
  10024                 });
  10025                 try ops[0].toPair(&len, cg);
  10026                 try ops[0].moveTo(inst, cg);
  10027             },
  10028             .error_set_has_value => return cg.fail("TODO implement error_set_has_value", .{}),
  10029             .union_init => if (use_old) try cg.airUnionInit(inst) else {
  10030                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
  10031                 const extra = cg.air.extraData(Air.UnionInit, ty_pl.payload).data;
  10032                 const union_ty = ty_pl.ty.toType();
  10033                 var ops = try cg.tempsFromOperands(inst, .{extra.init});
  10034                 var res = try cg.tempAllocMem(union_ty);
  10035                 const union_layout = union_ty.unionGetLayout(zcu);
  10036                 if (union_layout.tag_size > 0) {
  10037                     var tag_temp = try cg.tempFromValue(try pt.enumValueFieldIndex(
  10038                         union_ty.unionTagTypeSafety(zcu).?,
  10039                         extra.field_index,
  10040                     ));
  10041                     try res.write(&tag_temp, .{
  10042                         .disp = @intCast(union_layout.tagOffset()),
  10043                     }, cg);
  10044                     try tag_temp.die(cg);
  10045                 }
  10046                 try res.write(&ops[0], .{
  10047                     .disp = @intCast(union_layout.payloadOffset()),
  10048                 }, cg);
  10049                 try ops[0].die(cg);
  10050                 try res.moveTo(inst, cg);
  10051             },
  10052             .field_parent_ptr => if (use_old) try cg.airFieldParentPtr(inst) else {
  10053                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
  10054                 const extra = cg.air.extraData(Air.FieldParentPtr, ty_pl.payload).data;
  10055                 var ops = try cg.tempsFromOperands(inst, .{extra.field_ptr});
  10056                 try ops[0].toOffset(-cg.fieldOffset(
  10057                     ty_pl.ty.toType(),
  10058                     cg.typeOf(extra.field_ptr),
  10059                     extra.field_index,
  10060                 ), cg);
  10061                 try ops[0].moveTo(inst, cg);
  10062             },
  10063 
  10064             .is_named_enum_value => return cg.fail("TODO implement is_named_enum_value", .{}),
  10065 
  10066             .wasm_memory_size => unreachable,
  10067             .wasm_memory_grow => unreachable,
  10068 
  10069             .addrspace_cast => {
  10070                 const ty_op = air_datas[@intFromEnum(inst)].ty_op;
  10071                 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
  10072                 try ops[0].moveTo(inst, cg);
  10073             },
  10074 
  10075             .vector_store_elem => return cg.fail("TODO implement vector_store_elem", .{}),
  10076 
  10077             .c_va_arg => try cg.airVaArg(inst),
  10078             .c_va_copy => try cg.airVaCopy(inst),
  10079             .c_va_end => try cg.airVaEnd(inst),
  10080             .c_va_start => try cg.airVaStart(inst),
  10081 
  10082             .work_item_id => unreachable,
  10083             .work_group_size => unreachable,
  10084             .work_group_id => unreachable,
  10085         }
  10086         cg.resetTemps();
  10087         cg.checkInvariantsAfterAirInst();
  10088     }
  10089     verbose_tracking_log.debug("{}", .{cg.fmtTracking()});
  10090 }
  10091 
  10092 fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
  10093     const pt = self.pt;
  10094     const zcu = pt.zcu;
  10095     const ip = &zcu.intern_pool;
  10096     switch (Type.fromInterned(lazy_sym.ty).zigTypeTag(zcu)) {
  10097         .@"enum" => {
  10098             const enum_ty: Type = .fromInterned(lazy_sym.ty);
  10099             wip_mir_log.debug("{}.@tagName:", .{enum_ty.fmt(pt)});
  10100 
  10101             const param_regs = abi.getCAbiIntParamRegs(.auto);
  10102             const param_locks = self.register_manager.lockRegsAssumeUnused(2, param_regs[0..2].*);
  10103             defer for (param_locks) |lock| self.register_manager.unlockReg(lock);
  10104 
  10105             const ret_reg = param_regs[0];
  10106             const enum_mcv = MCValue{ .register = param_regs[1] };
  10107 
  10108             const epilogue_relocs = try self.gpa.alloc(Mir.Inst.Index, enum_ty.enumFieldCount(zcu));
  10109             defer self.gpa.free(epilogue_relocs);
  10110 
  10111             const data_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  10112             const data_lock = self.register_manager.lockRegAssumeUnused(data_reg);
  10113             defer self.register_manager.unlockReg(data_lock);
  10114             try self.genLazySymbolRef(.lea, data_reg, .{ .kind = .const_data, .ty = enum_ty.toIntern() });
  10115 
  10116             var data_off: i32 = 0;
  10117             const tag_names = enum_ty.enumFields(zcu);
  10118             for (epilogue_relocs, 0..) |*epilogue_reloc, tag_index| {
  10119                 const tag_name_len = tag_names.get(ip)[tag_index].length(ip);
  10120                 const tag_val = try pt.enumValueFieldIndex(enum_ty, @intCast(tag_index));
  10121                 const tag_mcv = try self.genTypedValue(tag_val);
  10122                 try self.genBinOpMir(.{ ._, .cmp }, enum_ty, enum_mcv, tag_mcv);
  10123                 const skip_reloc = try self.asmJccReloc(.ne, undefined);
  10124 
  10125                 try self.genSetMem(
  10126                     .{ .reg = ret_reg },
  10127                     0,
  10128                     .usize,
  10129                     .{ .register_offset = .{ .reg = data_reg, .off = data_off } },
  10130                     .{},
  10131                 );
  10132                 try self.genSetMem(.{ .reg = ret_reg }, 8, .usize, .{ .immediate = tag_name_len }, .{});
  10133 
  10134                 epilogue_reloc.* = try self.asmJmpReloc(undefined);
  10135                 self.performReloc(skip_reloc);
  10136 
  10137                 data_off += @intCast(tag_name_len + 1);
  10138             }
  10139 
  10140             try self.asmOpOnly(.{ ._, .ud2 });
  10141 
  10142             for (epilogue_relocs) |reloc| self.performReloc(reloc);
  10143             try self.asmOpOnly(.{ ._, .ret });
  10144         },
  10145         else => return self.fail(
  10146             "TODO implement {s} for {}",
  10147             .{ @tagName(lazy_sym.kind), Type.fromInterned(lazy_sym.ty).fmt(pt) },
  10148         ),
  10149     }
  10150 }
  10151 
  10152 fn getValue(self: *CodeGen, value: MCValue, inst: ?Air.Inst.Index) !void {
  10153     for (value.getRegs()) |reg| try self.register_manager.getReg(reg, inst);
  10154     switch (value) {
  10155         else => {},
  10156         .eflags, .register_overflow => self.eflags_inst = inst,
  10157     }
  10158 }
  10159 
  10160 fn getValueIfFree(self: *CodeGen, value: MCValue, inst: ?Air.Inst.Index) void {
  10161     for (value.getRegs()) |reg| if (self.register_manager.isRegFree(reg))
  10162         self.register_manager.getRegAssumeFree(reg, inst);
  10163 }
  10164 
  10165 fn freeReg(self: *CodeGen, reg: Register) !void {
  10166     self.register_manager.freeReg(reg);
  10167     if (reg.class() == .x87) try self.asmRegister(.{ .f_, .free }, reg);
  10168 }
  10169 
  10170 fn freeValue(self: *CodeGen, value: MCValue) !void {
  10171     switch (value) {
  10172         .register => |reg| try self.freeReg(reg),
  10173         inline .register_pair,
  10174         .register_triple,
  10175         .register_quadruple,
  10176         => |regs| for (regs) |reg| try self.freeReg(reg),
  10177         .register_offset, .indirect => |reg_off| try self.freeReg(reg_off.reg),
  10178         .register_overflow => |reg_ov| {
  10179             try self.freeReg(reg_ov.reg);
  10180             self.eflags_inst = null;
  10181         },
  10182         .register_mask => |reg_mask| try self.freeReg(reg_mask.reg),
  10183         .eflags => self.eflags_inst = null,
  10184         else => {}, // TODO process stack allocation death
  10185     }
  10186 }
  10187 
  10188 fn feed(self: *CodeGen, bt: *Liveness.BigTomb, operand: Air.Inst.Ref) !void {
  10189     if (bt.feed()) if (operand.toIndex()) |inst| try self.processDeath(inst);
  10190 }
  10191 
  10192 /// Asserts there is already capacity to insert into top branch inst_table.
  10193 fn processDeath(self: *CodeGen, inst: Air.Inst.Index) !void {
  10194     try self.inst_tracking.getPtr(inst).?.die(self, inst);
  10195 }
  10196 
  10197 fn finishAirResult(self: *CodeGen, inst: Air.Inst.Index, result: MCValue) void {
  10198     if (self.liveness.isUnused(inst) and self.air.instructions.items(.tag)[@intFromEnum(inst)] != .arg) switch (result) {
  10199         .none, .dead, .unreach => {},
  10200         else => unreachable, // Why didn't the result die?
  10201     } else {
  10202         tracking_log.debug("{} => {} (birth)", .{ inst, result });
  10203         self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(result));
  10204         // In some cases, an operand may be reused as the result.
  10205         // If that operand died and was a register, it was freed by
  10206         // processDeath, so we have to "re-allocate" the register.
  10207         self.getValueIfFree(result, inst);
  10208     }
  10209 }
  10210 
  10211 fn finishAir(
  10212     self: *CodeGen,
  10213     inst: Air.Inst.Index,
  10214     result: MCValue,
  10215     operands: [Liveness.bpi - 1]Air.Inst.Ref,
  10216 ) !void {
  10217     const tomb_bits = self.liveness.getTombBits(inst);
  10218     for (0.., operands) |op_index, op| {
  10219         if (tomb_bits & @as(Liveness.Bpi, 1) << @intCast(op_index) == 0) continue;
  10220         if (self.reused_operands.isSet(op_index)) continue;
  10221         try self.processDeath(op.toIndexAllowNone() orelse continue);
  10222     }
  10223     self.finishAirResult(inst, result);
  10224 }
  10225 
  10226 const FrameLayout = struct {
  10227     stack_mask: u32,
  10228     stack_adjust: u32,
  10229     save_reg_list: Mir.RegisterList,
  10230 };
  10231 
  10232 fn setFrameLoc(
  10233     self: *CodeGen,
  10234     frame_index: FrameIndex,
  10235     base: Register,
  10236     offset: *i32,
  10237     comptime aligned: bool,
  10238 ) void {
  10239     const frame_i = @intFromEnum(frame_index);
  10240     if (aligned) {
  10241         const alignment = self.frame_allocs.items(.abi_align)[frame_i];
  10242         offset.* = @intCast(alignment.forward(@intCast(offset.*)));
  10243     }
  10244     self.frame_locs.set(frame_i, .{ .base = base, .disp = offset.* });
  10245     offset.* += self.frame_allocs.items(.abi_size)[frame_i];
  10246 }
  10247 
  10248 fn computeFrameLayout(self: *CodeGen, cc: std.builtin.CallingConvention.Tag) !FrameLayout {
  10249     const frame_allocs_len = self.frame_allocs.len;
  10250     try self.frame_locs.resize(self.gpa, frame_allocs_len);
  10251     const stack_frame_order = try self.gpa.alloc(FrameIndex, frame_allocs_len - FrameIndex.named_count);
  10252     defer self.gpa.free(stack_frame_order);
  10253 
  10254     const frame_size = self.frame_allocs.items(.abi_size);
  10255     const frame_align = self.frame_allocs.items(.abi_align);
  10256     const frame_offset = self.frame_locs.items(.disp);
  10257 
  10258     for (stack_frame_order, FrameIndex.named_count..) |*frame_order, frame_index|
  10259         frame_order.* = @enumFromInt(frame_index);
  10260     {
  10261         const SortContext = struct {
  10262             frame_align: @TypeOf(frame_align),
  10263             pub fn lessThan(context: @This(), lhs: FrameIndex, rhs: FrameIndex) bool {
  10264                 return context.frame_align[@intFromEnum(lhs)].compare(.gt, context.frame_align[@intFromEnum(rhs)]);
  10265             }
  10266         };
  10267         const sort_context = SortContext{ .frame_align = frame_align };
  10268         std.mem.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan);
  10269     }
  10270 
  10271     const call_frame_align = frame_align[@intFromEnum(FrameIndex.call_frame)];
  10272     const stack_frame_align = frame_align[@intFromEnum(FrameIndex.stack_frame)];
  10273     const args_frame_align = frame_align[@intFromEnum(FrameIndex.args_frame)];
  10274     const needed_align = call_frame_align.max(stack_frame_align);
  10275     const need_align_stack = needed_align.compare(.gt, args_frame_align);
  10276 
  10277     // Create list of registers to save in the prologue.
  10278     // TODO handle register classes
  10279     var save_reg_list: Mir.RegisterList = .empty;
  10280     const callee_preserved_regs = abi.getCalleePreservedRegs(cc);
  10281     for (callee_preserved_regs) |reg| {
  10282         if (self.register_manager.isRegAllocated(reg)) {
  10283             save_reg_list.push(callee_preserved_regs, reg);
  10284         }
  10285     }
  10286 
  10287     var rbp_offset: i32 = 0;
  10288     self.setFrameLoc(.base_ptr, .rbp, &rbp_offset, false);
  10289     self.setFrameLoc(.ret_addr, .rbp, &rbp_offset, false);
  10290     self.setFrameLoc(.args_frame, .rbp, &rbp_offset, false);
  10291     const stack_frame_align_offset = if (need_align_stack)
  10292         0
  10293     else
  10294         save_reg_list.size(self.target) + frame_offset[@intFromEnum(FrameIndex.args_frame)];
  10295 
  10296     var rsp_offset: i32 = 0;
  10297     self.setFrameLoc(.call_frame, .rsp, &rsp_offset, true);
  10298     self.setFrameLoc(.stack_frame, .rsp, &rsp_offset, true);
  10299     for (stack_frame_order) |frame_index| self.setFrameLoc(frame_index, .rsp, &rsp_offset, true);
  10300     rsp_offset += stack_frame_align_offset;
  10301     rsp_offset = @intCast(needed_align.forward(@intCast(rsp_offset)));
  10302     rsp_offset -= stack_frame_align_offset;
  10303     frame_size[@intFromEnum(FrameIndex.call_frame)] =
  10304         @intCast(rsp_offset - frame_offset[@intFromEnum(FrameIndex.stack_frame)]);
  10305 
  10306     return .{
  10307         .stack_mask = @as(u32, std.math.maxInt(u32)) << @intCast(if (need_align_stack) @intFromEnum(needed_align) else 0),
  10308         .stack_adjust = @intCast(rsp_offset - frame_offset[@intFromEnum(FrameIndex.call_frame)]),
  10309         .save_reg_list = save_reg_list,
  10310     };
  10311 }
  10312 
  10313 fn getFrameAddrAlignment(self: *CodeGen, frame_addr: bits.FrameAddr) InternPool.Alignment {
  10314     const alloc_align = self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_align;
  10315     return @enumFromInt(@min(@intFromEnum(alloc_align), @ctz(frame_addr.off)));
  10316 }
  10317 
  10318 fn getFrameAddrSize(self: *CodeGen, frame_addr: bits.FrameAddr) u32 {
  10319     return self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_size - @as(u31, @intCast(frame_addr.off));
  10320 }
  10321 
  10322 fn allocFrameIndex(self: *CodeGen, alloc: FrameAlloc) !FrameIndex {
  10323     const frame_allocs_slice = self.frame_allocs.slice();
  10324     const frame_size = frame_allocs_slice.items(.abi_size);
  10325     const frame_align = frame_allocs_slice.items(.abi_align);
  10326 
  10327     const stack_frame_align = &frame_align[@intFromEnum(FrameIndex.stack_frame)];
  10328     stack_frame_align.* = stack_frame_align.max(alloc.abi_align);
  10329 
  10330     for (self.free_frame_indices.keys(), 0..) |frame_index, free_i| {
  10331         const abi_size = frame_size[@intFromEnum(frame_index)];
  10332         if (abi_size != alloc.abi_size) continue;
  10333         const abi_align = &frame_align[@intFromEnum(frame_index)];
  10334         abi_align.* = abi_align.max(alloc.abi_align);
  10335 
  10336         _ = self.free_frame_indices.swapRemoveAt(free_i);
  10337         return frame_index;
  10338     }
  10339     const frame_index: FrameIndex = @enumFromInt(self.frame_allocs.len);
  10340     try self.frame_allocs.append(self.gpa, alloc);
  10341     return frame_index;
  10342 }
  10343 
  10344 /// Use a pointer instruction as the basis for allocating stack memory.
  10345 fn allocMemPtr(self: *CodeGen, inst: Air.Inst.Index) !FrameIndex {
  10346     const pt = self.pt;
  10347     const zcu = pt.zcu;
  10348     const ptr_ty = self.typeOfIndex(inst);
  10349     const val_ty = ptr_ty.childType(zcu);
  10350     return self.allocFrameIndex(.init(.{
  10351         .size = std.math.cast(u32, val_ty.abiSize(zcu)) orelse {
  10352             return self.fail("type '{}' too big to fit into stack frame", .{val_ty.fmt(pt)});
  10353         },
  10354         .alignment = ptr_ty.ptrAlignment(zcu).max(.@"1"),
  10355     }));
  10356 }
  10357 
  10358 fn allocRegOrMem(self: *CodeGen, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
  10359     return self.allocRegOrMemAdvanced(self.typeOfIndex(inst), inst, reg_ok);
  10360 }
  10361 
  10362 fn allocTempRegOrMem(self: *CodeGen, elem_ty: Type, reg_ok: bool) !MCValue {
  10363     return self.allocRegOrMemAdvanced(elem_ty, null, reg_ok);
  10364 }
  10365 
  10366 fn allocRegOrMemAdvanced(self: *CodeGen, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue {
  10367     const pt = self.pt;
  10368     const zcu = pt.zcu;
  10369     const abi_size = std.math.cast(u32, ty.abiSize(zcu)) orelse {
  10370         return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(pt)});
  10371     };
  10372 
  10373     if (reg_ok) need_mem: {
  10374         if (std.math.isPowerOfTwo(abi_size) and abi_size <= @as(u32, switch (ty.zigTypeTag(zcu)) {
  10375             .float => switch (ty.floatBits(self.target.*)) {
  10376                 16, 32, 64, 128 => 16,
  10377                 80 => break :need_mem,
  10378                 else => unreachable,
  10379             },
  10380             .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
  10381                 .float => switch (ty.childType(zcu).floatBits(self.target.*)) {
  10382                     16, 32, 64, 128 => self.vectorSize(.float),
  10383                     80 => break :need_mem,
  10384                     else => unreachable,
  10385                 },
  10386                 else => self.vectorSize(.int),
  10387             },
  10388             else => 8,
  10389         })) {
  10390             if (self.register_manager.tryAllocReg(inst, self.regSetForType(ty))) |reg| {
  10391                 return MCValue{ .register = registerAlias(reg, abi_size) };
  10392             }
  10393         }
  10394     }
  10395 
  10396     const frame_index = try self.allocFrameIndex(.initSpill(ty, zcu));
  10397     return .{ .load_frame = .{ .index = frame_index } };
  10398 }
  10399 
  10400 fn regClassForType(self: *CodeGen, ty: Type) Register.Class {
  10401     const pt = self.pt;
  10402     const zcu = pt.zcu;
  10403     return switch (ty.zigTypeTag(zcu)) {
  10404         .float => switch (ty.floatBits(self.target.*)) {
  10405             80 => .x87,
  10406             else => .sse,
  10407         },
  10408         .vector => switch (ty.childType(zcu).toIntern()) {
  10409             .bool_type => .general_purpose,
  10410             else => .sse,
  10411         },
  10412         else => .general_purpose,
  10413     };
  10414 }
  10415 
  10416 fn regSetForRegClass(rc: Register.Class) RegisterManager.RegisterBitSet {
  10417     return switch (rc) {
  10418         .general_purpose => abi.RegisterClass.gp,
  10419         .segment, .ip => unreachable,
  10420         .x87 => abi.RegisterClass.x87,
  10421         .mmx => @panic("TODO"),
  10422         .sse => abi.RegisterClass.sse,
  10423     };
  10424 }
  10425 
  10426 fn regSetForType(self: *CodeGen, ty: Type) RegisterManager.RegisterBitSet {
  10427     return regSetForRegClass(self.regClassForType(ty));
  10428 }
  10429 
  10430 fn vectorSize(cg: *CodeGen, kind: enum { int, float }) u6 {
  10431     return if (cg.hasFeature(switch (kind) {
  10432         .int => .avx2,
  10433         .float => .avx,
  10434     })) 32 else if (cg.hasFeature(.sse)) 16 else 8;
  10435 }
  10436 
  10437 fn limbType(cg: *CodeGen, ty: Type) Type {
  10438     const pt = cg.pt;
  10439     const zcu = pt.zcu;
  10440     const vector_size = cg.vectorSize(if (ty.isRuntimeFloat()) .float else .int);
  10441     const scalar_ty, const scalar_size = scalar: {
  10442         const scalar_ty = ty.scalarType(zcu);
  10443         const scalar_size = scalar_ty.abiSize(zcu);
  10444         if (scalar_size <= vector_size) break :scalar .{ scalar_ty, scalar_size };
  10445     };
  10446     pt.vectorType(.{
  10447         .len = @divExact(vector_size, scalar_size),
  10448         .child = scalar_ty.toIntern(),
  10449     });
  10450 }
  10451 
  10452 const State = struct {
  10453     registers: RegisterManager.TrackedRegisters,
  10454     reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking,
  10455     free_registers: RegisterManager.RegisterBitSet,
  10456     inst_tracking_len: u32,
  10457     scope_generation: u32,
  10458 };
  10459 
  10460 fn initRetroactiveState(self: *CodeGen) State {
  10461     var state: State = undefined;
  10462     state.inst_tracking_len = @intCast(self.inst_tracking.count());
  10463     state.scope_generation = self.scope_generation;
  10464     return state;
  10465 }
  10466 
  10467 fn saveRetroactiveState(self: *CodeGen, state: *State) !void {
  10468     try self.spillEflagsIfOccupied();
  10469     const free_registers = self.register_manager.free_registers;
  10470     var it = free_registers.iterator(.{ .kind = .unset });
  10471     while (it.next()) |index| {
  10472         const tracked_inst = self.register_manager.registers[index];
  10473         state.registers[index] = tracked_inst;
  10474         state.reg_tracking[index] = self.inst_tracking.get(tracked_inst).?;
  10475     }
  10476     state.free_registers = free_registers;
  10477 }
  10478 
  10479 fn saveState(self: *CodeGen) !State {
  10480     var state = self.initRetroactiveState();
  10481     try self.saveRetroactiveState(&state);
  10482     return state;
  10483 }
  10484 
  10485 fn restoreState(self: *CodeGen, state: State, deaths: []const Air.Inst.Index, comptime opts: struct {
  10486     emit_instructions: bool,
  10487     update_tracking: bool,
  10488     resurrect: bool,
  10489     close_scope: bool,
  10490 }) !void {
  10491     if (opts.close_scope) {
  10492         for (
  10493             self.inst_tracking.keys()[state.inst_tracking_len..],
  10494             self.inst_tracking.values()[state.inst_tracking_len..],
  10495         ) |inst, *tracking| try tracking.die(self, inst);
  10496         self.inst_tracking.shrinkRetainingCapacity(state.inst_tracking_len);
  10497     }
  10498 
  10499     if (opts.resurrect) for (
  10500         self.inst_tracking.keys()[Temp.Index.max..state.inst_tracking_len],
  10501         self.inst_tracking.values()[Temp.Index.max..state.inst_tracking_len],
  10502     ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation);
  10503     for (deaths) |death| try self.processDeath(death);
  10504 
  10505     const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).array.len]RegisterLock;
  10506     var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
  10507         if (opts.update_tracking)
  10508     {} else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
  10509 
  10510     var reg_locks = if (opts.update_tracking) {} else try std.ArrayList(RegisterLock).initCapacity(
  10511         stack.get(),
  10512         @typeInfo(ExpectedContents).array.len,
  10513     );
  10514     defer if (!opts.update_tracking) {
  10515         for (reg_locks.items) |lock| self.register_manager.unlockReg(lock);
  10516         reg_locks.deinit();
  10517     };
  10518 
  10519     for (
  10520         0..,
  10521         self.register_manager.registers,
  10522         state.registers,
  10523         state.reg_tracking,
  10524     ) |reg_i, current_slot, target_slot, reg_tracking| {
  10525         const reg_index: RegisterManager.TrackedIndex = @intCast(reg_i);
  10526         const current_maybe_inst = if (self.register_manager.isRegIndexFree(reg_index)) null else current_slot;
  10527         const target_maybe_inst = if (state.free_registers.isSet(reg_index)) null else target_slot;
  10528         if (std.debug.runtime_safety) if (target_maybe_inst) |target_inst|
  10529             assert(self.inst_tracking.getIndex(target_inst).? < state.inst_tracking_len);
  10530         if (opts.emit_instructions) {
  10531             if (current_maybe_inst) |current_inst|
  10532                 try self.inst_tracking.getPtr(current_inst).?.spill(self, current_inst);
  10533             if (target_maybe_inst) |target_inst|
  10534                 try self.inst_tracking.getPtr(target_inst).?.materialize(self, target_inst, reg_tracking);
  10535         }
  10536         if (opts.update_tracking) {
  10537             if (current_maybe_inst) |current_inst| {
  10538                 try self.inst_tracking.getPtr(current_inst).?.trackSpill(self, current_inst);
  10539                 self.register_manager.freeRegIndex(reg_index);
  10540             }
  10541             if (target_maybe_inst) |target_inst| {
  10542                 self.register_manager.getRegIndexAssumeFree(reg_index, target_maybe_inst);
  10543                 self.inst_tracking.getPtr(target_inst).?.trackMaterialize(target_inst, reg_tracking);
  10544             }
  10545         } else if (target_maybe_inst) |_|
  10546             try reg_locks.append(self.register_manager.lockRegIndexAssumeUnused(reg_index));
  10547     }
  10548     if (opts.emit_instructions) if (self.eflags_inst) |inst|
  10549         try self.inst_tracking.getPtr(inst).?.spill(self, inst);
  10550     if (opts.update_tracking) if (self.eflags_inst) |inst| {
  10551         self.eflags_inst = null;
  10552         try self.inst_tracking.getPtr(inst).?.trackSpill(self, inst);
  10553     };
  10554 
  10555     if (opts.update_tracking and std.debug.runtime_safety) {
  10556         assert(self.eflags_inst == null);
  10557         assert(self.register_manager.free_registers.eql(state.free_registers));
  10558         var used_reg_it = state.free_registers.iterator(.{ .kind = .unset });
  10559         while (used_reg_it.next()) |index|
  10560             assert(self.register_manager.registers[index] == state.registers[index]);
  10561     }
  10562 }
  10563 
  10564 pub fn spillInstruction(self: *CodeGen, reg: Register, inst: Air.Inst.Index) !void {
  10565     const tracking = self.inst_tracking.getPtr(inst) orelse return;
  10566     for (tracking.getRegs()) |tracked_reg| {
  10567         if (tracked_reg.id() == reg.id()) break;
  10568     } else unreachable; // spilled reg not tracked with spilled instruction
  10569     try tracking.spill(self, inst);
  10570     try tracking.trackSpill(self, inst);
  10571 }
  10572 
  10573 pub fn spillEflagsIfOccupied(self: *CodeGen) !void {
  10574     if (self.eflags_inst) |inst| {
  10575         self.eflags_inst = null;
  10576         const tracking = self.inst_tracking.getPtr(inst).?;
  10577         assert(tracking.getCondition() != null);
  10578         try tracking.spill(self, inst);
  10579         try tracking.trackSpill(self, inst);
  10580     }
  10581 }
  10582 
  10583 pub fn spillCallerPreservedRegs(self: *CodeGen, cc: std.builtin.CallingConvention.Tag) !void {
  10584     switch (cc) {
  10585         inline .auto, .x86_64_sysv, .x86_64_win => |tag| try self.spillRegisters(abi.getCallerPreservedRegs(tag)),
  10586         else => unreachable,
  10587     }
  10588 }
  10589 
  10590 pub fn spillRegisters(self: *CodeGen, comptime registers: []const Register) !void {
  10591     inline for (registers) |reg| try self.register_manager.getKnownReg(reg, null);
  10592 }
  10593 
  10594 /// Copies a value to a register without tracking the register. The register is not considered
  10595 /// allocated. A second call to `copyToTmpRegister` may return the same register.
  10596 /// This can have a side effect of spilling instructions to the stack to free up a register.
  10597 fn copyToTmpRegister(self: *CodeGen, ty: Type, mcv: MCValue) !Register {
  10598     const reg = try self.register_manager.allocReg(null, self.regSetForType(ty));
  10599     try self.genSetReg(reg, ty, mcv, .{});
  10600     return reg;
  10601 }
  10602 
  10603 /// Allocates a new register and copies `mcv` into it.
  10604 /// `reg_owner` is the instruction that gets associated with the register in the register table.
  10605 /// This can have a side effect of spilling instructions to the stack to free up a register.
  10606 /// WARNING make sure that the allocated register matches the returned MCValue from an instruction!
  10607 fn copyToRegisterWithInstTracking(
  10608     self: *CodeGen,
  10609     reg_owner: Air.Inst.Index,
  10610     ty: Type,
  10611     mcv: MCValue,
  10612 ) !MCValue {
  10613     const reg: Register = try self.register_manager.allocReg(reg_owner, self.regSetForType(ty));
  10614     try self.genSetReg(reg, ty, mcv, .{});
  10615     return MCValue{ .register = reg };
  10616 }
  10617 
  10618 fn airAlloc(self: *CodeGen, inst: Air.Inst.Index) !void {
  10619     const result = MCValue{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } };
  10620     return self.finishAir(inst, result, .{ .none, .none, .none });
  10621 }
  10622 
  10623 fn airRetPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  10624     const result: MCValue = switch (self.ret_mcv.long) {
  10625         else => unreachable,
  10626         .none => .{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } },
  10627         .load_frame => .{ .register_offset = .{
  10628             .reg = (try self.copyToRegisterWithInstTracking(
  10629                 inst,
  10630                 self.typeOfIndex(inst),
  10631                 self.ret_mcv.long,
  10632             )).register,
  10633             .off = self.ret_mcv.short.indirect.off,
  10634         } },
  10635     };
  10636     return self.finishAir(inst, result, .{ .none, .none, .none });
  10637 }
  10638 
  10639 fn airFptrunc(self: *CodeGen, inst: Air.Inst.Index) !void {
  10640     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  10641     const dst_ty = self.typeOfIndex(inst);
  10642     const dst_bits = dst_ty.floatBits(self.target.*);
  10643     const src_ty = self.typeOf(ty_op.operand);
  10644     const src_bits = src_ty.floatBits(self.target.*);
  10645 
  10646     const result = result: {
  10647         if (switch (dst_bits) {
  10648             16 => switch (src_bits) {
  10649                 32 => !self.hasFeature(.f16c),
  10650                 64, 80, 128 => true,
  10651                 else => unreachable,
  10652             },
  10653             32 => switch (src_bits) {
  10654                 64 => false,
  10655                 80, 128 => true,
  10656                 else => unreachable,
  10657             },
  10658             64 => switch (src_bits) {
  10659                 80, 128 => true,
  10660                 else => unreachable,
  10661             },
  10662             80 => switch (src_bits) {
  10663                 128 => true,
  10664                 else => unreachable,
  10665             },
  10666             else => unreachable,
  10667         }) {
  10668             var callee_buf: ["__trunc?f?f2".len]u8 = undefined;
  10669             break :result try self.genCall(.{ .lib = .{
  10670                 .return_type = self.floatCompilerRtAbiType(dst_ty, src_ty).toIntern(),
  10671                 .param_types = &.{self.floatCompilerRtAbiType(src_ty, dst_ty).toIntern()},
  10672                 .callee = std.fmt.bufPrint(&callee_buf, "__trunc{c}f{c}f2", .{
  10673                     floatCompilerRtAbiName(src_bits),
  10674                     floatCompilerRtAbiName(dst_bits),
  10675                 }) catch unreachable,
  10676             } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{});
  10677         }
  10678 
  10679         const src_mcv = try self.resolveInst(ty_op.operand);
  10680         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  10681             src_mcv
  10682         else
  10683             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
  10684         const dst_reg = dst_mcv.getReg().?.to128();
  10685         const dst_lock = self.register_manager.lockReg(dst_reg);
  10686         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  10687 
  10688         if (dst_bits == 16) {
  10689             assert(self.hasFeature(.f16c));
  10690             switch (src_bits) {
  10691                 32 => {
  10692                     const mat_src_reg = if (src_mcv.isRegister())
  10693                         src_mcv.getReg().?
  10694                     else
  10695                         try self.copyToTmpRegister(src_ty, src_mcv);
  10696                     try self.asmRegisterRegisterImmediate(
  10697                         .{ .v_, .cvtps2ph },
  10698                         dst_reg,
  10699                         mat_src_reg.to128(),
  10700                         .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
  10701                     );
  10702                 },
  10703                 else => unreachable,
  10704             }
  10705         } else {
  10706             assert(src_bits == 64 and dst_bits == 32);
  10707             if (self.hasFeature(.avx)) if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
  10708                 .{ .v_ss, .cvtsd2 },
  10709                 dst_reg,
  10710                 dst_reg,
  10711                 try src_mcv.mem(self, .{ .size = .qword }),
  10712             ) else try self.asmRegisterRegisterRegister(
  10713                 .{ .v_ss, .cvtsd2 },
  10714                 dst_reg,
  10715                 dst_reg,
  10716                 (if (src_mcv.isRegister())
  10717                     src_mcv.getReg().?
  10718                 else
  10719                     try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
  10720             ) else if (src_mcv.isBase()) try self.asmRegisterMemory(
  10721                 .{ ._ss, .cvtsd2 },
  10722                 dst_reg,
  10723                 try src_mcv.mem(self, .{ .size = .qword }),
  10724             ) else try self.asmRegisterRegister(
  10725                 .{ ._ss, .cvtsd2 },
  10726                 dst_reg,
  10727                 (if (src_mcv.isRegister())
  10728                     src_mcv.getReg().?
  10729                 else
  10730                     try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
  10731             );
  10732         }
  10733         break :result dst_mcv;
  10734     };
  10735     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  10736 }
  10737 
  10738 fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void {
  10739     const pt = self.pt;
  10740     const zcu = pt.zcu;
  10741     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  10742     const dst_ty = self.typeOfIndex(inst);
  10743     const dst_scalar_ty = dst_ty.scalarType(zcu);
  10744     const dst_bits = dst_scalar_ty.floatBits(self.target.*);
  10745     const src_ty = self.typeOf(ty_op.operand);
  10746     const src_scalar_ty = src_ty.scalarType(zcu);
  10747     const src_bits = src_scalar_ty.floatBits(self.target.*);
  10748 
  10749     const result = result: {
  10750         if (switch (src_bits) {
  10751             16 => switch (dst_bits) {
  10752                 32, 64 => !self.hasFeature(.f16c),
  10753                 80, 128 => true,
  10754                 else => unreachable,
  10755             },
  10756             32 => switch (dst_bits) {
  10757                 64 => false,
  10758                 80, 128 => true,
  10759                 else => unreachable,
  10760             },
  10761             64 => switch (dst_bits) {
  10762                 80, 128 => true,
  10763                 else => unreachable,
  10764             },
  10765             80 => switch (dst_bits) {
  10766                 128 => true,
  10767                 else => unreachable,
  10768             },
  10769             else => unreachable,
  10770         }) {
  10771             if (dst_ty.isVector(zcu)) break :result null;
  10772             var callee_buf: ["__extend?f?f2".len]u8 = undefined;
  10773             break :result try self.genCall(.{ .lib = .{
  10774                 .return_type = self.floatCompilerRtAbiType(dst_scalar_ty, src_scalar_ty).toIntern(),
  10775                 .param_types = &.{self.floatCompilerRtAbiType(src_scalar_ty, dst_scalar_ty).toIntern()},
  10776                 .callee = std.fmt.bufPrint(&callee_buf, "__extend{c}f{c}f2", .{
  10777                     floatCompilerRtAbiName(src_bits),
  10778                     floatCompilerRtAbiName(dst_bits),
  10779                 }) catch unreachable,
  10780             } }, &.{src_scalar_ty}, &.{.{ .air_ref = ty_op.operand }}, .{});
  10781         }
  10782 
  10783         const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
  10784         const src_mcv = try self.resolveInst(ty_op.operand);
  10785         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  10786             src_mcv
  10787         else
  10788             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
  10789         const dst_reg = dst_mcv.getReg().?;
  10790         const dst_alias = registerAlias(dst_reg, @intCast(@max(dst_ty.abiSize(zcu), 16)));
  10791         const dst_lock = self.register_manager.lockReg(dst_reg);
  10792         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  10793 
  10794         const vec_len = if (dst_ty.isVector(zcu)) dst_ty.vectorLen(zcu) else 1;
  10795         if (src_bits == 16) {
  10796             assert(self.hasFeature(.f16c));
  10797             const mat_src_reg = if (src_mcv.isRegister())
  10798                 src_mcv.getReg().?
  10799             else
  10800                 try self.copyToTmpRegister(src_ty, src_mcv);
  10801             try self.asmRegisterRegister(
  10802                 .{ .v_ps, .cvtph2 },
  10803                 dst_alias,
  10804                 registerAlias(mat_src_reg, src_abi_size),
  10805             );
  10806             switch (dst_bits) {
  10807                 32 => {},
  10808                 64 => try self.asmRegisterRegisterRegister(
  10809                     .{ .v_sd, .cvtss2 },
  10810                     dst_alias,
  10811                     dst_alias,
  10812                     dst_alias,
  10813                 ),
  10814                 else => unreachable,
  10815             }
  10816         } else {
  10817             assert(src_bits == 32 and dst_bits == 64);
  10818             if (self.hasFeature(.avx)) switch (vec_len) {
  10819                 1 => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
  10820                     .{ .v_sd, .cvtss2 },
  10821                     dst_alias,
  10822                     dst_alias,
  10823                     try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }),
  10824                 ) else try self.asmRegisterRegisterRegister(
  10825                     .{ .v_sd, .cvtss2 },
  10826                     dst_alias,
  10827                     dst_alias,
  10828                     registerAlias(if (src_mcv.isRegister())
  10829                         src_mcv.getReg().?
  10830                     else
  10831                         try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
  10832                 ),
  10833                 2...4 => if (src_mcv.isBase()) try self.asmRegisterMemory(
  10834                     .{ .v_pd, .cvtps2 },
  10835                     dst_alias,
  10836                     try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }),
  10837                 ) else try self.asmRegisterRegister(
  10838                     .{ .v_pd, .cvtps2 },
  10839                     dst_alias,
  10840                     registerAlias(if (src_mcv.isRegister())
  10841                         src_mcv.getReg().?
  10842                     else
  10843                         try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
  10844                 ),
  10845                 else => break :result null,
  10846             } else if (src_mcv.isBase()) try self.asmRegisterMemory(
  10847                 switch (vec_len) {
  10848                     1 => .{ ._sd, .cvtss2 },
  10849                     2 => .{ ._pd, .cvtps2 },
  10850                     else => break :result null,
  10851                 },
  10852                 dst_alias,
  10853                 try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }),
  10854             ) else try self.asmRegisterRegister(
  10855                 switch (vec_len) {
  10856                     1 => .{ ._sd, .cvtss2 },
  10857                     2 => .{ ._pd, .cvtps2 },
  10858                     else => break :result null,
  10859                 },
  10860                 dst_alias,
  10861                 registerAlias(if (src_mcv.isRegister())
  10862                     src_mcv.getReg().?
  10863                 else
  10864                     try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
  10865             );
  10866         }
  10867         break :result dst_mcv;
  10868     } orelse return self.fail("TODO implement airFpext from {} to {}", .{
  10869         src_ty.fmt(pt), dst_ty.fmt(pt),
  10870     });
  10871     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  10872 }
  10873 
  10874 fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
  10875     const pt = self.pt;
  10876     const zcu = pt.zcu;
  10877     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  10878     const src_ty = self.typeOf(ty_op.operand);
  10879     const dst_ty = self.typeOfIndex(inst);
  10880 
  10881     const result = @as(?MCValue, result: {
  10882         const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
  10883 
  10884         const src_int_info = src_ty.intInfo(zcu);
  10885         const dst_int_info = dst_ty.intInfo(zcu);
  10886         const extend = switch (src_int_info.signedness) {
  10887             .signed => dst_int_info,
  10888             .unsigned => src_int_info,
  10889         }.signedness;
  10890 
  10891         const src_mcv = try self.resolveInst(ty_op.operand);
  10892         if (dst_ty.isVector(zcu)) {
  10893             const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
  10894             const max_abi_size = @max(dst_abi_size, src_abi_size);
  10895             if (max_abi_size > self.vectorSize(.int)) break :result null;
  10896             const has_avx = self.hasFeature(.avx);
  10897 
  10898             const dst_elem_abi_size = dst_ty.childType(zcu).abiSize(zcu);
  10899             const src_elem_abi_size = src_ty.childType(zcu).abiSize(zcu);
  10900             switch (std.math.order(dst_elem_abi_size, src_elem_abi_size)) {
  10901                 .lt => {
  10902                     const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) {
  10903                         else => break :result null,
  10904                         1 => switch (src_elem_abi_size) {
  10905                             else => break :result null,
  10906                             2 => switch (dst_int_info.signedness) {
  10907                                 .signed => if (has_avx) .{ .vp_b, .ackssw } else .{ .p_b, .ackssw },
  10908                                 .unsigned => if (has_avx) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw },
  10909                             },
  10910                         },
  10911                         2 => switch (src_elem_abi_size) {
  10912                             else => break :result null,
  10913                             4 => switch (dst_int_info.signedness) {
  10914                                 .signed => if (has_avx) .{ .vp_w, .ackssd } else .{ .p_w, .ackssd },
  10915                                 .unsigned => if (has_avx)
  10916                                     .{ .vp_w, .ackusd }
  10917                                 else if (self.hasFeature(.sse4_1))
  10918                                     .{ .p_w, .ackusd }
  10919                                 else
  10920                                     break :result null,
  10921                             },
  10922                         },
  10923                     };
  10924 
  10925                     const dst_mcv: MCValue = if (src_mcv.isRegister() and
  10926                         self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  10927                         src_mcv
  10928                     else if (has_avx and src_mcv.isRegister())
  10929                         .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
  10930                     else
  10931                         try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv);
  10932                     const dst_reg = dst_mcv.getReg().?;
  10933                     const dst_alias = registerAlias(dst_reg, dst_abi_size);
  10934 
  10935                     if (has_avx) try self.asmRegisterRegisterRegister(
  10936                         mir_tag,
  10937                         dst_alias,
  10938                         registerAlias(if (src_mcv.isRegister())
  10939                             src_mcv.getReg().?
  10940                         else
  10941                             dst_reg, src_abi_size),
  10942                         dst_alias,
  10943                     ) else try self.asmRegisterRegister(
  10944                         mir_tag,
  10945                         dst_alias,
  10946                         dst_alias,
  10947                     );
  10948                     break :result dst_mcv;
  10949                 },
  10950                 .eq => if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  10951                     break :result src_mcv
  10952                 else {
  10953                     const dst_mcv = try self.allocRegOrMem(inst, true);
  10954                     try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
  10955                     break :result dst_mcv;
  10956                 },
  10957                 .gt => if (self.hasFeature(.sse4_1)) {
  10958                     const mir_tag: Mir.Inst.FixedTag = .{ switch (dst_elem_abi_size) {
  10959                         else => break :result null,
  10960                         2 => if (has_avx) .vp_w else .p_w,
  10961                         4 => if (has_avx) .vp_d else .p_d,
  10962                         8 => if (has_avx) .vp_q else .p_q,
  10963                     }, switch (src_elem_abi_size) {
  10964                         else => break :result null,
  10965                         1 => switch (extend) {
  10966                             .signed => .movsxb,
  10967                             .unsigned => .movzxb,
  10968                         },
  10969                         2 => switch (extend) {
  10970                             .signed => .movsxw,
  10971                             .unsigned => .movzxw,
  10972                         },
  10973                         4 => switch (extend) {
  10974                             .signed => .movsxd,
  10975                             .unsigned => .movzxd,
  10976                         },
  10977                     } };
  10978 
  10979                     const dst_mcv: MCValue = if (src_mcv.isRegister() and
  10980                         self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  10981                         src_mcv
  10982                     else
  10983                         .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) };
  10984                     const dst_reg = dst_mcv.getReg().?;
  10985                     const dst_alias = registerAlias(dst_reg, dst_abi_size);
  10986 
  10987                     if (src_mcv.isBase()) try self.asmRegisterMemory(
  10988                         mir_tag,
  10989                         dst_alias,
  10990                         try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }),
  10991                     ) else try self.asmRegisterRegister(
  10992                         mir_tag,
  10993                         dst_alias,
  10994                         registerAlias(if (src_mcv.isRegister())
  10995                             src_mcv.getReg().?
  10996                         else
  10997                             try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
  10998                     );
  10999                     break :result dst_mcv;
  11000                 } else {
  11001                     const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) {
  11002                         else => break :result null,
  11003                         2 => switch (src_elem_abi_size) {
  11004                             else => break :result null,
  11005                             1 => .{ .p_, .unpcklbw },
  11006                         },
  11007                         4 => switch (src_elem_abi_size) {
  11008                             else => break :result null,
  11009                             2 => .{ .p_, .unpcklwd },
  11010                         },
  11011                         8 => switch (src_elem_abi_size) {
  11012                             else => break :result null,
  11013                             2 => .{ .p_, .unpckldq },
  11014                         },
  11015                     };
  11016 
  11017                     const dst_mcv: MCValue = if (src_mcv.isRegister() and
  11018                         self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  11019                         src_mcv
  11020                     else
  11021                         try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
  11022                     const dst_reg = dst_mcv.getReg().?;
  11023 
  11024                     const ext_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
  11025                     const ext_alias = registerAlias(ext_reg, src_abi_size);
  11026                     const ext_lock = self.register_manager.lockRegAssumeUnused(ext_reg);
  11027                     defer self.register_manager.unlockReg(ext_lock);
  11028 
  11029                     try self.asmRegisterRegister(.{ .p_, .xor }, ext_alias, ext_alias);
  11030                     switch (extend) {
  11031                         .signed => try self.asmRegisterRegister(
  11032                             .{ switch (src_elem_abi_size) {
  11033                                 else => unreachable,
  11034                                 1 => .p_b,
  11035                                 2 => .p_w,
  11036                                 4 => .p_d,
  11037                             }, .cmpgt },
  11038                             ext_alias,
  11039                             registerAlias(dst_reg, src_abi_size),
  11040                         ),
  11041                         .unsigned => {},
  11042                     }
  11043                     try self.asmRegisterRegister(
  11044                         mir_tag,
  11045                         registerAlias(dst_reg, dst_abi_size),
  11046                         registerAlias(ext_reg, dst_abi_size),
  11047                     );
  11048                     break :result dst_mcv;
  11049                 },
  11050             }
  11051             @compileError("unreachable");
  11052         }
  11053 
  11054         const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty;
  11055 
  11056         const src_storage_bits: u16 = switch (src_mcv) {
  11057             .register, .register_offset => 64,
  11058             .register_pair => 128,
  11059             .load_frame => |frame_addr| @intCast(self.getFrameAddrSize(frame_addr) * 8),
  11060             else => src_int_info.bits,
  11061         };
  11062 
  11063         const dst_mcv = if (dst_int_info.bits <= src_storage_bits and
  11064             std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable ==
  11065             std.math.divCeil(u32, src_storage_bits, 64) catch unreachable and
  11066             self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
  11067             const dst_mcv = try self.allocRegOrMem(inst, true);
  11068             try self.genCopy(min_ty, dst_mcv, src_mcv, .{});
  11069             break :dst dst_mcv;
  11070         };
  11071 
  11072         if (dst_int_info.bits <= src_int_info.bits) break :result if (dst_mcv.isRegister())
  11073             .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) }
  11074         else
  11075             dst_mcv;
  11076 
  11077         if (dst_mcv.isRegister()) {
  11078             try self.truncateRegister(src_ty, dst_mcv.getReg().?);
  11079             break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) };
  11080         }
  11081 
  11082         const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable;
  11083         const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable;
  11084 
  11085         const high_mcv: MCValue = if (dst_mcv.isBase())
  11086             dst_mcv.address().offset((src_limbs_len - 1) * 8).deref()
  11087         else
  11088             .{ .register = dst_mcv.register_pair[1] };
  11089         const high_reg = if (high_mcv.isRegister())
  11090             high_mcv.getReg().?
  11091         else
  11092             try self.copyToTmpRegister(switch (src_int_info.signedness) {
  11093                 .signed => .isize,
  11094                 .unsigned => .usize,
  11095             }, high_mcv);
  11096         const high_lock = self.register_manager.lockRegAssumeUnused(high_reg);
  11097         defer self.register_manager.unlockReg(high_lock);
  11098 
  11099         const high_bits = src_int_info.bits % 64;
  11100         if (high_bits > 0) {
  11101             try self.truncateRegister(src_ty, high_reg);
  11102             const high_ty: Type = if (dst_int_info.bits >= 64) .usize else dst_ty;
  11103             try self.genCopy(high_ty, high_mcv, .{ .register = high_reg }, .{});
  11104         }
  11105 
  11106         if (dst_limbs_len > src_limbs_len) try self.genInlineMemset(
  11107             dst_mcv.address().offset(src_limbs_len * 8),
  11108             switch (extend) {
  11109                 .signed => extend: {
  11110                     const extend_mcv = MCValue{ .register = high_reg };
  11111                     try self.genShiftBinOpMir(.{ ._r, .sa }, .isize, extend_mcv, .u8, .{ .immediate = 63 });
  11112                     break :extend extend_mcv;
  11113                 },
  11114                 .unsigned => .{ .immediate = 0 },
  11115             },
  11116             .{ .immediate = (dst_limbs_len - src_limbs_len) * 8 },
  11117             .{},
  11118         );
  11119 
  11120         break :result dst_mcv;
  11121     }) orelse return self.fail("TODO implement airIntCast from {} to {}", .{
  11122         src_ty.fmt(pt), dst_ty.fmt(pt),
  11123     });
  11124     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  11125 }
  11126 
  11127 fn airTrunc(self: *CodeGen, inst: Air.Inst.Index) !void {
  11128     const pt = self.pt;
  11129     const zcu = pt.zcu;
  11130     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  11131 
  11132     const dst_ty = self.typeOfIndex(inst);
  11133     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
  11134     const src_ty = self.typeOf(ty_op.operand);
  11135     const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
  11136 
  11137     const result = result: {
  11138         const src_mcv = try self.resolveInst(ty_op.operand);
  11139         const src_lock =
  11140             if (src_mcv.getReg()) |reg| self.register_manager.lockRegAssumeUnused(reg) else null;
  11141         defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  11142 
  11143         const dst_mcv = if (src_mcv.isRegister() and src_mcv.getReg().?.class() == self.regClassForType(dst_ty) and
  11144             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  11145             src_mcv
  11146         else if (dst_abi_size <= 8)
  11147             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv)
  11148         else if (dst_abi_size <= 16 and !dst_ty.isVector(zcu)) dst: {
  11149             const dst_regs =
  11150                 try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp);
  11151             const dst_mcv: MCValue = .{ .register_pair = dst_regs };
  11152             const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
  11153             defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
  11154 
  11155             try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
  11156             break :dst dst_mcv;
  11157         } else dst: {
  11158             const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, inst, true);
  11159             try self.genCopy(src_ty, dst_mcv, src_mcv, .{});
  11160             break :dst dst_mcv;
  11161         };
  11162 
  11163         if (dst_ty.zigTypeTag(zcu) == .vector) {
  11164             assert(src_ty.zigTypeTag(zcu) == .vector and dst_ty.vectorLen(zcu) == src_ty.vectorLen(zcu));
  11165             const dst_elem_ty = dst_ty.childType(zcu);
  11166             const dst_elem_abi_size: u32 = @intCast(dst_elem_ty.abiSize(zcu));
  11167             const src_elem_ty = src_ty.childType(zcu);
  11168             const src_elem_abi_size: u32 = @intCast(src_elem_ty.abiSize(zcu));
  11169 
  11170             const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_elem_abi_size) {
  11171                 1 => switch (src_elem_abi_size) {
  11172                     2 => switch (dst_ty.vectorLen(zcu)) {
  11173                         1...8 => if (self.hasFeature(.avx)) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw },
  11174                         9...16 => if (self.hasFeature(.avx2)) .{ .vp_b, .ackusw } else null,
  11175                         else => null,
  11176                     },
  11177                     else => null,
  11178                 },
  11179                 2 => switch (src_elem_abi_size) {
  11180                     4 => switch (dst_ty.vectorLen(zcu)) {
  11181                         1...4 => if (self.hasFeature(.avx))
  11182                             .{ .vp_w, .ackusd }
  11183                         else if (self.hasFeature(.sse4_1))
  11184                             .{ .p_w, .ackusd }
  11185                         else
  11186                             null,
  11187                         5...8 => if (self.hasFeature(.avx2)) .{ .vp_w, .ackusd } else null,
  11188                         else => null,
  11189                     },
  11190                     else => null,
  11191                 },
  11192                 else => null,
  11193             }) orelse return self.fail("TODO implement airTrunc for {}", .{dst_ty.fmt(pt)});
  11194 
  11195             const dst_info = dst_elem_ty.intInfo(zcu);
  11196             const src_info = src_elem_ty.intInfo(zcu);
  11197 
  11198             const mask_val = try pt.intValue(src_elem_ty, @as(u64, std.math.maxInt(u64)) >> @intCast(64 - dst_info.bits));
  11199 
  11200             const splat_ty = try pt.vectorType(.{
  11201                 .len = @intCast(@divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)),
  11202                 .child = src_elem_ty.ip_index,
  11203             });
  11204             const splat_abi_size: u32 = @intCast(splat_ty.abiSize(zcu));
  11205 
  11206             const splat_val = try pt.intern(.{ .aggregate = .{
  11207                 .ty = splat_ty.ip_index,
  11208                 .storage = .{ .repeated_elem = mask_val.ip_index },
  11209             } });
  11210 
  11211             const splat_mcv = try self.genTypedValue(.fromInterned(splat_val));
  11212             const splat_addr_mcv: MCValue = switch (splat_mcv) {
  11213                 .memory, .indirect, .load_frame => splat_mcv.address(),
  11214                 else => .{ .register = try self.copyToTmpRegister(.usize, splat_mcv.address()) },
  11215             };
  11216 
  11217             const dst_reg = dst_mcv.getReg().?;
  11218             const dst_alias = registerAlias(dst_reg, src_abi_size);
  11219             if (self.hasFeature(.avx)) {
  11220                 try self.asmRegisterRegisterMemory(
  11221                     .{ .vp_, .@"and" },
  11222                     dst_alias,
  11223                     dst_alias,
  11224                     try splat_addr_mcv.deref().mem(self, .{ .size = .fromSize(splat_abi_size) }),
  11225                 );
  11226                 if (src_abi_size > 16) {
  11227                     const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
  11228                     const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
  11229                     defer self.register_manager.unlockReg(temp_lock);
  11230 
  11231                     try self.asmRegisterRegisterImmediate(
  11232                         .{ if (self.hasFeature(.avx2)) .v_i128 else .v_f128, .extract },
  11233                         registerAlias(temp_reg, dst_abi_size),
  11234                         dst_alias,
  11235                         .u(1),
  11236                     );
  11237                     try self.asmRegisterRegisterRegister(
  11238                         mir_tag,
  11239                         registerAlias(dst_reg, dst_abi_size),
  11240                         registerAlias(dst_reg, dst_abi_size),
  11241                         registerAlias(temp_reg, dst_abi_size),
  11242                     );
  11243                 } else try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, dst_alias);
  11244             } else {
  11245                 try self.asmRegisterMemory(
  11246                     .{ .p_, .@"and" },
  11247                     dst_alias,
  11248                     try splat_addr_mcv.deref().mem(self, .{ .size = .fromSize(splat_abi_size) }),
  11249                 );
  11250                 try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias);
  11251             }
  11252             break :result dst_mcv;
  11253         }
  11254 
  11255         // when truncating a `u16` to `u5`, for example, those top 3 bits in the result
  11256         // have to be removed. this only happens if the dst if not a power-of-two size.
  11257         if (dst_abi_size <= 8) {
  11258             if (self.regExtraBits(dst_ty) > 0) {
  11259                 try self.truncateRegister(dst_ty, dst_mcv.register.to64());
  11260             }
  11261         } else if (dst_abi_size <= 16) {
  11262             const dst_info = dst_ty.intInfo(zcu);
  11263             const high_ty = try pt.intType(dst_info.signedness, dst_info.bits - 64);
  11264             if (self.regExtraBits(high_ty) > 0) {
  11265                 try self.truncateRegister(high_ty, dst_mcv.register_pair[1].to64());
  11266             }
  11267         }
  11268 
  11269         break :result dst_mcv;
  11270     };
  11271     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  11272 }
  11273 
  11274 fn airIntFromBool(self: *CodeGen, inst: Air.Inst.Index) !void {
  11275     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  11276     const ty = self.typeOfIndex(inst);
  11277 
  11278     const operand = try self.resolveInst(un_op);
  11279     const dst_mcv = if (self.reuseOperand(inst, un_op, 0, operand))
  11280         operand
  11281     else
  11282         try self.copyToRegisterWithInstTracking(inst, ty, operand);
  11283 
  11284     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
  11285 }
  11286 
  11287 fn airSlice(self: *CodeGen, inst: Air.Inst.Index) !void {
  11288     const zcu = self.pt.zcu;
  11289     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  11290     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
  11291 
  11292     const slice_ty = self.typeOfIndex(inst);
  11293     const frame_index = try self.allocFrameIndex(.initSpill(slice_ty, zcu));
  11294 
  11295     const ptr_ty = self.typeOf(bin_op.lhs);
  11296     try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, .{ .air_ref = bin_op.lhs }, .{});
  11297 
  11298     const len_ty = self.typeOf(bin_op.rhs);
  11299     try self.genSetMem(
  11300         .{ .frame = frame_index },
  11301         @intCast(ptr_ty.abiSize(zcu)),
  11302         len_ty,
  11303         .{ .air_ref = bin_op.rhs },
  11304         .{},
  11305     );
  11306 
  11307     const result = MCValue{ .load_frame = .{ .index = frame_index } };
  11308     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  11309 }
  11310 
  11311 fn airUnOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
  11312     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  11313     const dst_mcv = try self.genUnOp(inst, tag, ty_op.operand);
  11314     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  11315 }
  11316 
  11317 fn airBinOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
  11318     const pt = self.pt;
  11319     const zcu = pt.zcu;
  11320     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  11321     const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
  11322 
  11323     const dst_ty = self.typeOfIndex(inst);
  11324     if (dst_ty.isAbiInt(zcu)) {
  11325         const abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
  11326         const bit_size: u32 = @intCast(dst_ty.bitSize(zcu));
  11327         if (abi_size * 8 > bit_size) {
  11328             const dst_lock = switch (dst_mcv) {
  11329                 .register => |dst_reg| self.register_manager.lockRegAssumeUnused(dst_reg),
  11330                 else => null,
  11331             };
  11332             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  11333 
  11334             if (dst_mcv.isRegister()) {
  11335                 try self.truncateRegister(dst_ty, dst_mcv.getReg().?);
  11336             } else {
  11337                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  11338                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  11339                 defer self.register_manager.unlockReg(tmp_lock);
  11340 
  11341                 const hi_ty = try pt.intType(.unsigned, @intCast((dst_ty.bitSize(zcu) - 1) % 64 + 1));
  11342                 const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref();
  11343                 try self.genSetReg(tmp_reg, hi_ty, hi_mcv, .{});
  11344                 try self.truncateRegister(dst_ty, tmp_reg);
  11345                 try self.genCopy(hi_ty, hi_mcv, .{ .register = tmp_reg }, .{});
  11346             }
  11347         }
  11348     }
  11349     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
  11350 }
  11351 
  11352 fn airPtrArithmetic(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
  11353     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  11354     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
  11355     const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
  11356     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
  11357 }
  11358 
  11359 fn activeIntBits(self: *CodeGen, dst_air: Air.Inst.Ref) u16 {
  11360     const pt = self.pt;
  11361     const zcu = pt.zcu;
  11362     const air_tag = self.air.instructions.items(.tag);
  11363     const air_data = self.air.instructions.items(.data);
  11364 
  11365     const dst_ty = self.typeOf(dst_air);
  11366     const dst_info = dst_ty.intInfo(zcu);
  11367     if (dst_air.toIndex()) |inst| {
  11368         switch (air_tag[@intFromEnum(inst)]) {
  11369             .intcast => {
  11370                 const src_ty = self.typeOf(air_data[@intFromEnum(inst)].ty_op.operand);
  11371                 const src_info = src_ty.intInfo(zcu);
  11372                 return @min(switch (src_info.signedness) {
  11373                     .signed => switch (dst_info.signedness) {
  11374                         .signed => src_info.bits,
  11375                         .unsigned => src_info.bits - 1,
  11376                     },
  11377                     .unsigned => switch (dst_info.signedness) {
  11378                         .signed => src_info.bits + 1,
  11379                         .unsigned => src_info.bits,
  11380                     },
  11381                 }, dst_info.bits);
  11382             },
  11383             else => {},
  11384         }
  11385     } else if (dst_air.toInterned()) |ip_index| {
  11386         var space: Value.BigIntSpace = undefined;
  11387         const src_int = Value.fromInterned(ip_index).toBigInt(&space, zcu);
  11388         return @as(u16, @intCast(src_int.bitCountTwosComp())) +
  11389             @intFromBool(src_int.positive and dst_info.signedness == .signed);
  11390     }
  11391     return dst_info.bits;
  11392 }
  11393 
  11394 fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void {
  11395     const pt = self.pt;
  11396     const zcu = pt.zcu;
  11397     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  11398     const result = result: {
  11399         const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)];
  11400         const dst_ty = self.typeOfIndex(inst);
  11401         switch (dst_ty.zigTypeTag(zcu)) {
  11402             .float, .vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs),
  11403             else => {},
  11404         }
  11405         const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
  11406 
  11407         const dst_info = dst_ty.intInfo(zcu);
  11408         const src_ty = try pt.intType(dst_info.signedness, switch (tag) {
  11409             else => unreachable,
  11410             .mul, .mul_wrap => @max(
  11411                 self.activeIntBits(bin_op.lhs),
  11412                 self.activeIntBits(bin_op.rhs),
  11413                 dst_info.bits / 2,
  11414             ),
  11415             .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_info.bits,
  11416         });
  11417         const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
  11418 
  11419         if (dst_abi_size == 16 and src_abi_size == 16) switch (tag) {
  11420             else => unreachable,
  11421             .mul, .mul_wrap => {},
  11422             .div_trunc, .div_floor, .div_exact, .rem, .mod => {
  11423                 const signed = dst_ty.isSignedInt(zcu);
  11424                 var callee_buf: ["__udiv?i3".len]u8 = undefined;
  11425                 const signed_div_floor_state: struct {
  11426                     frame_index: FrameIndex,
  11427                     state: State,
  11428                     reloc: Mir.Inst.Index,
  11429                 } = if (signed and tag == .div_floor) state: {
  11430                     const frame_index = try self.allocFrameIndex(.initType(.usize, zcu));
  11431                     try self.asmMemoryImmediate(
  11432                         .{ ._, .mov },
  11433                         .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } },
  11434                         .u(0),
  11435                     );
  11436 
  11437                     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  11438                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  11439                     defer self.register_manager.unlockReg(tmp_lock);
  11440 
  11441                     const lhs_mcv = try self.resolveInst(bin_op.lhs);
  11442                     const mat_lhs_mcv = switch (lhs_mcv) {
  11443                         .load_symbol => mat_lhs_mcv: {
  11444                             // TODO clean this up!
  11445                             const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address());
  11446                             break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
  11447                         },
  11448                         else => lhs_mcv,
  11449                     };
  11450                     const mat_lhs_lock = switch (mat_lhs_mcv) {
  11451                         .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
  11452                         else => null,
  11453                     };
  11454                     defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
  11455                     if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
  11456                         .{ ._, .mov },
  11457                         tmp_reg,
  11458                         try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  11459                     ) else try self.asmRegisterRegister(
  11460                         .{ ._, .mov },
  11461                         tmp_reg,
  11462                         mat_lhs_mcv.register_pair[1],
  11463                     );
  11464 
  11465                     const rhs_mcv = try self.resolveInst(bin_op.rhs);
  11466                     const mat_rhs_mcv = switch (rhs_mcv) {
  11467                         .load_symbol => mat_rhs_mcv: {
  11468                             // TODO clean this up!
  11469                             const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address());
  11470                             break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
  11471                         },
  11472                         else => rhs_mcv,
  11473                     };
  11474                     const mat_rhs_lock = switch (mat_rhs_mcv) {
  11475                         .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
  11476                         else => null,
  11477                     };
  11478                     defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
  11479                     if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
  11480                         .{ ._, .xor },
  11481                         tmp_reg,
  11482                         try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  11483                     ) else try self.asmRegisterRegister(
  11484                         .{ ._, .xor },
  11485                         tmp_reg,
  11486                         mat_rhs_mcv.register_pair[1],
  11487                     );
  11488                     const state = try self.saveState();
  11489                     const reloc = try self.asmJccReloc(.ns, undefined);
  11490 
  11491                     break :state .{ .frame_index = frame_index, .state = state, .reloc = reloc };
  11492                 } else undefined;
  11493                 const call_mcv = try self.genCall(
  11494                     .{ .lib = .{
  11495                         .return_type = dst_ty.toIntern(),
  11496                         .param_types = &.{ src_ty.toIntern(), src_ty.toIntern() },
  11497                         .callee = std.fmt.bufPrint(&callee_buf, "__{s}{s}{c}i3", .{
  11498                             if (signed) "" else "u",
  11499                             switch (tag) {
  11500                                 .div_trunc, .div_exact => "div",
  11501                                 .div_floor => if (signed) "mod" else "div",
  11502                                 .rem, .mod => "mod",
  11503                                 else => unreachable,
  11504                             },
  11505                             intCompilerRtAbiName(@intCast(dst_ty.bitSize(zcu))),
  11506                         }) catch unreachable,
  11507                     } },
  11508                     &.{ src_ty, src_ty },
  11509                     &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } },
  11510                     .{},
  11511                 );
  11512                 break :result if (signed) switch (tag) {
  11513                     .div_floor => {
  11514                         try self.asmRegisterRegister(
  11515                             .{ ._, .@"or" },
  11516                             call_mcv.register_pair[0],
  11517                             call_mcv.register_pair[1],
  11518                         );
  11519                         try self.asmSetccMemory(.nz, .{
  11520                             .base = .{ .frame = signed_div_floor_state.frame_index },
  11521                             .mod = .{ .rm = .{ .size = .byte } },
  11522                         });
  11523                         try self.restoreState(signed_div_floor_state.state, &.{}, .{
  11524                             .emit_instructions = true,
  11525                             .update_tracking = true,
  11526                             .resurrect = true,
  11527                             .close_scope = true,
  11528                         });
  11529                         self.performReloc(signed_div_floor_state.reloc);
  11530                         const dst_mcv = try self.genCall(
  11531                             .{ .lib = .{
  11532                                 .return_type = dst_ty.toIntern(),
  11533                                 .param_types = &.{ src_ty.toIntern(), src_ty.toIntern() },
  11534                                 .callee = std.fmt.bufPrint(&callee_buf, "__div{c}i3", .{
  11535                                     intCompilerRtAbiName(@intCast(dst_ty.bitSize(zcu))),
  11536                                 }) catch unreachable,
  11537                             } },
  11538                             &.{ src_ty, src_ty },
  11539                             &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } },
  11540                             .{},
  11541                         );
  11542                         try self.asmRegisterMemory(
  11543                             .{ ._, .sub },
  11544                             dst_mcv.register_pair[0],
  11545                             .{
  11546                                 .base = .{ .frame = signed_div_floor_state.frame_index },
  11547                                 .mod = .{ .rm = .{ .size = .qword } },
  11548                             },
  11549                         );
  11550                         try self.asmRegisterImmediate(.{ ._, .sbb }, dst_mcv.register_pair[1], .u(0));
  11551                         try self.freeValue(
  11552                             .{ .load_frame = .{ .index = signed_div_floor_state.frame_index } },
  11553                         );
  11554                         break :result dst_mcv;
  11555                     },
  11556                     .mod => {
  11557                         const dst_regs = call_mcv.register_pair;
  11558                         const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
  11559                         defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
  11560 
  11561                         const tmp_regs =
  11562                             try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp);
  11563                         const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs);
  11564                         defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
  11565 
  11566                         const rhs_mcv = try self.resolveInst(bin_op.rhs);
  11567                         const mat_rhs_mcv = switch (rhs_mcv) {
  11568                             .load_symbol => mat_rhs_mcv: {
  11569                                 // TODO clean this up!
  11570                                 const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address());
  11571                                 break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
  11572                             },
  11573                             else => rhs_mcv,
  11574                         };
  11575                         const mat_rhs_lock = switch (mat_rhs_mcv) {
  11576                             .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
  11577                             else => null,
  11578                         };
  11579                         defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
  11580 
  11581                         for (tmp_regs, dst_regs) |tmp_reg, dst_reg|
  11582                             try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_reg);
  11583                         if (mat_rhs_mcv.isBase()) {
  11584                             try self.asmRegisterMemory(
  11585                                 .{ ._, .add },
  11586                                 tmp_regs[0],
  11587                                 try mat_rhs_mcv.mem(self, .{ .size = .qword }),
  11588                             );
  11589                             try self.asmRegisterMemory(
  11590                                 .{ ._, .adc },
  11591                                 tmp_regs[1],
  11592                                 try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  11593                             );
  11594                         } else for (
  11595                             [_]Mir.Inst.Tag{ .add, .adc },
  11596                             tmp_regs,
  11597                             mat_rhs_mcv.register_pair,
  11598                         ) |op, tmp_reg, rhs_reg|
  11599                             try self.asmRegisterRegister(.{ ._, op }, tmp_reg, rhs_reg);
  11600                         try self.asmRegisterRegister(.{ ._, .@"test" }, dst_regs[1], dst_regs[1]);
  11601                         for (dst_regs, tmp_regs) |dst_reg, tmp_reg|
  11602                             try self.asmCmovccRegisterRegister(.s, dst_reg, tmp_reg);
  11603                         break :result call_mcv;
  11604                     },
  11605                     else => call_mcv,
  11606                 } else call_mcv;
  11607             },
  11608         };
  11609 
  11610         try self.spillEflagsIfOccupied();
  11611         try self.spillRegisters(&.{ .rax, .rcx, .rdx });
  11612         const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx });
  11613         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  11614 
  11615         const lhs_mcv = try self.resolveInst(bin_op.lhs);
  11616         const rhs_mcv = try self.resolveInst(bin_op.rhs);
  11617         break :result try self.genMulDivBinOp(tag, inst, dst_ty, src_ty, lhs_mcv, rhs_mcv);
  11618     };
  11619     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  11620 }
  11621 
  11622 fn airAddSat(self: *CodeGen, inst: Air.Inst.Index) !void {
  11623     const pt = self.pt;
  11624     const zcu = pt.zcu;
  11625     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  11626     const ty = self.typeOf(bin_op.lhs);
  11627     if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail(
  11628         "TODO implement airAddSat for {}",
  11629         .{ty.fmt(pt)},
  11630     );
  11631 
  11632     const lhs_mcv = try self.resolveInst(bin_op.lhs);
  11633     const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
  11634         lhs_mcv
  11635     else
  11636         try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv);
  11637     const dst_reg = dst_mcv.register;
  11638     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  11639     defer self.register_manager.unlockReg(dst_lock);
  11640 
  11641     const rhs_mcv = try self.resolveInst(bin_op.rhs);
  11642     const rhs_lock = switch (rhs_mcv) {
  11643         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  11644         else => null,
  11645     };
  11646     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  11647 
  11648     const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  11649     const limit_mcv = MCValue{ .register = limit_reg };
  11650     const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
  11651     defer self.register_manager.unlockReg(limit_lock);
  11652 
  11653     const reg_bits = self.regBitSize(ty);
  11654     const reg_extra_bits = self.regExtraBits(ty);
  11655     const cc: Condition = if (ty.isSignedInt(zcu)) cc: {
  11656         if (reg_extra_bits > 0) {
  11657             try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits });
  11658         }
  11659         try self.genSetReg(limit_reg, ty, dst_mcv, .{});
  11660         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 });
  11661         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
  11662             .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
  11663         });
  11664         if (reg_extra_bits > 0) {
  11665             const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv);
  11666             const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg };
  11667             const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg);
  11668             defer self.register_manager.unlockReg(shifted_rhs_lock);
  11669 
  11670             try self.genShiftBinOpMir(.{ ._l, .sa }, ty, shifted_rhs_mcv, .u8, .{ .immediate = reg_extra_bits });
  11671             try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, shifted_rhs_mcv);
  11672         } else try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv);
  11673         break :cc .o;
  11674     } else cc: {
  11675         try self.genSetReg(limit_reg, ty, .{
  11676             .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - ty.bitSize(zcu)),
  11677         }, .{});
  11678 
  11679         try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv);
  11680         if (reg_extra_bits > 0) {
  11681             try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, limit_mcv);
  11682             break :cc .a;
  11683         }
  11684         break :cc .c;
  11685     };
  11686 
  11687     const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2);
  11688     try self.asmCmovccRegisterRegister(
  11689         cc,
  11690         registerAlias(dst_reg, cmov_abi_size),
  11691         registerAlias(limit_reg, cmov_abi_size),
  11692     );
  11693 
  11694     if (reg_extra_bits > 0 and ty.isSignedInt(zcu))
  11695         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits });
  11696 
  11697     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
  11698 }
  11699 
  11700 fn airSubSat(self: *CodeGen, inst: Air.Inst.Index) !void {
  11701     const pt = self.pt;
  11702     const zcu = pt.zcu;
  11703     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  11704     const ty = self.typeOf(bin_op.lhs);
  11705     if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail(
  11706         "TODO implement airSubSat for {}",
  11707         .{ty.fmt(pt)},
  11708     );
  11709 
  11710     const lhs_mcv = try self.resolveInst(bin_op.lhs);
  11711     const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
  11712         lhs_mcv
  11713     else
  11714         try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv);
  11715     const dst_reg = dst_mcv.register;
  11716     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  11717     defer self.register_manager.unlockReg(dst_lock);
  11718 
  11719     const rhs_mcv = try self.resolveInst(bin_op.rhs);
  11720     const rhs_lock = switch (rhs_mcv) {
  11721         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  11722         else => null,
  11723     };
  11724     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  11725 
  11726     const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  11727     const limit_mcv = MCValue{ .register = limit_reg };
  11728     const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
  11729     defer self.register_manager.unlockReg(limit_lock);
  11730 
  11731     const reg_bits = self.regBitSize(ty);
  11732     const reg_extra_bits = self.regExtraBits(ty);
  11733     const cc: Condition = if (ty.isSignedInt(zcu)) cc: {
  11734         if (reg_extra_bits > 0) {
  11735             try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits });
  11736         }
  11737         try self.genSetReg(limit_reg, ty, dst_mcv, .{});
  11738         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 });
  11739         try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
  11740             .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
  11741         });
  11742         if (reg_extra_bits > 0) {
  11743             const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv);
  11744             const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg };
  11745             const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg);
  11746             defer self.register_manager.unlockReg(shifted_rhs_lock);
  11747 
  11748             try self.genShiftBinOpMir(.{ ._l, .sa }, ty, shifted_rhs_mcv, .u8, .{ .immediate = reg_extra_bits });
  11749             try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, shifted_rhs_mcv);
  11750         } else try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv);
  11751         break :cc .o;
  11752     } else cc: {
  11753         try self.genSetReg(limit_reg, ty, .{ .immediate = 0 }, .{});
  11754         try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv);
  11755         break :cc .c;
  11756     };
  11757 
  11758     const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2);
  11759     try self.asmCmovccRegisterRegister(
  11760         cc,
  11761         registerAlias(dst_reg, cmov_abi_size),
  11762         registerAlias(limit_reg, cmov_abi_size),
  11763     );
  11764 
  11765     if (reg_extra_bits > 0 and ty.isSignedInt(zcu))
  11766         try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits });
  11767 
  11768     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
  11769 }
  11770 
  11771 fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void {
  11772     const pt = self.pt;
  11773     const zcu = pt.zcu;
  11774     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  11775     const ty = self.typeOf(bin_op.lhs);
  11776 
  11777     const result = result: {
  11778         if (ty.toIntern() == .i128_type) {
  11779             const ptr_c_int = try pt.singleMutPtrType(.c_int);
  11780             const overflow = try self.allocTempRegOrMem(.c_int, false);
  11781 
  11782             const dst_mcv = try self.genCall(.{ .lib = .{
  11783                 .return_type = .i128_type,
  11784                 .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() },
  11785                 .callee = "__muloti4",
  11786             } }, &.{ .i128, .i128, ptr_c_int }, &.{
  11787                 .{ .air_ref = bin_op.lhs },
  11788                 .{ .air_ref = bin_op.rhs },
  11789                 overflow.address(),
  11790             }, .{});
  11791             const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_mcv.register_pair);
  11792             defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
  11793 
  11794             const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  11795             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  11796             defer self.register_manager.unlockReg(tmp_lock);
  11797 
  11798             const lhs_mcv = try self.resolveInst(bin_op.lhs);
  11799             const mat_lhs_mcv = switch (lhs_mcv) {
  11800                 .load_symbol => mat_lhs_mcv: {
  11801                     // TODO clean this up!
  11802                     const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address());
  11803                     break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
  11804                 },
  11805                 else => lhs_mcv,
  11806             };
  11807             const mat_lhs_lock = switch (mat_lhs_mcv) {
  11808                 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
  11809                 else => null,
  11810             };
  11811             defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
  11812             if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
  11813                 .{ ._, .mov },
  11814                 tmp_reg,
  11815                 try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  11816             ) else try self.asmRegisterRegister(
  11817                 .{ ._, .mov },
  11818                 tmp_reg,
  11819                 mat_lhs_mcv.register_pair[1],
  11820             );
  11821 
  11822             const rhs_mcv = try self.resolveInst(bin_op.rhs);
  11823             const mat_rhs_mcv = switch (rhs_mcv) {
  11824                 .load_symbol => mat_rhs_mcv: {
  11825                     // TODO clean this up!
  11826                     const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address());
  11827                     break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
  11828                 },
  11829                 else => rhs_mcv,
  11830             };
  11831             const mat_rhs_lock = switch (mat_rhs_mcv) {
  11832                 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
  11833                 else => null,
  11834             };
  11835             defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
  11836             if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
  11837                 .{ ._, .xor },
  11838                 tmp_reg,
  11839                 try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  11840             ) else try self.asmRegisterRegister(
  11841                 .{ ._, .xor },
  11842                 tmp_reg,
  11843                 mat_rhs_mcv.register_pair[1],
  11844             );
  11845 
  11846             try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63));
  11847             try self.asmRegister(.{ ._, .not }, tmp_reg);
  11848             try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .{ .size = .dword }), .s(0));
  11849             try self.freeValue(overflow);
  11850             try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[0], tmp_reg);
  11851             try self.asmRegisterImmediate(.{ ._c, .bt }, tmp_reg, .u(63));
  11852             try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[1], tmp_reg);
  11853             break :result dst_mcv;
  11854         }
  11855 
  11856         if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail(
  11857             "TODO implement airMulSat for {}",
  11858             .{ty.fmt(pt)},
  11859         );
  11860 
  11861         try self.spillRegisters(&.{ .rax, .rcx, .rdx });
  11862         const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx });
  11863         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  11864 
  11865         const lhs_mcv = try self.resolveInst(bin_op.lhs);
  11866         const lhs_lock = switch (lhs_mcv) {
  11867             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  11868             else => null,
  11869         };
  11870         defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
  11871 
  11872         const rhs_mcv = try self.resolveInst(bin_op.rhs);
  11873         const rhs_lock = switch (rhs_mcv) {
  11874             .register => |reg| self.register_manager.lockReg(reg),
  11875             else => null,
  11876         };
  11877         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  11878 
  11879         const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  11880         const limit_mcv = MCValue{ .register = limit_reg };
  11881         const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
  11882         defer self.register_manager.unlockReg(limit_lock);
  11883 
  11884         const reg_bits = self.regBitSize(ty);
  11885         const cc: Condition = if (ty.isSignedInt(zcu)) cc: {
  11886             try self.genSetReg(limit_reg, ty, lhs_mcv, .{});
  11887             try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv);
  11888             try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 });
  11889             try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
  11890                 .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
  11891             });
  11892             break :cc .o;
  11893         } else cc: {
  11894             try self.genSetReg(limit_reg, ty, .{
  11895                 .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - reg_bits),
  11896             }, .{});
  11897             break :cc .c;
  11898         };
  11899 
  11900         const dst_mcv = try self.genMulDivBinOp(.mul, inst, ty, ty, lhs_mcv, rhs_mcv);
  11901         const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2);
  11902         try self.asmCmovccRegisterRegister(
  11903             cc,
  11904             registerAlias(dst_mcv.register, cmov_abi_size),
  11905             registerAlias(limit_reg, cmov_abi_size),
  11906         );
  11907         break :result dst_mcv;
  11908     };
  11909     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  11910 }
  11911 
  11912 fn airAddSubWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
  11913     const pt = self.pt;
  11914     const zcu = pt.zcu;
  11915     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  11916     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
  11917     const result: MCValue = result: {
  11918         const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)];
  11919         const ty = self.typeOf(bin_op.lhs);
  11920         switch (ty.zigTypeTag(zcu)) {
  11921             .vector => return self.fail("TODO implement add/sub with overflow for Vector type", .{}),
  11922             .int => {
  11923                 try self.spillEflagsIfOccupied();
  11924                 try self.spillRegisters(&.{ .rcx, .rdi, .rsi });
  11925                 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rcx, .rdi, .rsi });
  11926                 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  11927 
  11928                 const partial_mcv = try self.genBinOp(null, switch (tag) {
  11929                     .add_with_overflow => .add,
  11930                     .sub_with_overflow => .sub,
  11931                     else => unreachable,
  11932                 }, bin_op.lhs, bin_op.rhs);
  11933                 const int_info = ty.intInfo(zcu);
  11934                 const cc: Condition = switch (int_info.signedness) {
  11935                     .unsigned => .c,
  11936                     .signed => .o,
  11937                 };
  11938 
  11939                 const tuple_ty = self.typeOfIndex(inst);
  11940                 if (int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits)) {
  11941                     switch (partial_mcv) {
  11942                         .register => |reg| {
  11943                             self.eflags_inst = inst;
  11944                             break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
  11945                         },
  11946                         else => {},
  11947                     }
  11948 
  11949                     const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
  11950                     try self.genSetMem(
  11951                         .{ .frame = frame_index },
  11952                         @intCast(tuple_ty.structFieldOffset(1, zcu)),
  11953                         .u1,
  11954                         .{ .eflags = cc },
  11955                         .{},
  11956                     );
  11957                     try self.genSetMem(
  11958                         .{ .frame = frame_index },
  11959                         @intCast(tuple_ty.structFieldOffset(0, zcu)),
  11960                         ty,
  11961                         partial_mcv,
  11962                         .{},
  11963                     );
  11964                     break :result .{ .load_frame = .{ .index = frame_index } };
  11965                 }
  11966 
  11967                 const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
  11968                 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
  11969                 break :result .{ .load_frame = .{ .index = frame_index } };
  11970             },
  11971             else => unreachable,
  11972         }
  11973     };
  11974     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  11975 }
  11976 
  11977 fn airShlWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
  11978     const pt = self.pt;
  11979     const zcu = pt.zcu;
  11980     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  11981     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
  11982     const result: MCValue = result: {
  11983         const lhs_ty = self.typeOf(bin_op.lhs);
  11984         const rhs_ty = self.typeOf(bin_op.rhs);
  11985         switch (lhs_ty.zigTypeTag(zcu)) {
  11986             .vector => return self.fail("TODO implement shl with overflow for Vector type", .{}),
  11987             .int => {
  11988                 try self.spillEflagsIfOccupied();
  11989                 try self.spillRegisters(&.{ .rcx, .rdi, .rsi });
  11990                 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rcx, .rdi, .rsi });
  11991                 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  11992 
  11993                 const lhs = try self.resolveInst(bin_op.lhs);
  11994                 const rhs = try self.resolveInst(bin_op.rhs);
  11995 
  11996                 const int_info = lhs_ty.intInfo(zcu);
  11997 
  11998                 const partial_mcv = try self.genShiftBinOp(.shl, null, lhs, rhs, lhs_ty, rhs_ty);
  11999                 const partial_lock = switch (partial_mcv) {
  12000                     .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  12001                     else => null,
  12002                 };
  12003                 defer if (partial_lock) |lock| self.register_manager.unlockReg(lock);
  12004 
  12005                 const tmp_mcv = try self.genShiftBinOp(.shr, null, partial_mcv, rhs, lhs_ty, rhs_ty);
  12006                 const tmp_lock = switch (tmp_mcv) {
  12007                     .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  12008                     else => null,
  12009                 };
  12010                 defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
  12011 
  12012                 try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs);
  12013                 const cc = Condition.ne;
  12014 
  12015                 const tuple_ty = self.typeOfIndex(inst);
  12016                 if (int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits)) {
  12017                     switch (partial_mcv) {
  12018                         .register => |reg| {
  12019                             self.eflags_inst = inst;
  12020                             break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
  12021                         },
  12022                         else => {},
  12023                     }
  12024 
  12025                     const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
  12026                     try self.genSetMem(
  12027                         .{ .frame = frame_index },
  12028                         @intCast(tuple_ty.structFieldOffset(1, zcu)),
  12029                         tuple_ty.fieldType(1, zcu),
  12030                         .{ .eflags = cc },
  12031                         .{},
  12032                     );
  12033                     try self.genSetMem(
  12034                         .{ .frame = frame_index },
  12035                         @intCast(tuple_ty.structFieldOffset(0, zcu)),
  12036                         tuple_ty.fieldType(0, zcu),
  12037                         partial_mcv,
  12038                         .{},
  12039                     );
  12040                     break :result .{ .load_frame = .{ .index = frame_index } };
  12041                 }
  12042 
  12043                 const frame_index =
  12044                     try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
  12045                 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
  12046                 break :result .{ .load_frame = .{ .index = frame_index } };
  12047             },
  12048             else => unreachable,
  12049         }
  12050     };
  12051     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  12052 }
  12053 
  12054 fn genSetFrameTruncatedOverflowCompare(
  12055     self: *CodeGen,
  12056     tuple_ty: Type,
  12057     frame_index: FrameIndex,
  12058     src_mcv: MCValue,
  12059     overflow_cc: ?Condition,
  12060 ) !void {
  12061     const pt = self.pt;
  12062     const zcu = pt.zcu;
  12063     const src_lock = switch (src_mcv) {
  12064         .register => |reg| self.register_manager.lockReg(reg),
  12065         else => null,
  12066     };
  12067     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  12068 
  12069     const ty = tuple_ty.fieldType(0, zcu);
  12070     const int_info = ty.intInfo(zcu);
  12071 
  12072     const hi_bits = (int_info.bits - 1) % 64 + 1;
  12073     const hi_ty = try pt.intType(int_info.signedness, hi_bits);
  12074 
  12075     const limb_bits: u16 = @intCast(if (int_info.bits <= 64) self.regBitSize(ty) else 64);
  12076     const limb_ty = try pt.intType(int_info.signedness, limb_bits);
  12077 
  12078     const rest_ty = try pt.intType(.unsigned, int_info.bits - hi_bits);
  12079 
  12080     const temp_regs =
  12081         try self.register_manager.allocRegs(3, @splat(null), abi.RegisterClass.gp);
  12082     const temp_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs);
  12083     defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
  12084 
  12085     const overflow_reg = temp_regs[0];
  12086     if (overflow_cc) |cc| try self.asmSetccRegister(cc, overflow_reg.to8());
  12087 
  12088     const scratch_reg = temp_regs[1];
  12089     const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8;
  12090     const hi_limb_mcv = if (hi_limb_off > 0)
  12091         src_mcv.address().offset(int_info.bits / 64 * 8).deref()
  12092     else
  12093         src_mcv;
  12094     try self.genSetReg(scratch_reg, limb_ty, hi_limb_mcv, .{});
  12095     try self.truncateRegister(hi_ty, scratch_reg);
  12096     try self.genBinOpMir(.{ ._, .cmp }, limb_ty, .{ .register = scratch_reg }, hi_limb_mcv);
  12097 
  12098     const eq_reg = temp_regs[2];
  12099     if (overflow_cc) |_| {
  12100         try self.asmSetccRegister(.ne, eq_reg.to8());
  12101         try self.genBinOpMir(.{ ._, .@"or" }, .u8, .{ .register = overflow_reg }, .{ .register = eq_reg });
  12102     }
  12103 
  12104     const payload_off: i32 = @intCast(tuple_ty.structFieldOffset(0, zcu));
  12105     if (hi_limb_off > 0) try self.genSetMem(
  12106         .{ .frame = frame_index },
  12107         payload_off,
  12108         rest_ty,
  12109         src_mcv,
  12110         .{},
  12111     );
  12112     try self.genSetMem(
  12113         .{ .frame = frame_index },
  12114         payload_off + hi_limb_off,
  12115         limb_ty,
  12116         .{ .register = scratch_reg },
  12117         .{},
  12118     );
  12119     try self.genSetMem(
  12120         .{ .frame = frame_index },
  12121         @intCast(tuple_ty.structFieldOffset(1, zcu)),
  12122         tuple_ty.fieldType(1, zcu),
  12123         if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne },
  12124         .{},
  12125     );
  12126 }
  12127 
  12128 fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
  12129     const pt = self.pt;
  12130     const zcu = pt.zcu;
  12131     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  12132     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
  12133     const tuple_ty = self.typeOfIndex(inst);
  12134     const dst_ty = self.typeOf(bin_op.lhs);
  12135     const result: MCValue = switch (dst_ty.zigTypeTag(zcu)) {
  12136         .vector => return self.fail("TODO implement airMulWithOverflow for {}", .{dst_ty.fmt(pt)}),
  12137         .int => result: {
  12138             const dst_info = dst_ty.intInfo(zcu);
  12139             if (dst_info.bits > 128 and dst_info.signedness == .unsigned) {
  12140                 const slow_inc = self.hasFeature(.slow_incdec);
  12141                 const abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
  12142                 const limb_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
  12143 
  12144                 try self.spillRegisters(&.{ .rax, .rcx, .rdx });
  12145                 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx });
  12146                 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  12147 
  12148                 const dst_mcv = try self.allocRegOrMem(inst, false);
  12149                 try self.genInlineMemset(
  12150                     dst_mcv.address(),
  12151                     .{ .immediate = 0 },
  12152                     .{ .immediate = tuple_ty.abiSize(zcu) },
  12153                     .{},
  12154                 );
  12155                 const lhs_mcv = try self.resolveInst(bin_op.lhs);
  12156                 const rhs_mcv = try self.resolveInst(bin_op.rhs);
  12157 
  12158                 const temp_regs =
  12159                     try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp);
  12160                 const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs);
  12161                 defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
  12162 
  12163                 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32());
  12164 
  12165                 const outer_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  12166                 try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[1].to64(), .{
  12167                     .base = .{ .frame = rhs_mcv.load_frame.index },
  12168                     .mod = .{ .rm = .{
  12169                         .size = .qword,
  12170                         .index = temp_regs[0].to64(),
  12171                         .scale = .@"8",
  12172                         .disp = rhs_mcv.load_frame.off,
  12173                     } },
  12174                 });
  12175                 try self.asmRegisterRegister(.{ ._, .@"test" }, temp_regs[1].to64(), temp_regs[1].to64());
  12176                 const skip_inner = try self.asmJccReloc(.z, undefined);
  12177 
  12178                 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[2].to32(), temp_regs[2].to32());
  12179                 try self.asmRegisterRegister(.{ ._, .mov }, temp_regs[3].to32(), temp_regs[0].to32());
  12180                 try self.asmRegisterRegister(.{ ._, .xor }, .ecx, .ecx);
  12181                 try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx);
  12182 
  12183                 const inner_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  12184                 try self.asmRegisterImmediate(.{ ._r, .sh }, .cl, .u(1));
  12185                 try self.asmMemoryRegister(.{ ._, .adc }, .{
  12186                     .base = .{ .frame = dst_mcv.load_frame.index },
  12187                     .mod = .{ .rm = .{
  12188                         .size = .qword,
  12189                         .index = temp_regs[3].to64(),
  12190                         .scale = .@"8",
  12191                         .disp = dst_mcv.load_frame.off +
  12192                             @as(i32, @intCast(tuple_ty.structFieldOffset(0, zcu))),
  12193                     } },
  12194                 }, .rdx);
  12195                 try self.asmSetccRegister(.c, .cl);
  12196 
  12197                 try self.asmRegisterMemory(.{ ._, .mov }, .rax, .{
  12198                     .base = .{ .frame = lhs_mcv.load_frame.index },
  12199                     .mod = .{ .rm = .{
  12200                         .size = .qword,
  12201                         .index = temp_regs[2].to64(),
  12202                         .scale = .@"8",
  12203                         .disp = lhs_mcv.load_frame.off,
  12204                     } },
  12205                 });
  12206                 try self.asmRegister(.{ ._, .mul }, temp_regs[1].to64());
  12207 
  12208                 try self.asmRegisterImmediate(.{ ._r, .sh }, .ch, .u(1));
  12209                 try self.asmMemoryRegister(.{ ._, .adc }, .{
  12210                     .base = .{ .frame = dst_mcv.load_frame.index },
  12211                     .mod = .{ .rm = .{
  12212                         .size = .qword,
  12213                         .index = temp_regs[3].to64(),
  12214                         .scale = .@"8",
  12215                         .disp = dst_mcv.load_frame.off +
  12216                             @as(i32, @intCast(tuple_ty.structFieldOffset(0, zcu))),
  12217                     } },
  12218                 }, .rax);
  12219                 try self.asmSetccRegister(.c, .ch);
  12220 
  12221                 if (slow_inc) {
  12222                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1));
  12223                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1));
  12224                 } else {
  12225                     try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32());
  12226                     try self.asmRegister(.{ ._, .inc }, temp_regs[3].to32());
  12227                 }
  12228                 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len));
  12229                 _ = try self.asmJccReloc(.b, inner_loop);
  12230 
  12231                 try self.asmRegisterRegister(.{ ._, .@"or" }, .rdx, .rcx);
  12232                 const overflow = try self.asmJccReloc(.nz, undefined);
  12233                 const overflow_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  12234                 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[2].to32(), .u(limb_len));
  12235                 const no_overflow = try self.asmJccReloc(.nb, undefined);
  12236                 if (slow_inc) {
  12237                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1));
  12238                 } else {
  12239                     try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32());
  12240                 }
  12241                 try self.asmMemoryImmediate(.{ ._, .cmp }, .{
  12242                     .base = .{ .frame = lhs_mcv.load_frame.index },
  12243                     .mod = .{ .rm = .{
  12244                         .size = .qword,
  12245                         .index = temp_regs[2].to64(),
  12246                         .scale = .@"8",
  12247                         .disp = lhs_mcv.load_frame.off - 8,
  12248                     } },
  12249                 }, .u(0));
  12250                 _ = try self.asmJccReloc(.z, overflow_loop);
  12251                 self.performReloc(overflow);
  12252                 try self.asmMemoryImmediate(.{ ._, .mov }, .{
  12253                     .base = .{ .frame = dst_mcv.load_frame.index },
  12254                     .mod = .{ .rm = .{
  12255                         .size = .byte,
  12256                         .disp = dst_mcv.load_frame.off +
  12257                             @as(i32, @intCast(tuple_ty.structFieldOffset(1, zcu))),
  12258                     } },
  12259                 }, .u(1));
  12260                 self.performReloc(no_overflow);
  12261 
  12262                 self.performReloc(skip_inner);
  12263                 if (slow_inc) {
  12264                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1));
  12265                 } else {
  12266                     try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32());
  12267                 }
  12268                 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len));
  12269                 _ = try self.asmJccReloc(.b, outer_loop);
  12270 
  12271                 break :result dst_mcv;
  12272             }
  12273 
  12274             const lhs_active_bits = self.activeIntBits(bin_op.lhs);
  12275             const rhs_active_bits = self.activeIntBits(bin_op.rhs);
  12276             const src_bits = @max(lhs_active_bits, rhs_active_bits, dst_info.bits / 2);
  12277             const src_ty = try pt.intType(dst_info.signedness, src_bits);
  12278             if (src_bits > 64 and src_bits <= 128 and
  12279                 dst_info.bits > 64 and dst_info.bits <= 128) switch (dst_info.signedness) {
  12280                 .signed => {
  12281                     const ptr_c_int = try pt.singleMutPtrType(.c_int);
  12282                     const overflow = try self.allocTempRegOrMem(.c_int, false);
  12283                     const result = try self.genCall(.{ .lib = .{
  12284                         .return_type = .i128_type,
  12285                         .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() },
  12286                         .callee = "__muloti4",
  12287                     } }, &.{ .i128, .i128, ptr_c_int }, &.{
  12288                         .{ .air_ref = bin_op.lhs },
  12289                         .{ .air_ref = bin_op.rhs },
  12290                         overflow.address(),
  12291                     }, .{});
  12292 
  12293                     const dst_mcv = try self.allocRegOrMem(inst, false);
  12294                     try self.genSetMem(
  12295                         .{ .frame = dst_mcv.load_frame.index },
  12296                         @intCast(tuple_ty.structFieldOffset(0, zcu)),
  12297                         tuple_ty.fieldType(0, zcu),
  12298                         result,
  12299                         .{},
  12300                     );
  12301                     try self.asmMemoryImmediate(
  12302                         .{ ._, .cmp },
  12303                         try overflow.mem(self, .{ .size = self.memSize(.c_int) }),
  12304                         .s(0),
  12305                     );
  12306                     try self.genSetMem(
  12307                         .{ .frame = dst_mcv.load_frame.index },
  12308                         @intCast(tuple_ty.structFieldOffset(1, zcu)),
  12309                         tuple_ty.fieldType(1, zcu),
  12310                         .{ .eflags = .ne },
  12311                         .{},
  12312                     );
  12313                     try self.freeValue(overflow);
  12314                     break :result dst_mcv;
  12315                 },
  12316                 .unsigned => {
  12317                     try self.spillEflagsIfOccupied();
  12318                     try self.spillRegisters(&.{ .rax, .rdx });
  12319                     const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rdx });
  12320                     defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  12321 
  12322                     const tmp_regs =
  12323                         try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp);
  12324                     const tmp_locks = self.register_manager.lockRegsAssumeUnused(4, tmp_regs);
  12325                     defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
  12326 
  12327                     const lhs_mcv = try self.resolveInst(bin_op.lhs);
  12328                     const rhs_mcv = try self.resolveInst(bin_op.rhs);
  12329                     const mat_lhs_mcv = mat_lhs_mcv: switch (lhs_mcv) {
  12330                         .register => |lhs_reg| switch (lhs_reg.class()) {
  12331                             else => lhs_mcv,
  12332                             .sse => {
  12333                                 const mat_lhs_mcv: MCValue = .{
  12334                                     .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp),
  12335                                 };
  12336                                 try self.genCopy(dst_ty, mat_lhs_mcv, lhs_mcv, .{});
  12337                                 break :mat_lhs_mcv mat_lhs_mcv;
  12338                             },
  12339                         },
  12340                         .load_symbol => {
  12341                             // TODO clean this up!
  12342                             const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address());
  12343                             break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
  12344                         },
  12345                         else => lhs_mcv,
  12346                     };
  12347                     const mat_lhs_locks: [2]?RegisterLock = switch (mat_lhs_mcv) {
  12348                         .register_pair => |mat_lhs_regs| self.register_manager.lockRegs(2, mat_lhs_regs),
  12349                         .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null },
  12350                         else => @splat(null),
  12351                     };
  12352                     defer for (mat_lhs_locks) |mat_lhs_lock| if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
  12353                     const mat_rhs_mcv = mat_rhs_mcv: switch (rhs_mcv) {
  12354                         .register => |rhs_reg| switch (rhs_reg.class()) {
  12355                             else => rhs_mcv,
  12356                             .sse => {
  12357                                 const mat_rhs_mcv: MCValue = .{
  12358                                     .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp),
  12359                                 };
  12360                                 try self.genCopy(dst_ty, mat_rhs_mcv, rhs_mcv, .{});
  12361                                 break :mat_rhs_mcv mat_rhs_mcv;
  12362                             },
  12363                         },
  12364                         .load_symbol => {
  12365                             // TODO clean this up!
  12366                             const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address());
  12367                             break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
  12368                         },
  12369                         else => rhs_mcv,
  12370                     };
  12371                     const mat_rhs_locks: [2]?RegisterLock = switch (mat_rhs_mcv) {
  12372                         .register_pair => |mat_rhs_regs| self.register_manager.lockRegs(2, mat_rhs_regs),
  12373                         .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null },
  12374                         else => @splat(null),
  12375                     };
  12376                     defer for (mat_rhs_locks) |mat_rhs_lock| if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
  12377 
  12378                     if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
  12379                         .{ ._, .mov },
  12380                         .rax,
  12381                         try mat_lhs_mcv.mem(self, .{ .size = .qword }),
  12382                     ) else try self.asmRegisterRegister(
  12383                         .{ ._, .mov },
  12384                         .rax,
  12385                         mat_lhs_mcv.register_pair[0],
  12386                     );
  12387                     if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
  12388                         .{ ._, .mov },
  12389                         tmp_regs[0],
  12390                         try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  12391                     ) else try self.asmRegisterRegister(
  12392                         .{ ._, .mov },
  12393                         tmp_regs[0],
  12394                         mat_rhs_mcv.register_pair[1],
  12395                     );
  12396                     try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]);
  12397                     try self.asmSetccRegister(.nz, tmp_regs[1].to8());
  12398                     try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax);
  12399                     try self.asmSetccRegister(.o, tmp_regs[2].to8());
  12400                     if (mat_rhs_mcv.isBase())
  12401                         try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword }))
  12402                     else
  12403                         try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]);
  12404                     try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
  12405                     try self.asmSetccRegister(.c, tmp_regs[3].to8());
  12406                     try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[2].to8(), tmp_regs[3].to8());
  12407                     if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
  12408                         .{ ._, .mov },
  12409                         tmp_regs[0],
  12410                         try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  12411                     ) else try self.asmRegisterRegister(
  12412                         .{ ._, .mov },
  12413                         tmp_regs[0],
  12414                         mat_lhs_mcv.register_pair[1],
  12415                     );
  12416                     try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]);
  12417                     try self.asmSetccRegister(.nz, tmp_regs[3].to8());
  12418                     try self.asmRegisterRegister(
  12419                         .{ ._, .@"and" },
  12420                         tmp_regs[1].to8(),
  12421                         tmp_regs[3].to8(),
  12422                     );
  12423                     try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
  12424                     if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
  12425                         .{ .i_, .mul },
  12426                         tmp_regs[0],
  12427                         try mat_rhs_mcv.mem(self, .{ .size = .qword }),
  12428                     ) else try self.asmRegisterRegister(
  12429                         .{ .i_, .mul },
  12430                         tmp_regs[0],
  12431                         mat_rhs_mcv.register_pair[0],
  12432                     );
  12433                     try self.asmSetccRegister(.o, tmp_regs[2].to8());
  12434                     try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
  12435                     try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
  12436                     try self.asmSetccRegister(.c, tmp_regs[2].to8());
  12437                     try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
  12438 
  12439                     const dst_mcv = try self.allocRegOrMem(inst, false);
  12440                     try self.genSetMem(
  12441                         .{ .frame = dst_mcv.load_frame.index },
  12442                         @intCast(tuple_ty.structFieldOffset(0, zcu)),
  12443                         tuple_ty.fieldType(0, zcu),
  12444                         .{ .register_pair = .{ .rax, .rdx } },
  12445                         .{},
  12446                     );
  12447                     try self.genSetMem(
  12448                         .{ .frame = dst_mcv.load_frame.index },
  12449                         @intCast(tuple_ty.structFieldOffset(1, zcu)),
  12450                         tuple_ty.fieldType(1, zcu),
  12451                         .{ .register = tmp_regs[1] },
  12452                         .{},
  12453                     );
  12454                     break :result dst_mcv;
  12455                 },
  12456             };
  12457 
  12458             try self.spillEflagsIfOccupied();
  12459             try self.spillRegisters(&.{ .rax, .rcx, .rdx, .rdi, .rsi });
  12460             const reg_locks = self.register_manager.lockRegsAssumeUnused(5, .{ .rax, .rcx, .rdx, .rdi, .rsi });
  12461             defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  12462 
  12463             const cc: Condition = switch (dst_info.signedness) {
  12464                 .unsigned => .c,
  12465                 .signed => .o,
  12466             };
  12467 
  12468             const lhs = try self.resolveInst(bin_op.lhs);
  12469             const rhs = try self.resolveInst(bin_op.rhs);
  12470 
  12471             const extra_bits = if (dst_info.bits <= 64)
  12472                 self.regExtraBits(dst_ty)
  12473             else
  12474                 dst_info.bits % 64;
  12475             const partial_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs);
  12476 
  12477             switch (partial_mcv) {
  12478                 .register => |reg| if (extra_bits == 0) {
  12479                     self.eflags_inst = inst;
  12480                     break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
  12481                 } else {
  12482                     const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
  12483                     try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
  12484                     break :result .{ .load_frame = .{ .index = frame_index } };
  12485                 },
  12486                 else => {
  12487                     // For now, this is the only supported multiply that doesn't fit in a register.
  12488                     if (dst_info.bits > 128 or src_bits != 64)
  12489                         return self.fail("TODO implement airWithOverflow from {} to {}", .{
  12490                             src_ty.fmt(pt), dst_ty.fmt(pt),
  12491                         });
  12492 
  12493                     const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
  12494                     if (dst_info.bits >= lhs_active_bits + rhs_active_bits) {
  12495                         try self.genSetMem(
  12496                             .{ .frame = frame_index },
  12497                             @intCast(tuple_ty.structFieldOffset(0, zcu)),
  12498                             tuple_ty.fieldType(0, zcu),
  12499                             partial_mcv,
  12500                             .{},
  12501                         );
  12502                         try self.genSetMem(
  12503                             .{ .frame = frame_index },
  12504                             @intCast(tuple_ty.structFieldOffset(1, zcu)),
  12505                             tuple_ty.fieldType(1, zcu),
  12506                             .{ .immediate = 0 }, // cc being set is impossible
  12507                             .{},
  12508                         );
  12509                     } else try self.genSetFrameTruncatedOverflowCompare(
  12510                         tuple_ty,
  12511                         frame_index,
  12512                         partial_mcv,
  12513                         null,
  12514                     );
  12515                     break :result .{ .load_frame = .{ .index = frame_index } };
  12516                 },
  12517             }
  12518         },
  12519         else => unreachable,
  12520     };
  12521     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  12522 }
  12523 
  12524 /// Generates signed or unsigned integer multiplication/division.
  12525 /// Clobbers .rax and .rdx registers.
  12526 /// Quotient is saved in .rax and remainder in .rdx.
  12527 fn genIntMulDivOpMir(self: *CodeGen, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void {
  12528     const pt = self.pt;
  12529     const abi_size: u32 = @intCast(ty.abiSize(pt.zcu));
  12530     const bit_size: u32 = @intCast(self.regBitSize(ty));
  12531     if (abi_size > 8) {
  12532         return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{});
  12533     }
  12534 
  12535     try self.genSetReg(.rax, ty, lhs, .{});
  12536     switch (tag[1]) {
  12537         else => unreachable,
  12538         .mul => {},
  12539         .div => switch (tag[0]) {
  12540             ._ => {
  12541                 const hi_reg: Register =
  12542                     switch (bit_size) {
  12543                     8 => .ah,
  12544                     16, 32, 64 => .edx,
  12545                     else => unreachable,
  12546                 };
  12547                 try self.asmRegisterRegister(.{ ._, .xor }, hi_reg, hi_reg);
  12548             },
  12549             .i_ => try self.asmOpOnly(.{ ._, switch (bit_size) {
  12550                 8 => .cbw,
  12551                 16 => .cwd,
  12552                 32 => .cdq,
  12553                 64 => .cqo,
  12554                 else => unreachable,
  12555             } }),
  12556             else => unreachable,
  12557         },
  12558     }
  12559 
  12560     const mat_rhs: MCValue = switch (rhs) {
  12561         .register, .indirect, .load_frame => rhs,
  12562         else => .{ .register = try self.copyToTmpRegister(ty, rhs) },
  12563     };
  12564     switch (mat_rhs) {
  12565         .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)),
  12566         .memory, .indirect, .load_frame => try self.asmMemory(
  12567             tag,
  12568             try mat_rhs.mem(self, .{ .size = .fromSize(abi_size) }),
  12569         ),
  12570         else => unreachable,
  12571     }
  12572     if (tag[1] == .div and bit_size == 8) try self.asmRegisterRegister(.{ ._, .mov }, .dl, .ah);
  12573 }
  12574 
  12575 /// Always returns a register.
  12576 /// Clobbers .rax and .rdx registers.
  12577 fn genInlineIntDivFloor(self: *CodeGen, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue {
  12578     const pt = self.pt;
  12579     const zcu = pt.zcu;
  12580     const abi_size: u32 = @intCast(ty.abiSize(zcu));
  12581     const int_info = ty.intInfo(zcu);
  12582     const dividend = switch (lhs) {
  12583         .register => |reg| reg,
  12584         else => try self.copyToTmpRegister(ty, lhs),
  12585     };
  12586     const dividend_lock = self.register_manager.lockReg(dividend);
  12587     defer if (dividend_lock) |lock| self.register_manager.unlockReg(lock);
  12588 
  12589     const divisor = switch (rhs) {
  12590         .register => |reg| reg,
  12591         else => try self.copyToTmpRegister(ty, rhs),
  12592     };
  12593     const divisor_lock = self.register_manager.lockReg(divisor);
  12594     defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock);
  12595 
  12596     try self.genIntMulDivOpMir(
  12597         switch (int_info.signedness) {
  12598             .signed => .{ .i_, .div },
  12599             .unsigned => .{ ._, .div },
  12600         },
  12601         ty,
  12602         .{ .register = dividend },
  12603         .{ .register = divisor },
  12604     );
  12605 
  12606     try self.asmRegisterRegister(
  12607         .{ ._, .xor },
  12608         registerAlias(divisor, abi_size),
  12609         registerAlias(dividend, abi_size),
  12610     );
  12611     try self.asmRegisterImmediate(
  12612         .{ ._r, .sa },
  12613         registerAlias(divisor, abi_size),
  12614         .u(int_info.bits - 1),
  12615     );
  12616     try self.asmRegisterRegister(
  12617         .{ ._, .@"test" },
  12618         registerAlias(.rdx, abi_size),
  12619         registerAlias(.rdx, abi_size),
  12620     );
  12621     try self.asmCmovccRegisterRegister(
  12622         .z,
  12623         registerAlias(divisor, @max(abi_size, 2)),
  12624         registerAlias(.rdx, @max(abi_size, 2)),
  12625     );
  12626     try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax });
  12627     return MCValue{ .register = divisor };
  12628 }
  12629 
  12630 fn airShlShrBinOp(self: *CodeGen, inst: Air.Inst.Index) !void {
  12631     const pt = self.pt;
  12632     const zcu = pt.zcu;
  12633     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  12634 
  12635     const air_tags = self.air.instructions.items(.tag);
  12636     const tag = air_tags[@intFromEnum(inst)];
  12637     const lhs_ty = self.typeOf(bin_op.lhs);
  12638     const rhs_ty = self.typeOf(bin_op.rhs);
  12639     const result: MCValue = result: {
  12640         switch (lhs_ty.zigTypeTag(zcu)) {
  12641             .int => {
  12642                 try self.spillRegisters(&.{.rcx});
  12643                 try self.register_manager.getKnownReg(.rcx, null);
  12644                 const lhs_mcv = try self.resolveInst(bin_op.lhs);
  12645                 const rhs_mcv = try self.resolveInst(bin_op.rhs);
  12646 
  12647                 const dst_mcv = try self.genShiftBinOp(tag, inst, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty);
  12648                 switch (tag) {
  12649                     .shr, .shr_exact, .shl_exact => {},
  12650                     .shl => switch (dst_mcv) {
  12651                         .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg),
  12652                         .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]),
  12653                         .load_frame => |frame_addr| {
  12654                             const tmp_reg =
  12655                                 try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  12656                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  12657                             defer self.register_manager.unlockReg(tmp_lock);
  12658 
  12659                             const lhs_bits: u31 = @intCast(lhs_ty.bitSize(zcu));
  12660                             const tmp_ty: Type = if (lhs_bits > 64) .usize else lhs_ty;
  12661                             const off = frame_addr.off + (lhs_bits - 1) / 64 * 8;
  12662                             try self.genSetReg(
  12663                                 tmp_reg,
  12664                                 tmp_ty,
  12665                                 .{ .load_frame = .{ .index = frame_addr.index, .off = off } },
  12666                                 .{},
  12667                             );
  12668                             try self.truncateRegister(lhs_ty, tmp_reg);
  12669                             try self.genSetMem(
  12670                                 .{ .frame = frame_addr.index },
  12671                                 off,
  12672                                 tmp_ty,
  12673                                 .{ .register = tmp_reg },
  12674                                 .{},
  12675                             );
  12676                         },
  12677                         else => {},
  12678                     },
  12679                     else => unreachable,
  12680                 }
  12681                 break :result dst_mcv;
  12682             },
  12683             .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
  12684                 .int => if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.childType(zcu).intInfo(zcu).bits) {
  12685                     else => null,
  12686                     16 => switch (lhs_ty.vectorLen(zcu)) {
  12687                         else => null,
  12688                         1...8 => switch (tag) {
  12689                             else => unreachable,
  12690                             .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  12691                                 .signed => if (self.hasFeature(.avx))
  12692                                     .{ .vp_w, .sra }
  12693                                 else
  12694                                     .{ .p_w, .sra },
  12695                                 .unsigned => if (self.hasFeature(.avx))
  12696                                     .{ .vp_w, .srl }
  12697                                 else
  12698                                     .{ .p_w, .srl },
  12699                             },
  12700                             .shl, .shl_exact => if (self.hasFeature(.avx))
  12701                                 .{ .vp_w, .sll }
  12702                             else
  12703                                 .{ .p_w, .sll },
  12704                         },
  12705                         9...16 => switch (tag) {
  12706                             else => unreachable,
  12707                             .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  12708                                 .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .sra } else null,
  12709                                 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .srl } else null,
  12710                             },
  12711                             .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_w, .sll } else null,
  12712                         },
  12713                     },
  12714                     32 => switch (lhs_ty.vectorLen(zcu)) {
  12715                         else => null,
  12716                         1...4 => switch (tag) {
  12717                             else => unreachable,
  12718                             .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  12719                                 .signed => if (self.hasFeature(.avx))
  12720                                     .{ .vp_d, .sra }
  12721                                 else
  12722                                     .{ .p_d, .sra },
  12723                                 .unsigned => if (self.hasFeature(.avx))
  12724                                     .{ .vp_d, .srl }
  12725                                 else
  12726                                     .{ .p_d, .srl },
  12727                             },
  12728                             .shl, .shl_exact => if (self.hasFeature(.avx))
  12729                                 .{ .vp_d, .sll }
  12730                             else
  12731                                 .{ .p_d, .sll },
  12732                         },
  12733                         5...8 => switch (tag) {
  12734                             else => unreachable,
  12735                             .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  12736                                 .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .sra } else null,
  12737                                 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .srl } else null,
  12738                             },
  12739                             .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_d, .sll } else null,
  12740                         },
  12741                     },
  12742                     64 => switch (lhs_ty.vectorLen(zcu)) {
  12743                         else => null,
  12744                         1...2 => switch (tag) {
  12745                             else => unreachable,
  12746                             .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  12747                                 .signed => if (self.hasFeature(.avx))
  12748                                     .{ .vp_q, .sra }
  12749                                 else
  12750                                     .{ .p_q, .sra },
  12751                                 .unsigned => if (self.hasFeature(.avx))
  12752                                     .{ .vp_q, .srl }
  12753                                 else
  12754                                     .{ .p_q, .srl },
  12755                             },
  12756                             .shl, .shl_exact => if (self.hasFeature(.avx))
  12757                                 .{ .vp_q, .sll }
  12758                             else
  12759                                 .{ .p_q, .sll },
  12760                         },
  12761                         3...4 => switch (tag) {
  12762                             else => unreachable,
  12763                             .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  12764                                 .signed => if (self.hasFeature(.avx2)) .{ .vp_q, .sra } else null,
  12765                                 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_q, .srl } else null,
  12766                             },
  12767                             .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_q, .sll } else null,
  12768                         },
  12769                     },
  12770                 })) |mir_tag| if (try self.air.value(bin_op.rhs, pt)) |rhs_val| {
  12771                     switch (zcu.intern_pool.indexToKey(rhs_val.toIntern())) {
  12772                         .aggregate => |rhs_aggregate| switch (rhs_aggregate.storage) {
  12773                             .repeated_elem => |rhs_elem| {
  12774                                 const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu));
  12775 
  12776                                 const lhs_mcv = try self.resolveInst(bin_op.lhs);
  12777                                 const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and
  12778                                     self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
  12779                                     .{lhs_mcv.getReg().?} ** 2
  12780                                 else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{
  12781                                     try self.register_manager.allocReg(inst, abi.RegisterClass.sse),
  12782                                     lhs_mcv.getReg().?,
  12783                                 } else .{(try self.copyToRegisterWithInstTracking(
  12784                                     inst,
  12785                                     lhs_ty,
  12786                                     lhs_mcv,
  12787                                 )).register} ** 2;
  12788                                 const reg_locks =
  12789                                     self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg });
  12790                                 defer for (reg_locks) |reg_lock| if (reg_lock) |lock|
  12791                                     self.register_manager.unlockReg(lock);
  12792 
  12793                                 const shift_imm: Immediate =
  12794                                     .u(@intCast(Value.fromInterned(rhs_elem).toUnsignedInt(zcu)));
  12795                                 if (self.hasFeature(.avx)) try self.asmRegisterRegisterImmediate(
  12796                                     mir_tag,
  12797                                     registerAlias(dst_reg, abi_size),
  12798                                     registerAlias(lhs_reg, abi_size),
  12799                                     shift_imm,
  12800                                 ) else {
  12801                                     assert(dst_reg.id() == lhs_reg.id());
  12802                                     try self.asmRegisterImmediate(
  12803                                         mir_tag,
  12804                                         registerAlias(dst_reg, abi_size),
  12805                                         shift_imm,
  12806                                     );
  12807                                 }
  12808                                 break :result .{ .register = dst_reg };
  12809                             },
  12810                             else => {},
  12811                         },
  12812                         else => {},
  12813                     }
  12814                 } else if (bin_op.rhs.toIndex()) |rhs_inst| switch (air_tags[@intFromEnum(rhs_inst)]) {
  12815                     .splat => {
  12816                         const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu));
  12817 
  12818                         const lhs_mcv = try self.resolveInst(bin_op.lhs);
  12819                         const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and
  12820                             self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
  12821                             .{lhs_mcv.getReg().?} ** 2
  12822                         else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{
  12823                             try self.register_manager.allocReg(inst, abi.RegisterClass.sse),
  12824                             lhs_mcv.getReg().?,
  12825                         } else .{(try self.copyToRegisterWithInstTracking(
  12826                             inst,
  12827                             lhs_ty,
  12828                             lhs_mcv,
  12829                         )).register} ** 2;
  12830                         const reg_locks = self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg });
  12831                         defer for (reg_locks) |reg_lock| if (reg_lock) |lock|
  12832                             self.register_manager.unlockReg(lock);
  12833 
  12834                         const shift_reg =
  12835                             try self.copyToTmpRegister(rhs_ty, .{ .air_ref = bin_op.rhs });
  12836                         const shift_lock = self.register_manager.lockRegAssumeUnused(shift_reg);
  12837                         defer self.register_manager.unlockReg(shift_lock);
  12838 
  12839                         const mask_ty = try pt.vectorType(.{ .len = 16, .child = .u8_type });
  12840                         const mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{
  12841                             .ty = mask_ty.toIntern(),
  12842                             .storage = .{ .elems = &([1]InternPool.Index{
  12843                                 (try rhs_ty.childType(zcu).maxIntScalar(pt, .u8)).toIntern(),
  12844                             } ++ [1]InternPool.Index{
  12845                                 (try pt.intValue(.u8, 0)).toIntern(),
  12846                             } ** 15) },
  12847                         } })));
  12848                         const mask_addr_reg = try self.copyToTmpRegister(.usize, mask_mcv.address());
  12849                         const mask_addr_lock = self.register_manager.lockRegAssumeUnused(mask_addr_reg);
  12850                         defer self.register_manager.unlockReg(mask_addr_lock);
  12851 
  12852                         if (self.hasFeature(.avx)) {
  12853                             try self.asmRegisterRegisterMemory(
  12854                                 .{ .vp_, .@"and" },
  12855                                 shift_reg.to128(),
  12856                                 shift_reg.to128(),
  12857                                 .{
  12858                                     .base = .{ .reg = mask_addr_reg },
  12859                                     .mod = .{ .rm = .{ .size = .xword } },
  12860                                 },
  12861                             );
  12862                             try self.asmRegisterRegisterRegister(
  12863                                 mir_tag,
  12864                                 registerAlias(dst_reg, abi_size),
  12865                                 registerAlias(lhs_reg, abi_size),
  12866                                 shift_reg.to128(),
  12867                             );
  12868                         } else {
  12869                             try self.asmRegisterMemory(
  12870                                 .{ .p_, .@"and" },
  12871                                 shift_reg.to128(),
  12872                                 .{
  12873                                     .base = .{ .reg = mask_addr_reg },
  12874                                     .mod = .{ .rm = .{ .size = .xword } },
  12875                                 },
  12876                             );
  12877                             assert(dst_reg.id() == lhs_reg.id());
  12878                             try self.asmRegisterRegister(
  12879                                 mir_tag,
  12880                                 registerAlias(dst_reg, abi_size),
  12881                                 shift_reg.to128(),
  12882                             );
  12883                         }
  12884                         break :result .{ .register = dst_reg };
  12885                     },
  12886                     else => {},
  12887                 },
  12888                 else => {},
  12889             },
  12890             else => {},
  12891         }
  12892         return self.fail("TODO implement airShlShrBinOp for {}", .{lhs_ty.fmt(pt)});
  12893     };
  12894     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  12895 }
  12896 
  12897 fn airShlSat(self: *CodeGen, inst: Air.Inst.Index) !void {
  12898     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  12899     _ = bin_op;
  12900     return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
  12901     //return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  12902 }
  12903 
  12904 fn airOptionalPayload(self: *CodeGen, inst: Air.Inst.Index) !void {
  12905     const zcu = self.pt.zcu;
  12906     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  12907     const result: MCValue = result: {
  12908         const pl_ty = self.typeOfIndex(inst);
  12909         if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
  12910 
  12911         const opt_mcv = try self.resolveInst(ty_op.operand);
  12912         if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) {
  12913             const pl_mcv: MCValue = switch (opt_mcv) {
  12914                 .register_overflow => |ro| pl: {
  12915                     self.eflags_inst = null; // actually stop tracking the overflow part
  12916                     break :pl .{ .register = ro.reg };
  12917                 },
  12918                 else => opt_mcv,
  12919             };
  12920             switch (pl_mcv) {
  12921                 .register => |pl_reg| try self.truncateRegister(pl_ty, pl_reg),
  12922                 else => {},
  12923             }
  12924             break :result pl_mcv;
  12925         }
  12926 
  12927         const pl_mcv = try self.allocRegOrMem(inst, true);
  12928         try self.genCopy(pl_ty, pl_mcv, switch (opt_mcv) {
  12929             else => opt_mcv,
  12930             .register_overflow => |ro| .{ .register = ro.reg },
  12931         }, .{});
  12932         break :result pl_mcv;
  12933     };
  12934     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  12935 }
  12936 
  12937 fn airOptionalPayloadPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  12938     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  12939 
  12940     const dst_ty = self.typeOfIndex(inst);
  12941     const opt_mcv = try self.resolveInst(ty_op.operand);
  12942 
  12943     const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv))
  12944         opt_mcv
  12945     else
  12946         try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv);
  12947     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  12948 }
  12949 
  12950 fn airOptionalPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void {
  12951     const pt = self.pt;
  12952     const zcu = pt.zcu;
  12953     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  12954     const result = result: {
  12955         const dst_ty = self.typeOfIndex(inst);
  12956         const src_ty = self.typeOf(ty_op.operand);
  12957         const opt_ty = src_ty.childType(zcu);
  12958         const src_mcv = try self.resolveInst(ty_op.operand);
  12959 
  12960         if (opt_ty.optionalReprIsPayload(zcu)) {
  12961             break :result if (self.liveness.isUnused(inst))
  12962                 .unreach
  12963             else if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  12964                 src_mcv
  12965             else
  12966                 try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
  12967         }
  12968 
  12969         const dst_mcv: MCValue = if (src_mcv.isRegister() and
  12970             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  12971             src_mcv
  12972         else if (self.liveness.isUnused(inst))
  12973             .{ .register = try self.copyToTmpRegister(dst_ty, src_mcv) }
  12974         else
  12975             try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
  12976 
  12977         const pl_ty = dst_ty.childType(zcu);
  12978         const pl_abi_size: i32 = @intCast(pl_ty.abiSize(zcu));
  12979         try self.genSetMem(
  12980             .{ .reg = dst_mcv.getReg().? },
  12981             pl_abi_size,
  12982             .bool,
  12983             .{ .immediate = 1 },
  12984             .{},
  12985         );
  12986         break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv;
  12987     };
  12988     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  12989 }
  12990 
  12991 fn airUnwrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void {
  12992     const pt = self.pt;
  12993     const zcu = pt.zcu;
  12994     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  12995     const err_union_ty = self.typeOf(ty_op.operand);
  12996     const err_ty = err_union_ty.errorUnionSet(zcu);
  12997     const payload_ty = err_union_ty.errorUnionPayload(zcu);
  12998     const operand = try self.resolveInst(ty_op.operand);
  12999 
  13000     const result: MCValue = result: {
  13001         if (err_ty.errorSetIsEmpty(zcu)) {
  13002             break :result MCValue{ .immediate = 0 };
  13003         }
  13004 
  13005         if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
  13006             break :result operand;
  13007         }
  13008 
  13009         const err_off = codegen.errUnionErrorOffset(payload_ty, zcu);
  13010         switch (operand) {
  13011             .register => |reg| {
  13012                 // TODO reuse operand
  13013                 const eu_lock = self.register_manager.lockReg(reg);
  13014                 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
  13015 
  13016                 const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand);
  13017                 if (err_off > 0) try self.genShiftBinOpMir(
  13018                     .{ ._r, .sh },
  13019                     err_union_ty,
  13020                     result,
  13021                     .u8,
  13022                     .{ .immediate = @as(u6, @intCast(err_off * 8)) },
  13023                 ) else try self.truncateRegister(.anyerror, result.register);
  13024                 break :result result;
  13025             },
  13026             .load_frame => |frame_addr| break :result .{ .load_frame = .{
  13027                 .index = frame_addr.index,
  13028                 .off = frame_addr.off + @as(i32, @intCast(err_off)),
  13029             } },
  13030             else => return self.fail("TODO implement unwrap_err_err for {}", .{operand}),
  13031         }
  13032     };
  13033     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13034 }
  13035 
  13036 fn airUnwrapErrUnionPayload(self: *CodeGen, inst: Air.Inst.Index) !void {
  13037     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13038     const operand_ty = self.typeOf(ty_op.operand);
  13039     const operand = try self.resolveInst(ty_op.operand);
  13040     const result = try self.genUnwrapErrUnionPayloadMir(inst, operand_ty, operand);
  13041     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13042 }
  13043 
  13044 // *(E!T) -> E
  13045 fn airUnwrapErrUnionErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  13046     const pt = self.pt;
  13047     const zcu = pt.zcu;
  13048     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13049 
  13050     const src_ty = self.typeOf(ty_op.operand);
  13051     const src_mcv = try self.resolveInst(ty_op.operand);
  13052     const src_reg = switch (src_mcv) {
  13053         .register => |reg| reg,
  13054         else => try self.copyToTmpRegister(src_ty, src_mcv),
  13055     };
  13056     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  13057     defer self.register_manager.unlockReg(src_lock);
  13058 
  13059     const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
  13060     const dst_mcv = MCValue{ .register = dst_reg };
  13061     const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  13062     defer self.register_manager.unlockReg(dst_lock);
  13063 
  13064     const eu_ty = src_ty.childType(zcu);
  13065     const pl_ty = eu_ty.errorUnionPayload(zcu);
  13066     const err_ty = eu_ty.errorUnionSet(zcu);
  13067     const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu));
  13068     const err_abi_size: u32 = @intCast(err_ty.abiSize(zcu));
  13069     try self.asmRegisterMemory(
  13070         .{ ._, .mov },
  13071         registerAlias(dst_reg, err_abi_size),
  13072         .{
  13073             .base = .{ .reg = src_reg },
  13074             .mod = .{ .rm = .{
  13075                 .size = .fromSize(err_abi_size),
  13076                 .disp = err_off,
  13077             } },
  13078         },
  13079     );
  13080 
  13081     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  13082 }
  13083 
  13084 // *(E!T) -> *T
  13085 fn airUnwrapErrUnionPayloadPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  13086     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13087     const operand_ty = self.typeOf(ty_op.operand);
  13088     const operand = try self.resolveInst(ty_op.operand);
  13089     const result = try self.genUnwrapErrUnionPayloadPtrMir(inst, operand_ty, operand);
  13090     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13091 }
  13092 
  13093 fn airErrUnionPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void {
  13094     const pt = self.pt;
  13095     const zcu = pt.zcu;
  13096     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13097     const result: MCValue = result: {
  13098         const src_ty = self.typeOf(ty_op.operand);
  13099         const src_mcv = try self.resolveInst(ty_op.operand);
  13100         const src_reg = switch (src_mcv) {
  13101             .register => |reg| reg,
  13102             else => try self.copyToTmpRegister(src_ty, src_mcv),
  13103         };
  13104         const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  13105         defer self.register_manager.unlockReg(src_lock);
  13106 
  13107         const eu_ty = src_ty.childType(zcu);
  13108         const pl_ty = eu_ty.errorUnionPayload(zcu);
  13109         const err_ty = eu_ty.errorUnionSet(zcu);
  13110         const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu));
  13111         const err_abi_size: u32 = @intCast(err_ty.abiSize(zcu));
  13112         try self.asmMemoryImmediate(
  13113             .{ ._, .mov },
  13114             .{
  13115                 .base = .{ .reg = src_reg },
  13116                 .mod = .{ .rm = .{
  13117                     .size = .fromSize(err_abi_size),
  13118                     .disp = err_off,
  13119                 } },
  13120             },
  13121             .u(0),
  13122         );
  13123 
  13124         if (self.liveness.isUnused(inst)) break :result .unreach;
  13125 
  13126         const dst_ty = self.typeOfIndex(inst);
  13127         const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  13128             src_reg
  13129         else
  13130             try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
  13131         const dst_lock = self.register_manager.lockReg(dst_reg);
  13132         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  13133 
  13134         const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu));
  13135         const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
  13136         try self.asmRegisterMemory(
  13137             .{ ._, .lea },
  13138             registerAlias(dst_reg, dst_abi_size),
  13139             .{
  13140                 .base = .{ .reg = src_reg },
  13141                 .mod = .{ .rm = .{ .size = .qword, .disp = pl_off } },
  13142             },
  13143         );
  13144         break :result .{ .register = dst_reg };
  13145     };
  13146     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13147 }
  13148 
  13149 fn genUnwrapErrUnionPayloadMir(
  13150     self: *CodeGen,
  13151     maybe_inst: ?Air.Inst.Index,
  13152     err_union_ty: Type,
  13153     err_union: MCValue,
  13154 ) !MCValue {
  13155     const pt = self.pt;
  13156     const zcu = pt.zcu;
  13157     const payload_ty = err_union_ty.errorUnionPayload(zcu);
  13158 
  13159     const result: MCValue = result: {
  13160         if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
  13161 
  13162         const payload_off: u31 = @intCast(codegen.errUnionPayloadOffset(payload_ty, zcu));
  13163         switch (err_union) {
  13164             .load_frame => |frame_addr| break :result .{ .load_frame = .{
  13165                 .index = frame_addr.index,
  13166                 .off = frame_addr.off + payload_off,
  13167             } },
  13168             .register => |reg| {
  13169                 // TODO reuse operand
  13170                 const eu_lock = self.register_manager.lockReg(reg);
  13171                 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
  13172 
  13173                 const payload_in_gp = self.regSetForType(payload_ty).supersetOf(abi.RegisterClass.gp);
  13174                 const result_mcv: MCValue = if (payload_in_gp and maybe_inst != null)
  13175                     try self.copyToRegisterWithInstTracking(maybe_inst.?, err_union_ty, err_union)
  13176                 else
  13177                     .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) };
  13178                 if (payload_off > 0) try self.genShiftBinOpMir(
  13179                     .{ ._r, .sh },
  13180                     err_union_ty,
  13181                     result_mcv,
  13182                     .u8,
  13183                     .{ .immediate = @as(u6, @intCast(payload_off * 8)) },
  13184                 ) else try self.truncateRegister(payload_ty, result_mcv.register);
  13185                 break :result if (payload_in_gp)
  13186                     result_mcv
  13187                 else if (maybe_inst) |inst|
  13188                     try self.copyToRegisterWithInstTracking(inst, payload_ty, result_mcv)
  13189                 else
  13190                     .{ .register = try self.copyToTmpRegister(payload_ty, result_mcv) };
  13191             },
  13192             else => return self.fail("TODO implement genUnwrapErrUnionPayloadMir for {}", .{err_union}),
  13193         }
  13194     };
  13195 
  13196     return result;
  13197 }
  13198 
  13199 fn genUnwrapErrUnionPayloadPtrMir(
  13200     self: *CodeGen,
  13201     maybe_inst: ?Air.Inst.Index,
  13202     ptr_ty: Type,
  13203     ptr_mcv: MCValue,
  13204 ) !MCValue {
  13205     const pt = self.pt;
  13206     const zcu = pt.zcu;
  13207     const err_union_ty = ptr_ty.childType(zcu);
  13208     const payload_ty = err_union_ty.errorUnionPayload(zcu);
  13209 
  13210     const result: MCValue = result: {
  13211         const payload_off = codegen.errUnionPayloadOffset(payload_ty, zcu);
  13212         const result_mcv: MCValue = if (maybe_inst) |inst|
  13213             try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr_mcv)
  13214         else
  13215             .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) };
  13216         try self.genBinOpMir(.{ ._, .add }, ptr_ty, result_mcv, .{ .immediate = payload_off });
  13217         break :result result_mcv;
  13218     };
  13219 
  13220     return result;
  13221 }
  13222 
  13223 fn airErrReturnTrace(self: *CodeGen, inst: Air.Inst.Index) !void {
  13224     _ = inst;
  13225     return self.fail("TODO implement airErrReturnTrace for {}", .{self.target.cpu.arch});
  13226     //return self.finishAir(inst, result, .{ .none, .none, .none });
  13227 }
  13228 
  13229 fn airSetErrReturnTrace(self: *CodeGen, inst: Air.Inst.Index) !void {
  13230     _ = inst;
  13231     return self.fail("TODO implement airSetErrReturnTrace for {}", .{self.target.cpu.arch});
  13232 }
  13233 
  13234 fn airSaveErrReturnTraceIndex(self: *CodeGen, inst: Air.Inst.Index) !void {
  13235     _ = inst;
  13236     return self.fail("TODO implement airSaveErrReturnTraceIndex for {}", .{self.target.cpu.arch});
  13237 }
  13238 
  13239 fn airWrapOptional(self: *CodeGen, inst: Air.Inst.Index) !void {
  13240     const pt = self.pt;
  13241     const zcu = pt.zcu;
  13242     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13243     const result: MCValue = result: {
  13244         const pl_ty = self.typeOf(ty_op.operand);
  13245         if (!pl_ty.hasRuntimeBits(zcu)) break :result .{ .immediate = 1 };
  13246 
  13247         const opt_ty = self.typeOfIndex(inst);
  13248         const pl_mcv = try self.resolveInst(ty_op.operand);
  13249         const same_repr = opt_ty.optionalReprIsPayload(zcu);
  13250         if (same_repr and self.reuseOperand(inst, ty_op.operand, 0, pl_mcv)) break :result pl_mcv;
  13251 
  13252         const pl_lock: ?RegisterLock = switch (pl_mcv) {
  13253             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13254             else => null,
  13255         };
  13256         defer if (pl_lock) |lock| self.register_manager.unlockReg(lock);
  13257 
  13258         const opt_mcv = try self.allocRegOrMem(inst, true);
  13259         try self.genCopy(pl_ty, opt_mcv, pl_mcv, .{});
  13260 
  13261         if (!same_repr) {
  13262             const pl_abi_size: i32 = @intCast(pl_ty.abiSize(zcu));
  13263             switch (opt_mcv) {
  13264                 else => unreachable,
  13265 
  13266                 .register => |opt_reg| {
  13267                     try self.truncateRegister(pl_ty, opt_reg);
  13268                     try self.asmRegisterImmediate(
  13269                         .{ ._s, .bt },
  13270                         opt_reg,
  13271                         .u(@as(u6, @intCast(pl_abi_size * 8))),
  13272                     );
  13273                 },
  13274 
  13275                 .load_frame => |frame_addr| try self.asmMemoryImmediate(
  13276                     .{ ._, .mov },
  13277                     .{
  13278                         .base = .{ .frame = frame_addr.index },
  13279                         .mod = .{ .rm = .{
  13280                             .size = .byte,
  13281                             .disp = frame_addr.off + pl_abi_size,
  13282                         } },
  13283                     },
  13284                     .u(1),
  13285                 ),
  13286             }
  13287         }
  13288         break :result opt_mcv;
  13289     };
  13290     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13291 }
  13292 
  13293 /// T to E!T
  13294 fn airWrapErrUnionPayload(self: *CodeGen, inst: Air.Inst.Index) !void {
  13295     const pt = self.pt;
  13296     const zcu = pt.zcu;
  13297     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13298 
  13299     const eu_ty = ty_op.ty.toType();
  13300     const pl_ty = eu_ty.errorUnionPayload(zcu);
  13301     const err_ty = eu_ty.errorUnionSet(zcu);
  13302     const operand = try self.resolveInst(ty_op.operand);
  13303 
  13304     const result: MCValue = result: {
  13305         if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .{ .immediate = 0 };
  13306 
  13307         const frame_index = try self.allocFrameIndex(.initSpill(eu_ty, zcu));
  13308         const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu));
  13309         const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu));
  13310         try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand, .{});
  13311         try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 }, .{});
  13312         break :result .{ .load_frame = .{ .index = frame_index } };
  13313     };
  13314     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13315 }
  13316 
  13317 /// E to E!T
  13318 fn airWrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void {
  13319     const pt = self.pt;
  13320     const zcu = pt.zcu;
  13321     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13322 
  13323     const eu_ty = ty_op.ty.toType();
  13324     const pl_ty = eu_ty.errorUnionPayload(zcu);
  13325     const err_ty = eu_ty.errorUnionSet(zcu);
  13326 
  13327     const result: MCValue = result: {
  13328         if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result try self.resolveInst(ty_op.operand);
  13329 
  13330         const frame_index = try self.allocFrameIndex(.initSpill(eu_ty, zcu));
  13331         const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu));
  13332         const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu));
  13333         try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef, .{});
  13334         const operand = try self.resolveInst(ty_op.operand);
  13335         try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand, .{});
  13336         break :result .{ .load_frame = .{ .index = frame_index } };
  13337     };
  13338     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13339 }
  13340 
  13341 fn airSlicePtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  13342     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13343     const result = result: {
  13344         const src_mcv = try self.resolveInst(ty_op.operand);
  13345         const ptr_mcv: MCValue = switch (src_mcv) {
  13346             .register_pair => |regs| .{ .register = regs[0] },
  13347             else => src_mcv,
  13348         };
  13349         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
  13350             switch (src_mcv) {
  13351                 .register_pair => |regs| try self.freeValue(.{ .register = regs[1] }),
  13352                 else => {},
  13353             }
  13354             break :result ptr_mcv;
  13355         }
  13356 
  13357         const dst_mcv = try self.allocRegOrMem(inst, true);
  13358         try self.genCopy(self.typeOfIndex(inst), dst_mcv, ptr_mcv, .{});
  13359         break :result dst_mcv;
  13360     };
  13361     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13362 }
  13363 
  13364 fn airSliceLen(self: *CodeGen, inst: Air.Inst.Index) !void {
  13365     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13366     const result = result: {
  13367         const src_mcv = try self.resolveInst(ty_op.operand);
  13368         const len_mcv: MCValue = switch (src_mcv) {
  13369             .register_pair => |regs| .{ .register = regs[1] },
  13370             .load_frame => |frame_addr| .{ .load_frame = .{
  13371                 .index = frame_addr.index,
  13372                 .off = frame_addr.off + 8,
  13373             } },
  13374             else => return self.fail("TODO implement slice_len for {}", .{src_mcv}),
  13375         };
  13376         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
  13377             switch (src_mcv) {
  13378                 .register_pair => |regs| try self.freeValue(.{ .register = regs[0] }),
  13379                 .load_frame => {},
  13380                 else => unreachable,
  13381             }
  13382             break :result len_mcv;
  13383         }
  13384 
  13385         const dst_mcv = try self.allocRegOrMem(inst, true);
  13386         try self.genCopy(self.typeOfIndex(inst), dst_mcv, len_mcv, .{});
  13387         break :result dst_mcv;
  13388     };
  13389     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  13390 }
  13391 
  13392 fn airPtrSliceLenPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  13393     const pt = self.pt;
  13394     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13395 
  13396     const src_ty = self.typeOf(ty_op.operand);
  13397     const src_mcv = try self.resolveInst(ty_op.operand);
  13398     const src_reg = switch (src_mcv) {
  13399         .register => |reg| reg,
  13400         else => try self.copyToTmpRegister(src_ty, src_mcv),
  13401     };
  13402     const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  13403     defer self.register_manager.unlockReg(src_lock);
  13404 
  13405     const dst_ty = self.typeOfIndex(inst);
  13406     const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  13407         src_reg
  13408     else
  13409         try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
  13410     const dst_mcv = MCValue{ .register = dst_reg };
  13411     const dst_lock = self.register_manager.lockReg(dst_reg);
  13412     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  13413 
  13414     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu));
  13415     try self.asmRegisterMemory(
  13416         .{ ._, .lea },
  13417         registerAlias(dst_reg, dst_abi_size),
  13418         .{
  13419             .base = .{ .reg = src_reg },
  13420             .mod = .{ .rm = .{ .size = .qword, .disp = 8 } },
  13421         },
  13422     );
  13423 
  13424     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  13425 }
  13426 
  13427 fn airPtrSlicePtrPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  13428     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13429 
  13430     const dst_ty = self.typeOfIndex(inst);
  13431     const opt_mcv = try self.resolveInst(ty_op.operand);
  13432 
  13433     const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv))
  13434         opt_mcv
  13435     else
  13436         try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv);
  13437     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  13438 }
  13439 
  13440 fn elemOffset(self: *CodeGen, index_ty: Type, index: MCValue, elem_size: u64) !Register {
  13441     const reg: Register = blk: {
  13442         switch (index) {
  13443             .immediate => |imm| {
  13444                 // Optimisation: if index MCValue is an immediate, we can multiply in `comptime`
  13445                 // and set the register directly to the scaled offset as an immediate.
  13446                 const reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  13447                 try self.genSetReg(reg, index_ty, .{ .immediate = imm * elem_size }, .{});
  13448                 break :blk reg;
  13449             },
  13450             else => {
  13451                 const reg = try self.copyToTmpRegister(index_ty, index);
  13452                 try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size });
  13453                 break :blk reg;
  13454             },
  13455         }
  13456     };
  13457     return reg;
  13458 }
  13459 
  13460 fn genSliceElemPtr(self: *CodeGen, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue {
  13461     const pt = self.pt;
  13462     const zcu = pt.zcu;
  13463     const slice_ty = self.typeOf(lhs);
  13464     const slice_mcv = try self.resolveInst(lhs);
  13465     const slice_mcv_lock: ?RegisterLock = switch (slice_mcv) {
  13466         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13467         else => null,
  13468     };
  13469     defer if (slice_mcv_lock) |lock| self.register_manager.unlockReg(lock);
  13470 
  13471     const elem_ty = slice_ty.childType(zcu);
  13472     const elem_size = elem_ty.abiSize(zcu);
  13473     const slice_ptr_field_type = slice_ty.slicePtrFieldType(zcu);
  13474 
  13475     const index_ty = self.typeOf(rhs);
  13476     const index_mcv = try self.resolveInst(rhs);
  13477     const index_mcv_lock: ?RegisterLock = switch (index_mcv) {
  13478         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13479         else => null,
  13480     };
  13481     defer if (index_mcv_lock) |lock| self.register_manager.unlockReg(lock);
  13482 
  13483     const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_size);
  13484     const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
  13485     defer self.register_manager.unlockReg(offset_reg_lock);
  13486 
  13487     const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  13488     try self.genSetReg(addr_reg, .usize, slice_mcv, .{});
  13489     // TODO we could allocate register here, but need to expect addr register and potentially
  13490     // offset register.
  13491     try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{
  13492         .register = offset_reg,
  13493     });
  13494     return MCValue{ .register = addr_reg.to64() };
  13495 }
  13496 
  13497 fn airSliceElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
  13498     const pt = self.pt;
  13499     const zcu = pt.zcu;
  13500     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  13501 
  13502     const result: MCValue = result: {
  13503         const elem_ty = self.typeOfIndex(inst);
  13504         if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
  13505 
  13506         const slice_ty = self.typeOf(bin_op.lhs);
  13507         const slice_ptr_field_type = slice_ty.slicePtrFieldType(zcu);
  13508         const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs);
  13509         const dst_mcv = try self.allocRegOrMem(inst, false);
  13510         try self.load(dst_mcv, slice_ptr_field_type, elem_ptr);
  13511         break :result dst_mcv;
  13512     };
  13513     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  13514 }
  13515 
  13516 fn airSliceElemPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  13517     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  13518     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
  13519     const dst_mcv = try self.genSliceElemPtr(extra.lhs, extra.rhs);
  13520     return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
  13521 }
  13522 
  13523 fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
  13524     const pt = self.pt;
  13525     const zcu = pt.zcu;
  13526     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  13527 
  13528     const result: MCValue = result: {
  13529         const array_ty = self.typeOf(bin_op.lhs);
  13530         const elem_ty = array_ty.childType(zcu);
  13531 
  13532         const array_mcv = try self.resolveInst(bin_op.lhs);
  13533         const array_lock: ?RegisterLock = switch (array_mcv) {
  13534             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13535             else => null,
  13536         };
  13537         defer if (array_lock) |lock| self.register_manager.unlockReg(lock);
  13538 
  13539         const index_ty = self.typeOf(bin_op.rhs);
  13540         const index_mcv = try self.resolveInst(bin_op.rhs);
  13541         const index_lock = switch (index_mcv) {
  13542             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13543             else => null,
  13544         };
  13545         defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
  13546 
  13547         try self.spillEflagsIfOccupied();
  13548         if (array_ty.isVector(zcu) and elem_ty.bitSize(zcu) == 1) {
  13549             const array_mat_mcv: MCValue = switch (array_mcv) {
  13550                 else => array_mcv,
  13551                 .register_mask => .{ .register = try self.copyToTmpRegister(array_ty, array_mcv) },
  13552             };
  13553             const array_mat_lock = switch (array_mat_mcv) {
  13554                 .register => |reg| self.register_manager.lockReg(reg),
  13555                 else => null,
  13556             };
  13557             defer if (array_mat_lock) |lock| self.register_manager.unlockReg(lock);
  13558 
  13559             switch (array_mat_mcv) {
  13560                 .register => |array_reg| switch (array_reg.class()) {
  13561                     .general_purpose => switch (index_mcv) {
  13562                         .immediate => |index_imm| try self.asmRegisterImmediate(
  13563                             .{ ._, .bt },
  13564                             array_reg.to64(),
  13565                             .u(index_imm),
  13566                         ),
  13567                         else => try self.asmRegisterRegister(
  13568                             .{ ._, .bt },
  13569                             array_reg.to64(),
  13570                             switch (index_mcv) {
  13571                                 .register => |index_reg| index_reg,
  13572                                 else => try self.copyToTmpRegister(index_ty, index_mcv),
  13573                             }.to64(),
  13574                         ),
  13575                     },
  13576                     .sse => {
  13577                         const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu));
  13578                         try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mat_mcv, .{});
  13579                         switch (index_mcv) {
  13580                             .immediate => |index_imm| try self.asmMemoryImmediate(
  13581                                 .{ ._, .bt },
  13582                                 .{
  13583                                     .base = .{ .frame = frame_index },
  13584                                     .mod = .{ .rm = .{ .size = .qword } },
  13585                                 },
  13586                                 .u(index_imm),
  13587                             ),
  13588                             else => try self.asmMemoryRegister(
  13589                                 .{ ._, .bt },
  13590                                 .{
  13591                                     .base = .{ .frame = frame_index },
  13592                                     .mod = .{ .rm = .{ .size = .qword } },
  13593                                 },
  13594                                 switch (index_mcv) {
  13595                                     .register => |index_reg| index_reg,
  13596                                     else => try self.copyToTmpRegister(index_ty, index_mcv),
  13597                                 }.to64(),
  13598                             ),
  13599                         }
  13600                     },
  13601                     else => unreachable,
  13602                 },
  13603                 .load_frame => switch (index_mcv) {
  13604                     .immediate => |index_imm| try self.asmMemoryImmediate(
  13605                         .{ ._, .bt },
  13606                         try array_mat_mcv.mem(self, .{ .size = .qword }),
  13607                         .u(index_imm),
  13608                     ),
  13609                     else => try self.asmMemoryRegister(
  13610                         .{ ._, .bt },
  13611                         try array_mat_mcv.mem(self, .{ .size = .qword }),
  13612                         switch (index_mcv) {
  13613                             .register => |index_reg| index_reg,
  13614                             else => try self.copyToTmpRegister(index_ty, index_mcv),
  13615                         }.to64(),
  13616                     ),
  13617                 },
  13618                 .memory, .load_symbol, .load_direct, .load_got, .load_tlv => switch (index_mcv) {
  13619                     .immediate => |index_imm| try self.asmMemoryImmediate(
  13620                         .{ ._, .bt },
  13621                         .{
  13622                             .base = .{
  13623                                 .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()),
  13624                             },
  13625                             .mod = .{ .rm = .{ .size = .qword } },
  13626                         },
  13627                         .u(index_imm),
  13628                     ),
  13629                     else => try self.asmMemoryRegister(
  13630                         .{ ._, .bt },
  13631                         .{
  13632                             .base = .{
  13633                                 .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()),
  13634                             },
  13635                             .mod = .{ .rm = .{ .size = .qword } },
  13636                         },
  13637                         switch (index_mcv) {
  13638                             .register => |index_reg| index_reg,
  13639                             else => try self.copyToTmpRegister(index_ty, index_mcv),
  13640                         }.to64(),
  13641                     ),
  13642                 },
  13643                 else => return self.fail("TODO airArrayElemVal for {s} of {}", .{
  13644                     @tagName(array_mat_mcv), array_ty.fmt(pt),
  13645                 }),
  13646             }
  13647 
  13648             const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
  13649             try self.asmSetccRegister(.c, dst_reg.to8());
  13650             break :result .{ .register = dst_reg };
  13651         }
  13652 
  13653         const elem_abi_size = elem_ty.abiSize(zcu);
  13654         const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  13655         const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  13656         defer self.register_manager.unlockReg(addr_lock);
  13657 
  13658         switch (array_mcv) {
  13659             .register => {
  13660                 const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu));
  13661                 try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mcv, .{});
  13662                 try self.asmRegisterMemory(
  13663                     .{ ._, .lea },
  13664                     addr_reg,
  13665                     .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } },
  13666                 );
  13667             },
  13668             .load_frame => |frame_addr| try self.asmRegisterMemory(
  13669                 .{ ._, .lea },
  13670                 addr_reg,
  13671                 .{
  13672                     .base = .{ .frame = frame_addr.index },
  13673                     .mod = .{ .rm = .{ .size = .qword, .disp = frame_addr.off } },
  13674                 },
  13675             ),
  13676             .memory,
  13677             .load_symbol,
  13678             .load_direct,
  13679             .load_got,
  13680             .load_tlv,
  13681             => try self.genSetReg(addr_reg, .usize, array_mcv.address(), .{}),
  13682             .lea_symbol, .lea_direct, .lea_tlv => unreachable,
  13683             else => return self.fail("TODO airArrayElemVal_val for {s} of {}", .{
  13684                 @tagName(array_mcv), array_ty.fmt(pt),
  13685             }),
  13686         }
  13687 
  13688         const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
  13689         const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
  13690         defer self.register_manager.unlockReg(offset_lock);
  13691 
  13692         // TODO we could allocate register here, but need to expect addr register and potentially
  13693         // offset register.
  13694         const dst_mcv = try self.allocRegOrMem(inst, false);
  13695         try self.genBinOpMir(.{ ._, .add }, .usize, .{ .register = addr_reg }, .{ .register = offset_reg });
  13696         try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }, .{});
  13697         break :result dst_mcv;
  13698     };
  13699     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  13700 }
  13701 
  13702 fn airPtrElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
  13703     const pt = self.pt;
  13704     const zcu = pt.zcu;
  13705     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  13706     const ptr_ty = self.typeOf(bin_op.lhs);
  13707 
  13708     // this is identical to the `airPtrElemPtr` codegen expect here an
  13709     // additional `mov` is needed at the end to get the actual value
  13710 
  13711     const result = result: {
  13712         const elem_ty = ptr_ty.elemType2(zcu);
  13713         if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
  13714 
  13715         const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu));
  13716         const index_ty = self.typeOf(bin_op.rhs);
  13717         const index_mcv = try self.resolveInst(bin_op.rhs);
  13718         const index_lock = switch (index_mcv) {
  13719             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13720             else => null,
  13721         };
  13722         defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
  13723 
  13724         const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
  13725         const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
  13726         defer self.register_manager.unlockReg(offset_lock);
  13727 
  13728         const ptr_mcv = try self.resolveInst(bin_op.lhs);
  13729         const elem_ptr_reg = if (ptr_mcv.isRegister() and self.liveness.operandDies(inst, 0))
  13730             ptr_mcv.register
  13731         else
  13732             try self.copyToTmpRegister(ptr_ty, ptr_mcv);
  13733         const elem_ptr_lock = self.register_manager.lockRegAssumeUnused(elem_ptr_reg);
  13734         defer self.register_manager.unlockReg(elem_ptr_lock);
  13735         try self.asmRegisterRegister(
  13736             .{ ._, .add },
  13737             elem_ptr_reg,
  13738             offset_reg,
  13739         );
  13740 
  13741         const dst_mcv = try self.allocRegOrMem(inst, true);
  13742         const dst_lock = switch (dst_mcv) {
  13743             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13744             else => null,
  13745         };
  13746         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  13747         try self.load(dst_mcv, ptr_ty, .{ .register = elem_ptr_reg });
  13748         break :result dst_mcv;
  13749     };
  13750     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  13751 }
  13752 
  13753 fn airPtrElemPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  13754     const pt = self.pt;
  13755     const zcu = pt.zcu;
  13756     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  13757     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
  13758 
  13759     const result = result: {
  13760         const elem_ptr_ty = self.typeOfIndex(inst);
  13761         const base_ptr_ty = self.typeOf(extra.lhs);
  13762 
  13763         const base_ptr_mcv = try self.resolveInst(extra.lhs);
  13764         const base_ptr_lock: ?RegisterLock = switch (base_ptr_mcv) {
  13765             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13766             else => null,
  13767         };
  13768         defer if (base_ptr_lock) |lock| self.register_manager.unlockReg(lock);
  13769 
  13770         if (elem_ptr_ty.ptrInfo(zcu).flags.vector_index != .none) {
  13771             break :result if (self.reuseOperand(inst, extra.lhs, 0, base_ptr_mcv))
  13772                 base_ptr_mcv
  13773             else
  13774                 try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv);
  13775         }
  13776 
  13777         const elem_ty = base_ptr_ty.elemType2(zcu);
  13778         const elem_abi_size = elem_ty.abiSize(zcu);
  13779         const index_ty = self.typeOf(extra.rhs);
  13780         const index_mcv = try self.resolveInst(extra.rhs);
  13781         const index_lock: ?RegisterLock = switch (index_mcv) {
  13782             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13783             else => null,
  13784         };
  13785         defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
  13786 
  13787         const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
  13788         const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
  13789         defer self.register_manager.unlockReg(offset_reg_lock);
  13790 
  13791         const dst_mcv = try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv);
  13792         try self.genBinOpMir(.{ ._, .add }, elem_ptr_ty, dst_mcv, .{ .register = offset_reg });
  13793 
  13794         break :result dst_mcv;
  13795     };
  13796     return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none });
  13797 }
  13798 
  13799 fn airSetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void {
  13800     const pt = self.pt;
  13801     const zcu = pt.zcu;
  13802     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  13803     const ptr_union_ty = self.typeOf(bin_op.lhs);
  13804     const union_ty = ptr_union_ty.childType(zcu);
  13805     const tag_ty = self.typeOf(bin_op.rhs);
  13806     const layout = union_ty.unionGetLayout(zcu);
  13807 
  13808     if (layout.tag_size == 0) {
  13809         return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
  13810     }
  13811 
  13812     const ptr = try self.resolveInst(bin_op.lhs);
  13813     const ptr_lock: ?RegisterLock = switch (ptr) {
  13814         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13815         else => null,
  13816     };
  13817     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  13818 
  13819     const tag = try self.resolveInst(bin_op.rhs);
  13820     const tag_lock: ?RegisterLock = switch (tag) {
  13821         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13822         else => null,
  13823     };
  13824     defer if (tag_lock) |lock| self.register_manager.unlockReg(lock);
  13825 
  13826     const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align.compare(.lt, layout.payload_align)) blk: {
  13827         // TODO reusing the operand
  13828         const reg = try self.copyToTmpRegister(ptr_union_ty, ptr);
  13829         try self.genBinOpMir(
  13830             .{ ._, .add },
  13831             ptr_union_ty,
  13832             .{ .register = reg },
  13833             .{ .immediate = layout.payload_size },
  13834         );
  13835         break :blk MCValue{ .register = reg };
  13836     } else ptr;
  13837 
  13838     const ptr_tag_ty = try pt.adjustPtrTypeChild(ptr_union_ty, tag_ty);
  13839     try self.store(ptr_tag_ty, adjusted_ptr, tag, .{});
  13840 
  13841     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
  13842 }
  13843 
  13844 fn airGetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void {
  13845     const zcu = self.pt.zcu;
  13846     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13847 
  13848     const tag_ty = self.typeOfIndex(inst);
  13849     const union_ty = self.typeOf(ty_op.operand);
  13850     const layout = union_ty.unionGetLayout(zcu);
  13851 
  13852     if (layout.tag_size == 0) {
  13853         return self.finishAir(inst, .none, .{ ty_op.operand, .none, .none });
  13854     }
  13855 
  13856     // TODO reusing the operand
  13857     const operand = try self.resolveInst(ty_op.operand);
  13858     const operand_lock: ?RegisterLock = switch (operand) {
  13859         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  13860         else => null,
  13861     };
  13862     defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
  13863 
  13864     const tag_abi_size = tag_ty.abiSize(zcu);
  13865     const dst_mcv: MCValue = blk: {
  13866         switch (operand) {
  13867             .load_frame => |frame_addr| {
  13868                 if (tag_abi_size <= 8) {
  13869                     const off: i32 = @intCast(layout.tagOffset());
  13870                     break :blk try self.copyToRegisterWithInstTracking(inst, tag_ty, .{
  13871                         .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
  13872                     });
  13873                 }
  13874 
  13875                 return self.fail(
  13876                     "TODO implement get_union_tag for ABI larger than 8 bytes and operand {}",
  13877                     .{operand},
  13878                 );
  13879             },
  13880             .register => {
  13881                 const shift: u6 = @intCast(layout.tagOffset() * 8);
  13882                 const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand);
  13883                 try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, result, .u8, .{ .immediate = shift });
  13884                 break :blk MCValue{
  13885                     .register = registerAlias(result.register, @intCast(layout.tag_size)),
  13886                 };
  13887             },
  13888             else => return self.fail("TODO implement get_union_tag for {}", .{operand}),
  13889         }
  13890     };
  13891 
  13892     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  13893 }
  13894 
  13895 fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void {
  13896     const pt = self.pt;
  13897     const zcu = pt.zcu;
  13898     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  13899     const result = result: {
  13900         try self.spillEflagsIfOccupied();
  13901 
  13902         const dst_ty = self.typeOfIndex(inst);
  13903         const src_ty = self.typeOf(ty_op.operand);
  13904         if (src_ty.zigTypeTag(zcu) == .vector) return self.fail("TODO implement airClz for {}", .{
  13905             src_ty.fmt(pt),
  13906         });
  13907 
  13908         const src_mcv = try self.resolveInst(ty_op.operand);
  13909         const mat_src_mcv = switch (src_mcv) {
  13910             .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
  13911             else => src_mcv,
  13912         };
  13913         const mat_src_lock = switch (mat_src_mcv) {
  13914             .register => |reg| self.register_manager.lockReg(reg),
  13915             else => null,
  13916         };
  13917         defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
  13918 
  13919         const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
  13920         const dst_mcv = MCValue{ .register = dst_reg };
  13921         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  13922         defer self.register_manager.unlockReg(dst_lock);
  13923 
  13924         const abi_size: u31 = @intCast(src_ty.abiSize(zcu));
  13925         const src_bits: u31 = @intCast(src_ty.bitSize(zcu));
  13926         const has_lzcnt = self.hasFeature(.lzcnt);
  13927         if (src_bits > @as(u32, if (has_lzcnt) 128 else 64)) {
  13928             const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) {
  13929                 .load_frame => |src_frame_addr| src_frame_addr,
  13930                 else => {
  13931                     const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu));
  13932                     try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{});
  13933                     break :src_frame_addr .{ .index = src_frame_addr };
  13934                 },
  13935             };
  13936 
  13937             const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
  13938             const extra_bits = abi_size * 8 - src_bits;
  13939 
  13940             const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  13941             const index_lock = self.register_manager.lockRegAssumeUnused(index_reg);
  13942             defer self.register_manager.unlockReg(index_lock);
  13943 
  13944             try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), .u(limbs_len));
  13945             switch (extra_bits) {
  13946                 1 => try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()),
  13947                 else => try self.asmRegisterImmediate(
  13948                     .{ ._, .mov },
  13949                     dst_reg.to32(),
  13950                     .s(@as(i32, extra_bits) - 1),
  13951                 ),
  13952             }
  13953             const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  13954             try self.asmRegisterRegister(.{ ._, .@"test" }, index_reg.to32(), index_reg.to32());
  13955             const zero = try self.asmJccReloc(.z, undefined);
  13956             if (self.hasFeature(.slow_incdec)) {
  13957                 try self.asmRegisterImmediate(.{ ._, .sub }, index_reg.to32(), .u(1));
  13958             } else {
  13959                 try self.asmRegister(.{ ._, .dec }, index_reg.to32());
  13960             }
  13961             try self.asmMemoryImmediate(.{ ._, .cmp }, .{
  13962                 .base = .{ .frame = src_frame_addr.index },
  13963                 .mod = .{ .rm = .{
  13964                     .size = .qword,
  13965                     .index = index_reg.to64(),
  13966                     .scale = .@"8",
  13967                     .disp = src_frame_addr.off,
  13968                 } },
  13969             }, .u(0));
  13970             _ = try self.asmJccReloc(.e, loop);
  13971             try self.asmRegisterMemory(.{ ._r, .bs }, dst_reg.to64(), .{
  13972                 .base = .{ .frame = src_frame_addr.index },
  13973                 .mod = .{ .rm = .{
  13974                     .size = .qword,
  13975                     .index = index_reg.to64(),
  13976                     .scale = .@"8",
  13977                     .disp = src_frame_addr.off,
  13978                 } },
  13979             });
  13980             self.performReloc(zero);
  13981             try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), .u(6));
  13982             try self.asmRegisterRegister(.{ ._, .add }, index_reg.to32(), dst_reg.to32());
  13983             try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), .u(src_bits - 1));
  13984             try self.asmRegisterRegister(.{ ._, .sub }, dst_reg.to32(), index_reg.to32());
  13985             break :result dst_mcv;
  13986         }
  13987 
  13988         if (has_lzcnt) {
  13989             if (src_bits <= 8) {
  13990                 const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
  13991                 try self.truncateRegister(src_ty, wide_reg);
  13992                 try self.genBinOpMir(.{ ._, .lzcnt }, .u32, dst_mcv, .{ .register = wide_reg });
  13993                 try self.genBinOpMir(
  13994                     .{ ._, .sub },
  13995                     dst_ty,
  13996                     dst_mcv,
  13997                     .{ .immediate = 32 - src_bits },
  13998                 );
  13999             } else if (src_bits <= 64) {
  14000                 try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv);
  14001                 const extra_bits = self.regExtraBits(src_ty);
  14002                 if (extra_bits > 0) {
  14003                     try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits });
  14004                 }
  14005             } else {
  14006                 assert(src_bits <= 128);
  14007                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14008                 const tmp_mcv = MCValue{ .register = tmp_reg };
  14009                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  14010                 defer self.register_manager.unlockReg(tmp_lock);
  14011 
  14012                 try self.genBinOpMir(.{ ._, .lzcnt }, .u64, dst_mcv, if (mat_src_mcv.isBase())
  14013                     mat_src_mcv
  14014                 else
  14015                     .{ .register = mat_src_mcv.register_pair[0] });
  14016                 try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 });
  14017                 try self.genBinOpMir(.{ ._, .lzcnt }, .u64, tmp_mcv, if (mat_src_mcv.isBase())
  14018                     mat_src_mcv.address().offset(8).deref()
  14019                 else
  14020                     .{ .register = mat_src_mcv.register_pair[1] });
  14021                 try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32());
  14022 
  14023                 if (src_bits < 128) try self.genBinOpMir(
  14024                     .{ ._, .sub },
  14025                     dst_ty,
  14026                     dst_mcv,
  14027                     .{ .immediate = 128 - src_bits },
  14028                 );
  14029             }
  14030             break :result dst_mcv;
  14031         }
  14032 
  14033         assert(src_bits <= 64);
  14034         const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2);
  14035         if (std.math.isPowerOfTwo(src_bits)) {
  14036             const imm_reg = try self.copyToTmpRegister(dst_ty, .{
  14037                 .immediate = src_bits ^ (src_bits - 1),
  14038             });
  14039             const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg);
  14040             defer self.register_manager.unlockReg(imm_lock);
  14041 
  14042             if (src_bits <= 8) {
  14043                 const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
  14044                 const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
  14045                 defer self.register_manager.unlockReg(wide_lock);
  14046 
  14047                 try self.truncateRegister(src_ty, wide_reg);
  14048                 try self.genBinOpMir(.{ ._r, .bs }, .u16, dst_mcv, .{ .register = wide_reg });
  14049             } else try self.genBinOpMir(.{ ._r, .bs }, src_ty, dst_mcv, mat_src_mcv);
  14050 
  14051             try self.asmCmovccRegisterRegister(
  14052                 .z,
  14053                 registerAlias(dst_reg, cmov_abi_size),
  14054                 registerAlias(imm_reg, cmov_abi_size),
  14055             );
  14056 
  14057             try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 });
  14058         } else {
  14059             const imm_reg = try self.copyToTmpRegister(dst_ty, .{
  14060                 .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - self.regBitSize(dst_ty)),
  14061             });
  14062             const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg);
  14063             defer self.register_manager.unlockReg(imm_lock);
  14064 
  14065             const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
  14066             const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
  14067             defer self.register_manager.unlockReg(wide_lock);
  14068 
  14069             try self.truncateRegister(src_ty, wide_reg);
  14070             try self.genBinOpMir(
  14071                 .{ ._r, .bs },
  14072                 if (src_bits <= 8) .u16 else src_ty,
  14073                 dst_mcv,
  14074                 .{ .register = wide_reg },
  14075             );
  14076 
  14077             try self.asmCmovccRegisterRegister(
  14078                 .nz,
  14079                 registerAlias(imm_reg, cmov_abi_size),
  14080                 registerAlias(dst_reg, cmov_abi_size),
  14081             );
  14082 
  14083             try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 }, .{});
  14084             try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .register = imm_reg });
  14085         }
  14086         break :result dst_mcv;
  14087     };
  14088     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  14089 }
  14090 
  14091 fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void {
  14092     const pt = self.pt;
  14093     const zcu = pt.zcu;
  14094     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  14095     const result = result: {
  14096         try self.spillEflagsIfOccupied();
  14097 
  14098         const dst_ty = self.typeOfIndex(inst);
  14099         const src_ty = self.typeOf(ty_op.operand);
  14100         if (src_ty.zigTypeTag(zcu) == .vector) return self.fail("TODO implement airCtz for {}", .{
  14101             src_ty.fmt(pt),
  14102         });
  14103 
  14104         const src_mcv = try self.resolveInst(ty_op.operand);
  14105         const mat_src_mcv = switch (src_mcv) {
  14106             .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
  14107             else => src_mcv,
  14108         };
  14109         const mat_src_lock = switch (mat_src_mcv) {
  14110             .register => |reg| self.register_manager.lockReg(reg),
  14111             else => null,
  14112         };
  14113         defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
  14114 
  14115         const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
  14116         const dst_mcv = MCValue{ .register = dst_reg };
  14117         const dst_lock = self.register_manager.lockReg(dst_reg);
  14118         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  14119 
  14120         const abi_size: u31 = @intCast(src_ty.abiSize(zcu));
  14121         const src_bits: u31 = @intCast(src_ty.bitSize(zcu));
  14122         const has_bmi = self.hasFeature(.bmi);
  14123         if (src_bits > @as(u32, if (has_bmi) 128 else 64)) {
  14124             const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) {
  14125                 .load_frame => |src_frame_addr| src_frame_addr,
  14126                 else => {
  14127                     const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu));
  14128                     try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{});
  14129                     break :src_frame_addr .{ .index = src_frame_addr };
  14130                 },
  14131             };
  14132 
  14133             const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
  14134             const extra_bits = abi_size * 8 - src_bits;
  14135 
  14136             const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14137             const index_lock = self.register_manager.lockRegAssumeUnused(index_reg);
  14138             defer self.register_manager.unlockReg(index_lock);
  14139 
  14140             try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), .s(-1));
  14141             switch (extra_bits) {
  14142                 0 => try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()),
  14143                 1 => try self.asmRegisterRegister(.{ ._, .mov }, dst_reg.to32(), dst_reg.to32()),
  14144                 else => try self.asmRegisterImmediate(
  14145                     .{ ._, .mov },
  14146                     dst_reg.to32(),
  14147                     .s(-@as(i32, extra_bits)),
  14148                 ),
  14149             }
  14150             const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  14151             if (self.hasFeature(.slow_incdec)) {
  14152                 try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1));
  14153             } else {
  14154                 try self.asmRegister(.{ ._, .inc }, index_reg.to32());
  14155             }
  14156             try self.asmRegisterImmediate(.{ ._, .cmp }, index_reg.to32(), .u(limbs_len));
  14157             const zero = try self.asmJccReloc(.nb, undefined);
  14158             try self.asmMemoryImmediate(.{ ._, .cmp }, .{
  14159                 .base = .{ .frame = src_frame_addr.index },
  14160                 .mod = .{ .rm = .{
  14161                     .size = .qword,
  14162                     .index = index_reg.to64(),
  14163                     .scale = .@"8",
  14164                     .disp = src_frame_addr.off,
  14165                 } },
  14166             }, .u(0));
  14167             _ = try self.asmJccReloc(.e, loop);
  14168             try self.asmRegisterMemory(.{ ._f, .bs }, dst_reg.to64(), .{
  14169                 .base = .{ .frame = src_frame_addr.index },
  14170                 .mod = .{ .rm = .{
  14171                     .size = .qword,
  14172                     .index = index_reg.to64(),
  14173                     .scale = .@"8",
  14174                     .disp = src_frame_addr.off,
  14175                 } },
  14176             });
  14177             self.performReloc(zero);
  14178             try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), .u(6));
  14179             try self.asmRegisterRegister(.{ ._, .add }, dst_reg.to32(), index_reg.to32());
  14180             break :result dst_mcv;
  14181         }
  14182 
  14183         const wide_ty: Type = if (src_bits <= 8) .u16 else src_ty;
  14184         if (has_bmi) {
  14185             if (src_bits <= 64) {
  14186                 const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0);
  14187                 const masked_mcv = if (extra_bits > 0) masked: {
  14188                     const tmp_mcv = tmp: {
  14189                         if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0))
  14190                             break :tmp src_mcv;
  14191                         try self.genSetReg(dst_reg, wide_ty, src_mcv, .{});
  14192                         break :tmp dst_mcv;
  14193                     };
  14194                     try self.genBinOpMir(
  14195                         .{ ._, .@"or" },
  14196                         wide_ty,
  14197                         tmp_mcv,
  14198                         .{ .immediate = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - extra_bits)) <<
  14199                             @intCast(src_bits) },
  14200                     );
  14201                     break :masked tmp_mcv;
  14202                 } else mat_src_mcv;
  14203                 try self.genBinOpMir(.{ ._, .tzcnt }, wide_ty, dst_mcv, masked_mcv);
  14204             } else {
  14205                 assert(src_bits <= 128);
  14206                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14207                 const tmp_mcv = MCValue{ .register = tmp_reg };
  14208                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  14209                 defer self.register_manager.unlockReg(tmp_lock);
  14210 
  14211                 const lo_mat_src_mcv: MCValue = if (mat_src_mcv.isBase())
  14212                     mat_src_mcv
  14213                 else
  14214                     .{ .register = mat_src_mcv.register_pair[0] };
  14215                 const hi_mat_src_mcv: MCValue = if (mat_src_mcv.isBase())
  14216                     mat_src_mcv.address().offset(8).deref()
  14217                 else
  14218                     .{ .register = mat_src_mcv.register_pair[1] };
  14219                 const masked_mcv = if (src_bits < 128) masked: {
  14220                     try self.genCopy(.u64, dst_mcv, hi_mat_src_mcv, .{});
  14221                     try self.genBinOpMir(
  14222                         .{ ._, .@"or" },
  14223                         .u64,
  14224                         dst_mcv,
  14225                         .{ .immediate = @as(u64, std.math.maxInt(u64)) << @intCast(src_bits - 64) },
  14226                     );
  14227                     break :masked dst_mcv;
  14228                 } else hi_mat_src_mcv;
  14229                 try self.genBinOpMir(.{ ._, .tzcnt }, .u64, dst_mcv, masked_mcv);
  14230                 try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 });
  14231                 try self.genBinOpMir(.{ ._, .tzcnt }, .u64, tmp_mcv, lo_mat_src_mcv);
  14232                 try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32());
  14233             }
  14234             break :result dst_mcv;
  14235         }
  14236 
  14237         assert(src_bits <= 64);
  14238         const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits });
  14239         const width_lock = self.register_manager.lockRegAssumeUnused(width_reg);
  14240         defer self.register_manager.unlockReg(width_lock);
  14241 
  14242         if (src_bits <= 8 or !std.math.isPowerOfTwo(src_bits)) {
  14243             const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
  14244             const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
  14245             defer self.register_manager.unlockReg(wide_lock);
  14246 
  14247             try self.truncateRegister(src_ty, wide_reg);
  14248             try self.genBinOpMir(.{ ._f, .bs }, wide_ty, dst_mcv, .{ .register = wide_reg });
  14249         } else try self.genBinOpMir(.{ ._f, .bs }, src_ty, dst_mcv, mat_src_mcv);
  14250 
  14251         const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2);
  14252         try self.asmCmovccRegisterRegister(
  14253             .z,
  14254             registerAlias(dst_reg, cmov_abi_size),
  14255             registerAlias(width_reg, cmov_abi_size),
  14256         );
  14257         break :result dst_mcv;
  14258     };
  14259     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  14260 }
  14261 
  14262 fn airPopCount(self: *CodeGen, inst: Air.Inst.Index) !void {
  14263     const pt = self.pt;
  14264     const zcu = pt.zcu;
  14265     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  14266     const result: MCValue = result: {
  14267         try self.spillEflagsIfOccupied();
  14268 
  14269         const src_ty = self.typeOf(ty_op.operand);
  14270         const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
  14271         if (src_ty.zigTypeTag(zcu) == .vector or src_abi_size > 16)
  14272             return self.fail("TODO implement airPopCount for {}", .{src_ty.fmt(pt)});
  14273         const src_mcv = try self.resolveInst(ty_op.operand);
  14274 
  14275         const mat_src_mcv = switch (src_mcv) {
  14276             .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
  14277             else => src_mcv,
  14278         };
  14279         const mat_src_lock = switch (mat_src_mcv) {
  14280             .register => |reg| self.register_manager.lockReg(reg),
  14281             else => null,
  14282         };
  14283         defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
  14284 
  14285         if (src_abi_size <= 8) {
  14286             const dst_contains_src =
  14287                 src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv);
  14288             const dst_reg = if (dst_contains_src)
  14289                 src_mcv.getReg().?
  14290             else
  14291                 try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
  14292             const dst_lock = self.register_manager.lockReg(dst_reg);
  14293             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  14294 
  14295             try self.genPopCount(dst_reg, src_ty, mat_src_mcv, dst_contains_src);
  14296             break :result .{ .register = dst_reg };
  14297         }
  14298 
  14299         assert(src_abi_size > 8 and src_abi_size <= 16);
  14300         const tmp_regs = try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp);
  14301         const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs);
  14302         defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
  14303 
  14304         try self.genPopCount(tmp_regs[0], .usize, if (mat_src_mcv.isBase())
  14305             mat_src_mcv
  14306         else
  14307             .{ .register = mat_src_mcv.register_pair[0] }, false);
  14308         const src_info = src_ty.intInfo(zcu);
  14309         const hi_ty = try pt.intType(src_info.signedness, (src_info.bits - 1) % 64 + 1);
  14310         try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isBase())
  14311             mat_src_mcv.address().offset(8).deref()
  14312         else
  14313             .{ .register = mat_src_mcv.register_pair[1] }, false);
  14314         try self.asmRegisterRegister(.{ ._, .add }, tmp_regs[0].to8(), tmp_regs[1].to8());
  14315         break :result .{ .register = tmp_regs[0] };
  14316     };
  14317     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  14318 }
  14319 
  14320 fn genPopCount(
  14321     self: *CodeGen,
  14322     dst_reg: Register,
  14323     src_ty: Type,
  14324     src_mcv: MCValue,
  14325     dst_contains_src: bool,
  14326 ) !void {
  14327     const pt = self.pt;
  14328 
  14329     const src_abi_size: u32 = @intCast(src_ty.abiSize(pt.zcu));
  14330     if (self.hasFeature(.popcnt)) return self.genBinOpMir(
  14331         .{ ._, .popcnt },
  14332         if (src_abi_size > 1) src_ty else .u32,
  14333         .{ .register = dst_reg },
  14334         if (src_abi_size > 1) src_mcv else src: {
  14335             if (!dst_contains_src) try self.genSetReg(dst_reg, src_ty, src_mcv, .{});
  14336             try self.truncateRegister(try src_ty.toUnsigned(pt), dst_reg);
  14337             break :src .{ .register = dst_reg };
  14338         },
  14339     );
  14340 
  14341     const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8);
  14342     const imm_0_1: Immediate = .u(mask / 0b1_1);
  14343     const imm_00_11: Immediate = .u(mask / 0b01_01);
  14344     const imm_0000_1111: Immediate = .u(mask / 0b0001_0001);
  14345     const imm_0000_0001: Immediate = .u(mask / 0b1111_1111);
  14346 
  14347     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14348     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  14349     defer self.register_manager.unlockReg(tmp_lock);
  14350 
  14351     const dst = registerAlias(dst_reg, src_abi_size);
  14352     const tmp = registerAlias(tmp_reg, src_abi_size);
  14353     const imm = if (src_abi_size > 4)
  14354         try self.register_manager.allocReg(null, abi.RegisterClass.gp)
  14355     else
  14356         undefined;
  14357 
  14358     if (!dst_contains_src) try self.genSetReg(dst, src_ty, src_mcv, .{});
  14359     // dst = operand
  14360     try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
  14361     // tmp = operand
  14362     try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(1));
  14363     // tmp = operand >> 1
  14364     if (src_abi_size > 4) {
  14365         try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1);
  14366         try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
  14367     } else try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1);
  14368     // tmp = (operand >> 1) & 0x55...55
  14369     try self.asmRegisterRegister(.{ ._, .sub }, dst, tmp);
  14370     // dst = temp1 = operand - ((operand >> 1) & 0x55...55)
  14371     try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
  14372     // tmp = temp1
  14373     try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2));
  14374     // dst = temp1 >> 2
  14375     if (src_abi_size > 4) {
  14376         try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11);
  14377         try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
  14378         try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
  14379     } else {
  14380         try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11);
  14381         try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11);
  14382     }
  14383     // tmp = temp1 & 0x33...33
  14384     // dst = (temp1 >> 2) & 0x33...33
  14385     try self.asmRegisterRegister(.{ ._, .add }, tmp, dst);
  14386     // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33)
  14387     try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp);
  14388     // dst = temp2
  14389     try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(4));
  14390     // tmp = temp2 >> 4
  14391     try self.asmRegisterRegister(.{ ._, .add }, dst, tmp);
  14392     // dst = temp2 + (temp2 >> 4)
  14393     if (src_abi_size > 4) {
  14394         try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111);
  14395         try self.asmRegisterImmediate(.{ ._, .mov }, tmp, imm_0000_0001);
  14396         try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
  14397         try self.asmRegisterRegister(.{ .i_, .mul }, dst, tmp);
  14398     } else {
  14399         try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
  14400         if (src_abi_size > 1) {
  14401             try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst, dst, imm_0000_0001);
  14402         }
  14403     }
  14404     // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f
  14405     // dst = temp3 * 0x01...01
  14406     if (src_abi_size > 1) {
  14407         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u((src_abi_size - 1) * 8));
  14408     }
  14409     // dst = (temp3 * 0x01...01) >> (bits - 8)
  14410 }
  14411 
  14412 fn genByteSwap(
  14413     self: *CodeGen,
  14414     inst: Air.Inst.Index,
  14415     src_ty: Type,
  14416     src_mcv: MCValue,
  14417     mem_ok: bool,
  14418 ) !MCValue {
  14419     const pt = self.pt;
  14420     const zcu = pt.zcu;
  14421     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  14422     const has_movbe = self.hasFeature(.movbe);
  14423 
  14424     if (src_ty.zigTypeTag(zcu) == .vector) return self.fail(
  14425         "TODO implement genByteSwap for {}",
  14426         .{src_ty.fmt(pt)},
  14427     );
  14428 
  14429     const src_lock = switch (src_mcv) {
  14430         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  14431         else => null,
  14432     };
  14433     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  14434 
  14435     const abi_size: u32 = @intCast(src_ty.abiSize(zcu));
  14436     switch (abi_size) {
  14437         0 => unreachable,
  14438         1 => return if ((mem_ok or src_mcv.isRegister()) and
  14439             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  14440             src_mcv
  14441         else
  14442             try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv),
  14443         2 => if ((mem_ok or src_mcv.isRegister()) and
  14444             self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  14445         {
  14446             try self.genBinOpMir(.{ ._l, .ro }, src_ty, src_mcv, .{ .immediate = 8 });
  14447             return src_mcv;
  14448         },
  14449         3...8 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
  14450             try self.genUnOpMir(.{ ._, .bswap }, src_ty, src_mcv);
  14451             return src_mcv;
  14452         },
  14453         9...16 => {
  14454             switch (src_mcv) {
  14455                 .register_pair => |src_regs| if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
  14456                     for (src_regs) |src_reg| try self.asmRegister(.{ ._, .bswap }, src_reg.to64());
  14457                     return .{ .register_pair = .{ src_regs[1], src_regs[0] } };
  14458                 },
  14459                 else => {},
  14460             }
  14461 
  14462             const dst_regs =
  14463                 try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp);
  14464             const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
  14465             defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
  14466 
  14467             for (dst_regs, 0..) |dst_reg, limb_index| {
  14468                 if (src_mcv.isBase()) {
  14469                     try self.asmRegisterMemory(
  14470                         .{ ._, if (has_movbe) .movbe else .mov },
  14471                         dst_reg.to64(),
  14472                         try src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }),
  14473                     );
  14474                     if (!has_movbe) try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
  14475                 } else {
  14476                     try self.asmRegisterRegister(
  14477                         .{ ._, .mov },
  14478                         dst_reg.to64(),
  14479                         src_mcv.register_pair[limb_index].to64(),
  14480                     );
  14481                     try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
  14482                 }
  14483             }
  14484             return .{ .register_pair = .{ dst_regs[1], dst_regs[0] } };
  14485         },
  14486         else => {
  14487             const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
  14488 
  14489             const temp_regs =
  14490                 try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp);
  14491             const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs);
  14492             defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
  14493 
  14494             const dst_mcv = try self.allocRegOrMem(inst, false);
  14495             try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32());
  14496             try self.asmRegisterImmediate(.{ ._, .mov }, temp_regs[1].to32(), .u(limbs_len - 1));
  14497 
  14498             const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  14499             try self.asmRegisterMemory(
  14500                 .{ ._, if (has_movbe) .movbe else .mov },
  14501                 temp_regs[2].to64(),
  14502                 .{
  14503                     .base = .{ .frame = dst_mcv.load_frame.index },
  14504                     .mod = .{ .rm = .{
  14505                         .size = .qword,
  14506                         .index = temp_regs[0].to64(),
  14507                         .scale = .@"8",
  14508                         .disp = dst_mcv.load_frame.off,
  14509                     } },
  14510                 },
  14511             );
  14512             try self.asmRegisterMemory(
  14513                 .{ ._, if (has_movbe) .movbe else .mov },
  14514                 temp_regs[3].to64(),
  14515                 .{
  14516                     .base = .{ .frame = dst_mcv.load_frame.index },
  14517                     .mod = .{ .rm = .{
  14518                         .size = .qword,
  14519                         .index = temp_regs[1].to64(),
  14520                         .scale = .@"8",
  14521                         .disp = dst_mcv.load_frame.off,
  14522                     } },
  14523                 },
  14524             );
  14525             if (!has_movbe) {
  14526                 try self.asmRegister(.{ ._, .bswap }, temp_regs[2].to64());
  14527                 try self.asmRegister(.{ ._, .bswap }, temp_regs[3].to64());
  14528             }
  14529             try self.asmMemoryRegister(.{ ._, .mov }, .{
  14530                 .base = .{ .frame = dst_mcv.load_frame.index },
  14531                 .mod = .{ .rm = .{
  14532                     .size = .qword,
  14533                     .index = temp_regs[0].to64(),
  14534                     .scale = .@"8",
  14535                     .disp = dst_mcv.load_frame.off,
  14536                 } },
  14537             }, temp_regs[3].to64());
  14538             try self.asmMemoryRegister(.{ ._, .mov }, .{
  14539                 .base = .{ .frame = dst_mcv.load_frame.index },
  14540                 .mod = .{ .rm = .{
  14541                     .size = .qword,
  14542                     .index = temp_regs[1].to64(),
  14543                     .scale = .@"8",
  14544                     .disp = dst_mcv.load_frame.off,
  14545                 } },
  14546             }, temp_regs[2].to64());
  14547             if (self.hasFeature(.slow_incdec)) {
  14548                 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1));
  14549                 try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1));
  14550             } else {
  14551                 try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32());
  14552                 try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32());
  14553             }
  14554             try self.asmRegisterRegister(.{ ._, .cmp }, temp_regs[0].to32(), temp_regs[1].to32());
  14555             _ = try self.asmJccReloc(.be, loop);
  14556             return dst_mcv;
  14557         },
  14558     }
  14559 
  14560     const dst_mcv: MCValue = if (mem_ok and has_movbe and src_mcv.isRegister())
  14561         try self.allocRegOrMem(inst, true)
  14562     else
  14563         .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.gp) };
  14564     if (dst_mcv.getReg()) |dst_reg| {
  14565         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register);
  14566         defer self.register_manager.unlockReg(dst_lock);
  14567 
  14568         try self.genSetReg(dst_reg, src_ty, src_mcv, .{});
  14569         switch (abi_size) {
  14570             else => unreachable,
  14571             2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }),
  14572             3...8 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv),
  14573         }
  14574     } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
  14575     return dst_mcv;
  14576 }
  14577 
  14578 fn airByteSwap(self: *CodeGen, inst: Air.Inst.Index) !void {
  14579     const pt = self.pt;
  14580     const zcu = pt.zcu;
  14581     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  14582 
  14583     const src_ty = self.typeOf(ty_op.operand);
  14584     const src_bits: u32 = @intCast(src_ty.bitSize(zcu));
  14585     const src_mcv = try self.resolveInst(ty_op.operand);
  14586 
  14587     const dst_mcv = try self.genByteSwap(inst, src_ty, src_mcv, true);
  14588     try self.genShiftBinOpMir(
  14589         .{ ._r, switch (if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned) {
  14590             .signed => .sa,
  14591             .unsigned => .sh,
  14592         } },
  14593         src_ty,
  14594         dst_mcv,
  14595         if (src_bits > 256) .u16 else .u8,
  14596         .{ .immediate = src_ty.abiSize(zcu) * 8 - src_bits },
  14597     );
  14598     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  14599 }
  14600 
  14601 fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void {
  14602     const pt = self.pt;
  14603     const zcu = pt.zcu;
  14604     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  14605 
  14606     const src_ty = self.typeOf(ty_op.operand);
  14607     const abi_size: u32 = @intCast(src_ty.abiSize(zcu));
  14608     const bit_size: u32 = @intCast(src_ty.bitSize(zcu));
  14609     const src_mcv = try self.resolveInst(ty_op.operand);
  14610 
  14611     const dst_mcv = try self.genByteSwap(inst, src_ty, src_mcv, false);
  14612     const dst_locks: [2]?RegisterLock = switch (dst_mcv) {
  14613         .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null },
  14614         .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs),
  14615         else => unreachable,
  14616     };
  14617     defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  14618 
  14619     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  14620     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  14621     defer self.register_manager.unlockReg(tmp_lock);
  14622 
  14623     const limb_abi_size: u32 = @min(abi_size, 8);
  14624     const tmp = registerAlias(tmp_reg, limb_abi_size);
  14625     const imm = if (limb_abi_size > 4)
  14626         try self.register_manager.allocReg(null, abi.RegisterClass.gp)
  14627     else
  14628         undefined;
  14629 
  14630     const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_size * 8);
  14631     const imm_0000_1111: Immediate = .u(mask / 0b0001_0001);
  14632     const imm_00_11: Immediate = .u(mask / 0b01_01);
  14633     const imm_0_1: Immediate = .u(mask / 0b1_1);
  14634 
  14635     for (dst_mcv.getRegs()) |dst_reg| {
  14636         const dst = registerAlias(dst_reg, limb_abi_size);
  14637 
  14638         // dst = temp1 = bswap(operand)
  14639         try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
  14640         // tmp = temp1
  14641         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(4));
  14642         // dst = temp1 >> 4
  14643         if (limb_abi_size > 4) {
  14644             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111);
  14645             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
  14646             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
  14647         } else {
  14648             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111);
  14649             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
  14650         }
  14651         // tmp = temp1 & 0x0F...0F
  14652         // dst = (temp1 >> 4) & 0x0F...0F
  14653         try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, .u(4));
  14654         // tmp = (temp1 & 0x0F...0F) << 4
  14655         try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp);
  14656         // dst = temp2 = ((temp1 >> 4) & 0x0F...0F) | ((temp1 & 0x0F...0F) << 4)
  14657         try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
  14658         // tmp = temp2
  14659         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2));
  14660         // dst = temp2 >> 2
  14661         if (limb_abi_size > 4) {
  14662             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11);
  14663             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
  14664             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
  14665         } else {
  14666             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11);
  14667             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11);
  14668         }
  14669         // tmp = temp2 & 0x33...33
  14670         // dst = (temp2 >> 2) & 0x33...33
  14671         try self.asmRegisterMemory(
  14672             .{ ._, .lea },
  14673             if (limb_abi_size > 4) tmp.to64() else tmp.to32(),
  14674             .{
  14675                 .base = .{ .reg = dst.to64() },
  14676                 .mod = .{ .rm = .{
  14677                     .size = .qword,
  14678                     .index = tmp.to64(),
  14679                     .scale = .@"4",
  14680                 } },
  14681             },
  14682         );
  14683         // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2)
  14684         try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp);
  14685         // dst = temp3
  14686         try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(1));
  14687         // tmp = temp3 >> 1
  14688         if (limb_abi_size > 4) {
  14689             try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1);
  14690             try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
  14691             try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
  14692         } else {
  14693             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0_1);
  14694             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1);
  14695         }
  14696         // dst = temp3 & 0x55...55
  14697         // tmp = (temp3 >> 1) & 0x55...55
  14698         try self.asmRegisterMemory(
  14699             .{ ._, .lea },
  14700             if (limb_abi_size > 4) dst.to64() else dst.to32(),
  14701             .{
  14702                 .base = .{ .reg = tmp.to64() },
  14703                 .mod = .{ .rm = .{
  14704                     .size = .qword,
  14705                     .index = dst.to64(),
  14706                     .scale = .@"2",
  14707                 } },
  14708             },
  14709         );
  14710         // dst = ((temp3 >> 1) & 0x55...55) + ((temp3 & 0x55...55) << 1)
  14711     }
  14712 
  14713     const extra_bits = abi_size * 8 - bit_size;
  14714     const signedness: std.builtin.Signedness =
  14715         if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned;
  14716     if (extra_bits > 0) try self.genShiftBinOpMir(switch (signedness) {
  14717         .signed => .{ ._r, .sa },
  14718         .unsigned => .{ ._r, .sh },
  14719     }, src_ty, dst_mcv, .u8, .{ .immediate = extra_bits });
  14720 
  14721     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  14722 }
  14723 
  14724 fn floatSign(self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) !void {
  14725     const pt = self.pt;
  14726     const zcu = pt.zcu;
  14727     const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)];
  14728 
  14729     const result = result: {
  14730         const scalar_bits = ty.scalarType(zcu).floatBits(self.target.*);
  14731         if (scalar_bits == 80) {
  14732             if (ty.zigTypeTag(zcu) != .float) return self.fail("TODO implement floatSign for {}", .{
  14733                 ty.fmt(pt),
  14734             });
  14735 
  14736             const src_mcv = try self.resolveInst(operand);
  14737             const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  14738             defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  14739 
  14740             const dst_mcv: MCValue = .{ .register = .st0 };
  14741             if (!std.meta.eql(src_mcv, dst_mcv) or !self.reuseOperand(inst, operand, 0, src_mcv))
  14742                 try self.register_manager.getKnownReg(.st0, inst);
  14743 
  14744             try self.genCopy(ty, dst_mcv, src_mcv, .{});
  14745             switch (tag) {
  14746                 .neg => try self.asmOpOnly(.{ .f_, .chs }),
  14747                 .abs => try self.asmOpOnly(.{ .f_, .abs }),
  14748                 else => unreachable,
  14749             }
  14750             break :result dst_mcv;
  14751         }
  14752 
  14753         const abi_size: u32 = switch (ty.abiSize(zcu)) {
  14754             1...16 => 16,
  14755             17...32 => 32,
  14756             else => return self.fail("TODO implement floatSign for {}", .{
  14757                 ty.fmt(pt),
  14758             }),
  14759         };
  14760 
  14761         const src_mcv = try self.resolveInst(operand);
  14762         const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  14763         defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  14764 
  14765         const dst_mcv: MCValue = if (src_mcv.isRegister() and
  14766             self.reuseOperand(inst, operand, 0, src_mcv))
  14767             src_mcv
  14768         else if (self.hasFeature(.avx))
  14769             .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
  14770         else
  14771             try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
  14772         const dst_reg = dst_mcv.getReg().?;
  14773         const dst_lock = self.register_manager.lockReg(dst_reg);
  14774         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  14775 
  14776         const vec_ty = try pt.vectorType(.{
  14777             .len = @divExact(abi_size * 8, scalar_bits),
  14778             .child = (try pt.intType(.signed, scalar_bits)).ip_index,
  14779         });
  14780 
  14781         const sign_mcv = try self.genTypedValue(switch (tag) {
  14782             .neg => try vec_ty.minInt(pt, vec_ty),
  14783             .abs => try vec_ty.maxInt(pt, vec_ty),
  14784             else => unreachable,
  14785         });
  14786         const sign_mem: Memory = if (sign_mcv.isBase())
  14787             try sign_mcv.mem(self, .{ .size = .fromSize(abi_size) })
  14788         else
  14789             .{
  14790                 .base = .{ .reg = try self.copyToTmpRegister(.usize, sign_mcv.address()) },
  14791                 .mod = .{ .rm = .{ .size = .fromSize(abi_size) } },
  14792             };
  14793 
  14794         if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory(
  14795             switch (scalar_bits) {
  14796                 16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) {
  14797                     .neg => .{ .vp_, .xor },
  14798                     .abs => .{ .vp_, .@"and" },
  14799                     else => unreachable,
  14800                 } else switch (tag) {
  14801                     .neg => .{ .v_ps, .xor },
  14802                     .abs => .{ .v_ps, .@"and" },
  14803                     else => unreachable,
  14804                 },
  14805                 32 => switch (tag) {
  14806                     .neg => .{ .v_ps, .xor },
  14807                     .abs => .{ .v_ps, .@"and" },
  14808                     else => unreachable,
  14809                 },
  14810                 64 => switch (tag) {
  14811                     .neg => .{ .v_pd, .xor },
  14812                     .abs => .{ .v_pd, .@"and" },
  14813                     else => unreachable,
  14814                 },
  14815                 80 => return self.fail("TODO implement floatSign for {}", .{ty.fmt(pt)}),
  14816                 else => unreachable,
  14817             },
  14818             registerAlias(dst_reg, abi_size),
  14819             registerAlias(if (src_mcv.isRegister())
  14820                 src_mcv.getReg().?
  14821             else
  14822                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
  14823             sign_mem,
  14824         ) else try self.asmRegisterMemory(
  14825             switch (scalar_bits) {
  14826                 16, 128 => switch (tag) {
  14827                     .neg => .{ .p_, .xor },
  14828                     .abs => .{ .p_, .@"and" },
  14829                     else => unreachable,
  14830                 },
  14831                 32 => switch (tag) {
  14832                     .neg => .{ ._ps, .xor },
  14833                     .abs => .{ ._ps, .@"and" },
  14834                     else => unreachable,
  14835                 },
  14836                 64 => switch (tag) {
  14837                     .neg => .{ ._pd, .xor },
  14838                     .abs => .{ ._pd, .@"and" },
  14839                     else => unreachable,
  14840                 },
  14841                 80 => return self.fail("TODO implement floatSign for {}", .{ty.fmt(pt)}),
  14842                 else => unreachable,
  14843             },
  14844             registerAlias(dst_reg, abi_size),
  14845             sign_mem,
  14846         );
  14847         break :result dst_mcv;
  14848     };
  14849     return self.finishAir(inst, result, .{ operand, .none, .none });
  14850 }
  14851 
  14852 fn airFloatSign(self: *CodeGen, inst: Air.Inst.Index) !void {
  14853     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  14854     const ty = self.typeOf(un_op);
  14855     return self.floatSign(inst, un_op, ty);
  14856 }
  14857 
  14858 const RoundMode = packed struct(u5) {
  14859     mode: enum(u4) {
  14860         /// Round to nearest (even)
  14861         nearest = 0b0_00,
  14862         /// Round down (toward -∞)
  14863         down = 0b0_01,
  14864         /// Round up (toward +∞)
  14865         up = 0b0_10,
  14866         /// Round toward zero (truncate)
  14867         zero = 0b0_11,
  14868         /// Use current rounding mode of MXCSR.RC
  14869         mxcsr = 0b1_00,
  14870     },
  14871     precision: enum(u1) {
  14872         normal = 0b0,
  14873         inexact = 0b1,
  14874     } = .normal,
  14875 };
  14876 
  14877 fn airRound(self: *CodeGen, inst: Air.Inst.Index, mode: RoundMode) !void {
  14878     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  14879     const ty = self.typeOf(un_op);
  14880 
  14881     const result = result: {
  14882         switch (try self.genRoundLibcall(ty, .{ .air_ref = un_op }, mode)) {
  14883             .none => {},
  14884             else => |dst_mcv| break :result dst_mcv,
  14885         }
  14886 
  14887         const src_mcv = try self.resolveInst(un_op);
  14888         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
  14889             src_mcv
  14890         else
  14891             try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
  14892         const dst_reg = dst_mcv.getReg().?;
  14893         const dst_lock = self.register_manager.lockReg(dst_reg);
  14894         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  14895         try self.genRound(ty, dst_reg, src_mcv, mode);
  14896         break :result dst_mcv;
  14897     };
  14898     return self.finishAir(inst, result, .{ un_op, .none, .none });
  14899 }
  14900 
  14901 fn getRoundTag(self: *CodeGen, ty: Type) ?Mir.Inst.FixedTag {
  14902     const pt = self.pt;
  14903     const zcu = pt.zcu;
  14904     return if (self.hasFeature(.sse4_1)) switch (ty.zigTypeTag(zcu)) {
  14905         .float => switch (ty.floatBits(self.target.*)) {
  14906             32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
  14907             64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
  14908             16, 80, 128 => null,
  14909             else => unreachable,
  14910         },
  14911         .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
  14912             .float => switch (ty.childType(zcu).floatBits(self.target.*)) {
  14913                 32 => switch (ty.vectorLen(zcu)) {
  14914                     1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
  14915                     2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round },
  14916                     5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null,
  14917                     else => null,
  14918                 },
  14919                 64 => switch (ty.vectorLen(zcu)) {
  14920                     1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
  14921                     2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round },
  14922                     3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null,
  14923                     else => null,
  14924                 },
  14925                 16, 80, 128 => null,
  14926                 else => unreachable,
  14927             },
  14928             else => null,
  14929         },
  14930         else => unreachable,
  14931     } else null;
  14932 }
  14933 
  14934 fn genRoundLibcall(self: *CodeGen, ty: Type, src_mcv: MCValue, mode: RoundMode) !MCValue {
  14935     const pt = self.pt;
  14936     const zcu = pt.zcu;
  14937     if (self.getRoundTag(ty)) |_| return .none;
  14938 
  14939     if (ty.zigTypeTag(zcu) != .float)
  14940         return self.fail("TODO implement genRound for {}", .{ty.fmt(pt)});
  14941 
  14942     var callee_buf: ["__trunc?".len]u8 = undefined;
  14943     return try self.genCall(.{ .lib = .{
  14944         .return_type = ty.toIntern(),
  14945         .param_types = &.{ty.toIntern()},
  14946         .callee = std.fmt.bufPrint(&callee_buf, "{s}{s}{s}", .{
  14947             floatLibcAbiPrefix(ty),
  14948             switch (mode.mode) {
  14949                 .down => "floor",
  14950                 .up => "ceil",
  14951                 .zero => "trunc",
  14952                 else => unreachable,
  14953             },
  14954             floatLibcAbiSuffix(ty),
  14955         }) catch unreachable,
  14956     } }, &.{ty}, &.{src_mcv}, .{});
  14957 }
  14958 
  14959 fn genRound(self: *CodeGen, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: RoundMode) !void {
  14960     const pt = self.pt;
  14961     const mir_tag = self.getRoundTag(ty) orelse {
  14962         const result = try self.genRoundLibcall(ty, src_mcv, mode);
  14963         return self.genSetReg(dst_reg, ty, result, .{});
  14964     };
  14965     const abi_size: u32 = @intCast(ty.abiSize(pt.zcu));
  14966     const dst_alias = registerAlias(dst_reg, abi_size);
  14967     switch (mir_tag[0]) {
  14968         .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  14969             mir_tag,
  14970             dst_alias,
  14971             dst_alias,
  14972             try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
  14973             .u(@as(u5, @bitCast(mode))),
  14974         ) else try self.asmRegisterRegisterRegisterImmediate(
  14975             mir_tag,
  14976             dst_alias,
  14977             dst_alias,
  14978             registerAlias(if (src_mcv.isRegister())
  14979                 src_mcv.getReg().?
  14980             else
  14981                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
  14982             .u(@as(u5, @bitCast(mode))),
  14983         ),
  14984         else => if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate(
  14985             mir_tag,
  14986             dst_alias,
  14987             try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
  14988             .u(@as(u5, @bitCast(mode))),
  14989         ) else try self.asmRegisterRegisterImmediate(
  14990             mir_tag,
  14991             dst_alias,
  14992             registerAlias(if (src_mcv.isRegister())
  14993                 src_mcv.getReg().?
  14994             else
  14995                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
  14996             .u(@as(u5, @bitCast(mode))),
  14997         ),
  14998     }
  14999 }
  15000 
  15001 fn airAbs(self: *CodeGen, inst: Air.Inst.Index) !void {
  15002     const pt = self.pt;
  15003     const zcu = pt.zcu;
  15004     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  15005     const ty = self.typeOf(ty_op.operand);
  15006 
  15007     const result: MCValue = result: {
  15008         const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(zcu)) {
  15009             else => null,
  15010             .int => switch (ty.abiSize(zcu)) {
  15011                 0 => unreachable,
  15012                 1...8 => {
  15013                     try self.spillEflagsIfOccupied();
  15014                     const src_mcv = try self.resolveInst(ty_op.operand);
  15015                     const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
  15016 
  15017                     try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv);
  15018 
  15019                     const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2);
  15020                     switch (src_mcv) {
  15021                         .register => |val_reg| try self.asmCmovccRegisterRegister(
  15022                             .l,
  15023                             registerAlias(dst_mcv.register, cmov_abi_size),
  15024                             registerAlias(val_reg, cmov_abi_size),
  15025                         ),
  15026                         .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
  15027                             .l,
  15028                             registerAlias(dst_mcv.register, cmov_abi_size),
  15029                             try src_mcv.mem(self, .{ .size = .fromSize(cmov_abi_size) }),
  15030                         ),
  15031                         else => {
  15032                             const val_reg = try self.copyToTmpRegister(ty, src_mcv);
  15033                             try self.asmCmovccRegisterRegister(
  15034                                 .l,
  15035                                 registerAlias(dst_mcv.register, cmov_abi_size),
  15036                                 registerAlias(val_reg, cmov_abi_size),
  15037                             );
  15038                         },
  15039                     }
  15040                     break :result dst_mcv;
  15041                 },
  15042                 9...16 => {
  15043                     try self.spillEflagsIfOccupied();
  15044                     const src_mcv = try self.resolveInst(ty_op.operand);
  15045                     const dst_mcv = if (src_mcv == .register_pair and
  15046                         self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
  15047                         const dst_regs = try self.register_manager.allocRegs(
  15048                             2,
  15049                             .{ inst, inst },
  15050                             abi.RegisterClass.gp,
  15051                         );
  15052                         const dst_mcv: MCValue = .{ .register_pair = dst_regs };
  15053                         const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
  15054                         defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
  15055 
  15056                         try self.genCopy(ty, dst_mcv, src_mcv, .{});
  15057                         break :dst dst_mcv;
  15058                     };
  15059                     const dst_regs = dst_mcv.register_pair;
  15060                     const dst_locks = self.register_manager.lockRegs(2, dst_regs);
  15061                     defer for (dst_locks) |dst_lock| if (dst_lock) |lock|
  15062                         self.register_manager.unlockReg(lock);
  15063 
  15064                     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  15065                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  15066                     defer self.register_manager.unlockReg(tmp_lock);
  15067 
  15068                     try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]);
  15069                     try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63));
  15070                     try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[0], tmp_reg);
  15071                     try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[1], tmp_reg);
  15072                     try self.asmRegisterRegister(.{ ._, .sub }, dst_regs[0], tmp_reg);
  15073                     try self.asmRegisterRegister(.{ ._, .sbb }, dst_regs[1], tmp_reg);
  15074 
  15075                     break :result dst_mcv;
  15076                 },
  15077                 else => {
  15078                     const abi_size: u31 = @intCast(ty.abiSize(zcu));
  15079                     const limb_len = std.math.divCeil(u31, abi_size, 8) catch unreachable;
  15080 
  15081                     const tmp_regs =
  15082                         try self.register_manager.allocRegs(3, @splat(null), abi.RegisterClass.gp);
  15083                     const tmp_locks = self.register_manager.lockRegsAssumeUnused(3, tmp_regs);
  15084                     defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
  15085 
  15086                     try self.spillEflagsIfOccupied();
  15087                     const src_mcv = try self.resolveInst(ty_op.operand);
  15088                     const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  15089                         src_mcv
  15090                     else
  15091                         try self.allocRegOrMem(inst, false);
  15092 
  15093                     try self.asmMemoryImmediate(
  15094                         .{ ._, .cmp },
  15095                         try dst_mcv.address().offset((limb_len - 1) * 8).deref().mem(self, .{ .size = .qword }),
  15096                         .u(0),
  15097                     );
  15098                     const positive = try self.asmJccReloc(.ns, undefined);
  15099 
  15100                     try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[0].to32(), tmp_regs[0].to32());
  15101                     try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[1].to8(), tmp_regs[1].to8());
  15102 
  15103                     const neg_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  15104                     try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[2].to32(), tmp_regs[2].to32());
  15105                     try self.asmRegisterImmediate(.{ ._r, .sh }, tmp_regs[1].to8(), .u(1));
  15106                     try self.asmRegisterMemory(.{ ._, .sbb }, tmp_regs[2].to64(), .{
  15107                         .base = .{ .frame = dst_mcv.load_frame.index },
  15108                         .mod = .{ .rm = .{
  15109                             .size = .qword,
  15110                             .index = tmp_regs[0].to64(),
  15111                             .scale = .@"8",
  15112                             .disp = dst_mcv.load_frame.off,
  15113                         } },
  15114                     });
  15115                     try self.asmSetccRegister(.c, tmp_regs[1].to8());
  15116                     try self.asmMemoryRegister(.{ ._, .mov }, .{
  15117                         .base = .{ .frame = dst_mcv.load_frame.index },
  15118                         .mod = .{ .rm = .{
  15119                             .size = .qword,
  15120                             .index = tmp_regs[0].to64(),
  15121                             .scale = .@"8",
  15122                             .disp = dst_mcv.load_frame.off,
  15123                         } },
  15124                     }, tmp_regs[2].to64());
  15125 
  15126                     if (self.hasFeature(.slow_incdec)) {
  15127                         try self.asmRegisterImmediate(.{ ._, .add }, tmp_regs[0].to32(), .u(1));
  15128                     } else {
  15129                         try self.asmRegister(.{ ._, .inc }, tmp_regs[0].to32());
  15130                     }
  15131                     try self.asmRegisterImmediate(.{ ._, .cmp }, tmp_regs[0].to32(), .u(limb_len));
  15132                     _ = try self.asmJccReloc(.b, neg_loop);
  15133 
  15134                     self.performReloc(positive);
  15135                     break :result dst_mcv;
  15136                 },
  15137             },
  15138             .float => return self.floatSign(inst, ty_op.operand, ty),
  15139             .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
  15140                 else => null,
  15141                 .int => switch (ty.childType(zcu).intInfo(zcu).bits) {
  15142                     else => null,
  15143                     8 => switch (ty.vectorLen(zcu)) {
  15144                         else => null,
  15145                         1...16 => if (self.hasFeature(.avx))
  15146                             .{ .vp_b, .abs }
  15147                         else if (self.hasFeature(.ssse3))
  15148                             .{ .p_b, .abs }
  15149                         else
  15150                             null,
  15151                         17...32 => if (self.hasFeature(.avx2)) .{ .vp_b, .abs } else null,
  15152                     },
  15153                     16 => switch (ty.vectorLen(zcu)) {
  15154                         else => null,
  15155                         1...8 => if (self.hasFeature(.avx))
  15156                             .{ .vp_w, .abs }
  15157                         else if (self.hasFeature(.ssse3))
  15158                             .{ .p_w, .abs }
  15159                         else
  15160                             null,
  15161                         9...16 => if (self.hasFeature(.avx2)) .{ .vp_w, .abs } else null,
  15162                     },
  15163                     32 => switch (ty.vectorLen(zcu)) {
  15164                         else => null,
  15165                         1...4 => if (self.hasFeature(.avx))
  15166                             .{ .vp_d, .abs }
  15167                         else if (self.hasFeature(.ssse3))
  15168                             .{ .p_d, .abs }
  15169                         else
  15170                             null,
  15171                         5...8 => if (self.hasFeature(.avx2)) .{ .vp_d, .abs } else null,
  15172                     },
  15173                 },
  15174                 .float => return self.floatSign(inst, ty_op.operand, ty),
  15175             },
  15176         }) orelse return self.fail("TODO implement airAbs for {}", .{ty.fmt(pt)});
  15177 
  15178         const abi_size: u32 = @intCast(ty.abiSize(zcu));
  15179         const src_mcv = try self.resolveInst(ty_op.operand);
  15180         const dst_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  15181             src_mcv.getReg().?
  15182         else
  15183             try self.register_manager.allocReg(inst, self.regSetForType(ty));
  15184         const dst_alias = registerAlias(dst_reg, abi_size);
  15185         if (src_mcv.isBase()) try self.asmRegisterMemory(
  15186             mir_tag,
  15187             dst_alias,
  15188             try src_mcv.mem(self, .{ .size = self.memSize(ty) }),
  15189         ) else try self.asmRegisterRegister(
  15190             mir_tag,
  15191             dst_alias,
  15192             registerAlias(if (src_mcv.isRegister())
  15193                 src_mcv.getReg().?
  15194             else
  15195                 try self.copyToTmpRegister(ty, src_mcv), abi_size),
  15196         );
  15197         break :result .{ .register = dst_reg };
  15198     };
  15199     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  15200 }
  15201 
  15202 fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void {
  15203     const pt = self.pt;
  15204     const zcu = pt.zcu;
  15205     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  15206     const ty = self.typeOf(un_op);
  15207     const abi_size: u32 = @intCast(ty.abiSize(zcu));
  15208 
  15209     const result: MCValue = result: {
  15210         switch (ty.zigTypeTag(zcu)) {
  15211             .float => {
  15212                 const float_bits = ty.floatBits(self.target.*);
  15213                 if (switch (float_bits) {
  15214                     16 => !self.hasFeature(.f16c),
  15215                     32, 64 => false,
  15216                     80, 128 => true,
  15217                     else => unreachable,
  15218                 }) {
  15219                     var callee_buf: ["__sqrt?".len]u8 = undefined;
  15220                     break :result try self.genCall(.{ .lib = .{
  15221                         .return_type = ty.toIntern(),
  15222                         .param_types = &.{ty.toIntern()},
  15223                         .callee = std.fmt.bufPrint(&callee_buf, "{s}sqrt{s}", .{
  15224                             floatLibcAbiPrefix(ty),
  15225                             floatLibcAbiSuffix(ty),
  15226                         }) catch unreachable,
  15227                     } }, &.{ty}, &.{.{ .air_ref = un_op }}, .{});
  15228                 }
  15229             },
  15230             else => {},
  15231         }
  15232 
  15233         const src_mcv = try self.resolveInst(un_op);
  15234         const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
  15235             src_mcv
  15236         else
  15237             try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
  15238         const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
  15239         const dst_lock = self.register_manager.lockReg(dst_reg);
  15240         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  15241 
  15242         const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(zcu)) {
  15243             .float => switch (ty.floatBits(self.target.*)) {
  15244                 16 => {
  15245                     assert(self.hasFeature(.f16c));
  15246                     const mat_src_reg = if (src_mcv.isRegister())
  15247                         src_mcv.getReg().?
  15248                     else
  15249                         try self.copyToTmpRegister(ty, src_mcv);
  15250                     try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
  15251                     try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg);
  15252                     try self.asmRegisterRegisterImmediate(
  15253                         .{ .v_, .cvtps2ph },
  15254                         dst_reg,
  15255                         dst_reg,
  15256                         .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
  15257                     );
  15258                     break :result dst_mcv;
  15259                 },
  15260                 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
  15261                 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
  15262                 else => unreachable,
  15263             },
  15264             .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
  15265                 .float => switch (ty.childType(zcu).floatBits(self.target.*)) {
  15266                     16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen(zcu)) {
  15267                         1 => {
  15268                             try self.asmRegisterRegister(
  15269                                 .{ .v_ps, .cvtph2 },
  15270                                 dst_reg,
  15271                                 (if (src_mcv.isRegister())
  15272                                     src_mcv.getReg().?
  15273                                 else
  15274                                     try self.copyToTmpRegister(ty, src_mcv)).to128(),
  15275                             );
  15276                             try self.asmRegisterRegisterRegister(
  15277                                 .{ .v_ss, .sqrt },
  15278                                 dst_reg,
  15279                                 dst_reg,
  15280                                 dst_reg,
  15281                             );
  15282                             try self.asmRegisterRegisterImmediate(
  15283                                 .{ .v_, .cvtps2ph },
  15284                                 dst_reg,
  15285                                 dst_reg,
  15286                                 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
  15287                             );
  15288                             break :result dst_mcv;
  15289                         },
  15290                         2...8 => {
  15291                             const wide_reg = registerAlias(dst_reg, abi_size * 2);
  15292                             if (src_mcv.isBase()) try self.asmRegisterMemory(
  15293                                 .{ .v_ps, .cvtph2 },
  15294                                 wide_reg,
  15295                                 try src_mcv.mem(self, .{ .size = .fromSize(
  15296                                     @intCast(@divExact(wide_reg.bitSize(), 16)),
  15297                                 ) }),
  15298                             ) else try self.asmRegisterRegister(
  15299                                 .{ .v_ps, .cvtph2 },
  15300                                 wide_reg,
  15301                                 (if (src_mcv.isRegister())
  15302                                     src_mcv.getReg().?
  15303                                 else
  15304                                     try self.copyToTmpRegister(ty, src_mcv)).to128(),
  15305                             );
  15306                             try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg);
  15307                             try self.asmRegisterRegisterImmediate(
  15308                                 .{ .v_, .cvtps2ph },
  15309                                 dst_reg,
  15310                                 wide_reg,
  15311                                 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
  15312                             );
  15313                             break :result dst_mcv;
  15314                         },
  15315                         else => null,
  15316                     } else null,
  15317                     32 => switch (ty.vectorLen(zcu)) {
  15318                         1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
  15319                         2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt },
  15320                         5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null,
  15321                         else => null,
  15322                     },
  15323                     64 => switch (ty.vectorLen(zcu)) {
  15324                         1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
  15325                         2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt },
  15326                         3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null,
  15327                         else => null,
  15328                     },
  15329                     80, 128 => null,
  15330                     else => unreachable,
  15331                 },
  15332                 else => unreachable,
  15333             },
  15334             else => unreachable,
  15335         }) orelse return self.fail("TODO implement airSqrt for {}", .{ty.fmt(pt)});
  15336         switch (mir_tag[0]) {
  15337             .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
  15338                 mir_tag,
  15339                 dst_reg,
  15340                 dst_reg,
  15341                 try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
  15342             ) else try self.asmRegisterRegisterRegister(
  15343                 mir_tag,
  15344                 dst_reg,
  15345                 dst_reg,
  15346                 registerAlias(if (src_mcv.isRegister())
  15347                     src_mcv.getReg().?
  15348                 else
  15349                     try self.copyToTmpRegister(ty, src_mcv), abi_size),
  15350             ),
  15351             else => if (src_mcv.isBase()) try self.asmRegisterMemory(
  15352                 mir_tag,
  15353                 dst_reg,
  15354                 try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
  15355             ) else try self.asmRegisterRegister(
  15356                 mir_tag,
  15357                 dst_reg,
  15358                 registerAlias(if (src_mcv.isRegister())
  15359                     src_mcv.getReg().?
  15360                 else
  15361                     try self.copyToTmpRegister(ty, src_mcv), abi_size),
  15362             ),
  15363         }
  15364         break :result dst_mcv;
  15365     };
  15366     return self.finishAir(inst, result, .{ un_op, .none, .none });
  15367 }
  15368 
  15369 fn airUnaryMath(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
  15370     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  15371     const ty = self.typeOf(un_op);
  15372     var callee_buf: ["__round?".len]u8 = undefined;
  15373     const result = try self.genCall(.{ .lib = .{
  15374         .return_type = ty.toIntern(),
  15375         .param_types = &.{ty.toIntern()},
  15376         .callee = std.fmt.bufPrint(&callee_buf, "{s}{s}{s}", .{
  15377             floatLibcAbiPrefix(ty),
  15378             switch (tag) {
  15379                 .sin,
  15380                 .cos,
  15381                 .tan,
  15382                 .exp,
  15383                 .exp2,
  15384                 .log,
  15385                 .log2,
  15386                 .log10,
  15387                 .round,
  15388                 => @tagName(tag),
  15389                 else => unreachable,
  15390             },
  15391             floatLibcAbiSuffix(ty),
  15392         }) catch unreachable,
  15393     } }, &.{ty}, &.{.{ .air_ref = un_op }}, .{});
  15394     return self.finishAir(inst, result, .{ un_op, .none, .none });
  15395 }
  15396 
  15397 fn reuseOperand(
  15398     self: *CodeGen,
  15399     inst: Air.Inst.Index,
  15400     operand: Air.Inst.Ref,
  15401     op_index: Liveness.OperandInt,
  15402     mcv: MCValue,
  15403 ) bool {
  15404     return self.reuseOperandAdvanced(inst, operand, op_index, mcv, inst);
  15405 }
  15406 
  15407 fn reuseOperandAdvanced(
  15408     self: *CodeGen,
  15409     inst: Air.Inst.Index,
  15410     operand: Air.Inst.Ref,
  15411     op_index: Liveness.OperandInt,
  15412     mcv: MCValue,
  15413     maybe_tracked_inst: ?Air.Inst.Index,
  15414 ) bool {
  15415     if (!self.liveness.operandDies(inst, op_index))
  15416         return false;
  15417 
  15418     switch (mcv) {
  15419         .register, .register_pair, .register_overflow, .register_mask => for (mcv.getRegs()) |reg| {
  15420             // If it's in the registers table, need to associate the register(s) with the
  15421             // new instruction.
  15422             if (maybe_tracked_inst) |tracked_inst| {
  15423                 if (!self.register_manager.isRegFree(reg)) {
  15424                     if (RegisterManager.indexOfRegIntoTracked(reg)) |index| {
  15425                         self.register_manager.registers[index] = tracked_inst;
  15426                     }
  15427                 }
  15428             } else self.register_manager.freeReg(reg);
  15429         },
  15430         .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false,
  15431         else => return false,
  15432     }
  15433     switch (mcv) {
  15434         .eflags, .register_overflow => self.eflags_inst = maybe_tracked_inst,
  15435         else => {},
  15436     }
  15437 
  15438     // Prevent the operand deaths processing code from deallocating it.
  15439     self.reused_operands.set(op_index);
  15440     const op_inst = operand.toIndex().?;
  15441     self.getResolvedInstValue(op_inst).reuse(self, maybe_tracked_inst, op_inst);
  15442 
  15443     return true;
  15444 }
  15445 
  15446 fn packedLoad(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
  15447     const pt = self.pt;
  15448     const zcu = pt.zcu;
  15449 
  15450     const ptr_info = ptr_ty.ptrInfo(zcu);
  15451     const val_ty: Type = .fromInterned(ptr_info.child);
  15452     if (!val_ty.hasRuntimeBitsIgnoreComptime(zcu)) return;
  15453     const val_abi_size: u32 = @intCast(val_ty.abiSize(zcu));
  15454 
  15455     const val_bit_size: u32 = @intCast(val_ty.bitSize(zcu));
  15456     const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) {
  15457         .none => 0,
  15458         .runtime => unreachable,
  15459         else => |vector_index| @intFromEnum(vector_index) * val_bit_size,
  15460     };
  15461     if (ptr_bit_off % 8 == 0) {
  15462         {
  15463             const mat_ptr_mcv: MCValue = switch (ptr_mcv) {
  15464                 .immediate, .register, .register_offset, .lea_frame => ptr_mcv,
  15465                 else => .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
  15466             };
  15467             const mat_ptr_lock = switch (mat_ptr_mcv) {
  15468                 .register => |mat_ptr_reg| self.register_manager.lockReg(mat_ptr_reg),
  15469                 else => null,
  15470             };
  15471             defer if (mat_ptr_lock) |lock| self.register_manager.unlockReg(lock);
  15472 
  15473             try self.load(dst_mcv, ptr_ty, mat_ptr_mcv.offset(@intCast(@divExact(ptr_bit_off, 8))));
  15474         }
  15475 
  15476         if (val_abi_size * 8 > val_bit_size) {
  15477             if (dst_mcv.isRegister()) {
  15478                 try self.truncateRegister(val_ty, dst_mcv.getReg().?);
  15479             } else {
  15480                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  15481                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  15482                 defer self.register_manager.unlockReg(tmp_lock);
  15483 
  15484                 const hi_mcv = dst_mcv.address().offset(@intCast(val_bit_size / 64 * 8)).deref();
  15485                 try self.genSetReg(tmp_reg, .usize, hi_mcv, .{});
  15486                 try self.truncateRegister(val_ty, tmp_reg);
  15487                 try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{});
  15488             }
  15489         }
  15490         return;
  15491     }
  15492 
  15493     if (val_abi_size > 8) return self.fail("TODO implement packed load of {}", .{val_ty.fmt(pt)});
  15494 
  15495     const limb_abi_size: u31 = @min(val_abi_size, 8);
  15496     const limb_abi_bits = limb_abi_size * 8;
  15497     const val_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size);
  15498     const val_bit_off = ptr_bit_off % limb_abi_bits;
  15499     const val_extra_bits = self.regExtraBits(val_ty);
  15500 
  15501     const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
  15502     const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
  15503     defer self.register_manager.unlockReg(ptr_lock);
  15504 
  15505     const dst_reg = switch (dst_mcv) {
  15506         .register => |reg| reg,
  15507         else => try self.register_manager.allocReg(null, abi.RegisterClass.gp),
  15508     };
  15509     const dst_lock = self.register_manager.lockReg(dst_reg);
  15510     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  15511 
  15512     const load_abi_size =
  15513         if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2;
  15514     if (load_abi_size <= 8) {
  15515         const load_reg = registerAlias(dst_reg, load_abi_size);
  15516         try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{
  15517             .base = .{ .reg = ptr_reg },
  15518             .mod = .{ .rm = .{
  15519                 .size = .fromSize(load_abi_size),
  15520                 .disp = val_byte_off,
  15521             } },
  15522         });
  15523         try self.spillEflagsIfOccupied();
  15524         try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, .u(val_bit_off));
  15525     } else {
  15526         const tmp_reg =
  15527             registerAlias(try self.register_manager.allocReg(null, abi.RegisterClass.gp), val_abi_size);
  15528         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  15529         defer self.register_manager.unlockReg(tmp_lock);
  15530 
  15531         const dst_alias = registerAlias(dst_reg, val_abi_size);
  15532         try self.asmRegisterMemory(.{ ._, .mov }, dst_alias, .{
  15533             .base = .{ .reg = ptr_reg },
  15534             .mod = .{ .rm = .{
  15535                 .size = .fromSize(val_abi_size),
  15536                 .disp = val_byte_off,
  15537             } },
  15538         });
  15539         try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{
  15540             .base = .{ .reg = ptr_reg },
  15541             .mod = .{ .rm = .{
  15542                 .size = .fromSize(val_abi_size),
  15543                 .disp = val_byte_off + limb_abi_size,
  15544             } },
  15545         });
  15546         try self.spillEflagsIfOccupied();
  15547         try self.asmRegisterRegisterImmediate(.{ ._rd, .sh }, dst_alias, tmp_reg, .u(val_bit_off));
  15548     }
  15549 
  15550     if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg);
  15551     try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg }, .{});
  15552 }
  15553 
  15554 fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
  15555     const pt = self.pt;
  15556     const zcu = pt.zcu;
  15557     const dst_ty = ptr_ty.childType(zcu);
  15558     if (!dst_ty.hasRuntimeBitsIgnoreComptime(zcu)) return;
  15559     switch (ptr_mcv) {
  15560         .none,
  15561         .unreach,
  15562         .dead,
  15563         .undef,
  15564         .eflags,
  15565         .register_pair,
  15566         .register_triple,
  15567         .register_quadruple,
  15568         .register_overflow,
  15569         .register_mask,
  15570         .elementwise_regs_then_frame,
  15571         .reserved_frame,
  15572         => unreachable, // not a valid pointer
  15573         .immediate,
  15574         .register,
  15575         .register_offset,
  15576         .lea_symbol,
  15577         .lea_direct,
  15578         .lea_got,
  15579         .lea_tlv,
  15580         .lea_frame,
  15581         => try self.genCopy(dst_ty, dst_mcv, ptr_mcv.deref(), .{}),
  15582         .memory,
  15583         .indirect,
  15584         .load_symbol,
  15585         .load_direct,
  15586         .load_got,
  15587         .load_tlv,
  15588         .load_frame,
  15589         => {
  15590             const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
  15591             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  15592             defer self.register_manager.unlockReg(addr_lock);
  15593 
  15594             try self.genCopy(dst_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }, .{});
  15595         },
  15596         .air_ref => |ptr_ref| try self.load(dst_mcv, ptr_ty, try self.resolveInst(ptr_ref)),
  15597     }
  15598 }
  15599 
  15600 fn airLoad(self: *CodeGen, inst: Air.Inst.Index) !void {
  15601     const pt = self.pt;
  15602     const zcu = pt.zcu;
  15603     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  15604     const elem_ty = self.typeOfIndex(inst);
  15605     const result: MCValue = result: {
  15606         if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
  15607 
  15608         try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
  15609         const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
  15610         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  15611 
  15612         const ptr_ty = self.typeOf(ty_op.operand);
  15613         const elem_size = elem_ty.abiSize(zcu);
  15614 
  15615         const elem_rs = self.regSetForType(elem_ty);
  15616         const ptr_rs = self.regSetForType(ptr_ty);
  15617 
  15618         const ptr_mcv = try self.resolveInst(ty_op.operand);
  15619         const dst_mcv = if (elem_size <= 8 and std.math.isPowerOfTwo(elem_size) and
  15620             elem_rs.supersetOf(ptr_rs) and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv))
  15621             // The MCValue that holds the pointer can be re-used as the value.
  15622             ptr_mcv
  15623         else
  15624             try self.allocRegOrMem(inst, true);
  15625 
  15626         const ptr_info = ptr_ty.ptrInfo(zcu);
  15627         if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) {
  15628             try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv);
  15629         } else {
  15630             try self.load(dst_mcv, ptr_ty, ptr_mcv);
  15631         }
  15632 
  15633         if (elem_ty.isAbiInt(zcu) and elem_size * 8 > elem_ty.bitSize(zcu)) {
  15634             const high_mcv: MCValue = switch (dst_mcv) {
  15635                 .register => |dst_reg| .{ .register = dst_reg },
  15636                 .register_pair => |dst_regs| .{ .register = dst_regs[1] },
  15637                 else => dst_mcv.address().offset(@intCast((elem_size - 1) / 8 * 8)).deref(),
  15638             };
  15639             const high_reg = if (high_mcv.isRegister())
  15640                 high_mcv.getReg().?
  15641             else
  15642                 try self.copyToTmpRegister(.usize, high_mcv);
  15643             const high_lock = self.register_manager.lockReg(high_reg);
  15644             defer if (high_lock) |lock| self.register_manager.unlockReg(lock);
  15645 
  15646             try self.truncateRegister(elem_ty, high_reg);
  15647             if (!high_mcv.isRegister()) try self.genCopy(
  15648                 if (elem_size <= 8) elem_ty else .usize,
  15649                 high_mcv,
  15650                 .{ .register = high_reg },
  15651                 .{},
  15652             );
  15653         }
  15654         break :result dst_mcv;
  15655     };
  15656     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  15657 }
  15658 
  15659 fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
  15660     const pt = self.pt;
  15661     const zcu = pt.zcu;
  15662     const ptr_info = ptr_ty.ptrInfo(zcu);
  15663     const src_ty: Type = .fromInterned(ptr_info.child);
  15664     if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) return;
  15665 
  15666     const limb_abi_size: u16 = @min(ptr_info.packed_offset.host_size, 8);
  15667     const limb_abi_bits = limb_abi_size * 8;
  15668     const limb_ty = try pt.intType(.unsigned, limb_abi_bits);
  15669 
  15670     const src_bit_size = src_ty.bitSize(zcu);
  15671     const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) {
  15672         .none => 0,
  15673         .runtime => unreachable,
  15674         else => |vector_index| @intFromEnum(vector_index) * src_bit_size,
  15675     };
  15676     const src_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size);
  15677     const src_bit_off = ptr_bit_off % limb_abi_bits;
  15678 
  15679     const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
  15680     const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
  15681     defer self.register_manager.unlockReg(ptr_lock);
  15682 
  15683     var limb_i: u16 = 0;
  15684     while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) {
  15685         const part_bit_off = if (limb_i == 0) src_bit_off else 0;
  15686         const part_bit_size =
  15687             @min(src_bit_off + src_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off;
  15688         const limb_mem: Memory = .{
  15689             .base = .{ .reg = ptr_reg },
  15690             .mod = .{ .rm = .{
  15691                 .size = .fromSize(limb_abi_size),
  15692                 .disp = src_byte_off + limb_i * limb_abi_size,
  15693             } },
  15694         };
  15695 
  15696         const part_mask = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - part_bit_size)) <<
  15697             @intCast(part_bit_off);
  15698         const part_mask_not = part_mask ^ (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_bits));
  15699         if (limb_abi_size <= 4) {
  15700             try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .u(part_mask_not));
  15701         } else if (std.math.cast(i32, @as(i64, @bitCast(part_mask_not)))) |small| {
  15702             try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .s(small));
  15703         } else {
  15704             const part_mask_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  15705             try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, .u(part_mask_not));
  15706             try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg);
  15707         }
  15708 
  15709         if (src_bit_size <= 64) {
  15710             const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  15711             const tmp_mcv = MCValue{ .register = tmp_reg };
  15712             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  15713             defer self.register_manager.unlockReg(tmp_lock);
  15714 
  15715             try self.genSetReg(tmp_reg, limb_ty, src_mcv, .{});
  15716             switch (limb_i) {
  15717                 0 => try self.genShiftBinOpMir(
  15718                     .{ ._l, .sh },
  15719                     limb_ty,
  15720                     tmp_mcv,
  15721                     .u8,
  15722                     .{ .immediate = src_bit_off },
  15723                 ),
  15724                 1 => try self.genShiftBinOpMir(
  15725                     .{ ._r, .sh },
  15726                     limb_ty,
  15727                     tmp_mcv,
  15728                     .u8,
  15729                     .{ .immediate = limb_abi_bits - src_bit_off },
  15730                 ),
  15731                 else => unreachable,
  15732             }
  15733             try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
  15734             try self.asmMemoryRegister(
  15735                 .{ ._, .@"or" },
  15736                 limb_mem,
  15737                 registerAlias(tmp_reg, limb_abi_size),
  15738             );
  15739         } else if (src_bit_size <= 128 and src_bit_off == 0) {
  15740             const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  15741             const tmp_mcv = MCValue{ .register = tmp_reg };
  15742             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  15743             defer self.register_manager.unlockReg(tmp_lock);
  15744 
  15745             try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) {
  15746                 0 => src_mcv,
  15747                 else => src_mcv.address().offset(limb_i * limb_abi_size).deref(),
  15748             }, .{});
  15749             try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
  15750             try self.asmMemoryRegister(
  15751                 .{ ._, .@"or" },
  15752                 limb_mem,
  15753                 registerAlias(tmp_reg, limb_abi_size),
  15754             );
  15755         } else return self.fail("TODO: implement packed store of {}", .{src_ty.fmt(pt)});
  15756     }
  15757 }
  15758 
  15759 fn store(
  15760     self: *CodeGen,
  15761     ptr_ty: Type,
  15762     ptr_mcv: MCValue,
  15763     src_mcv: MCValue,
  15764     opts: CopyOptions,
  15765 ) InnerError!void {
  15766     const pt = self.pt;
  15767     const zcu = pt.zcu;
  15768     const src_ty = ptr_ty.childType(zcu);
  15769     if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) return;
  15770     switch (ptr_mcv) {
  15771         .none,
  15772         .unreach,
  15773         .dead,
  15774         .undef,
  15775         .eflags,
  15776         .register_pair,
  15777         .register_triple,
  15778         .register_quadruple,
  15779         .register_overflow,
  15780         .register_mask,
  15781         .elementwise_regs_then_frame,
  15782         .reserved_frame,
  15783         => unreachable, // not a valid pointer
  15784         .immediate,
  15785         .register,
  15786         .register_offset,
  15787         .lea_symbol,
  15788         .lea_direct,
  15789         .lea_got,
  15790         .lea_tlv,
  15791         .lea_frame,
  15792         => try self.genCopy(src_ty, ptr_mcv.deref(), src_mcv, opts),
  15793         .memory,
  15794         .indirect,
  15795         .load_symbol,
  15796         .load_direct,
  15797         .load_got,
  15798         .load_tlv,
  15799         .load_frame,
  15800         => {
  15801             const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
  15802             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  15803             defer self.register_manager.unlockReg(addr_lock);
  15804 
  15805             try self.genCopy(src_ty, .{ .indirect = .{ .reg = addr_reg } }, src_mcv, opts);
  15806         },
  15807         .air_ref => |ptr_ref| try self.store(ptr_ty, try self.resolveInst(ptr_ref), src_mcv, opts),
  15808     }
  15809 }
  15810 
  15811 fn airStore(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
  15812     const pt = self.pt;
  15813     const zcu = pt.zcu;
  15814     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  15815 
  15816     result: {
  15817         if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result;
  15818 
  15819         try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
  15820         const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
  15821         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  15822 
  15823         const src_mcv = try self.resolveInst(bin_op.rhs);
  15824         const ptr_mcv = try self.resolveInst(bin_op.lhs);
  15825         const ptr_ty = self.typeOf(bin_op.lhs);
  15826 
  15827         const ptr_info = ptr_ty.ptrInfo(zcu);
  15828         if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) {
  15829             try self.packedStore(ptr_ty, ptr_mcv, src_mcv);
  15830         } else {
  15831             try self.store(ptr_ty, ptr_mcv, src_mcv, .{ .safety = safety });
  15832         }
  15833     }
  15834     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
  15835 }
  15836 
  15837 fn airStructFieldPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  15838     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  15839     const extra = self.air.extraData(Air.StructField, ty_pl.payload).data;
  15840     const result = try self.fieldPtr(inst, extra.struct_operand, extra.field_index);
  15841     return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none });
  15842 }
  15843 
  15844 fn airStructFieldPtrIndex(self: *CodeGen, inst: Air.Inst.Index, field_index: u8) !void {
  15845     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  15846     const result = try self.fieldPtr(inst, ty_op.operand, field_index);
  15847     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  15848 }
  15849 
  15850 fn fieldPtr(self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, field_index: u32) !MCValue {
  15851     const ptr_field_ty = self.typeOfIndex(inst);
  15852 
  15853     const src_mcv = try self.resolveInst(operand);
  15854     const dst_mcv = if (switch (src_mcv) {
  15855         .immediate, .lea_frame => true,
  15856         .register, .register_offset => self.reuseOperand(inst, operand, 0, src_mcv),
  15857         else => false,
  15858     }) src_mcv else try self.copyToRegisterWithInstTracking(inst, ptr_field_ty, src_mcv);
  15859     return dst_mcv.offset(self.fieldOffset(self.typeOf(operand), ptr_field_ty, field_index));
  15860 }
  15861 
  15862 fn fieldOffset(self: *CodeGen, ptr_agg_ty: Type, ptr_field_ty: Type, field_index: u32) i32 {
  15863     const pt = self.pt;
  15864     const zcu = pt.zcu;
  15865     const agg_ty = ptr_agg_ty.childType(zcu);
  15866     return switch (agg_ty.containerLayout(zcu)) {
  15867         .auto, .@"extern" => @intCast(agg_ty.structFieldOffset(field_index, zcu)),
  15868         .@"packed" => @divExact(@as(i32, ptr_agg_ty.ptrInfo(zcu).packed_offset.bit_offset) +
  15869             (if (zcu.typeToStruct(agg_ty)) |loaded_struct| pt.structPackedFieldBitOffset(loaded_struct, field_index) else 0) -
  15870             ptr_field_ty.ptrInfo(zcu).packed_offset.bit_offset, 8),
  15871     };
  15872 }
  15873 
  15874 fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void {
  15875     const pt = self.pt;
  15876     const zcu = pt.zcu;
  15877     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  15878     const extra = self.air.extraData(Air.StructField, ty_pl.payload).data;
  15879     const result: MCValue = result: {
  15880         const operand = extra.struct_operand;
  15881         const index = extra.field_index;
  15882 
  15883         const container_ty = self.typeOf(operand);
  15884         const container_rc = self.regSetForType(container_ty);
  15885         const field_ty = container_ty.fieldType(index, zcu);
  15886         if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
  15887         const field_rc = self.regSetForType(field_ty);
  15888         const field_is_gp = field_rc.supersetOf(abi.RegisterClass.gp);
  15889 
  15890         const src_mcv = try self.resolveInst(operand);
  15891         const field_off: u32 = switch (container_ty.containerLayout(zcu)) {
  15892             .auto, .@"extern" => @intCast(container_ty.structFieldOffset(extra.field_index, zcu) * 8),
  15893             .@"packed" => if (zcu.typeToStruct(container_ty)) |loaded_struct|
  15894                 pt.structPackedFieldBitOffset(loaded_struct, extra.field_index)
  15895             else
  15896                 0,
  15897         };
  15898 
  15899         switch (src_mcv) {
  15900             .register => |src_reg| {
  15901                 const src_reg_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  15902                 defer self.register_manager.unlockReg(src_reg_lock);
  15903 
  15904                 const src_in_field_rc =
  15905                     field_rc.isSet(RegisterManager.indexOfRegIntoTracked(src_reg).?);
  15906                 const dst_reg = if (src_in_field_rc and self.reuseOperand(inst, operand, 0, src_mcv))
  15907                     src_reg
  15908                 else if (field_off == 0)
  15909                     (try self.copyToRegisterWithInstTracking(inst, field_ty, src_mcv)).register
  15910                 else
  15911                     try self.copyToTmpRegister(.usize, .{ .register = src_reg });
  15912                 const dst_mcv: MCValue = .{ .register = dst_reg };
  15913                 const dst_lock = self.register_manager.lockReg(dst_reg);
  15914                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  15915 
  15916                 if (field_off > 0) {
  15917                     try self.spillEflagsIfOccupied();
  15918                     try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, dst_mcv, .u8, .{ .immediate = field_off });
  15919                 }
  15920                 if (abi.RegisterClass.gp.isSet(RegisterManager.indexOfRegIntoTracked(dst_reg).?) and
  15921                     container_ty.abiSize(zcu) * 8 > field_ty.bitSize(zcu))
  15922                     try self.truncateRegister(field_ty, dst_reg);
  15923 
  15924                 break :result if (field_off == 0 or field_rc.supersetOf(abi.RegisterClass.gp))
  15925                     dst_mcv
  15926                 else
  15927                     try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
  15928             },
  15929             .register_pair => |src_regs| {
  15930                 const src_regs_lock = self.register_manager.lockRegsAssumeUnused(2, src_regs);
  15931                 defer for (src_regs_lock) |lock| self.register_manager.unlockReg(lock);
  15932 
  15933                 const field_bit_size: u32 = @intCast(field_ty.bitSize(zcu));
  15934                 const src_reg = if (field_off + field_bit_size <= 64)
  15935                     src_regs[0]
  15936                 else if (field_off >= 64)
  15937                     src_regs[1]
  15938                 else {
  15939                     const dst_regs: [2]Register = if (field_rc.supersetOf(container_rc) and
  15940                         self.reuseOperand(inst, operand, 0, src_mcv)) src_regs else dst: {
  15941                         const dst_regs =
  15942                             try self.register_manager.allocRegs(2, @splat(null), field_rc);
  15943                         const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
  15944                         defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
  15945 
  15946                         try self.genCopy(container_ty, .{ .register_pair = dst_regs }, src_mcv, .{});
  15947                         break :dst dst_regs;
  15948                     };
  15949                     const dst_mcv = MCValue{ .register_pair = dst_regs };
  15950                     const dst_locks = self.register_manager.lockRegs(2, dst_regs);
  15951                     defer for (dst_locks) |dst_lock| if (dst_lock) |lock|
  15952                         self.register_manager.unlockReg(lock);
  15953 
  15954                     if (field_off > 0) {
  15955                         try self.spillEflagsIfOccupied();
  15956                         try self.genShiftBinOpMir(.{ ._r, .sh }, .u128, dst_mcv, .u8, .{ .immediate = field_off });
  15957                     }
  15958 
  15959                     if (field_bit_size <= 64) {
  15960                         if (self.regExtraBits(field_ty) > 0)
  15961                             try self.truncateRegister(field_ty, dst_regs[0]);
  15962                         break :result if (field_rc.supersetOf(abi.RegisterClass.gp))
  15963                             .{ .register = dst_regs[0] }
  15964                         else
  15965                             try self.copyToRegisterWithInstTracking(inst, field_ty, .{
  15966                                 .register = dst_regs[0],
  15967                             });
  15968                     }
  15969 
  15970                     if (field_bit_size < 128) try self.truncateRegister(
  15971                         try pt.intType(.unsigned, @intCast(field_bit_size - 64)),
  15972                         dst_regs[1],
  15973                     );
  15974                     break :result if (field_rc.supersetOf(abi.RegisterClass.gp))
  15975                         dst_mcv
  15976                     else
  15977                         try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
  15978                 };
  15979 
  15980                 const dst_reg = try self.copyToTmpRegister(.usize, .{ .register = src_reg });
  15981                 const dst_mcv = MCValue{ .register = dst_reg };
  15982                 const dst_lock = self.register_manager.lockReg(dst_reg);
  15983                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  15984 
  15985                 if (field_off % 64 > 0) {
  15986                     try self.spillEflagsIfOccupied();
  15987                     try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, dst_mcv, .u8, .{ .immediate = field_off % 64 });
  15988                 }
  15989                 if (self.regExtraBits(field_ty) > 0) try self.truncateRegister(field_ty, dst_reg);
  15990 
  15991                 break :result if (field_rc.supersetOf(abi.RegisterClass.gp))
  15992                     dst_mcv
  15993                 else
  15994                     try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
  15995             },
  15996             .register_overflow => |ro| {
  15997                 switch (index) {
  15998                     // Get wrapped value for overflow operation.
  15999                     0 => if (self.reuseOperand(inst, extra.struct_operand, 0, src_mcv)) {
  16000                         self.eflags_inst = null; // actually stop tracking the overflow part
  16001                         break :result .{ .register = ro.reg };
  16002                     } else break :result try self.copyToRegisterWithInstTracking(inst, .usize, .{ .register = ro.reg }),
  16003                     // Get overflow bit.
  16004                     1 => if (self.reuseOperandAdvanced(inst, extra.struct_operand, 0, src_mcv, null)) {
  16005                         self.eflags_inst = inst; // actually keep tracking the overflow part
  16006                         break :result .{ .eflags = ro.eflags };
  16007                     } else {
  16008                         const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
  16009                         try self.asmSetccRegister(ro.eflags, dst_reg.to8());
  16010                         break :result .{ .register = dst_reg.to8() };
  16011                     },
  16012                     else => unreachable,
  16013                 }
  16014             },
  16015             .load_frame => |frame_addr| {
  16016                 const field_abi_size: u32 = @intCast(field_ty.abiSize(zcu));
  16017                 if (field_off % 8 == 0) {
  16018                     const field_byte_off = @divExact(field_off, 8);
  16019                     const off_mcv = src_mcv.address().offset(@intCast(field_byte_off)).deref();
  16020                     const field_bit_size = field_ty.bitSize(zcu);
  16021 
  16022                     if (field_abi_size <= 8) {
  16023                         const int_ty = try pt.intType(
  16024                             if (field_ty.isAbiInt(zcu)) field_ty.intInfo(zcu).signedness else .unsigned,
  16025                             @intCast(field_bit_size),
  16026                         );
  16027 
  16028                         const dst_reg = try self.register_manager.allocReg(
  16029                             if (field_is_gp) inst else null,
  16030                             abi.RegisterClass.gp,
  16031                         );
  16032                         const dst_mcv = MCValue{ .register = dst_reg };
  16033                         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  16034                         defer self.register_manager.unlockReg(dst_lock);
  16035 
  16036                         try self.genCopy(int_ty, dst_mcv, off_mcv, .{});
  16037                         if (self.regExtraBits(field_ty) > 0) try self.truncateRegister(int_ty, dst_reg);
  16038                         break :result if (field_is_gp)
  16039                             dst_mcv
  16040                         else
  16041                             try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
  16042                     }
  16043 
  16044                     const container_abi_size: u32 = @intCast(container_ty.abiSize(zcu));
  16045                     const dst_mcv = if (field_byte_off + field_abi_size <= container_abi_size and
  16046                         self.reuseOperand(inst, operand, 0, src_mcv))
  16047                         off_mcv
  16048                     else dst: {
  16049                         const dst_mcv = try self.allocRegOrMem(inst, true);
  16050                         try self.genCopy(field_ty, dst_mcv, off_mcv, .{});
  16051                         break :dst dst_mcv;
  16052                     };
  16053                     if (field_abi_size * 8 > field_bit_size and dst_mcv.isBase()) {
  16054                         const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  16055                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  16056                         defer self.register_manager.unlockReg(tmp_lock);
  16057 
  16058                         const hi_mcv =
  16059                             dst_mcv.address().offset(@intCast(field_bit_size / 64 * 8)).deref();
  16060                         try self.genSetReg(tmp_reg, .usize, hi_mcv, .{});
  16061                         try self.truncateRegister(field_ty, tmp_reg);
  16062                         try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{});
  16063                     }
  16064                     break :result dst_mcv;
  16065                 }
  16066 
  16067                 const limb_abi_size: u31 = @min(field_abi_size, 8);
  16068                 const limb_abi_bits = limb_abi_size * 8;
  16069                 const field_byte_off: i32 = @intCast(field_off / limb_abi_bits * limb_abi_size);
  16070                 const field_bit_off = field_off % limb_abi_bits;
  16071 
  16072                 if (field_abi_size > 8) {
  16073                     return self.fail("TODO implement struct_field_val with large packed field", .{});
  16074                 }
  16075 
  16076                 const dst_reg = try self.register_manager.allocReg(
  16077                     if (field_is_gp) inst else null,
  16078                     abi.RegisterClass.gp,
  16079                 );
  16080                 const field_extra_bits = self.regExtraBits(field_ty);
  16081                 const load_abi_size =
  16082                     if (field_bit_off < field_extra_bits) field_abi_size else field_abi_size * 2;
  16083                 if (load_abi_size <= 8) {
  16084                     const load_reg = registerAlias(dst_reg, load_abi_size);
  16085                     try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{
  16086                         .base = .{ .frame = frame_addr.index },
  16087                         .mod = .{ .rm = .{
  16088                             .size = .fromSize(load_abi_size),
  16089                             .disp = frame_addr.off + field_byte_off,
  16090                         } },
  16091                     });
  16092                     try self.spillEflagsIfOccupied();
  16093                     try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, .u(field_bit_off));
  16094                 } else {
  16095                     const tmp_reg = registerAlias(
  16096                         try self.register_manager.allocReg(null, abi.RegisterClass.gp),
  16097                         field_abi_size,
  16098                     );
  16099                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  16100                     defer self.register_manager.unlockReg(tmp_lock);
  16101 
  16102                     const dst_alias = registerAlias(dst_reg, field_abi_size);
  16103                     try self.asmRegisterMemory(
  16104                         .{ ._, .mov },
  16105                         dst_alias,
  16106                         .{
  16107                             .base = .{ .frame = frame_addr.index },
  16108                             .mod = .{ .rm = .{
  16109                                 .size = .fromSize(field_abi_size),
  16110                                 .disp = frame_addr.off + field_byte_off,
  16111                             } },
  16112                         },
  16113                     );
  16114                     try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{
  16115                         .base = .{ .frame = frame_addr.index },
  16116                         .mod = .{ .rm = .{
  16117                             .size = .fromSize(field_abi_size),
  16118                             .disp = frame_addr.off + field_byte_off + limb_abi_size,
  16119                         } },
  16120                     });
  16121                     try self.spillEflagsIfOccupied();
  16122                     try self.asmRegisterRegisterImmediate(
  16123                         .{ ._rd, .sh },
  16124                         dst_alias,
  16125                         tmp_reg,
  16126                         .u(field_bit_off),
  16127                     );
  16128                 }
  16129 
  16130                 if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg);
  16131 
  16132                 const dst_mcv = MCValue{ .register = dst_reg };
  16133                 break :result if (field_is_gp)
  16134                     dst_mcv
  16135                 else
  16136                     try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
  16137             },
  16138             else => return self.fail("TODO implement airStructFieldVal for {}", .{src_mcv}),
  16139         }
  16140     };
  16141     return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none });
  16142 }
  16143 
  16144 fn airFieldParentPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  16145     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  16146     const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data;
  16147 
  16148     const ptr_agg_ty = self.typeOfIndex(inst);
  16149     const src_mcv = try self.resolveInst(extra.field_ptr);
  16150     const dst_mcv = if (src_mcv.isRegisterOffset() and
  16151         self.reuseOperand(inst, extra.field_ptr, 0, src_mcv))
  16152         src_mcv
  16153     else
  16154         try self.copyToRegisterWithInstTracking(inst, ptr_agg_ty, src_mcv);
  16155     const result = dst_mcv.offset(-self.fieldOffset(ptr_agg_ty, self.typeOf(extra.field_ptr), extra.field_index));
  16156     return self.finishAir(inst, result, .{ extra.field_ptr, .none, .none });
  16157 }
  16158 
  16159 fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue {
  16160     const pt = self.pt;
  16161     const zcu = pt.zcu;
  16162     const src_ty = self.typeOf(src_air);
  16163     if (src_ty.zigTypeTag(zcu) == .vector)
  16164         return self.fail("TODO implement genUnOp for {}", .{src_ty.fmt(pt)});
  16165 
  16166     var src_mcv = try self.resolveInst(src_air);
  16167     switch (src_mcv) {
  16168         .eflags => |cc| switch (tag) {
  16169             .not => {
  16170                 if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv))
  16171                     return .{ .eflags = cc.negate() };
  16172                 try self.spillEflagsIfOccupied();
  16173                 src_mcv = try self.resolveInst(src_air);
  16174             },
  16175             else => {},
  16176         },
  16177         else => {},
  16178     }
  16179 
  16180     const src_lock = switch (src_mcv) {
  16181         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  16182         else => null,
  16183     };
  16184     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  16185 
  16186     const dst_mcv: MCValue = dst: {
  16187         if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) break :dst src_mcv;
  16188 
  16189         const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, maybe_inst, true);
  16190         try self.genCopy(src_ty, dst_mcv, src_mcv, .{});
  16191         break :dst dst_mcv;
  16192     };
  16193     const dst_lock = switch (dst_mcv) {
  16194         .register => |reg| self.register_manager.lockReg(reg),
  16195         else => null,
  16196     };
  16197     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  16198 
  16199     const abi_size: u16 = @intCast(src_ty.abiSize(zcu));
  16200     switch (tag) {
  16201         .not => {
  16202             const limb_abi_size: u16 = @min(abi_size, 8);
  16203             const int_info: InternPool.Key.IntType = if (src_ty.ip_index == .bool_type)
  16204                 .{ .signedness = .unsigned, .bits = 1 }
  16205             else
  16206                 src_ty.intInfo(zcu);
  16207             var byte_off: i32 = 0;
  16208             while (byte_off * 8 < int_info.bits) : (byte_off += limb_abi_size) {
  16209                 const limb_bits: u16 = @intCast(@min(switch (int_info.signedness) {
  16210                     .signed => abi_size * 8,
  16211                     .unsigned => int_info.bits,
  16212                 } - byte_off * 8, limb_abi_size * 8));
  16213                 const limb_ty = try pt.intType(int_info.signedness, limb_bits);
  16214                 const limb_mcv = switch (byte_off) {
  16215                     0 => dst_mcv,
  16216                     else => dst_mcv.address().offset(byte_off).deref(),
  16217                 };
  16218 
  16219                 if (int_info.signedness == .unsigned and self.regExtraBits(limb_ty) > 0) {
  16220                     const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_bits);
  16221                     try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask });
  16222                 } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv);
  16223             }
  16224         },
  16225         .neg => {
  16226             try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv);
  16227             const bit_size = src_ty.intInfo(zcu).bits;
  16228             if (abi_size * 8 > bit_size) {
  16229                 if (dst_mcv.isRegister()) {
  16230                     try self.truncateRegister(src_ty, dst_mcv.getReg().?);
  16231                 } else {
  16232                     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  16233                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  16234                     defer self.register_manager.unlockReg(tmp_lock);
  16235 
  16236                     const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref();
  16237                     try self.genSetReg(tmp_reg, .usize, hi_mcv, .{});
  16238                     try self.truncateRegister(src_ty, tmp_reg);
  16239                     try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{});
  16240                 }
  16241             }
  16242         },
  16243         else => unreachable,
  16244     }
  16245     return dst_mcv;
  16246 }
  16247 
  16248 fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void {
  16249     const pt = self.pt;
  16250     const abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu));
  16251     if (abi_size > 8) return self.fail("TODO implement {} for {}", .{ mir_tag, dst_ty.fmt(pt) });
  16252     switch (dst_mcv) {
  16253         .none,
  16254         .unreach,
  16255         .dead,
  16256         .undef,
  16257         .immediate,
  16258         .register_offset,
  16259         .eflags,
  16260         .register_overflow,
  16261         .register_mask,
  16262         .lea_symbol,
  16263         .lea_direct,
  16264         .lea_got,
  16265         .lea_tlv,
  16266         .lea_frame,
  16267         .elementwise_regs_then_frame,
  16268         .reserved_frame,
  16269         .air_ref,
  16270         => unreachable, // unmodifiable destination
  16271         .register => |dst_reg| try self.asmRegister(mir_tag, registerAlias(dst_reg, abi_size)),
  16272         .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented
  16273         .memory, .load_symbol, .load_got, .load_direct, .load_tlv => {
  16274             const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  16275             const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  16276             defer self.register_manager.unlockReg(addr_reg_lock);
  16277 
  16278             try self.genSetReg(addr_reg, .usize, dst_mcv.address(), .{});
  16279             try self.asmMemory(mir_tag, .{ .base = .{ .reg = addr_reg }, .mod = .{ .rm = .{
  16280                 .size = .fromSize(abi_size),
  16281             } } });
  16282         },
  16283         .indirect, .load_frame => try self.asmMemory(
  16284             mir_tag,
  16285             try dst_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
  16286         ),
  16287     }
  16288 }
  16289 
  16290 /// Clobbers .rcx for non-immediate shift value.
  16291 fn genShiftBinOpMir(
  16292     self: *CodeGen,
  16293     tag: Mir.Inst.FixedTag,
  16294     lhs_ty: Type,
  16295     lhs_mcv: MCValue,
  16296     rhs_ty: Type,
  16297     rhs_mcv: MCValue,
  16298 ) !void {
  16299     const pt = self.pt;
  16300     const zcu = pt.zcu;
  16301     const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu));
  16302     const shift_abi_size: u32 = @intCast(rhs_ty.abiSize(zcu));
  16303     try self.spillEflagsIfOccupied();
  16304 
  16305     if (abi_size > 16) {
  16306         const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
  16307         assert(shift_abi_size >= 1 and shift_abi_size <= 2);
  16308 
  16309         const rcx_lock: ?RegisterLock = switch (rhs_mcv) {
  16310             .immediate => |shift_imm| switch (shift_imm) {
  16311                 0 => return,
  16312                 else => null,
  16313             },
  16314             else => lock: {
  16315                 if (switch (rhs_mcv) {
  16316                     .register => |rhs_reg| rhs_reg.id() != Register.rcx.id(),
  16317                     else => true,
  16318                 }) {
  16319                     self.register_manager.getRegAssumeFree(.rcx, null);
  16320                     try self.genSetReg(.rcx, rhs_ty, rhs_mcv, .{});
  16321                 }
  16322                 break :lock self.register_manager.lockReg(.rcx);
  16323             },
  16324         };
  16325         defer if (rcx_lock) |lock| self.register_manager.unlockReg(lock);
  16326 
  16327         const temp_regs = try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp);
  16328         const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs);
  16329         defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
  16330 
  16331         switch (tag[0]) {
  16332             ._l => {
  16333                 try self.asmRegisterImmediate(.{ ._, .mov }, temp_regs[1].to32(), .u(limbs_len - 1));
  16334                 switch (rhs_mcv) {
  16335                     .immediate => |shift_imm| try self.asmRegisterImmediate(
  16336                         .{ ._, .mov },
  16337                         temp_regs[0].to32(),
  16338                         .u(limbs_len - (shift_imm >> 6) - 1),
  16339                     ),
  16340                     else => {
  16341                         try self.asmRegisterRegister(
  16342                             .{ ._, .movzx },
  16343                             temp_regs[2].to32(),
  16344                             registerAlias(.rcx, shift_abi_size),
  16345                         );
  16346                         try self.asmRegisterImmediate(.{ ._, .@"and" }, .cl, .u(std.math.maxInt(u6)));
  16347                         try self.asmRegisterImmediate(.{ ._r, .sh }, temp_regs[2].to32(), .u(6));
  16348                         try self.asmRegisterRegister(
  16349                             .{ ._, .mov },
  16350                             temp_regs[0].to32(),
  16351                             temp_regs[1].to32(),
  16352                         );
  16353                         try self.asmRegisterRegister(
  16354                             .{ ._, .sub },
  16355                             temp_regs[0].to32(),
  16356                             temp_regs[2].to32(),
  16357                         );
  16358                     },
  16359                 }
  16360             },
  16361             ._r => {
  16362                 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[1].to32(), temp_regs[1].to32());
  16363                 switch (rhs_mcv) {
  16364                     .immediate => |shift_imm| try self.asmRegisterImmediate(
  16365                         .{ ._, .mov },
  16366                         temp_regs[0].to32(),
  16367                         .u(shift_imm >> 6),
  16368                     ),
  16369                     else => {
  16370                         try self.asmRegisterRegister(
  16371                             .{ ._, .movzx },
  16372                             temp_regs[0].to32(),
  16373                             registerAlias(.rcx, shift_abi_size),
  16374                         );
  16375                         try self.asmRegisterImmediate(.{ ._, .@"and" }, .cl, .u(std.math.maxInt(u6)));
  16376                         try self.asmRegisterImmediate(.{ ._r, .sh }, temp_regs[0].to32(), .u(6));
  16377                     },
  16378                 }
  16379             },
  16380             else => unreachable,
  16381         }
  16382 
  16383         const slow_inc_dec = self.hasFeature(.slow_incdec);
  16384         if (switch (rhs_mcv) {
  16385             .immediate => |shift_imm| shift_imm >> 6 < limbs_len - 1,
  16386             else => true,
  16387         }) {
  16388             try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[2].to64(), .{
  16389                 .base = .{ .frame = lhs_mcv.load_frame.index },
  16390                 .mod = .{ .rm = .{
  16391                     .size = .qword,
  16392                     .index = temp_regs[0].to64(),
  16393                     .scale = .@"8",
  16394                     .disp = lhs_mcv.load_frame.off,
  16395                 } },
  16396             });
  16397             const skip = switch (rhs_mcv) {
  16398                 .immediate => undefined,
  16399                 else => switch (tag[0]) {
  16400                     ._l => try self.asmJccReloc(.z, undefined),
  16401                     ._r => skip: {
  16402                         try self.asmRegisterImmediate(
  16403                             .{ ._, .cmp },
  16404                             temp_regs[0].to32(),
  16405                             .u(limbs_len - 1),
  16406                         );
  16407                         break :skip try self.asmJccReloc(.nb, undefined);
  16408                     },
  16409                     else => unreachable,
  16410                 },
  16411             };
  16412             const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  16413             try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[3].to64(), .{
  16414                 .base = .{ .frame = lhs_mcv.load_frame.index },
  16415                 .mod = .{ .rm = .{
  16416                     .size = .qword,
  16417                     .index = temp_regs[0].to64(),
  16418                     .scale = .@"8",
  16419                     .disp = switch (tag[0]) {
  16420                         ._l => lhs_mcv.load_frame.off - 8,
  16421                         ._r => lhs_mcv.load_frame.off + 8,
  16422                         else => unreachable,
  16423                     },
  16424                 } },
  16425             });
  16426             switch (rhs_mcv) {
  16427                 .immediate => |shift_imm| try self.asmRegisterRegisterImmediate(
  16428                     .{ switch (tag[0]) {
  16429                         ._l => ._ld,
  16430                         ._r => ._rd,
  16431                         else => unreachable,
  16432                     }, .sh },
  16433                     temp_regs[2].to64(),
  16434                     temp_regs[3].to64(),
  16435                     .u(shift_imm & std.math.maxInt(u6)),
  16436                 ),
  16437                 else => try self.asmRegisterRegisterRegister(.{ switch (tag[0]) {
  16438                     ._l => ._ld,
  16439                     ._r => ._rd,
  16440                     else => unreachable,
  16441                 }, .sh }, temp_regs[2].to64(), temp_regs[3].to64(), .cl),
  16442             }
  16443             try self.asmMemoryRegister(.{ ._, .mov }, .{
  16444                 .base = .{ .frame = lhs_mcv.load_frame.index },
  16445                 .mod = .{ .rm = .{
  16446                     .size = .qword,
  16447                     .index = temp_regs[1].to64(),
  16448                     .scale = .@"8",
  16449                     .disp = lhs_mcv.load_frame.off,
  16450                 } },
  16451             }, temp_regs[2].to64());
  16452             try self.asmRegisterRegister(.{ ._, .mov }, temp_regs[2].to64(), temp_regs[3].to64());
  16453             switch (tag[0]) {
  16454                 ._l => {
  16455                     if (slow_inc_dec) {
  16456                         try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1));
  16457                         try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[0].to32(), .u(1));
  16458                     } else {
  16459                         try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32());
  16460                         try self.asmRegister(.{ ._, .dec }, temp_regs[0].to32());
  16461                     }
  16462                     _ = try self.asmJccReloc(.nz, loop);
  16463                 },
  16464                 ._r => {
  16465                     if (slow_inc_dec) {
  16466                         try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), .u(1));
  16467                         try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1));
  16468                     } else {
  16469                         try self.asmRegister(.{ ._, .inc }, temp_regs[1].to32());
  16470                         try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32());
  16471                     }
  16472                     try self.asmRegisterImmediate(
  16473                         .{ ._, .cmp },
  16474                         temp_regs[0].to32(),
  16475                         .u(limbs_len - 1),
  16476                     );
  16477                     _ = try self.asmJccReloc(.b, loop);
  16478                 },
  16479                 else => unreachable,
  16480             }
  16481             switch (rhs_mcv) {
  16482                 .immediate => {},
  16483                 else => self.performReloc(skip),
  16484             }
  16485         }
  16486         switch (rhs_mcv) {
  16487             .immediate => |shift_imm| try self.asmRegisterImmediate(
  16488                 tag,
  16489                 temp_regs[2].to64(),
  16490                 .u(shift_imm & std.math.maxInt(u6)),
  16491             ),
  16492             else => try self.asmRegisterRegister(tag, temp_regs[2].to64(), .cl),
  16493         }
  16494         try self.asmMemoryRegister(.{ ._, .mov }, .{
  16495             .base = .{ .frame = lhs_mcv.load_frame.index },
  16496             .mod = .{ .rm = .{
  16497                 .size = .qword,
  16498                 .index = temp_regs[1].to64(),
  16499                 .scale = .@"8",
  16500                 .disp = lhs_mcv.load_frame.off,
  16501             } },
  16502         }, temp_regs[2].to64());
  16503         if (tag[0] == ._r and tag[1] == .sa) try self.asmRegisterImmediate(
  16504             tag,
  16505             temp_regs[2].to64(),
  16506             .u(63),
  16507         );
  16508         if (switch (rhs_mcv) {
  16509             .immediate => |shift_imm| shift_imm >> 6 > 0,
  16510             else => true,
  16511         }) {
  16512             const skip = switch (rhs_mcv) {
  16513                 .immediate => undefined,
  16514                 else => switch (tag[0]) {
  16515                     ._l => skip: {
  16516                         try self.asmRegisterRegister(
  16517                             .{ ._, .@"test" },
  16518                             temp_regs[1].to32(),
  16519                             temp_regs[1].to32(),
  16520                         );
  16521                         break :skip try self.asmJccReloc(.z, undefined);
  16522                     },
  16523                     ._r => skip: {
  16524                         try self.asmRegisterImmediate(
  16525                             .{ ._, .cmp },
  16526                             temp_regs[1].to32(),
  16527                             .u(limbs_len - 1),
  16528                         );
  16529                         break :skip try self.asmJccReloc(.nb, undefined);
  16530                     },
  16531                     else => unreachable,
  16532                 },
  16533             };
  16534             const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  16535             switch (tag[0]) {
  16536                 ._l => if (slow_inc_dec) {
  16537                     try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1));
  16538                 } else {
  16539                     try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32());
  16540                 },
  16541                 ._r => if (slow_inc_dec) {
  16542                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), .u(1));
  16543                 } else {
  16544                     try self.asmRegister(.{ ._, .inc }, temp_regs[1].to32());
  16545                 },
  16546                 else => unreachable,
  16547             }
  16548             if (tag[0] == ._r and tag[1] == .sa) try self.asmMemoryRegister(.{ ._, .mov }, .{
  16549                 .base = .{ .frame = lhs_mcv.load_frame.index },
  16550                 .mod = .{ .rm = .{
  16551                     .size = .qword,
  16552                     .index = temp_regs[1].to64(),
  16553                     .scale = .@"8",
  16554                     .disp = lhs_mcv.load_frame.off,
  16555                 } },
  16556             }, temp_regs[2].to64()) else try self.asmMemoryImmediate(.{ ._, .mov }, .{
  16557                 .base = .{ .frame = lhs_mcv.load_frame.index },
  16558                 .mod = .{ .rm = .{
  16559                     .size = .qword,
  16560                     .index = temp_regs[1].to64(),
  16561                     .scale = .@"8",
  16562                     .disp = lhs_mcv.load_frame.off,
  16563                 } },
  16564             }, .u(0));
  16565             switch (tag[0]) {
  16566                 ._l => _ = try self.asmJccReloc(.nz, loop),
  16567                 ._r => {
  16568                     try self.asmRegisterImmediate(
  16569                         .{ ._, .cmp },
  16570                         temp_regs[1].to32(),
  16571                         .u(limbs_len - 1),
  16572                     );
  16573                     _ = try self.asmJccReloc(.b, loop);
  16574                 },
  16575                 else => unreachable,
  16576             }
  16577             switch (rhs_mcv) {
  16578                 .immediate => {},
  16579                 else => self.performReloc(skip),
  16580             }
  16581         }
  16582         return;
  16583     }
  16584 
  16585     assert(shift_abi_size == 1);
  16586     const shift_mcv: MCValue = shift: {
  16587         switch (rhs_mcv) {
  16588             .immediate => |shift_imm| switch (shift_imm) {
  16589                 0 => return,
  16590                 else => break :shift rhs_mcv,
  16591             },
  16592             .register => |rhs_reg| if (rhs_reg.id() == Register.rcx.id())
  16593                 break :shift rhs_mcv,
  16594             else => {},
  16595         }
  16596         self.register_manager.getRegAssumeFree(.rcx, null);
  16597         try self.genSetReg(.cl, rhs_ty, rhs_mcv, .{});
  16598         break :shift .{ .register = .rcx };
  16599     };
  16600     if (abi_size > 8) {
  16601         const info: struct { indices: [2]u31, double_tag: Mir.Inst.FixedTag } = switch (tag[0]) {
  16602             ._l => .{ .indices = .{ 0, 1 }, .double_tag = .{ ._ld, .sh } },
  16603             ._r => .{ .indices = .{ 1, 0 }, .double_tag = .{ ._rd, .sh } },
  16604             else => unreachable,
  16605         };
  16606         switch (lhs_mcv) {
  16607             .register_pair => |lhs_regs| switch (shift_mcv) {
  16608                 .immediate => |shift_imm| if (shift_imm > 0 and shift_imm < 64) {
  16609                     try self.asmRegisterRegisterImmediate(
  16610                         info.double_tag,
  16611                         lhs_regs[info.indices[1]],
  16612                         lhs_regs[info.indices[0]],
  16613                         .u(shift_imm),
  16614                     );
  16615                     try self.asmRegisterImmediate(
  16616                         tag,
  16617                         lhs_regs[info.indices[0]],
  16618                         .u(shift_imm),
  16619                     );
  16620                     return;
  16621                 } else {
  16622                     assert(shift_imm < 128);
  16623                     try self.asmRegisterRegister(
  16624                         .{ ._, .mov },
  16625                         lhs_regs[info.indices[1]],
  16626                         lhs_regs[info.indices[0]],
  16627                     );
  16628                     if (tag[0] == ._r and tag[1] == .sa) try self.asmRegisterImmediate(
  16629                         tag,
  16630                         lhs_regs[info.indices[0]],
  16631                         .u(63),
  16632                     ) else try self.asmRegisterRegister(
  16633                         .{ ._, .xor },
  16634                         lhs_regs[info.indices[0]],
  16635                         lhs_regs[info.indices[0]],
  16636                     );
  16637                     if (shift_imm > 64) try self.asmRegisterImmediate(
  16638                         tag,
  16639                         lhs_regs[info.indices[1]],
  16640                         .u(shift_imm - 64),
  16641                     );
  16642                     return;
  16643                 },
  16644                 .register => |shift_reg| {
  16645                     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  16646                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  16647                     defer self.register_manager.unlockReg(tmp_lock);
  16648 
  16649                     if (tag[0] == ._r and tag[1] == .sa) {
  16650                         try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, lhs_regs[info.indices[0]]);
  16651                         try self.asmRegisterImmediate(tag, tmp_reg, .u(63));
  16652                     } else try self.asmRegisterRegister(
  16653                         .{ ._, .xor },
  16654                         tmp_reg.to32(),
  16655                         tmp_reg.to32(),
  16656                     );
  16657                     try self.asmRegisterRegisterRegister(
  16658                         info.double_tag,
  16659                         lhs_regs[info.indices[1]],
  16660                         lhs_regs[info.indices[0]],
  16661                         registerAlias(shift_reg, 1),
  16662                     );
  16663                     try self.asmRegisterRegister(
  16664                         tag,
  16665                         lhs_regs[info.indices[0]],
  16666                         registerAlias(shift_reg, 1),
  16667                     );
  16668                     try self.asmRegisterImmediate(.{ ._, .cmp }, registerAlias(shift_reg, 1), .u(64));
  16669                     try self.asmCmovccRegisterRegister(
  16670                         .ae,
  16671                         lhs_regs[info.indices[1]],
  16672                         lhs_regs[info.indices[0]],
  16673                     );
  16674                     try self.asmCmovccRegisterRegister(.ae, lhs_regs[info.indices[0]], tmp_reg);
  16675                     return;
  16676                 },
  16677                 else => {},
  16678             },
  16679             .load_frame => |dst_frame_addr| {
  16680                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  16681                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  16682                 defer self.register_manager.unlockReg(tmp_lock);
  16683 
  16684                 switch (shift_mcv) {
  16685                     .immediate => |shift_imm| if (shift_imm > 0 and shift_imm < 64) {
  16686                         try self.asmRegisterMemory(
  16687                             .{ ._, .mov },
  16688                             tmp_reg,
  16689                             .{
  16690                                 .base = .{ .frame = dst_frame_addr.index },
  16691                                 .mod = .{ .rm = .{
  16692                                     .size = .qword,
  16693                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
  16694                                 } },
  16695                             },
  16696                         );
  16697                         try self.asmMemoryRegisterImmediate(
  16698                             info.double_tag,
  16699                             .{
  16700                                 .base = .{ .frame = dst_frame_addr.index },
  16701                                 .mod = .{ .rm = .{
  16702                                     .size = .qword,
  16703                                     .disp = dst_frame_addr.off + info.indices[1] * 8,
  16704                                 } },
  16705                             },
  16706                             tmp_reg,
  16707                             .u(shift_imm),
  16708                         );
  16709                         try self.asmMemoryImmediate(
  16710                             tag,
  16711                             .{
  16712                                 .base = .{ .frame = dst_frame_addr.index },
  16713                                 .mod = .{ .rm = .{
  16714                                     .size = .qword,
  16715                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
  16716                                 } },
  16717                             },
  16718                             .u(shift_imm),
  16719                         );
  16720                         return;
  16721                     } else {
  16722                         assert(shift_imm < 128);
  16723                         try self.asmRegisterMemory(
  16724                             .{ ._, .mov },
  16725                             tmp_reg,
  16726                             .{
  16727                                 .base = .{ .frame = dst_frame_addr.index },
  16728                                 .mod = .{ .rm = .{
  16729                                     .size = .qword,
  16730                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
  16731                                 } },
  16732                             },
  16733                         );
  16734                         if (shift_imm > 64) try self.asmRegisterImmediate(
  16735                             tag,
  16736                             tmp_reg,
  16737                             .u(shift_imm - 64),
  16738                         );
  16739                         try self.asmMemoryRegister(
  16740                             .{ ._, .mov },
  16741                             .{
  16742                                 .base = .{ .frame = dst_frame_addr.index },
  16743                                 .mod = .{ .rm = .{
  16744                                     .size = .qword,
  16745                                     .disp = dst_frame_addr.off + info.indices[1] * 8,
  16746                                 } },
  16747                             },
  16748                             tmp_reg,
  16749                         );
  16750                         if (tag[0] == ._r and tag[1] == .sa) try self.asmMemoryImmediate(
  16751                             tag,
  16752                             .{
  16753                                 .base = .{ .frame = dst_frame_addr.index },
  16754                                 .mod = .{ .rm = .{
  16755                                     .size = .qword,
  16756                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
  16757                                 } },
  16758                             },
  16759                             .u(63),
  16760                         ) else {
  16761                             try self.asmRegisterRegister(.{ ._, .xor }, tmp_reg.to32(), tmp_reg.to32());
  16762                             try self.asmMemoryRegister(
  16763                                 .{ ._, .mov },
  16764                                 .{
  16765                                     .base = .{ .frame = dst_frame_addr.index },
  16766                                     .mod = .{ .rm = .{
  16767                                         .size = .qword,
  16768                                         .disp = dst_frame_addr.off + info.indices[0] * 8,
  16769                                     } },
  16770                                 },
  16771                                 tmp_reg,
  16772                             );
  16773                         }
  16774                         return;
  16775                     },
  16776                     .register => |shift_reg| {
  16777                         const first_reg =
  16778                             try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  16779                         const first_lock = self.register_manager.lockRegAssumeUnused(first_reg);
  16780                         defer self.register_manager.unlockReg(first_lock);
  16781 
  16782                         const second_reg =
  16783                             try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  16784                         const second_lock = self.register_manager.lockRegAssumeUnused(second_reg);
  16785                         defer self.register_manager.unlockReg(second_lock);
  16786 
  16787                         try self.asmRegisterMemory(
  16788                             .{ ._, .mov },
  16789                             first_reg,
  16790                             .{
  16791                                 .base = .{ .frame = dst_frame_addr.index },
  16792                                 .mod = .{ .rm = .{
  16793                                     .size = .qword,
  16794                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
  16795                                 } },
  16796                             },
  16797                         );
  16798                         try self.asmRegisterMemory(
  16799                             .{ ._, .mov },
  16800                             second_reg,
  16801                             .{
  16802                                 .base = .{ .frame = dst_frame_addr.index },
  16803                                 .mod = .{ .rm = .{
  16804                                     .size = .qword,
  16805                                     .disp = dst_frame_addr.off + info.indices[1] * 8,
  16806                                 } },
  16807                             },
  16808                         );
  16809                         if (tag[0] == ._r and tag[1] == .sa) {
  16810                             try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, first_reg);
  16811                             try self.asmRegisterImmediate(tag, tmp_reg, .u(63));
  16812                         } else try self.asmRegisterRegister(
  16813                             .{ ._, .xor },
  16814                             tmp_reg.to32(),
  16815                             tmp_reg.to32(),
  16816                         );
  16817                         try self.asmRegisterRegisterRegister(
  16818                             info.double_tag,
  16819                             second_reg,
  16820                             first_reg,
  16821                             registerAlias(shift_reg, 1),
  16822                         );
  16823                         try self.asmRegisterRegister(tag, first_reg, registerAlias(shift_reg, 1));
  16824                         try self.asmRegisterImmediate(
  16825                             .{ ._, .cmp },
  16826                             registerAlias(shift_reg, 1),
  16827                             .u(64),
  16828                         );
  16829                         try self.asmCmovccRegisterRegister(.ae, second_reg, first_reg);
  16830                         try self.asmCmovccRegisterRegister(.ae, first_reg, tmp_reg);
  16831                         try self.asmMemoryRegister(
  16832                             .{ ._, .mov },
  16833                             .{
  16834                                 .base = .{ .frame = dst_frame_addr.index },
  16835                                 .mod = .{ .rm = .{
  16836                                     .size = .qword,
  16837                                     .disp = dst_frame_addr.off + info.indices[1] * 8,
  16838                                 } },
  16839                             },
  16840                             second_reg,
  16841                         );
  16842                         try self.asmMemoryRegister(
  16843                             .{ ._, .mov },
  16844                             .{
  16845                                 .base = .{ .frame = dst_frame_addr.index },
  16846                                 .mod = .{ .rm = .{
  16847                                     .size = .qword,
  16848                                     .disp = dst_frame_addr.off + info.indices[0] * 8,
  16849                                 } },
  16850                             },
  16851                             first_reg,
  16852                         );
  16853                         return;
  16854                     },
  16855                     else => {},
  16856                 }
  16857             },
  16858             else => {},
  16859         }
  16860     } else switch (lhs_mcv) {
  16861         .register => |lhs_reg| switch (shift_mcv) {
  16862             .immediate => |shift_imm| return self.asmRegisterImmediate(
  16863                 tag,
  16864                 registerAlias(lhs_reg, abi_size),
  16865                 .u(shift_imm),
  16866             ),
  16867             .register => |shift_reg| return self.asmRegisterRegister(
  16868                 tag,
  16869                 registerAlias(lhs_reg, abi_size),
  16870                 registerAlias(shift_reg, 1),
  16871             ),
  16872             else => {},
  16873         },
  16874         .memory, .indirect, .load_frame => {
  16875             const lhs_mem: Memory = switch (lhs_mcv) {
  16876                 .memory => |addr| .{
  16877                     .base = .{ .reg = .ds },
  16878                     .mod = .{ .rm = .{
  16879                         .size = .fromSize(abi_size),
  16880                         .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse
  16881                             return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
  16882                             @tagName(lhs_mcv),
  16883                             @tagName(shift_mcv),
  16884                         }),
  16885                     } },
  16886                 },
  16887                 .indirect => |reg_off| .{
  16888                     .base = .{ .reg = reg_off.reg },
  16889                     .mod = .{ .rm = .{
  16890                         .size = .fromSize(abi_size),
  16891                         .disp = reg_off.off,
  16892                     } },
  16893                 },
  16894                 .load_frame => |frame_addr| .{
  16895                     .base = .{ .frame = frame_addr.index },
  16896                     .mod = .{ .rm = .{
  16897                         .size = .fromSize(abi_size),
  16898                         .disp = frame_addr.off,
  16899                     } },
  16900                 },
  16901                 else => unreachable,
  16902             };
  16903             switch (shift_mcv) {
  16904                 .immediate => |shift_imm| return self.asmMemoryImmediate(tag, lhs_mem, .u(shift_imm)),
  16905                 .register => |shift_reg| return self.asmMemoryRegister(
  16906                     tag,
  16907                     lhs_mem,
  16908                     registerAlias(shift_reg, 1),
  16909                 ),
  16910                 else => {},
  16911             }
  16912         },
  16913         else => {},
  16914     }
  16915     return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
  16916         @tagName(lhs_mcv),
  16917         @tagName(shift_mcv),
  16918     });
  16919 }
  16920 
  16921 /// Result is always a register.
  16922 /// Clobbers .rcx for non-immediate rhs, therefore care is needed to spill .rcx upfront.
  16923 /// Asserts .rcx is free.
  16924 fn genShiftBinOp(
  16925     self: *CodeGen,
  16926     air_tag: Air.Inst.Tag,
  16927     maybe_inst: ?Air.Inst.Index,
  16928     lhs_mcv: MCValue,
  16929     rhs_mcv: MCValue,
  16930     lhs_ty: Type,
  16931     rhs_ty: Type,
  16932 ) !MCValue {
  16933     const pt = self.pt;
  16934     const zcu = pt.zcu;
  16935     if (lhs_ty.zigTypeTag(zcu) == .vector) return self.fail("TODO implement genShiftBinOp for {}", .{
  16936         lhs_ty.fmt(pt),
  16937     });
  16938 
  16939     try self.register_manager.getKnownReg(.rcx, null);
  16940     const rcx_lock = self.register_manager.lockReg(.rcx);
  16941     defer if (rcx_lock) |lock| self.register_manager.unlockReg(lock);
  16942 
  16943     const mat_lhs_mcv: MCValue, const can_reuse_lhs = switch (lhs_mcv) {
  16944         .register => |lhs_reg| switch (lhs_reg.class()) {
  16945             .general_purpose => .{ lhs_mcv, true },
  16946             else => lhs: {
  16947                 const mat_lhs_mcv = try self.allocTempRegOrMem(lhs_ty, true);
  16948                 try self.genCopy(lhs_ty, mat_lhs_mcv, lhs_mcv, .{});
  16949                 break :lhs .{ mat_lhs_mcv, false };
  16950             },
  16951         },
  16952         else => .{ lhs_mcv, true },
  16953     };
  16954     const lhs_lock = switch (mat_lhs_mcv) {
  16955         .register => |reg| self.register_manager.lockReg(reg),
  16956         else => null,
  16957     };
  16958     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
  16959 
  16960     const rhs_lock = switch (rhs_mcv) {
  16961         .register => |reg| self.register_manager.lockReg(reg),
  16962         else => null,
  16963     };
  16964     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  16965 
  16966     const dst_mcv: MCValue = dst: {
  16967         if (can_reuse_lhs) if (maybe_inst) |inst| {
  16968             const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  16969             if (self.reuseOperand(inst, bin_op.lhs, 0, mat_lhs_mcv)) break :dst mat_lhs_mcv;
  16970         };
  16971         const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
  16972         try self.genCopy(lhs_ty, dst_mcv, mat_lhs_mcv, .{});
  16973         break :dst dst_mcv;
  16974     };
  16975 
  16976     const signedness = lhs_ty.intInfo(zcu).signedness;
  16977     try self.genShiftBinOpMir(switch (air_tag) {
  16978         .shl, .shl_exact => switch (signedness) {
  16979             .signed => .{ ._l, .sa },
  16980             .unsigned => .{ ._l, .sh },
  16981         },
  16982         .shr, .shr_exact => switch (signedness) {
  16983             .signed => .{ ._r, .sa },
  16984             .unsigned => .{ ._r, .sh },
  16985         },
  16986         else => unreachable,
  16987     }, lhs_ty, dst_mcv, rhs_ty, rhs_mcv);
  16988     return dst_mcv;
  16989 }
  16990 
  16991 /// Result is always a register.
  16992 /// Clobbers .rax and .rdx therefore care is needed to spill .rax and .rdx upfront.
  16993 /// Asserts .rax and .rdx are free.
  16994 fn genMulDivBinOp(
  16995     self: *CodeGen,
  16996     tag: Air.Inst.Tag,
  16997     maybe_inst: ?Air.Inst.Index,
  16998     dst_ty: Type,
  16999     src_ty: Type,
  17000     lhs_mcv: MCValue,
  17001     rhs_mcv: MCValue,
  17002 ) !MCValue {
  17003     const pt = self.pt;
  17004     const zcu = pt.zcu;
  17005     if (dst_ty.zigTypeTag(zcu) == .vector or dst_ty.zigTypeTag(zcu) == .float) return self.fail(
  17006         "TODO implement genMulDivBinOp for {s} from {} to {}",
  17007         .{ @tagName(tag), src_ty.fmt(pt), dst_ty.fmt(pt) },
  17008     );
  17009     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
  17010     const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
  17011 
  17012     assert(self.register_manager.isRegFree(.rax));
  17013     assert(self.register_manager.isRegFree(.rcx));
  17014     assert(self.register_manager.isRegFree(.rdx));
  17015     assert(self.eflags_inst == null);
  17016 
  17017     if (dst_abi_size == 16 and src_abi_size == 16) {
  17018         assert(tag == .mul or tag == .mul_wrap);
  17019         const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx });
  17020         defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
  17021 
  17022         const mat_lhs_mcv = switch (lhs_mcv) {
  17023             .load_symbol => mat_lhs_mcv: {
  17024                 // TODO clean this up!
  17025                 const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address());
  17026                 break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
  17027             },
  17028             else => lhs_mcv,
  17029         };
  17030         const mat_lhs_lock = switch (mat_lhs_mcv) {
  17031             .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
  17032             else => null,
  17033         };
  17034         defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
  17035         const mat_rhs_mcv = switch (rhs_mcv) {
  17036             .load_symbol => mat_rhs_mcv: {
  17037                 // TODO clean this up!
  17038                 const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address());
  17039                 break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
  17040             },
  17041             else => rhs_mcv,
  17042         };
  17043         const mat_rhs_lock = switch (mat_rhs_mcv) {
  17044             .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
  17045             else => null,
  17046         };
  17047         defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
  17048 
  17049         const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  17050         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  17051         defer self.register_manager.unlockReg(tmp_lock);
  17052 
  17053         if (mat_lhs_mcv.isBase())
  17054             try self.asmRegisterMemory(.{ ._, .mov }, .rax, try mat_lhs_mcv.mem(self, .{ .size = .qword }))
  17055         else
  17056             try self.asmRegisterRegister(.{ ._, .mov }, .rax, mat_lhs_mcv.register_pair[0]);
  17057         if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
  17058             .{ ._, .mov },
  17059             tmp_reg,
  17060             try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  17061         ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_rhs_mcv.register_pair[1]);
  17062         try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, .rax);
  17063         if (mat_rhs_mcv.isBase())
  17064             try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword }))
  17065         else
  17066             try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]);
  17067         try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg);
  17068         if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
  17069             .{ ._, .mov },
  17070             tmp_reg,
  17071             try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  17072         ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_lhs_mcv.register_pair[1]);
  17073         if (mat_rhs_mcv.isBase())
  17074             try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, try mat_rhs_mcv.mem(self, .{ .size = .qword }))
  17075         else
  17076             try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, mat_rhs_mcv.register_pair[0]);
  17077         try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg);
  17078         return .{ .register_pair = .{ .rax, .rdx } };
  17079     }
  17080 
  17081     if (switch (tag) {
  17082         else => unreachable,
  17083         .mul, .mul_wrap => dst_abi_size != src_abi_size and dst_abi_size != src_abi_size * 2,
  17084         .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_abi_size != src_abi_size,
  17085     } or src_abi_size > 8) {
  17086         const src_info = src_ty.intInfo(zcu);
  17087         switch (tag) {
  17088             .mul, .mul_wrap => {
  17089                 const slow_inc = self.hasFeature(.slow_incdec);
  17090                 const limb_len = std.math.divCeil(u32, src_abi_size, 8) catch unreachable;
  17091 
  17092                 try self.spillRegisters(&.{ .rax, .rcx, .rdx });
  17093                 const reg_locks = self.register_manager.lockRegs(3, .{ .rax, .rcx, .rdx });
  17094                 defer for (reg_locks) |reg_lock| if (reg_lock) |lock|
  17095                     self.register_manager.unlockReg(lock);
  17096 
  17097                 const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false);
  17098                 try self.genInlineMemset(
  17099                     dst_mcv.address(),
  17100                     .{ .immediate = 0 },
  17101                     .{ .immediate = src_abi_size },
  17102                     .{},
  17103                 );
  17104 
  17105                 const temp_regs =
  17106                     try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp);
  17107                 const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs);
  17108                 defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
  17109 
  17110                 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32());
  17111 
  17112                 const outer_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  17113                 try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[1].to64(), .{
  17114                     .base = .{ .frame = rhs_mcv.load_frame.index },
  17115                     .mod = .{ .rm = .{
  17116                         .size = .qword,
  17117                         .index = temp_regs[0].to64(),
  17118                         .scale = .@"8",
  17119                         .disp = rhs_mcv.load_frame.off,
  17120                     } },
  17121                 });
  17122                 try self.asmRegisterRegister(.{ ._, .@"test" }, temp_regs[1].to64(), temp_regs[1].to64());
  17123                 const skip_inner = try self.asmJccReloc(.z, undefined);
  17124 
  17125                 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[2].to32(), temp_regs[2].to32());
  17126                 try self.asmRegisterRegister(.{ ._, .mov }, temp_regs[3].to32(), temp_regs[0].to32());
  17127                 try self.asmRegisterRegister(.{ ._, .xor }, .ecx, .ecx);
  17128                 try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx);
  17129 
  17130                 const inner_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  17131                 try self.asmRegisterImmediate(.{ ._r, .sh }, .cl, .u(1));
  17132                 try self.asmMemoryRegister(.{ ._, .adc }, .{
  17133                     .base = .{ .frame = dst_mcv.load_frame.index },
  17134                     .mod = .{ .rm = .{
  17135                         .size = .qword,
  17136                         .index = temp_regs[3].to64(),
  17137                         .scale = .@"8",
  17138                         .disp = dst_mcv.load_frame.off,
  17139                     } },
  17140                 }, .rdx);
  17141                 try self.asmSetccRegister(.c, .cl);
  17142 
  17143                 try self.asmRegisterMemory(.{ ._, .mov }, .rax, .{
  17144                     .base = .{ .frame = lhs_mcv.load_frame.index },
  17145                     .mod = .{ .rm = .{
  17146                         .size = .qword,
  17147                         .index = temp_regs[2].to64(),
  17148                         .scale = .@"8",
  17149                         .disp = lhs_mcv.load_frame.off,
  17150                     } },
  17151                 });
  17152                 try self.asmRegister(.{ ._, .mul }, temp_regs[1].to64());
  17153 
  17154                 try self.asmRegisterImmediate(.{ ._r, .sh }, .ch, .u(1));
  17155                 try self.asmMemoryRegister(.{ ._, .adc }, .{
  17156                     .base = .{ .frame = dst_mcv.load_frame.index },
  17157                     .mod = .{ .rm = .{
  17158                         .size = .qword,
  17159                         .index = temp_regs[3].to64(),
  17160                         .scale = .@"8",
  17161                         .disp = dst_mcv.load_frame.off,
  17162                     } },
  17163                 }, .rax);
  17164                 try self.asmSetccRegister(.c, .ch);
  17165 
  17166                 if (slow_inc) {
  17167                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1));
  17168                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1));
  17169                 } else {
  17170                     try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32());
  17171                     try self.asmRegister(.{ ._, .inc }, temp_regs[3].to32());
  17172                 }
  17173                 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len));
  17174                 _ = try self.asmJccReloc(.b, inner_loop);
  17175 
  17176                 self.performReloc(skip_inner);
  17177                 if (slow_inc) {
  17178                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1));
  17179                 } else {
  17180                     try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32());
  17181                 }
  17182                 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len));
  17183                 _ = try self.asmJccReloc(.b, outer_loop);
  17184 
  17185                 return dst_mcv;
  17186             },
  17187             .div_trunc, .div_floor, .div_exact, .rem, .mod => switch (src_info.signedness) {
  17188                 .signed => {},
  17189                 .unsigned => {
  17190                     const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false);
  17191                     const manyptr_u32_ty = try pt.ptrType(.{
  17192                         .child = .u32_type,
  17193                         .flags = .{
  17194                             .size = .many,
  17195                         },
  17196                     });
  17197                     const manyptr_const_u32_ty = try pt.ptrType(.{
  17198                         .child = .u32_type,
  17199                         .flags = .{
  17200                             .size = .many,
  17201                             .is_const = true,
  17202                         },
  17203                     });
  17204                     _ = try self.genCall(.{ .lib = .{
  17205                         .return_type = .void_type,
  17206                         .param_types = &.{
  17207                             manyptr_u32_ty.toIntern(),
  17208                             manyptr_const_u32_ty.toIntern(),
  17209                             manyptr_const_u32_ty.toIntern(),
  17210                             .usize_type,
  17211                         },
  17212                         .callee = switch (tag) {
  17213                             .div_trunc,
  17214                             .div_floor,
  17215                             .div_exact,
  17216                             => "__udivei4",
  17217                             .rem,
  17218                             .mod,
  17219                             => "__umodei4",
  17220                             else => unreachable,
  17221                         },
  17222                     } }, &.{
  17223                         manyptr_u32_ty,
  17224                         manyptr_const_u32_ty,
  17225                         manyptr_const_u32_ty,
  17226                         .usize,
  17227                     }, &.{
  17228                         dst_mcv.address(),
  17229                         lhs_mcv.address(),
  17230                         rhs_mcv.address(),
  17231                         .{ .immediate = src_info.bits },
  17232                     }, .{});
  17233                     return dst_mcv;
  17234                 },
  17235             },
  17236             else => {},
  17237         }
  17238         return self.fail(
  17239             "TODO implement genMulDivBinOp for {s} from {} to {}",
  17240             .{ @tagName(tag), src_ty.fmt(pt), dst_ty.fmt(pt) },
  17241         );
  17242     }
  17243     const ty = if (dst_abi_size <= 8) dst_ty else src_ty;
  17244     const abi_size = if (dst_abi_size <= 8) dst_abi_size else src_abi_size;
  17245 
  17246     const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx });
  17247     defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
  17248 
  17249     const signedness = ty.intInfo(zcu).signedness;
  17250     switch (tag) {
  17251         .mul,
  17252         .mul_wrap,
  17253         .rem,
  17254         .div_trunc,
  17255         .div_exact,
  17256         => {
  17257             const track_inst_rax = switch (tag) {
  17258                 .mul, .mul_wrap => if (dst_abi_size <= 8) maybe_inst else null,
  17259                 .div_exact, .div_trunc => maybe_inst,
  17260                 else => null,
  17261             };
  17262             const track_inst_rdx = switch (tag) {
  17263                 .rem => maybe_inst,
  17264                 else => null,
  17265             };
  17266             try self.register_manager.getKnownReg(.rax, track_inst_rax);
  17267             try self.register_manager.getKnownReg(.rdx, track_inst_rdx);
  17268 
  17269             try self.genIntMulDivOpMir(switch (signedness) {
  17270                 .signed => switch (tag) {
  17271                     .mul, .mul_wrap => .{ .i_, .mul },
  17272                     .div_trunc, .div_exact, .rem => .{ .i_, .div },
  17273                     else => unreachable,
  17274                 },
  17275                 .unsigned => switch (tag) {
  17276                     .mul, .mul_wrap => .{ ._, .mul },
  17277                     .div_trunc, .div_exact, .rem => .{ ._, .div },
  17278                     else => unreachable,
  17279                 },
  17280             }, ty, lhs_mcv, rhs_mcv);
  17281 
  17282             if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) {
  17283                 .mul, .mul_wrap, .div_trunc, .div_exact => .rax,
  17284                 .rem => .rdx,
  17285                 else => unreachable,
  17286             }, dst_abi_size) };
  17287 
  17288             const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false);
  17289             try self.asmMemoryRegister(.{ ._, .mov }, .{
  17290                 .base = .{ .frame = dst_mcv.load_frame.index },
  17291                 .mod = .{ .rm = .{
  17292                     .size = .qword,
  17293                     .disp = dst_mcv.load_frame.off,
  17294                 } },
  17295             }, .rax);
  17296             try self.asmMemoryRegister(.{ ._, .mov }, .{
  17297                 .base = .{ .frame = dst_mcv.load_frame.index },
  17298                 .mod = .{ .rm = .{
  17299                     .size = .qword,
  17300                     .disp = dst_mcv.load_frame.off + 8,
  17301                 } },
  17302             }, .rdx);
  17303             return dst_mcv;
  17304         },
  17305 
  17306         .mod => {
  17307             try self.register_manager.getKnownReg(.rax, null);
  17308             try self.register_manager.getKnownReg(
  17309                 .rdx,
  17310                 if (signedness == .unsigned) maybe_inst else null,
  17311             );
  17312 
  17313             switch (signedness) {
  17314                 .signed => {
  17315                     const lhs_lock = switch (lhs_mcv) {
  17316                         .register => |reg| self.register_manager.lockReg(reg),
  17317                         else => null,
  17318                     };
  17319                     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
  17320                     const rhs_lock = switch (rhs_mcv) {
  17321                         .register => |reg| self.register_manager.lockReg(reg),
  17322                         else => null,
  17323                     };
  17324                     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  17325 
  17326                     // hack around hazard between rhs and div_floor by copying rhs to another register
  17327                     const rhs_copy = try self.copyToTmpRegister(ty, rhs_mcv);
  17328                     const rhs_copy_lock = self.register_manager.lockRegAssumeUnused(rhs_copy);
  17329                     defer self.register_manager.unlockReg(rhs_copy_lock);
  17330 
  17331                     const div_floor = try self.genInlineIntDivFloor(ty, lhs_mcv, rhs_mcv);
  17332                     try self.genIntMulComplexOpMir(ty, div_floor, .{ .register = rhs_copy });
  17333                     const div_floor_lock = self.register_manager.lockReg(div_floor.register);
  17334                     defer if (div_floor_lock) |lock| self.register_manager.unlockReg(lock);
  17335 
  17336                     const result: MCValue = if (maybe_inst) |inst|
  17337                         try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv)
  17338                     else
  17339                         .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) };
  17340                     try self.genBinOpMir(.{ ._, .sub }, ty, result, div_floor);
  17341 
  17342                     return result;
  17343                 },
  17344                 .unsigned => {
  17345                     try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs_mcv, rhs_mcv);
  17346                     return .{ .register = registerAlias(.rdx, abi_size) };
  17347                 },
  17348             }
  17349         },
  17350 
  17351         .div_floor => {
  17352             try self.register_manager.getKnownReg(
  17353                 .rax,
  17354                 if (signedness == .unsigned) maybe_inst else null,
  17355             );
  17356             try self.register_manager.getKnownReg(.rdx, null);
  17357 
  17358             const lhs_lock: ?RegisterLock = switch (lhs_mcv) {
  17359                 .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  17360                 else => null,
  17361             };
  17362             defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
  17363 
  17364             const actual_rhs_mcv: MCValue = blk: {
  17365                 switch (signedness) {
  17366                     .signed => {
  17367                         const rhs_lock: ?RegisterLock = switch (rhs_mcv) {
  17368                             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  17369                             else => null,
  17370                         };
  17371                         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  17372 
  17373                         if (maybe_inst) |inst| {
  17374                             break :blk try self.copyToRegisterWithInstTracking(inst, ty, rhs_mcv);
  17375                         }
  17376                         break :blk MCValue{ .register = try self.copyToTmpRegister(ty, rhs_mcv) };
  17377                     },
  17378                     .unsigned => break :blk rhs_mcv,
  17379                 }
  17380             };
  17381             const rhs_lock: ?RegisterLock = switch (actual_rhs_mcv) {
  17382                 .register => |reg| self.register_manager.lockReg(reg),
  17383                 else => null,
  17384             };
  17385             defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  17386 
  17387             switch (signedness) {
  17388                 .signed => return try self.genInlineIntDivFloor(ty, lhs_mcv, actual_rhs_mcv),
  17389                 .unsigned => {
  17390                     try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs_mcv, actual_rhs_mcv);
  17391                     return .{ .register = registerAlias(.rax, abi_size) };
  17392                 },
  17393             }
  17394         },
  17395 
  17396         else => unreachable,
  17397     }
  17398 }
  17399 
  17400 fn genBinOp(
  17401     self: *CodeGen,
  17402     maybe_inst: ?Air.Inst.Index,
  17403     air_tag: Air.Inst.Tag,
  17404     lhs_air: Air.Inst.Ref,
  17405     rhs_air: Air.Inst.Ref,
  17406 ) !MCValue {
  17407     const pt = self.pt;
  17408     const zcu = pt.zcu;
  17409     const lhs_ty = self.typeOf(lhs_air);
  17410     const rhs_ty = self.typeOf(rhs_air);
  17411     const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu));
  17412 
  17413     if (lhs_ty.isRuntimeFloat()) libcall: {
  17414         const float_bits = lhs_ty.floatBits(self.target.*);
  17415         const type_needs_libcall = switch (float_bits) {
  17416             16 => !self.hasFeature(.f16c),
  17417             32, 64 => false,
  17418             80, 128 => true,
  17419             else => unreachable,
  17420         };
  17421         switch (air_tag) {
  17422             .rem, .mod => {},
  17423             else => if (!type_needs_libcall) break :libcall,
  17424         }
  17425         var callee_buf: ["__mod?f3".len]u8 = undefined;
  17426         const callee = switch (air_tag) {
  17427             .add,
  17428             .sub,
  17429             .mul,
  17430             .div_float,
  17431             .div_trunc,
  17432             .div_floor,
  17433             .div_exact,
  17434             => std.fmt.bufPrint(&callee_buf, "__{s}{c}f3", .{
  17435                 @tagName(air_tag)[0..3],
  17436                 floatCompilerRtAbiName(float_bits),
  17437             }),
  17438             .rem, .mod, .min, .max => std.fmt.bufPrint(&callee_buf, "{s}f{s}{s}", .{
  17439                 floatLibcAbiPrefix(lhs_ty),
  17440                 switch (air_tag) {
  17441                     .rem, .mod => "mod",
  17442                     .min => "min",
  17443                     .max => "max",
  17444                     else => unreachable,
  17445                 },
  17446                 floatLibcAbiSuffix(lhs_ty),
  17447             }),
  17448             else => return self.fail("TODO implement genBinOp for {s} {}", .{
  17449                 @tagName(air_tag), lhs_ty.fmt(pt),
  17450             }),
  17451         } catch unreachable;
  17452         const result = try self.genCall(.{ .lib = .{
  17453             .return_type = lhs_ty.toIntern(),
  17454             .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() },
  17455             .callee = callee,
  17456         } }, &.{ lhs_ty, rhs_ty }, &.{ .{ .air_ref = lhs_air }, .{ .air_ref = rhs_air } }, .{});
  17457         return switch (air_tag) {
  17458             .mod => result: {
  17459                 const adjusted: MCValue = if (type_needs_libcall) adjusted: {
  17460                     var add_callee_buf: ["__add?f3".len]u8 = undefined;
  17461                     break :adjusted try self.genCall(.{ .lib = .{
  17462                         .return_type = lhs_ty.toIntern(),
  17463                         .param_types = &.{
  17464                             lhs_ty.toIntern(),
  17465                             rhs_ty.toIntern(),
  17466                         },
  17467                         .callee = std.fmt.bufPrint(&add_callee_buf, "__add{c}f3", .{
  17468                             floatCompilerRtAbiName(float_bits),
  17469                         }) catch unreachable,
  17470                     } }, &.{ lhs_ty, rhs_ty }, &.{ result, .{ .air_ref = rhs_air } }, .{});
  17471                 } else switch (float_bits) {
  17472                     16, 32, 64 => adjusted: {
  17473                         const dst_reg = switch (result) {
  17474                             .register => |reg| reg,
  17475                             else => if (maybe_inst) |inst|
  17476                                 (try self.copyToRegisterWithInstTracking(inst, lhs_ty, result)).register
  17477                             else
  17478                                 try self.copyToTmpRegister(lhs_ty, result),
  17479                         };
  17480                         const dst_lock = self.register_manager.lockReg(dst_reg);
  17481                         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  17482 
  17483                         const rhs_mcv = try self.resolveInst(rhs_air);
  17484                         const src_mcv: MCValue = if (float_bits == 16) src: {
  17485                             assert(self.hasFeature(.f16c));
  17486                             const tmp_reg = (try self.register_manager.allocReg(
  17487                                 null,
  17488                                 abi.RegisterClass.sse,
  17489                             )).to128();
  17490                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  17491                             defer self.register_manager.unlockReg(tmp_lock);
  17492 
  17493                             if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  17494                                 .{ .vp_w, .insr },
  17495                                 dst_reg,
  17496                                 dst_reg,
  17497                                 try rhs_mcv.mem(self, .{ .size = .word }),
  17498                                 .u(1),
  17499                             ) else try self.asmRegisterRegisterRegister(
  17500                                 .{ .vp_, .unpcklwd },
  17501                                 dst_reg,
  17502                                 dst_reg,
  17503                                 (if (rhs_mcv.isRegister())
  17504                                     rhs_mcv.getReg().?
  17505                                 else
  17506                                     try self.copyToTmpRegister(rhs_ty, rhs_mcv)).to128(),
  17507                             );
  17508                             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
  17509                             break :src .{ .register = tmp_reg };
  17510                         } else rhs_mcv;
  17511 
  17512                         if (self.hasFeature(.avx)) {
  17513                             const mir_tag: Mir.Inst.FixedTag = switch (float_bits) {
  17514                                 16, 32 => .{ .v_ss, .add },
  17515                                 64 => .{ .v_sd, .add },
  17516                                 else => unreachable,
  17517                             };
  17518                             if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
  17519                                 mir_tag,
  17520                                 dst_reg,
  17521                                 dst_reg,
  17522                                 try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }),
  17523                             ) else try self.asmRegisterRegisterRegister(
  17524                                 mir_tag,
  17525                                 dst_reg,
  17526                                 dst_reg,
  17527                                 (if (src_mcv.isRegister())
  17528                                     src_mcv.getReg().?
  17529                                 else
  17530                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
  17531                             );
  17532                         } else {
  17533                             const mir_tag: Mir.Inst.FixedTag = switch (float_bits) {
  17534                                 32 => .{ ._ss, .add },
  17535                                 64 => .{ ._sd, .add },
  17536                                 else => unreachable,
  17537                             };
  17538                             if (src_mcv.isBase()) try self.asmRegisterMemory(
  17539                                 mir_tag,
  17540                                 dst_reg,
  17541                                 try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }),
  17542                             ) else try self.asmRegisterRegister(
  17543                                 mir_tag,
  17544                                 dst_reg,
  17545                                 (if (src_mcv.isRegister())
  17546                                     src_mcv.getReg().?
  17547                                 else
  17548                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
  17549                             );
  17550                         }
  17551 
  17552                         if (float_bits == 16) try self.asmRegisterRegisterImmediate(
  17553                             .{ .v_, .cvtps2ph },
  17554                             dst_reg,
  17555                             dst_reg,
  17556                             .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
  17557                         );
  17558                         break :adjusted .{ .register = dst_reg };
  17559                     },
  17560                     80, 128 => return self.fail("TODO implement genBinOp for {s} of {}", .{
  17561                         @tagName(air_tag), lhs_ty.fmt(pt),
  17562                     }),
  17563                     else => unreachable,
  17564                 };
  17565                 break :result try self.genCall(.{ .lib = .{
  17566                     .return_type = lhs_ty.toIntern(),
  17567                     .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() },
  17568                     .callee = callee,
  17569                 } }, &.{ lhs_ty, rhs_ty }, &.{ adjusted, .{ .air_ref = rhs_air } }, .{});
  17570             },
  17571             .div_trunc, .div_floor => try self.genRoundLibcall(lhs_ty, result, .{
  17572                 .mode = switch (air_tag) {
  17573                     .div_trunc => .zero,
  17574                     .div_floor => .down,
  17575                     else => unreachable,
  17576                 },
  17577                 .precision = .inexact,
  17578             }),
  17579             else => result,
  17580         };
  17581     }
  17582 
  17583     const sse_op = switch (lhs_ty.zigTypeTag(zcu)) {
  17584         else => false,
  17585         .float => true,
  17586         .vector => switch (lhs_ty.childType(zcu).toIntern()) {
  17587             .bool_type, .u1_type => false,
  17588             else => true,
  17589         },
  17590     };
  17591     if (sse_op and ((lhs_ty.scalarType(zcu).isRuntimeFloat() and
  17592         lhs_ty.scalarType(zcu).floatBits(self.target.*) == 80) or
  17593         lhs_ty.abiSize(zcu) > self.vectorSize(.float)))
  17594         return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(air_tag), lhs_ty.fmt(pt) });
  17595 
  17596     const maybe_mask_reg = switch (air_tag) {
  17597         else => null,
  17598         .rem, .mod => unreachable,
  17599         .max, .min => if (lhs_ty.scalarType(zcu).isRuntimeFloat()) registerAlias(
  17600             if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: {
  17601                 try self.register_manager.getKnownReg(.xmm0, null);
  17602                 break :mask .xmm0;
  17603             } else try self.register_manager.allocReg(null, abi.RegisterClass.sse),
  17604             abi_size,
  17605         ) else null,
  17606     };
  17607     const mask_lock =
  17608         if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null;
  17609     defer if (mask_lock) |lock| self.register_manager.unlockReg(lock);
  17610 
  17611     const ordered_air: [2]Air.Inst.Ref = if (lhs_ty.isVector(zcu) and
  17612         switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
  17613         .bool => false,
  17614         .int => switch (air_tag) {
  17615             .cmp_lt, .cmp_gte => true,
  17616             else => false,
  17617         },
  17618         .float => switch (air_tag) {
  17619             .cmp_gte, .cmp_gt => true,
  17620             else => false,
  17621         },
  17622         else => unreachable,
  17623     }) .{ rhs_air, lhs_air } else .{ lhs_air, rhs_air };
  17624 
  17625     if (lhs_ty.isAbiInt(zcu)) for (ordered_air) |op_air| {
  17626         switch (try self.resolveInst(op_air)) {
  17627             .register => |op_reg| switch (op_reg.class()) {
  17628                 .sse => try self.register_manager.getReg(op_reg, null),
  17629                 else => {},
  17630             },
  17631             else => {},
  17632         }
  17633     };
  17634 
  17635     const lhs_mcv = try self.resolveInst(ordered_air[0]);
  17636     var rhs_mcv = try self.resolveInst(ordered_air[1]);
  17637     switch (lhs_mcv) {
  17638         .immediate => |imm| switch (imm) {
  17639             0 => switch (air_tag) {
  17640                 .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, ordered_air[1]),
  17641                 else => {},
  17642             },
  17643             else => {},
  17644         },
  17645         else => {},
  17646     }
  17647 
  17648     const is_commutative = switch (air_tag) {
  17649         .add,
  17650         .add_wrap,
  17651         .mul,
  17652         .bool_or,
  17653         .bit_or,
  17654         .bool_and,
  17655         .bit_and,
  17656         .xor,
  17657         .min,
  17658         .max,
  17659         .cmp_eq,
  17660         .cmp_neq,
  17661         => true,
  17662 
  17663         else => false,
  17664     };
  17665 
  17666     const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) {
  17667         .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null },
  17668         .register_pair => |lhs_regs| locks: {
  17669             const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs);
  17670             break :locks .{ locks[0], locks[1] };
  17671         },
  17672         else => @splat(null),
  17673     };
  17674     defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
  17675 
  17676     const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) {
  17677         .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null },
  17678         .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs),
  17679         else => @splat(null),
  17680     };
  17681     defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  17682 
  17683     var flipped = false;
  17684     var copied_to_dst = true;
  17685     const dst_mcv: MCValue = dst: {
  17686         const tracked_inst = switch (air_tag) {
  17687             else => maybe_inst,
  17688             .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null,
  17689         };
  17690         if (maybe_inst) |inst| {
  17691             if ((!sse_op or lhs_mcv.isRegister()) and
  17692                 self.reuseOperandAdvanced(inst, ordered_air[0], 0, lhs_mcv, tracked_inst))
  17693                 break :dst lhs_mcv;
  17694             if (is_commutative and (!sse_op or rhs_mcv.isRegister()) and
  17695                 self.reuseOperandAdvanced(inst, ordered_air[1], 1, rhs_mcv, tracked_inst))
  17696             {
  17697                 flipped = true;
  17698                 break :dst rhs_mcv;
  17699             }
  17700         }
  17701         const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, tracked_inst, true);
  17702         if (sse_op and lhs_mcv.isRegister() and self.hasFeature(.avx))
  17703             copied_to_dst = false
  17704         else
  17705             try self.genCopy(lhs_ty, dst_mcv, lhs_mcv, .{});
  17706         rhs_mcv = try self.resolveInst(ordered_air[1]);
  17707         break :dst dst_mcv;
  17708     };
  17709     const dst_locks: [2]?RegisterLock = switch (dst_mcv) {
  17710         .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null },
  17711         .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs),
  17712         else => @splat(null),
  17713     };
  17714     defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  17715 
  17716     const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv;
  17717     const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg|
  17718         if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and
  17719             self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: {
  17720             try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv, .{});
  17721             break :src .{ .register = mask_reg };
  17722         }
  17723     else
  17724         unmat_src_mcv;
  17725     const src_locks: [2]?RegisterLock = switch (src_mcv) {
  17726         .register => |src_reg| .{ self.register_manager.lockReg(src_reg), null },
  17727         .register_pair => |src_regs| self.register_manager.lockRegs(2, src_regs),
  17728         else => @splat(null),
  17729     };
  17730     defer for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock);
  17731 
  17732     if (!sse_op) {
  17733         switch (air_tag) {
  17734             .add,
  17735             .add_wrap,
  17736             => try self.genBinOpMir(.{ ._, .add }, lhs_ty, dst_mcv, src_mcv),
  17737 
  17738             .sub,
  17739             .sub_wrap,
  17740             => try self.genBinOpMir(.{ ._, .sub }, lhs_ty, dst_mcv, src_mcv),
  17741 
  17742             .ptr_add,
  17743             .ptr_sub,
  17744             => {
  17745                 const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv);
  17746                 const tmp_mcv = MCValue{ .register = tmp_reg };
  17747                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  17748                 defer self.register_manager.unlockReg(tmp_lock);
  17749 
  17750                 const elem_size = lhs_ty.elemType2(zcu).abiSize(zcu);
  17751                 try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size });
  17752                 try self.genBinOpMir(
  17753                     switch (air_tag) {
  17754                         .ptr_add => .{ ._, .add },
  17755                         .ptr_sub => .{ ._, .sub },
  17756                         else => unreachable,
  17757                     },
  17758                     lhs_ty,
  17759                     dst_mcv,
  17760                     tmp_mcv,
  17761                 );
  17762             },
  17763 
  17764             .bool_or,
  17765             .bit_or,
  17766             => try self.genBinOpMir(.{ ._, .@"or" }, lhs_ty, dst_mcv, src_mcv),
  17767 
  17768             .bool_and,
  17769             .bit_and,
  17770             => try self.genBinOpMir(.{ ._, .@"and" }, lhs_ty, dst_mcv, src_mcv),
  17771 
  17772             .xor => try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv),
  17773 
  17774             .min,
  17775             .max,
  17776             => {
  17777                 const resolved_src_mcv = switch (src_mcv) {
  17778                     else => src_mcv,
  17779                     .air_ref => |src_ref| try self.resolveInst(src_ref),
  17780                 };
  17781 
  17782                 if (abi_size > 8) {
  17783                     const dst_regs = switch (dst_mcv) {
  17784                         .register_pair => |dst_regs| dst_regs,
  17785                         else => dst: {
  17786                             const dst_regs = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp);
  17787                             const dst_regs_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
  17788                             defer for (dst_regs_locks) |lock| self.register_manager.unlockReg(lock);
  17789 
  17790                             try self.genCopy(lhs_ty, .{ .register_pair = dst_regs }, dst_mcv, .{});
  17791                             break :dst dst_regs;
  17792                         },
  17793                     };
  17794                     const dst_regs_locks = self.register_manager.lockRegs(2, dst_regs);
  17795                     defer for (dst_regs_locks) |dst_lock| if (dst_lock) |lock|
  17796                         self.register_manager.unlockReg(lock);
  17797 
  17798                     const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  17799                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  17800                     defer self.register_manager.unlockReg(tmp_lock);
  17801 
  17802                     const signed = lhs_ty.isSignedInt(zcu);
  17803                     const cc: Condition = switch (air_tag) {
  17804                         .min => if (signed) .nl else .nb,
  17805                         .max => if (signed) .nge else .nae,
  17806                         else => unreachable,
  17807                     };
  17808 
  17809                     try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]);
  17810                     if (src_mcv.isBase()) {
  17811                         try self.asmRegisterMemory(
  17812                             .{ ._, .cmp },
  17813                             dst_regs[0],
  17814                             try src_mcv.mem(self, .{ .size = .qword }),
  17815                         );
  17816                         try self.asmRegisterMemory(
  17817                             .{ ._, .sbb },
  17818                             tmp_reg,
  17819                             try src_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  17820                         );
  17821                         try self.asmCmovccRegisterMemory(
  17822                             cc,
  17823                             dst_regs[0],
  17824                             try src_mcv.mem(self, .{ .size = .qword }),
  17825                         );
  17826                         try self.asmCmovccRegisterMemory(
  17827                             cc,
  17828                             dst_regs[1],
  17829                             try src_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
  17830                         );
  17831                     } else {
  17832                         try self.asmRegisterRegister(
  17833                             .{ ._, .cmp },
  17834                             dst_regs[0],
  17835                             src_mcv.register_pair[0],
  17836                         );
  17837                         try self.asmRegisterRegister(
  17838                             .{ ._, .sbb },
  17839                             tmp_reg,
  17840                             src_mcv.register_pair[1],
  17841                         );
  17842                         try self.asmCmovccRegisterRegister(cc, dst_regs[0], src_mcv.register_pair[0]);
  17843                         try self.asmCmovccRegisterRegister(cc, dst_regs[1], src_mcv.register_pair[1]);
  17844                     }
  17845                     try self.genCopy(lhs_ty, dst_mcv, .{ .register_pair = dst_regs }, .{});
  17846                 } else {
  17847                     const mat_src_mcv: MCValue = if (switch (resolved_src_mcv) {
  17848                         .immediate,
  17849                         .eflags,
  17850                         .register_offset,
  17851                         .load_symbol,
  17852                         .lea_symbol,
  17853                         .load_direct,
  17854                         .lea_direct,
  17855                         .load_got,
  17856                         .lea_got,
  17857                         .load_tlv,
  17858                         .lea_tlv,
  17859                         .lea_frame,
  17860                         => true,
  17861                         .memory => |addr| std.math.cast(i32, @as(i64, @bitCast(addr))) == null,
  17862                         else => false,
  17863                         .register_pair,
  17864                         .register_overflow,
  17865                         => unreachable,
  17866                     })
  17867                         .{ .register = try self.copyToTmpRegister(rhs_ty, resolved_src_mcv) }
  17868                     else
  17869                         resolved_src_mcv;
  17870                     const mat_mcv_lock = switch (mat_src_mcv) {
  17871                         .register => |reg| self.register_manager.lockReg(reg),
  17872                         else => null,
  17873                     };
  17874                     defer if (mat_mcv_lock) |lock| self.register_manager.unlockReg(lock);
  17875 
  17876                     try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, dst_mcv, mat_src_mcv);
  17877 
  17878                     const int_info = lhs_ty.intInfo(zcu);
  17879                     const cc: Condition = switch (int_info.signedness) {
  17880                         .unsigned => switch (air_tag) {
  17881                             .min => .a,
  17882                             .max => .b,
  17883                             else => unreachable,
  17884                         },
  17885                         .signed => switch (air_tag) {
  17886                             .min => .g,
  17887                             .max => .l,
  17888                             else => unreachable,
  17889                         },
  17890                     };
  17891 
  17892                     const cmov_abi_size = @max(@as(u32, @intCast(lhs_ty.abiSize(zcu))), 2);
  17893                     const tmp_reg = switch (dst_mcv) {
  17894                         .register => |reg| reg,
  17895                         else => try self.copyToTmpRegister(lhs_ty, dst_mcv),
  17896                     };
  17897                     const tmp_lock = self.register_manager.lockReg(tmp_reg);
  17898                     defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
  17899                     switch (mat_src_mcv) {
  17900                         .none,
  17901                         .unreach,
  17902                         .dead,
  17903                         .undef,
  17904                         .immediate,
  17905                         .eflags,
  17906                         .register_pair,
  17907                         .register_triple,
  17908                         .register_quadruple,
  17909                         .register_offset,
  17910                         .register_overflow,
  17911                         .register_mask,
  17912                         .load_symbol,
  17913                         .lea_symbol,
  17914                         .load_direct,
  17915                         .lea_direct,
  17916                         .load_got,
  17917                         .lea_got,
  17918                         .load_tlv,
  17919                         .lea_tlv,
  17920                         .lea_frame,
  17921                         .elementwise_regs_then_frame,
  17922                         .reserved_frame,
  17923                         .air_ref,
  17924                         => unreachable,
  17925                         .register => |src_reg| try self.asmCmovccRegisterRegister(
  17926                             cc,
  17927                             registerAlias(tmp_reg, cmov_abi_size),
  17928                             registerAlias(src_reg, cmov_abi_size),
  17929                         ),
  17930                         .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
  17931                             cc,
  17932                             registerAlias(tmp_reg, cmov_abi_size),
  17933                             switch (mat_src_mcv) {
  17934                                 .memory => |addr| .{
  17935                                     .base = .{ .reg = .ds },
  17936                                     .mod = .{ .rm = .{
  17937                                         .size = .fromSize(cmov_abi_size),
  17938                                         .disp = @intCast(@as(i64, @bitCast(addr))),
  17939                                     } },
  17940                                 },
  17941                                 .indirect => |reg_off| .{
  17942                                     .base = .{ .reg = reg_off.reg },
  17943                                     .mod = .{ .rm = .{
  17944                                         .size = .fromSize(cmov_abi_size),
  17945                                         .disp = reg_off.off,
  17946                                     } },
  17947                                 },
  17948                                 .load_frame => |frame_addr| .{
  17949                                     .base = .{ .frame = frame_addr.index },
  17950                                     .mod = .{ .rm = .{
  17951                                         .size = .fromSize(cmov_abi_size),
  17952                                         .disp = frame_addr.off,
  17953                                     } },
  17954                                 },
  17955                                 else => unreachable,
  17956                             },
  17957                         ),
  17958                     }
  17959                     try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg }, .{});
  17960                 }
  17961             },
  17962 
  17963             .cmp_eq, .cmp_neq => {
  17964                 assert(lhs_ty.isVector(zcu) and lhs_ty.childType(zcu).toIntern() == .bool_type);
  17965                 try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv);
  17966                 switch (air_tag) {
  17967                     .cmp_eq => try self.genUnOpMir(.{ ._, .not }, lhs_ty, dst_mcv),
  17968                     .cmp_neq => {},
  17969                     else => unreachable,
  17970                 }
  17971             },
  17972 
  17973             else => return self.fail("TODO implement genBinOp for {s} {}", .{
  17974                 @tagName(air_tag), lhs_ty.fmt(pt),
  17975             }),
  17976         }
  17977         return dst_mcv;
  17978     }
  17979 
  17980     const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
  17981     const mir_tag = @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) {
  17982         else => unreachable,
  17983         .float => switch (lhs_ty.floatBits(self.target.*)) {
  17984             16 => {
  17985                 assert(self.hasFeature(.f16c));
  17986                 const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
  17987 
  17988                 const tmp_reg = (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
  17989                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  17990                 defer self.register_manager.unlockReg(tmp_lock);
  17991 
  17992                 if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  17993                     .{ .vp_w, .insr },
  17994                     dst_reg,
  17995                     lhs_reg,
  17996                     try src_mcv.mem(self, .{ .size = .word }),
  17997                     .u(1),
  17998                 ) else try self.asmRegisterRegisterRegister(
  17999                     .{ .vp_, .unpcklwd },
  18000                     dst_reg,
  18001                     lhs_reg,
  18002                     (if (src_mcv.isRegister())
  18003                         src_mcv.getReg().?
  18004                     else
  18005                         try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
  18006                 );
  18007                 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
  18008                 try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
  18009                 try self.asmRegisterRegisterRegister(
  18010                     switch (air_tag) {
  18011                         .add => .{ .v_ss, .add },
  18012                         .sub => .{ .v_ss, .sub },
  18013                         .mul => .{ .v_ss, .mul },
  18014                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
  18015                         .max => .{ .v_ss, .max },
  18016                         .min => .{ .v_ss, .max },
  18017                         else => unreachable,
  18018                     },
  18019                     dst_reg,
  18020                     dst_reg,
  18021                     tmp_reg,
  18022                 );
  18023                 switch (air_tag) {
  18024                     .div_trunc, .div_floor => try self.asmRegisterRegisterRegisterImmediate(
  18025                         .{ .v_ss, .round },
  18026                         dst_reg,
  18027                         dst_reg,
  18028                         dst_reg,
  18029                         .u(@as(u5, @bitCast(RoundMode{
  18030                             .mode = switch (air_tag) {
  18031                                 .div_trunc => .zero,
  18032                                 .div_floor => .down,
  18033                                 else => unreachable,
  18034                             },
  18035                             .precision = .inexact,
  18036                         }))),
  18037                     ),
  18038                     else => {},
  18039                 }
  18040                 try self.asmRegisterRegisterImmediate(
  18041                     .{ .v_, .cvtps2ph },
  18042                     dst_reg,
  18043                     dst_reg,
  18044                     .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
  18045                 );
  18046                 return dst_mcv;
  18047             },
  18048             32 => switch (air_tag) {
  18049                 .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
  18050                 .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
  18051                 .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
  18052                 .div_float,
  18053                 .div_trunc,
  18054                 .div_floor,
  18055                 .div_exact,
  18056                 => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
  18057                 .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
  18058                 .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
  18059                 else => unreachable,
  18060             },
  18061             64 => switch (air_tag) {
  18062                 .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
  18063                 .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
  18064                 .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
  18065                 .div_float,
  18066                 .div_trunc,
  18067                 .div_floor,
  18068                 .div_exact,
  18069                 => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
  18070                 .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
  18071                 .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
  18072                 else => unreachable,
  18073             },
  18074             80, 128 => null,
  18075             else => unreachable,
  18076         },
  18077         .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
  18078             else => null,
  18079             .int => switch (lhs_ty.childType(zcu).intInfo(zcu).bits) {
  18080                 8 => switch (lhs_ty.vectorLen(zcu)) {
  18081                     1...16 => switch (air_tag) {
  18082                         .add,
  18083                         .add_wrap,
  18084                         => if (self.hasFeature(.avx)) .{ .vp_b, .add } else .{ .p_b, .add },
  18085                         .sub,
  18086                         .sub_wrap,
  18087                         => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub },
  18088                         .bit_and => if (self.hasFeature(.avx))
  18089                             .{ .vp_, .@"and" }
  18090                         else
  18091                             .{ .p_, .@"and" },
  18092                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
  18093                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
  18094                         .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18095                             .signed => if (self.hasFeature(.avx))
  18096                                 .{ .vp_b, .mins }
  18097                             else if (self.hasFeature(.sse4_1))
  18098                                 .{ .p_b, .mins }
  18099                             else
  18100                                 null,
  18101                             .unsigned => if (self.hasFeature(.avx))
  18102                                 .{ .vp_b, .minu }
  18103                             else if (self.hasFeature(.sse4_1))
  18104                                 .{ .p_b, .minu }
  18105                             else
  18106                                 null,
  18107                         },
  18108                         .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18109                             .signed => if (self.hasFeature(.avx))
  18110                                 .{ .vp_b, .maxs }
  18111                             else if (self.hasFeature(.sse4_1))
  18112                                 .{ .p_b, .maxs }
  18113                             else
  18114                                 null,
  18115                             .unsigned => if (self.hasFeature(.avx))
  18116                                 .{ .vp_b, .maxu }
  18117                             else if (self.hasFeature(.sse4_1))
  18118                                 .{ .p_b, .maxu }
  18119                             else
  18120                                 null,
  18121                         },
  18122                         .cmp_lt,
  18123                         .cmp_lte,
  18124                         .cmp_gte,
  18125                         .cmp_gt,
  18126                         => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18127                             .signed => if (self.hasFeature(.avx))
  18128                                 .{ .vp_b, .cmpgt }
  18129                             else
  18130                                 .{ .p_b, .cmpgt },
  18131                             .unsigned => null,
  18132                         },
  18133                         .cmp_eq,
  18134                         .cmp_neq,
  18135                         => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else .{ .p_b, .cmpeq },
  18136                         else => null,
  18137                     },
  18138                     17...32 => switch (air_tag) {
  18139                         .add,
  18140                         .add_wrap,
  18141                         => if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null,
  18142                         .sub,
  18143                         .sub_wrap,
  18144                         => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null,
  18145                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
  18146                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
  18147                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
  18148                         .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18149                             .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null,
  18150                             .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null,
  18151                         },
  18152                         .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18153                             .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null,
  18154                             .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null,
  18155                         },
  18156                         .cmp_lt,
  18157                         .cmp_lte,
  18158                         .cmp_gte,
  18159                         .cmp_gt,
  18160                         => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18161                             .signed => if (self.hasFeature(.avx)) .{ .vp_b, .cmpgt } else null,
  18162                             .unsigned => null,
  18163                         },
  18164                         .cmp_eq,
  18165                         .cmp_neq,
  18166                         => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else null,
  18167                         else => null,
  18168                     },
  18169                     else => null,
  18170                 },
  18171                 16 => switch (lhs_ty.vectorLen(zcu)) {
  18172                     1...8 => switch (air_tag) {
  18173                         .add,
  18174                         .add_wrap,
  18175                         => if (self.hasFeature(.avx)) .{ .vp_w, .add } else .{ .p_w, .add },
  18176                         .sub,
  18177                         .sub_wrap,
  18178                         => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub },
  18179                         .mul,
  18180                         .mul_wrap,
  18181                         => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull },
  18182                         .bit_and => if (self.hasFeature(.avx))
  18183                             .{ .vp_, .@"and" }
  18184                         else
  18185                             .{ .p_, .@"and" },
  18186                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
  18187                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
  18188                         .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18189                             .signed => if (self.hasFeature(.avx))
  18190                                 .{ .vp_w, .mins }
  18191                             else
  18192                                 .{ .p_w, .mins },
  18193                             .unsigned => if (self.hasFeature(.avx))
  18194                                 .{ .vp_w, .minu }
  18195                             else
  18196                                 .{ .p_w, .minu },
  18197                         },
  18198                         .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18199                             .signed => if (self.hasFeature(.avx))
  18200                                 .{ .vp_w, .maxs }
  18201                             else
  18202                                 .{ .p_w, .maxs },
  18203                             .unsigned => if (self.hasFeature(.avx))
  18204                                 .{ .vp_w, .maxu }
  18205                             else
  18206                                 .{ .p_w, .maxu },
  18207                         },
  18208                         .cmp_lt,
  18209                         .cmp_lte,
  18210                         .cmp_gte,
  18211                         .cmp_gt,
  18212                         => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18213                             .signed => if (self.hasFeature(.avx))
  18214                                 .{ .vp_w, .cmpgt }
  18215                             else
  18216                                 .{ .p_w, .cmpgt },
  18217                             .unsigned => null,
  18218                         },
  18219                         .cmp_eq,
  18220                         .cmp_neq,
  18221                         => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else .{ .p_w, .cmpeq },
  18222                         else => null,
  18223                     },
  18224                     9...16 => switch (air_tag) {
  18225                         .add,
  18226                         .add_wrap,
  18227                         => if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null,
  18228                         .sub,
  18229                         .sub_wrap,
  18230                         => if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null,
  18231                         .mul,
  18232                         .mul_wrap,
  18233                         => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null,
  18234                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
  18235                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
  18236                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
  18237                         .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18238                             .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null,
  18239                             .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null,
  18240                         },
  18241                         .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18242                             .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null,
  18243                             .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null,
  18244                         },
  18245                         .cmp_lt,
  18246                         .cmp_lte,
  18247                         .cmp_gte,
  18248                         .cmp_gt,
  18249                         => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18250                             .signed => if (self.hasFeature(.avx)) .{ .vp_w, .cmpgt } else null,
  18251                             .unsigned => null,
  18252                         },
  18253                         .cmp_eq,
  18254                         .cmp_neq,
  18255                         => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else null,
  18256                         else => null,
  18257                     },
  18258                     else => null,
  18259                 },
  18260                 32 => switch (lhs_ty.vectorLen(zcu)) {
  18261                     1...4 => switch (air_tag) {
  18262                         .add,
  18263                         .add_wrap,
  18264                         => if (self.hasFeature(.avx)) .{ .vp_d, .add } else .{ .p_d, .add },
  18265                         .sub,
  18266                         .sub_wrap,
  18267                         => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub },
  18268                         .mul,
  18269                         .mul_wrap,
  18270                         => if (self.hasFeature(.avx))
  18271                             .{ .vp_d, .mull }
  18272                         else if (self.hasFeature(.sse4_1))
  18273                             .{ .p_d, .mull }
  18274                         else
  18275                             null,
  18276                         .bit_and => if (self.hasFeature(.avx))
  18277                             .{ .vp_, .@"and" }
  18278                         else
  18279                             .{ .p_, .@"and" },
  18280                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
  18281                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
  18282                         .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18283                             .signed => if (self.hasFeature(.avx))
  18284                                 .{ .vp_d, .mins }
  18285                             else if (self.hasFeature(.sse4_1))
  18286                                 .{ .p_d, .mins }
  18287                             else
  18288                                 null,
  18289                             .unsigned => if (self.hasFeature(.avx))
  18290                                 .{ .vp_d, .minu }
  18291                             else if (self.hasFeature(.sse4_1))
  18292                                 .{ .p_d, .minu }
  18293                             else
  18294                                 null,
  18295                         },
  18296                         .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18297                             .signed => if (self.hasFeature(.avx))
  18298                                 .{ .vp_d, .maxs }
  18299                             else if (self.hasFeature(.sse4_1))
  18300                                 .{ .p_d, .maxs }
  18301                             else
  18302                                 null,
  18303                             .unsigned => if (self.hasFeature(.avx))
  18304                                 .{ .vp_d, .maxu }
  18305                             else if (self.hasFeature(.sse4_1))
  18306                                 .{ .p_d, .maxu }
  18307                             else
  18308                                 null,
  18309                         },
  18310                         .cmp_lt,
  18311                         .cmp_lte,
  18312                         .cmp_gte,
  18313                         .cmp_gt,
  18314                         => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18315                             .signed => if (self.hasFeature(.avx))
  18316                                 .{ .vp_d, .cmpgt }
  18317                             else
  18318                                 .{ .p_d, .cmpgt },
  18319                             .unsigned => null,
  18320                         },
  18321                         .cmp_eq,
  18322                         .cmp_neq,
  18323                         => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else .{ .p_d, .cmpeq },
  18324                         else => null,
  18325                     },
  18326                     5...8 => switch (air_tag) {
  18327                         .add,
  18328                         .add_wrap,
  18329                         => if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null,
  18330                         .sub,
  18331                         .sub_wrap,
  18332                         => if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null,
  18333                         .mul,
  18334                         .mul_wrap,
  18335                         => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null,
  18336                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
  18337                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
  18338                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
  18339                         .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18340                             .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null,
  18341                             .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null,
  18342                         },
  18343                         .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18344                             .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null,
  18345                             .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null,
  18346                         },
  18347                         .cmp_lt,
  18348                         .cmp_lte,
  18349                         .cmp_gte,
  18350                         .cmp_gt,
  18351                         => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18352                             .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null,
  18353                             .unsigned => null,
  18354                         },
  18355                         .cmp_eq,
  18356                         .cmp_neq,
  18357                         => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null,
  18358                         else => null,
  18359                     },
  18360                     else => null,
  18361                 },
  18362                 64 => switch (lhs_ty.vectorLen(zcu)) {
  18363                     1...2 => switch (air_tag) {
  18364                         .add,
  18365                         .add_wrap,
  18366                         => if (self.hasFeature(.avx)) .{ .vp_q, .add } else .{ .p_q, .add },
  18367                         .sub,
  18368                         .sub_wrap,
  18369                         => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub },
  18370                         .bit_and => if (self.hasFeature(.avx))
  18371                             .{ .vp_, .@"and" }
  18372                         else
  18373                             .{ .p_, .@"and" },
  18374                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
  18375                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
  18376                         .cmp_lt,
  18377                         .cmp_lte,
  18378                         .cmp_gte,
  18379                         .cmp_gt,
  18380                         => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18381                             .signed => if (self.hasFeature(.avx))
  18382                                 .{ .vp_q, .cmpgt }
  18383                             else if (self.hasFeature(.sse4_2))
  18384                                 .{ .p_q, .cmpgt }
  18385                             else
  18386                                 null,
  18387                             .unsigned => null,
  18388                         },
  18389                         .cmp_eq,
  18390                         .cmp_neq,
  18391                         => if (self.hasFeature(.avx))
  18392                             .{ .vp_q, .cmpeq }
  18393                         else if (self.hasFeature(.sse4_1))
  18394                             .{ .p_q, .cmpeq }
  18395                         else
  18396                             null,
  18397                         else => null,
  18398                     },
  18399                     3...4 => switch (air_tag) {
  18400                         .add,
  18401                         .add_wrap,
  18402                         => if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null,
  18403                         .sub,
  18404                         .sub_wrap,
  18405                         => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null,
  18406                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
  18407                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
  18408                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
  18409                         .cmp_eq,
  18410                         .cmp_neq,
  18411                         => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null,
  18412                         .cmp_lt,
  18413                         .cmp_lte,
  18414                         .cmp_gt,
  18415                         .cmp_gte,
  18416                         => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
  18417                             .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null,
  18418                             .unsigned => null,
  18419                         },
  18420                         else => null,
  18421                     },
  18422                     else => null,
  18423                 },
  18424                 else => null,
  18425             },
  18426             .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) {
  18427                 16 => tag: {
  18428                     assert(self.hasFeature(.f16c));
  18429                     const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
  18430                     switch (lhs_ty.vectorLen(zcu)) {
  18431                         1 => {
  18432                             const tmp_reg =
  18433                                 (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
  18434                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  18435                             defer self.register_manager.unlockReg(tmp_lock);
  18436 
  18437                             if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  18438                                 .{ .vp_w, .insr },
  18439                                 dst_reg,
  18440                                 lhs_reg,
  18441                                 try src_mcv.mem(self, .{ .size = .word }),
  18442                                 .u(1),
  18443                             ) else try self.asmRegisterRegisterRegister(
  18444                                 .{ .vp_, .unpcklwd },
  18445                                 dst_reg,
  18446                                 lhs_reg,
  18447                                 (if (src_mcv.isRegister())
  18448                                     src_mcv.getReg().?
  18449                                 else
  18450                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
  18451                             );
  18452                             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
  18453                             try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
  18454                             try self.asmRegisterRegisterRegister(
  18455                                 switch (air_tag) {
  18456                                     .add => .{ .v_ss, .add },
  18457                                     .sub => .{ .v_ss, .sub },
  18458                                     .mul => .{ .v_ss, .mul },
  18459                                     .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
  18460                                     .max => .{ .v_ss, .max },
  18461                                     .min => .{ .v_ss, .max },
  18462                                     else => unreachable,
  18463                                 },
  18464                                 dst_reg,
  18465                                 dst_reg,
  18466                                 tmp_reg,
  18467                             );
  18468                             try self.asmRegisterRegisterImmediate(
  18469                                 .{ .v_, .cvtps2ph },
  18470                                 dst_reg,
  18471                                 dst_reg,
  18472                                 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
  18473                             );
  18474                             return dst_mcv;
  18475                         },
  18476                         2 => {
  18477                             const tmp_reg = (try self.register_manager.allocReg(
  18478                                 null,
  18479                                 abi.RegisterClass.sse,
  18480                             )).to128();
  18481                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  18482                             defer self.register_manager.unlockReg(tmp_lock);
  18483 
  18484                             if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  18485                                 .{ .vp_d, .insr },
  18486                                 dst_reg,
  18487                                 lhs_reg,
  18488                                 try src_mcv.mem(self, .{ .size = .dword }),
  18489                                 .u(1),
  18490                             ) else try self.asmRegisterRegisterRegister(
  18491                                 .{ .v_ps, .unpckl },
  18492                                 dst_reg,
  18493                                 lhs_reg,
  18494                                 (if (src_mcv.isRegister())
  18495                                     src_mcv.getReg().?
  18496                                 else
  18497                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
  18498                             );
  18499                             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
  18500                             try self.asmRegisterRegisterRegister(
  18501                                 .{ .v_ps, .movhl },
  18502                                 tmp_reg,
  18503                                 dst_reg,
  18504                                 dst_reg,
  18505                             );
  18506                             try self.asmRegisterRegisterRegister(
  18507                                 switch (air_tag) {
  18508                                     .add => .{ .v_ps, .add },
  18509                                     .sub => .{ .v_ps, .sub },
  18510                                     .mul => .{ .v_ps, .mul },
  18511                                     .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
  18512                                     .max => .{ .v_ps, .max },
  18513                                     .min => .{ .v_ps, .max },
  18514                                     else => unreachable,
  18515                                 },
  18516                                 dst_reg,
  18517                                 dst_reg,
  18518                                 tmp_reg,
  18519                             );
  18520                             try self.asmRegisterRegisterImmediate(
  18521                                 .{ .v_, .cvtps2ph },
  18522                                 dst_reg,
  18523                                 dst_reg,
  18524                                 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
  18525                             );
  18526                             return dst_mcv;
  18527                         },
  18528                         3...4 => {
  18529                             const tmp_reg = (try self.register_manager.allocReg(
  18530                                 null,
  18531                                 abi.RegisterClass.sse,
  18532                             )).to128();
  18533                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  18534                             defer self.register_manager.unlockReg(tmp_lock);
  18535 
  18536                             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, lhs_reg);
  18537                             if (src_mcv.isBase()) try self.asmRegisterMemory(
  18538                                 .{ .v_ps, .cvtph2 },
  18539                                 tmp_reg,
  18540                                 try src_mcv.mem(self, .{ .size = .qword }),
  18541                             ) else try self.asmRegisterRegister(
  18542                                 .{ .v_ps, .cvtph2 },
  18543                                 tmp_reg,
  18544                                 (if (src_mcv.isRegister())
  18545                                     src_mcv.getReg().?
  18546                                 else
  18547                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
  18548                             );
  18549                             try self.asmRegisterRegisterRegister(
  18550                                 switch (air_tag) {
  18551                                     .add => .{ .v_ps, .add },
  18552                                     .sub => .{ .v_ps, .sub },
  18553                                     .mul => .{ .v_ps, .mul },
  18554                                     .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
  18555                                     .max => .{ .v_ps, .max },
  18556                                     .min => .{ .v_ps, .max },
  18557                                     else => unreachable,
  18558                                 },
  18559                                 dst_reg,
  18560                                 dst_reg,
  18561                                 tmp_reg,
  18562                             );
  18563                             try self.asmRegisterRegisterImmediate(
  18564                                 .{ .v_, .cvtps2ph },
  18565                                 dst_reg,
  18566                                 dst_reg,
  18567                                 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
  18568                             );
  18569                             return dst_mcv;
  18570                         },
  18571                         5...8 => {
  18572                             const tmp_reg = (try self.register_manager.allocReg(
  18573                                 null,
  18574                                 abi.RegisterClass.sse,
  18575                             )).to256();
  18576                             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  18577                             defer self.register_manager.unlockReg(tmp_lock);
  18578 
  18579                             try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), lhs_reg);
  18580                             if (src_mcv.isBase()) try self.asmRegisterMemory(
  18581                                 .{ .v_ps, .cvtph2 },
  18582                                 tmp_reg,
  18583                                 try src_mcv.mem(self, .{ .size = .xword }),
  18584                             ) else try self.asmRegisterRegister(
  18585                                 .{ .v_ps, .cvtph2 },
  18586                                 tmp_reg,
  18587                                 (if (src_mcv.isRegister())
  18588                                     src_mcv.getReg().?
  18589                                 else
  18590                                     try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
  18591                             );
  18592                             try self.asmRegisterRegisterRegister(
  18593                                 switch (air_tag) {
  18594                                     .add => .{ .v_ps, .add },
  18595                                     .sub => .{ .v_ps, .sub },
  18596                                     .mul => .{ .v_ps, .mul },
  18597                                     .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
  18598                                     .max => .{ .v_ps, .max },
  18599                                     .min => .{ .v_ps, .max },
  18600                                     else => unreachable,
  18601                                 },
  18602                                 dst_reg.to256(),
  18603                                 dst_reg.to256(),
  18604                                 tmp_reg,
  18605                             );
  18606                             try self.asmRegisterRegisterImmediate(
  18607                                 .{ .v_, .cvtps2ph },
  18608                                 dst_reg,
  18609                                 dst_reg.to256(),
  18610                                 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))),
  18611                             );
  18612                             return dst_mcv;
  18613                         },
  18614                         else => break :tag null,
  18615                     }
  18616                 },
  18617                 32 => switch (lhs_ty.vectorLen(zcu)) {
  18618                     1 => switch (air_tag) {
  18619                         .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
  18620                         .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
  18621                         .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
  18622                         .div_float,
  18623                         .div_trunc,
  18624                         .div_floor,
  18625                         .div_exact,
  18626                         => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
  18627                         .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
  18628                         .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
  18629                         .cmp_lt,
  18630                         .cmp_lte,
  18631                         .cmp_eq,
  18632                         .cmp_gte,
  18633                         .cmp_gt,
  18634                         .cmp_neq,
  18635                         => if (self.hasFeature(.avx)) .{ .v_ss, .cmp } else .{ ._ss, .cmp },
  18636                         else => unreachable,
  18637                     },
  18638                     2...4 => switch (air_tag) {
  18639                         .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add },
  18640                         .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub },
  18641                         .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul },
  18642                         .div_float,
  18643                         .div_trunc,
  18644                         .div_floor,
  18645                         .div_exact,
  18646                         => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div },
  18647                         .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max },
  18648                         .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min },
  18649                         .cmp_lt,
  18650                         .cmp_lte,
  18651                         .cmp_eq,
  18652                         .cmp_gte,
  18653                         .cmp_gt,
  18654                         .cmp_neq,
  18655                         => if (self.hasFeature(.avx)) .{ .v_ps, .cmp } else .{ ._ps, .cmp },
  18656                         else => unreachable,
  18657                     },
  18658                     5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
  18659                         .add => .{ .v_ps, .add },
  18660                         .sub => .{ .v_ps, .sub },
  18661                         .mul => .{ .v_ps, .mul },
  18662                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
  18663                         .max => .{ .v_ps, .max },
  18664                         .min => .{ .v_ps, .min },
  18665                         .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_ps, .cmp },
  18666                         else => unreachable,
  18667                     } else null,
  18668                     else => null,
  18669                 },
  18670                 64 => switch (lhs_ty.vectorLen(zcu)) {
  18671                     1 => switch (air_tag) {
  18672                         .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
  18673                         .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
  18674                         .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
  18675                         .div_float,
  18676                         .div_trunc,
  18677                         .div_floor,
  18678                         .div_exact,
  18679                         => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
  18680                         .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
  18681                         .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
  18682                         .cmp_lt,
  18683                         .cmp_lte,
  18684                         .cmp_eq,
  18685                         .cmp_gte,
  18686                         .cmp_gt,
  18687                         .cmp_neq,
  18688                         => if (self.hasFeature(.avx)) .{ .v_sd, .cmp } else .{ ._sd, .cmp },
  18689                         else => unreachable,
  18690                     },
  18691                     2 => switch (air_tag) {
  18692                         .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add },
  18693                         .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub },
  18694                         .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul },
  18695                         .div_float,
  18696                         .div_trunc,
  18697                         .div_floor,
  18698                         .div_exact,
  18699                         => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div },
  18700                         .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max },
  18701                         .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min },
  18702                         .cmp_lt,
  18703                         .cmp_lte,
  18704                         .cmp_eq,
  18705                         .cmp_gte,
  18706                         .cmp_gt,
  18707                         .cmp_neq,
  18708                         => if (self.hasFeature(.avx)) .{ .v_pd, .cmp } else .{ ._pd, .cmp },
  18709                         else => unreachable,
  18710                     },
  18711                     3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
  18712                         .add => .{ .v_pd, .add },
  18713                         .sub => .{ .v_pd, .sub },
  18714                         .mul => .{ .v_pd, .mul },
  18715                         .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div },
  18716                         .max => .{ .v_pd, .max },
  18717                         .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_pd, .cmp },
  18718                         .min => .{ .v_pd, .min },
  18719                         else => unreachable,
  18720                     } else null,
  18721                     else => null,
  18722                 },
  18723                 80, 128 => null,
  18724                 else => unreachable,
  18725             },
  18726         },
  18727     }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
  18728         @tagName(air_tag), lhs_ty.fmt(pt),
  18729     });
  18730 
  18731     const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias(
  18732         if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?,
  18733         abi_size,
  18734     ) else null;
  18735     const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null;
  18736     defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock);
  18737 
  18738     switch (mir_tag[1]) {
  18739         else => if (self.hasFeature(.avx)) {
  18740             const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
  18741             if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
  18742                 mir_tag,
  18743                 dst_reg,
  18744                 lhs_reg,
  18745                 try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) {
  18746                     else => .fromSize(abi_size),
  18747                     .vector => .fromBitSize(dst_reg.bitSize()),
  18748                 } }),
  18749             ) else try self.asmRegisterRegisterRegister(
  18750                 mir_tag,
  18751                 dst_reg,
  18752                 lhs_reg,
  18753                 registerAlias(if (src_mcv.isRegister())
  18754                     src_mcv.getReg().?
  18755                 else
  18756                     try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
  18757             );
  18758         } else {
  18759             assert(copied_to_dst);
  18760             if (src_mcv.isBase()) try self.asmRegisterMemory(
  18761                 mir_tag,
  18762                 dst_reg,
  18763                 try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) {
  18764                     else => .fromSize(abi_size),
  18765                     .vector => .fromBitSize(dst_reg.bitSize()),
  18766                 } }),
  18767             ) else try self.asmRegisterRegister(
  18768                 mir_tag,
  18769                 dst_reg,
  18770                 registerAlias(if (src_mcv.isRegister())
  18771                     src_mcv.getReg().?
  18772                 else
  18773                     try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
  18774             );
  18775         },
  18776         .cmp => {
  18777             const imm: Immediate = .u(switch (air_tag) {
  18778                 .cmp_eq => 0,
  18779                 .cmp_lt, .cmp_gt => 1,
  18780                 .cmp_lte, .cmp_gte => 2,
  18781                 .cmp_neq => 4,
  18782                 else => unreachable,
  18783             });
  18784             if (self.hasFeature(.avx)) {
  18785                 const lhs_reg =
  18786                     if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
  18787                 if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  18788                     mir_tag,
  18789                     dst_reg,
  18790                     lhs_reg,
  18791                     try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) {
  18792                         else => .fromSize(abi_size),
  18793                         .vector => .fromBitSize(dst_reg.bitSize()),
  18794                     } }),
  18795                     imm,
  18796                 ) else try self.asmRegisterRegisterRegisterImmediate(
  18797                     mir_tag,
  18798                     dst_reg,
  18799                     lhs_reg,
  18800                     registerAlias(if (src_mcv.isRegister())
  18801                         src_mcv.getReg().?
  18802                     else
  18803                         try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
  18804                     imm,
  18805                 );
  18806             } else {
  18807                 assert(copied_to_dst);
  18808                 if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate(
  18809                     mir_tag,
  18810                     dst_reg,
  18811                     try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) {
  18812                         else => .fromSize(abi_size),
  18813                         .vector => .fromBitSize(dst_reg.bitSize()),
  18814                     } }),
  18815                     imm,
  18816                 ) else try self.asmRegisterRegisterImmediate(
  18817                     mir_tag,
  18818                     dst_reg,
  18819                     registerAlias(if (src_mcv.isRegister())
  18820                         src_mcv.getReg().?
  18821                     else
  18822                         try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
  18823                     imm,
  18824                 );
  18825             }
  18826         },
  18827     }
  18828 
  18829     switch (air_tag) {
  18830         .add, .add_wrap, .sub, .sub_wrap, .mul, .mul_wrap, .div_float, .div_exact => {},
  18831         .div_trunc, .div_floor => try self.genRound(lhs_ty, dst_reg, .{ .register = dst_reg }, .{
  18832             .mode = switch (air_tag) {
  18833                 .div_trunc => .zero,
  18834                 .div_floor => .down,
  18835                 else => unreachable,
  18836             },
  18837             .precision = .inexact,
  18838         }),
  18839         .bit_and, .bit_or, .xor => {},
  18840         .max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) {
  18841             const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size);
  18842 
  18843             try self.asmRegisterRegisterRegisterImmediate(
  18844                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) {
  18845                     .float => switch (lhs_ty.floatBits(self.target.*)) {
  18846                         32 => .{ .v_ss, .cmp },
  18847                         64 => .{ .v_sd, .cmp },
  18848                         16, 80, 128 => null,
  18849                         else => unreachable,
  18850                     },
  18851                     .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
  18852                         .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) {
  18853                             32 => switch (lhs_ty.vectorLen(zcu)) {
  18854                                 1 => .{ .v_ss, .cmp },
  18855                                 2...8 => .{ .v_ps, .cmp },
  18856                                 else => null,
  18857                             },
  18858                             64 => switch (lhs_ty.vectorLen(zcu)) {
  18859                                 1 => .{ .v_sd, .cmp },
  18860                                 2...4 => .{ .v_pd, .cmp },
  18861                                 else => null,
  18862                             },
  18863                             16, 80, 128 => null,
  18864                             else => unreachable,
  18865                         },
  18866                         else => unreachable,
  18867                     },
  18868                     else => unreachable,
  18869                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
  18870                     @tagName(air_tag), lhs_ty.fmt(pt),
  18871                 }),
  18872                 mask_reg,
  18873                 rhs_copy_reg,
  18874                 rhs_copy_reg,
  18875                 .u(3), // unord
  18876             );
  18877             try self.asmRegisterRegisterRegisterRegister(
  18878                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) {
  18879                     .float => switch (lhs_ty.floatBits(self.target.*)) {
  18880                         32 => .{ .v_ps, .blendv },
  18881                         64 => .{ .v_pd, .blendv },
  18882                         16, 80, 128 => null,
  18883                         else => unreachable,
  18884                     },
  18885                     .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
  18886                         .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) {
  18887                             32 => switch (lhs_ty.vectorLen(zcu)) {
  18888                                 1...8 => .{ .v_ps, .blendv },
  18889                                 else => null,
  18890                             },
  18891                             64 => switch (lhs_ty.vectorLen(zcu)) {
  18892                                 1...4 => .{ .v_pd, .blendv },
  18893                                 else => null,
  18894                             },
  18895                             16, 80, 128 => null,
  18896                             else => unreachable,
  18897                         },
  18898                         else => unreachable,
  18899                     },
  18900                     else => unreachable,
  18901                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
  18902                     @tagName(air_tag), lhs_ty.fmt(pt),
  18903                 }),
  18904                 dst_reg,
  18905                 dst_reg,
  18906                 lhs_copy_reg.?,
  18907                 mask_reg,
  18908             );
  18909         } else {
  18910             const has_blend = self.hasFeature(.sse4_1);
  18911             try self.asmRegisterRegisterImmediate(
  18912                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) {
  18913                     .float => switch (lhs_ty.floatBits(self.target.*)) {
  18914                         32 => .{ ._ss, .cmp },
  18915                         64 => .{ ._sd, .cmp },
  18916                         16, 80, 128 => null,
  18917                         else => unreachable,
  18918                     },
  18919                     .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
  18920                         .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) {
  18921                             32 => switch (lhs_ty.vectorLen(zcu)) {
  18922                                 1 => .{ ._ss, .cmp },
  18923                                 2...4 => .{ ._ps, .cmp },
  18924                                 else => null,
  18925                             },
  18926                             64 => switch (lhs_ty.vectorLen(zcu)) {
  18927                                 1 => .{ ._sd, .cmp },
  18928                                 2 => .{ ._pd, .cmp },
  18929                                 else => null,
  18930                             },
  18931                             16, 80, 128 => null,
  18932                             else => unreachable,
  18933                         },
  18934                         else => unreachable,
  18935                     },
  18936                     else => unreachable,
  18937                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
  18938                     @tagName(air_tag), lhs_ty.fmt(pt),
  18939                 }),
  18940                 mask_reg,
  18941                 mask_reg,
  18942                 .u(if (has_blend) 3 else 7), // unord, ord
  18943             );
  18944             if (has_blend) try self.asmRegisterRegisterRegister(
  18945                 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) {
  18946                     .float => switch (lhs_ty.floatBits(self.target.*)) {
  18947                         32 => .{ ._ps, .blendv },
  18948                         64 => .{ ._pd, .blendv },
  18949                         16, 80, 128 => null,
  18950                         else => unreachable,
  18951                     },
  18952                     .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
  18953                         .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) {
  18954                             32 => switch (lhs_ty.vectorLen(zcu)) {
  18955                                 1...4 => .{ ._ps, .blendv },
  18956                                 else => null,
  18957                             },
  18958                             64 => switch (lhs_ty.vectorLen(zcu)) {
  18959                                 1...2 => .{ ._pd, .blendv },
  18960                                 else => null,
  18961                             },
  18962                             16, 80, 128 => null,
  18963                             else => unreachable,
  18964                         },
  18965                         else => unreachable,
  18966                     },
  18967                     else => unreachable,
  18968                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
  18969                     @tagName(air_tag), lhs_ty.fmt(pt),
  18970                 }),
  18971                 dst_reg,
  18972                 lhs_copy_reg.?,
  18973                 mask_reg,
  18974             ) else {
  18975                 const mir_fixes = @as(?Mir.Inst.Fixes, switch (lhs_ty.zigTypeTag(zcu)) {
  18976                     .float => switch (lhs_ty.floatBits(self.target.*)) {
  18977                         32 => ._ps,
  18978                         64 => ._pd,
  18979                         16, 80, 128 => null,
  18980                         else => unreachable,
  18981                     },
  18982                     .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
  18983                         .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) {
  18984                             32 => switch (lhs_ty.vectorLen(zcu)) {
  18985                                 1...4 => ._ps,
  18986                                 else => null,
  18987                             },
  18988                             64 => switch (lhs_ty.vectorLen(zcu)) {
  18989                                 1...2 => ._pd,
  18990                                 else => null,
  18991                             },
  18992                             16, 80, 128 => null,
  18993                             else => unreachable,
  18994                         },
  18995                         else => unreachable,
  18996                     },
  18997                     else => unreachable,
  18998                 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
  18999                     @tagName(air_tag), lhs_ty.fmt(pt),
  19000                 });
  19001                 try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_reg, mask_reg);
  19002                 try self.asmRegisterRegister(.{ mir_fixes, .andn }, mask_reg, lhs_copy_reg.?);
  19003                 try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_reg, mask_reg);
  19004             }
  19005         },
  19006         .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => {
  19007             switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
  19008                 .int => switch (air_tag) {
  19009                     .cmp_lt,
  19010                     .cmp_eq,
  19011                     .cmp_gt,
  19012                     => {},
  19013                     .cmp_lte,
  19014                     .cmp_gte,
  19015                     .cmp_neq,
  19016                     => {
  19017                         const unsigned_ty = try lhs_ty.toUnsigned(pt);
  19018                         const not_mcv = try self.genTypedValue(try unsigned_ty.maxInt(pt, unsigned_ty));
  19019                         const not_mem: Memory = if (not_mcv.isBase())
  19020                             try not_mcv.mem(self, .{ .size = .fromSize(abi_size) })
  19021                         else
  19022                             .{ .base = .{
  19023                                 .reg = try self.copyToTmpRegister(.usize, not_mcv.address()),
  19024                             }, .mod = .{ .rm = .{ .size = .fromSize(abi_size) } } };
  19025                         switch (mir_tag[0]) {
  19026                             .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory(
  19027                                 .{ .vp_, .xor },
  19028                                 dst_reg,
  19029                                 dst_reg,
  19030                                 not_mem,
  19031                             ),
  19032                             .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory(
  19033                                 .{ .p_, .xor },
  19034                                 dst_reg,
  19035                                 not_mem,
  19036                             ),
  19037                             else => unreachable,
  19038                         }
  19039                     },
  19040                     else => unreachable,
  19041                 },
  19042                 .float => {},
  19043                 else => unreachable,
  19044             }
  19045 
  19046             const gp_reg = try self.register_manager.allocReg(maybe_inst, abi.RegisterClass.gp);
  19047             const gp_lock = self.register_manager.lockRegAssumeUnused(gp_reg);
  19048             defer self.register_manager.unlockReg(gp_lock);
  19049 
  19050             try self.asmRegisterRegister(switch (mir_tag[0]) {
  19051                 ._pd, ._sd, .p_q => .{ ._pd, .movmsk },
  19052                 ._ps, ._ss, .p_d => .{ ._ps, .movmsk },
  19053                 .p_b => .{ .p_b, .movmsk },
  19054                 .p_w => movmsk: {
  19055                     try self.asmRegisterRegister(.{ .p_b, .ackssw }, dst_reg, dst_reg);
  19056                     break :movmsk .{ .p_b, .movmsk };
  19057                 },
  19058                 .v_pd, .v_sd, .vp_q => .{ .v_pd, .movmsk },
  19059                 .v_ps, .v_ss, .vp_d => .{ .v_ps, .movmsk },
  19060                 .vp_b => .{ .vp_b, .movmsk },
  19061                 .vp_w => movmsk: {
  19062                     try self.asmRegisterRegisterRegister(
  19063                         .{ .vp_b, .ackssw },
  19064                         dst_reg,
  19065                         dst_reg,
  19066                         dst_reg,
  19067                     );
  19068                     break :movmsk .{ .vp_b, .movmsk };
  19069                 },
  19070                 else => unreachable,
  19071             }, gp_reg.to32(), dst_reg);
  19072             return .{ .register = gp_reg };
  19073         },
  19074         else => unreachable,
  19075     }
  19076 
  19077     return dst_mcv;
  19078 }
  19079 
  19080 fn genBinOpMir(
  19081     self: *CodeGen,
  19082     mir_tag: Mir.Inst.FixedTag,
  19083     ty: Type,
  19084     dst_mcv: MCValue,
  19085     src_mcv: MCValue,
  19086 ) !void {
  19087     const pt = self.pt;
  19088     const zcu = pt.zcu;
  19089     const abi_size: u32 = @intCast(ty.abiSize(zcu));
  19090     try self.spillEflagsIfOccupied();
  19091     switch (dst_mcv) {
  19092         .none,
  19093         .unreach,
  19094         .dead,
  19095         .undef,
  19096         .immediate,
  19097         .eflags,
  19098         .register_overflow,
  19099         .register_mask,
  19100         .lea_direct,
  19101         .lea_got,
  19102         .lea_tlv,
  19103         .lea_frame,
  19104         .lea_symbol,
  19105         .elementwise_regs_then_frame,
  19106         .reserved_frame,
  19107         .air_ref,
  19108         => unreachable, // unmodifiable destination
  19109         .register, .register_pair, .register_triple, .register_quadruple, .register_offset => {
  19110             switch (dst_mcv) {
  19111                 .register, .register_pair, .register_triple, .register_quadruple => {},
  19112                 .register_offset => |ro| assert(ro.off == 0),
  19113                 else => unreachable,
  19114             }
  19115             for (dst_mcv.getRegs(), 0..) |dst_reg, dst_reg_i| {
  19116                 const dst_reg_lock = self.register_manager.lockReg(dst_reg);
  19117                 defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock);
  19118 
  19119                 const mir_limb_tag: Mir.Inst.FixedTag = switch (dst_reg_i) {
  19120                     0 => mir_tag,
  19121                     1 => switch (mir_tag[1]) {
  19122                         .add => .{ ._, .adc },
  19123                         .sub, .cmp => .{ ._, .sbb },
  19124                         .@"or", .@"and", .xor => mir_tag,
  19125                         else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{
  19126                             @tagName(mir_tag[1]),
  19127                         }),
  19128                     },
  19129                     else => unreachable,
  19130                 };
  19131                 const off: u4 = @intCast(dst_reg_i * 8);
  19132                 const limb_abi_size = @min(abi_size - off, 8);
  19133                 const dst_alias = registerAlias(dst_reg, limb_abi_size);
  19134                 switch (src_mcv) {
  19135                     .none,
  19136                     .unreach,
  19137                     .dead,
  19138                     .undef,
  19139                     .register_overflow,
  19140                     .register_mask,
  19141                     .elementwise_regs_then_frame,
  19142                     .reserved_frame,
  19143                     => unreachable,
  19144                     .register,
  19145                     .register_pair,
  19146                     .register_triple,
  19147                     .register_quadruple,
  19148                     => try self.asmRegisterRegister(
  19149                         mir_limb_tag,
  19150                         dst_alias,
  19151                         registerAlias(src_mcv.getRegs()[dst_reg_i], limb_abi_size),
  19152                     ),
  19153                     .immediate => |imm| {
  19154                         assert(off == 0);
  19155                         switch (self.regBitSize(ty)) {
  19156                             8 => try self.asmRegisterImmediate(
  19157                                 mir_limb_tag,
  19158                                 dst_alias,
  19159                                 if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |small|
  19160                                     .s(small)
  19161                                 else
  19162                                     .u(@as(u8, @intCast(imm))),
  19163                             ),
  19164                             16 => try self.asmRegisterImmediate(
  19165                                 mir_limb_tag,
  19166                                 dst_alias,
  19167                                 if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |small|
  19168                                     .s(small)
  19169                                 else
  19170                                     .u(@as(u16, @intCast(imm))),
  19171                             ),
  19172                             32 => try self.asmRegisterImmediate(
  19173                                 mir_limb_tag,
  19174                                 dst_alias,
  19175                                 if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small|
  19176                                     .s(small)
  19177                                 else
  19178                                     .u(@as(u32, @intCast(imm))),
  19179                             ),
  19180                             64 => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small|
  19181                                 try self.asmRegisterImmediate(mir_limb_tag, dst_alias, .s(small))
  19182                             else
  19183                                 try self.asmRegisterRegister(mir_limb_tag, dst_alias, registerAlias(
  19184                                     try self.copyToTmpRegister(ty, src_mcv),
  19185                                     limb_abi_size,
  19186                                 )),
  19187                             else => unreachable,
  19188                         }
  19189                     },
  19190                     .eflags,
  19191                     .register_offset,
  19192                     .memory,
  19193                     .indirect,
  19194                     .load_symbol,
  19195                     .lea_symbol,
  19196                     .load_direct,
  19197                     .lea_direct,
  19198                     .load_got,
  19199                     .lea_got,
  19200                     .load_tlv,
  19201                     .lea_tlv,
  19202                     .load_frame,
  19203                     .lea_frame,
  19204                     => {
  19205                         direct: {
  19206                             try self.asmRegisterMemory(mir_limb_tag, dst_alias, switch (src_mcv) {
  19207                                 .memory => |addr| .{
  19208                                     .base = .{ .reg = .ds },
  19209                                     .mod = .{ .rm = .{
  19210                                         .size = .fromSize(limb_abi_size),
  19211                                         .disp = std.math.cast(i32, addr + off) orelse break :direct,
  19212                                     } },
  19213                                 },
  19214                                 .indirect => |reg_off| .{
  19215                                     .base = .{ .reg = reg_off.reg },
  19216                                     .mod = .{ .rm = .{
  19217                                         .size = .fromSize(limb_abi_size),
  19218                                         .disp = reg_off.off + off,
  19219                                     } },
  19220                                 },
  19221                                 .load_frame => |frame_addr| .{
  19222                                     .base = .{ .frame = frame_addr.index },
  19223                                     .mod = .{ .rm = .{
  19224                                         .size = .fromSize(limb_abi_size),
  19225                                         .disp = frame_addr.off + off,
  19226                                     } },
  19227                                 },
  19228                                 else => break :direct,
  19229                             });
  19230                             continue;
  19231                         }
  19232 
  19233                         switch (src_mcv) {
  19234                             .eflags,
  19235                             .register_offset,
  19236                             .lea_symbol,
  19237                             .lea_direct,
  19238                             .lea_got,
  19239                             .lea_tlv,
  19240                             .lea_frame,
  19241                             => {
  19242                                 assert(off == 0);
  19243                                 const reg = try self.copyToTmpRegister(ty, src_mcv);
  19244                                 return self.genBinOpMir(
  19245                                     mir_limb_tag,
  19246                                     ty,
  19247                                     dst_mcv,
  19248                                     .{ .register = reg },
  19249                                 );
  19250                             },
  19251                             .memory,
  19252                             .load_symbol,
  19253                             .load_direct,
  19254                             .load_got,
  19255                             .load_tlv,
  19256                             => {
  19257                                 const ptr_ty = try pt.singleConstPtrType(ty);
  19258                                 const addr_reg = try self.copyToTmpRegister(ptr_ty, src_mcv.address());
  19259                                 return self.genBinOpMir(mir_limb_tag, ty, dst_mcv, .{
  19260                                     .indirect = .{ .reg = addr_reg, .off = off },
  19261                                 });
  19262                             },
  19263                             else => unreachable,
  19264                         }
  19265                     },
  19266                     .air_ref => |src_ref| return self.genBinOpMir(
  19267                         mir_tag,
  19268                         ty,
  19269                         dst_mcv,
  19270                         try self.resolveInst(src_ref),
  19271                     ),
  19272                 }
  19273             }
  19274         },
  19275         .memory, .indirect, .load_symbol, .load_got, .load_direct, .load_tlv, .load_frame => {
  19276             const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock };
  19277             const limb_abi_size: u32 = @min(abi_size, 8);
  19278 
  19279             const dst_info: OpInfo = switch (dst_mcv) {
  19280                 else => unreachable,
  19281                 .memory, .load_symbol, .load_got, .load_direct, .load_tlv => dst: {
  19282                     const dst_addr_reg =
  19283                         (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64();
  19284                     const dst_addr_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg);
  19285                     errdefer self.register_manager.unlockReg(dst_addr_lock);
  19286 
  19287                     try self.genSetReg(dst_addr_reg, .usize, dst_mcv.address(), .{});
  19288                     break :dst .{ .addr_reg = dst_addr_reg, .addr_lock = dst_addr_lock };
  19289                 },
  19290                 .load_frame => null,
  19291             };
  19292             defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock);
  19293 
  19294             const resolved_src_mcv = switch (src_mcv) {
  19295                 else => src_mcv,
  19296                 .air_ref => |src_ref| try self.resolveInst(src_ref),
  19297             };
  19298             const src_info: OpInfo = switch (resolved_src_mcv) {
  19299                 .none,
  19300                 .unreach,
  19301                 .dead,
  19302                 .undef,
  19303                 .register_overflow,
  19304                 .register_mask,
  19305                 .elementwise_regs_then_frame,
  19306                 .reserved_frame,
  19307                 .air_ref,
  19308                 => unreachable,
  19309                 .immediate,
  19310                 .eflags,
  19311                 .register,
  19312                 .register_pair,
  19313                 .register_triple,
  19314                 .register_quadruple,
  19315                 .register_offset,
  19316                 .indirect,
  19317                 .lea_direct,
  19318                 .lea_got,
  19319                 .lea_tlv,
  19320                 .load_frame,
  19321                 .lea_frame,
  19322                 .lea_symbol,
  19323                 => null,
  19324                 .memory, .load_symbol, .load_got, .load_direct, .load_tlv => src: {
  19325                     switch (resolved_src_mcv) {
  19326                         .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr))) != null and
  19327                             std.math.cast(i32, @as(i64, @bitCast(addr)) + abi_size - limb_abi_size) != null)
  19328                             break :src null,
  19329                         .load_symbol, .load_got, .load_direct, .load_tlv => {},
  19330                         else => unreachable,
  19331                     }
  19332 
  19333                     const src_addr_reg =
  19334                         (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64();
  19335                     const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
  19336                     errdefer self.register_manager.unlockReg(src_addr_lock);
  19337 
  19338                     try self.genSetReg(src_addr_reg, .usize, resolved_src_mcv.address(), .{});
  19339                     break :src .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock };
  19340                 },
  19341             };
  19342             defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock);
  19343 
  19344             const ty_signedness =
  19345                 if (ty.isAbiInt(zcu)) ty.intInfo(zcu).signedness else .unsigned;
  19346             const limb_ty: Type = if (abi_size <= 8) ty else switch (ty_signedness) {
  19347                 .signed => .usize,
  19348                 .unsigned => .isize,
  19349             };
  19350             var limb_i: usize = 0;
  19351             var off: i32 = 0;
  19352             while (off < abi_size) : ({
  19353                 limb_i += 1;
  19354                 off += 8;
  19355             }) {
  19356                 const mir_limb_tag: Mir.Inst.FixedTag = switch (limb_i) {
  19357                     0 => mir_tag,
  19358                     else => switch (mir_tag[1]) {
  19359                         .add => .{ ._, .adc },
  19360                         .sub, .cmp => .{ ._, .sbb },
  19361                         .@"or", .@"and", .xor => mir_tag,
  19362                         else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{
  19363                             @tagName(mir_tag[1]),
  19364                         }),
  19365                     },
  19366                 };
  19367                 const dst_limb_mem: Memory = switch (dst_mcv) {
  19368                     .memory,
  19369                     .load_symbol,
  19370                     .load_got,
  19371                     .load_direct,
  19372                     .load_tlv,
  19373                     => .{
  19374                         .base = .{ .reg = dst_info.?.addr_reg },
  19375                         .mod = .{ .rm = .{
  19376                             .size = .fromSize(limb_abi_size),
  19377                             .disp = off,
  19378                         } },
  19379                     },
  19380                     .indirect => |reg_off| .{
  19381                         .base = .{ .reg = reg_off.reg },
  19382                         .mod = .{ .rm = .{
  19383                             .size = .fromSize(limb_abi_size),
  19384                             .disp = reg_off.off + off,
  19385                         } },
  19386                     },
  19387                     .load_frame => |frame_addr| .{
  19388                         .base = .{ .frame = frame_addr.index },
  19389                         .mod = .{ .rm = .{
  19390                             .size = .fromSize(limb_abi_size),
  19391                             .disp = frame_addr.off + off,
  19392                         } },
  19393                     },
  19394                     else => unreachable,
  19395                 };
  19396                 switch (resolved_src_mcv) {
  19397                     .none,
  19398                     .unreach,
  19399                     .dead,
  19400                     .undef,
  19401                     .register_overflow,
  19402                     .register_mask,
  19403                     .elementwise_regs_then_frame,
  19404                     .reserved_frame,
  19405                     .air_ref,
  19406                     => unreachable,
  19407                     .immediate => |src_imm| {
  19408                         const imm: u64 = switch (limb_i) {
  19409                             0 => src_imm,
  19410                             else => switch (ty_signedness) {
  19411                                 .signed => @bitCast(@as(i64, @bitCast(src_imm)) >> 63),
  19412                                 .unsigned => 0,
  19413                             },
  19414                         };
  19415                         switch (self.regBitSize(limb_ty)) {
  19416                             8 => try self.asmMemoryImmediate(
  19417                                 mir_limb_tag,
  19418                                 dst_limb_mem,
  19419                                 if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |small|
  19420                                     .s(small)
  19421                                 else
  19422                                     .u(@as(u8, @intCast(imm))),
  19423                             ),
  19424                             16 => try self.asmMemoryImmediate(
  19425                                 mir_limb_tag,
  19426                                 dst_limb_mem,
  19427                                 if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |small|
  19428                                     .s(small)
  19429                                 else
  19430                                     .u(@as(u16, @intCast(imm))),
  19431                             ),
  19432                             32 => try self.asmMemoryImmediate(
  19433                                 mir_limb_tag,
  19434                                 dst_limb_mem,
  19435                                 if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small|
  19436                                     .s(small)
  19437                                 else
  19438                                     .u(@as(u32, @intCast(imm))),
  19439                             ),
  19440                             64 => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small|
  19441                                 try self.asmMemoryImmediate(mir_limb_tag, dst_limb_mem, .s(small))
  19442                             else
  19443                                 try self.asmMemoryRegister(
  19444                                     mir_limb_tag,
  19445                                     dst_limb_mem,
  19446                                     registerAlias(
  19447                                         try self.copyToTmpRegister(limb_ty, .{ .immediate = imm }),
  19448                                         limb_abi_size,
  19449                                     ),
  19450                                 ),
  19451                             else => unreachable,
  19452                         }
  19453                     },
  19454                     .register,
  19455                     .register_pair,
  19456                     .register_triple,
  19457                     .register_quadruple,
  19458                     .register_offset,
  19459                     .eflags,
  19460                     .memory,
  19461                     .indirect,
  19462                     .load_symbol,
  19463                     .lea_symbol,
  19464                     .load_direct,
  19465                     .lea_direct,
  19466                     .load_got,
  19467                     .lea_got,
  19468                     .load_tlv,
  19469                     .lea_tlv,
  19470                     .load_frame,
  19471                     .lea_frame,
  19472                     => {
  19473                         const src_limb_mcv: MCValue = if (src_info) |info| .{
  19474                             .indirect = .{ .reg = info.addr_reg, .off = off },
  19475                         } else switch (resolved_src_mcv) {
  19476                             .register, .register_pair, .register_triple, .register_quadruple => .{
  19477                                 .register = resolved_src_mcv.getRegs()[limb_i],
  19478                             },
  19479                             .eflags,
  19480                             .register_offset,
  19481                             .lea_symbol,
  19482                             .lea_direct,
  19483                             .lea_got,
  19484                             .lea_tlv,
  19485                             .lea_frame,
  19486                             => switch (limb_i) {
  19487                                 0 => resolved_src_mcv,
  19488                                 else => .{ .immediate = 0 },
  19489                             },
  19490                             .memory => |addr| .{ .memory = @bitCast(@as(i64, @bitCast(addr)) + off) },
  19491                             .indirect => |reg_off| .{ .indirect = .{
  19492                                 .reg = reg_off.reg,
  19493                                 .off = reg_off.off + off,
  19494                             } },
  19495                             .load_frame => |frame_addr| .{ .load_frame = .{
  19496                                 .index = frame_addr.index,
  19497                                 .off = frame_addr.off + off,
  19498                             } },
  19499                             else => unreachable,
  19500                         };
  19501                         const src_limb_reg = if (src_limb_mcv.isRegister())
  19502                             src_limb_mcv.getReg().?
  19503                         else
  19504                             try self.copyToTmpRegister(limb_ty, src_limb_mcv);
  19505                         try self.asmMemoryRegister(
  19506                             mir_limb_tag,
  19507                             dst_limb_mem,
  19508                             registerAlias(src_limb_reg, limb_abi_size),
  19509                         );
  19510                     },
  19511                 }
  19512             }
  19513         },
  19514     }
  19515 }
  19516 
  19517 /// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
  19518 /// Does not support byte-size operands.
  19519 fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
  19520     const pt = self.pt;
  19521     const abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu));
  19522     try self.spillEflagsIfOccupied();
  19523     switch (dst_mcv) {
  19524         .none,
  19525         .unreach,
  19526         .dead,
  19527         .undef,
  19528         .immediate,
  19529         .eflags,
  19530         .register_offset,
  19531         .register_overflow,
  19532         .register_mask,
  19533         .lea_symbol,
  19534         .lea_direct,
  19535         .lea_got,
  19536         .lea_tlv,
  19537         .lea_frame,
  19538         .elementwise_regs_then_frame,
  19539         .reserved_frame,
  19540         .air_ref,
  19541         => unreachable, // unmodifiable destination
  19542         .register => |dst_reg| {
  19543             const dst_alias = registerAlias(dst_reg, abi_size);
  19544             const dst_lock = self.register_manager.lockReg(dst_reg);
  19545             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  19546 
  19547             const resolved_src_mcv = switch (src_mcv) {
  19548                 else => src_mcv,
  19549                 .air_ref => |src_ref| try self.resolveInst(src_ref),
  19550             };
  19551             switch (resolved_src_mcv) {
  19552                 .none,
  19553                 .unreach,
  19554                 .dead,
  19555                 .undef,
  19556                 .register_pair,
  19557                 .register_triple,
  19558                 .register_quadruple,
  19559                 .register_overflow,
  19560                 .register_mask,
  19561                 .elementwise_regs_then_frame,
  19562                 .reserved_frame,
  19563                 .air_ref,
  19564                 => unreachable,
  19565                 .register => |src_reg| try self.asmRegisterRegister(
  19566                     .{ .i_, .mul },
  19567                     dst_alias,
  19568                     registerAlias(src_reg, abi_size),
  19569                 ),
  19570                 .immediate => |imm| {
  19571                     if (std.math.cast(i32, imm)) |small| {
  19572                         try self.asmRegisterRegisterImmediate(
  19573                             .{ .i_, .mul },
  19574                             dst_alias,
  19575                             dst_alias,
  19576                             .s(small),
  19577                         );
  19578                     } else {
  19579                         const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv);
  19580                         return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg });
  19581                     }
  19582                 },
  19583                 .register_offset,
  19584                 .eflags,
  19585                 .load_symbol,
  19586                 .lea_symbol,
  19587                 .load_direct,
  19588                 .lea_direct,
  19589                 .load_got,
  19590                 .lea_got,
  19591                 .load_tlv,
  19592                 .lea_tlv,
  19593                 .lea_frame,
  19594                 => try self.asmRegisterRegister(
  19595                     .{ .i_, .mul },
  19596                     dst_alias,
  19597                     registerAlias(try self.copyToTmpRegister(dst_ty, resolved_src_mcv), abi_size),
  19598                 ),
  19599                 .memory, .indirect, .load_frame => try self.asmRegisterMemory(
  19600                     .{ .i_, .mul },
  19601                     dst_alias,
  19602                     switch (resolved_src_mcv) {
  19603                         .memory => |addr| .{
  19604                             .base = .{ .reg = .ds },
  19605                             .mod = .{ .rm = .{
  19606                                 .size = .fromSize(abi_size),
  19607                                 .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse
  19608                                     return self.asmRegisterRegister(
  19609                                     .{ .i_, .mul },
  19610                                     dst_alias,
  19611                                     registerAlias(
  19612                                         try self.copyToTmpRegister(dst_ty, resolved_src_mcv),
  19613                                         abi_size,
  19614                                     ),
  19615                                 ),
  19616                             } },
  19617                         },
  19618                         .indirect => |reg_off| .{
  19619                             .base = .{ .reg = reg_off.reg },
  19620                             .mod = .{ .rm = .{
  19621                                 .size = .fromSize(abi_size),
  19622                                 .disp = reg_off.off,
  19623                             } },
  19624                         },
  19625                         .load_frame => |frame_addr| .{
  19626                             .base = .{ .frame = frame_addr.index },
  19627                             .mod = .{ .rm = .{
  19628                                 .size = .fromSize(abi_size),
  19629                                 .disp = frame_addr.off,
  19630                             } },
  19631                         },
  19632                         else => unreachable,
  19633                     },
  19634                 ),
  19635             }
  19636         },
  19637         .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented
  19638         .memory, .indirect, .load_symbol, .load_direct, .load_got, .load_tlv, .load_frame => {
  19639             const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv);
  19640             const tmp_mcv = MCValue{ .register = tmp_reg };
  19641             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  19642             defer self.register_manager.unlockReg(tmp_lock);
  19643 
  19644             try self.genIntMulComplexOpMir(dst_ty, tmp_mcv, src_mcv);
  19645             try self.genCopy(dst_ty, dst_mcv, tmp_mcv, .{});
  19646         },
  19647     }
  19648 }
  19649 
  19650 fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void {
  19651     const pt = self.pt;
  19652     const zcu = pt.zcu;
  19653     // skip zero-bit arguments as they don't have a corresponding arg instruction
  19654     var arg_index = self.arg_index;
  19655     while (self.args[arg_index] == .none) arg_index += 1;
  19656     self.arg_index = arg_index + 1;
  19657 
  19658     const result: MCValue = if (self.debug_output == .none and self.liveness.isUnused(inst)) .unreach else result: {
  19659         const arg_ty = self.typeOfIndex(inst);
  19660         const src_mcv = self.args[arg_index];
  19661         switch (src_mcv) {
  19662             .register, .register_pair, .load_frame => {
  19663                 for (src_mcv.getRegs()) |reg| self.register_manager.getRegAssumeFree(reg, inst);
  19664                 break :result src_mcv;
  19665             },
  19666             .indirect => |reg_off| {
  19667                 self.register_manager.getRegAssumeFree(reg_off.reg, inst);
  19668                 const dst_mcv = try self.allocRegOrMem(inst, false);
  19669                 try self.genCopy(arg_ty, dst_mcv, src_mcv, .{});
  19670                 break :result dst_mcv;
  19671             },
  19672             .elementwise_regs_then_frame => |regs_frame_addr| {
  19673                 try self.spillEflagsIfOccupied();
  19674 
  19675                 const fn_info = zcu.typeToFunc(self.fn_type).?;
  19676                 const param_int_regs = abi.getCAbiIntParamRegs(fn_info.cc);
  19677                 var prev_reg: Register = undefined;
  19678                 for (
  19679                     param_int_regs[param_int_regs.len - regs_frame_addr.regs ..],
  19680                     0..,
  19681                 ) |dst_reg, elem_index| {
  19682                     assert(self.register_manager.isRegFree(dst_reg));
  19683                     if (elem_index > 0) {
  19684                         try self.asmRegisterImmediate(.{ ._l, .sh }, dst_reg.to8(), .u(elem_index));
  19685                         try self.asmRegisterRegister(
  19686                             .{ ._, .@"or" },
  19687                             dst_reg.to8(),
  19688                             prev_reg.to8(),
  19689                         );
  19690                     }
  19691                     prev_reg = dst_reg;
  19692                 }
  19693 
  19694                 const prev_lock = if (regs_frame_addr.regs > 0)
  19695                     self.register_manager.lockRegAssumeUnused(prev_reg)
  19696                 else
  19697                     null;
  19698                 defer if (prev_lock) |lock| self.register_manager.unlockReg(lock);
  19699 
  19700                 const dst_mcv = try self.allocRegOrMem(inst, false);
  19701                 if (regs_frame_addr.regs > 0) try self.asmMemoryRegister(
  19702                     .{ ._, .mov },
  19703                     try dst_mcv.mem(self, .{ .size = .byte }),
  19704                     prev_reg.to8(),
  19705                 );
  19706                 try self.genInlineMemset(
  19707                     dst_mcv.address().offset(@intFromBool(regs_frame_addr.regs > 0)),
  19708                     .{ .immediate = 0 },
  19709                     .{ .immediate = arg_ty.abiSize(zcu) - @intFromBool(regs_frame_addr.regs > 0) },
  19710                     .{},
  19711                 );
  19712 
  19713                 const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  19714                 const index_lock = self.register_manager.lockRegAssumeUnused(index_reg);
  19715                 defer self.register_manager.unlockReg(index_lock);
  19716 
  19717                 try self.asmRegisterImmediate(
  19718                     .{ ._, .mov },
  19719                     index_reg.to32(),
  19720                     .u(regs_frame_addr.regs),
  19721                 );
  19722                 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  19723                 try self.asmMemoryImmediate(.{ ._, .cmp }, .{
  19724                     .base = .{ .frame = regs_frame_addr.frame_index },
  19725                     .mod = .{ .rm = .{
  19726                         .size = .byte,
  19727                         .index = index_reg.to64(),
  19728                         .scale = .@"8",
  19729                         .disp = regs_frame_addr.frame_off - @as(u6, regs_frame_addr.regs) * 8,
  19730                     } },
  19731                 }, Immediate.u(0));
  19732                 const unset = try self.asmJccReloc(.e, undefined);
  19733                 try self.asmMemoryRegister(
  19734                     .{ ._s, .bt },
  19735                     try dst_mcv.mem(self, .{ .size = .dword }),
  19736                     index_reg.to32(),
  19737                 );
  19738                 self.performReloc(unset);
  19739                 if (self.hasFeature(.slow_incdec)) {
  19740                     try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1));
  19741                 } else {
  19742                     try self.asmRegister(.{ ._, .inc }, index_reg.to32());
  19743                 }
  19744                 try self.asmRegisterImmediate(
  19745                     .{ ._, .cmp },
  19746                     index_reg.to32(),
  19747                     .u(arg_ty.vectorLen(zcu)),
  19748                 );
  19749                 _ = try self.asmJccReloc(.b, loop);
  19750 
  19751                 break :result dst_mcv;
  19752             },
  19753             else => return self.fail("TODO implement arg for {}", .{src_mcv}),
  19754         }
  19755     };
  19756     return self.finishAir(inst, result, .{ .none, .none, .none });
  19757 }
  19758 
  19759 fn airDbgArg(self: *CodeGen, inst: Air.Inst.Index) !void {
  19760     // skip zero-bit arguments as they don't have a corresponding arg instruction
  19761     var arg_index = self.arg_index;
  19762     while (self.args[arg_index] == .none) arg_index += 1;
  19763     self.arg_index = arg_index + 1;
  19764 
  19765     if (self.debug_output != .none) {
  19766         const name = self.air.instructions.items(.data)[@intFromEnum(inst)].arg.name;
  19767         if (name != .none) try self.genLocalDebugInfo(inst, self.getResolvedInstValue(inst).short);
  19768         if (self.liveness.isUnused(inst)) try self.processDeath(inst);
  19769     }
  19770     for (self.args[self.arg_index..]) |arg| {
  19771         if (arg != .none) break;
  19772     } else try self.airDbgVarArgs();
  19773 }
  19774 
  19775 fn airDbgVarArgs(self: *CodeGen) !void {
  19776     if (self.pt.zcu.typeToFunc(self.fn_type).?.is_var_args) try self.asmPseudo(.pseudo_dbg_var_args_none);
  19777 }
  19778 
  19779 fn genLocalDebugInfo(
  19780     self: *CodeGen,
  19781     inst: Air.Inst.Index,
  19782     mcv: MCValue,
  19783 ) !void {
  19784     if (self.debug_output == .none) return;
  19785     switch (self.air.instructions.items(.tag)[@intFromEnum(inst)]) {
  19786         else => unreachable,
  19787         .arg, .dbg_arg_inline, .dbg_var_val => |tag| {
  19788             switch (mcv) {
  19789                 .none => try self.asmAir(.dbg_local, inst),
  19790                 .unreach, .dead, .elementwise_regs_then_frame, .reserved_frame, .air_ref => unreachable,
  19791                 .immediate => |imm| try self.asmAirImmediate(.dbg_local, inst, .u(imm)),
  19792                 .lea_frame => |frame_addr| try self.asmAirFrameAddress(.dbg_local, inst, frame_addr),
  19793                 .lea_symbol => |sym_off| try self.asmAirImmediate(.dbg_local, inst, .rel(sym_off)),
  19794                 else => {
  19795                     const ty = switch (tag) {
  19796                         else => unreachable,
  19797                         .arg => self.typeOfIndex(inst),
  19798                         .dbg_arg_inline, .dbg_var_val => self.typeOf(
  19799                             self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op.operand,
  19800                         ),
  19801                     };
  19802                     const frame_index = try self.allocFrameIndex(.initSpill(ty, self.pt.zcu));
  19803                     try self.genSetMem(.{ .frame = frame_index }, 0, ty, mcv, .{});
  19804                     try self.asmAirMemory(.dbg_local, inst, .{
  19805                         .base = .{ .frame = frame_index },
  19806                         .mod = .{ .rm = .{ .size = .qword } },
  19807                     });
  19808                 },
  19809             }
  19810         },
  19811         .dbg_var_ptr => switch (mcv) {
  19812             else => unreachable,
  19813             .unreach, .dead, .elementwise_regs_then_frame, .reserved_frame, .air_ref => unreachable,
  19814             .lea_frame => |frame_addr| try self.asmAirMemory(.dbg_local, inst, .{
  19815                 .base = .{ .frame = frame_addr.index },
  19816                 .mod = .{ .rm = .{
  19817                     .size = .qword,
  19818                     .disp = frame_addr.off,
  19819                 } },
  19820             }),
  19821             .lea_symbol => |sym_off| try self.asmAirMemory(.dbg_local, inst, .{
  19822                 .base = .{ .reloc = sym_off.sym_index },
  19823                 .mod = .{ .rm = .{
  19824                     .size = .qword,
  19825                     .disp = sym_off.off,
  19826                 } },
  19827             }),
  19828             .lea_direct, .lea_got, .lea_tlv => |sym_index| try self.asmAirMemory(.dbg_local, inst, .{
  19829                 .base = .{ .reloc = sym_index },
  19830                 .mod = .{ .rm = .{ .size = .qword } },
  19831             }),
  19832         },
  19833     }
  19834 }
  19835 
  19836 fn airRetAddr(self: *CodeGen, inst: Air.Inst.Index) !void {
  19837     const dst_mcv = try self.allocRegOrMem(inst, true);
  19838     try self.genCopy(.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } }, .{});
  19839     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
  19840 }
  19841 
  19842 fn airFrameAddress(self: *CodeGen, inst: Air.Inst.Index) !void {
  19843     const dst_mcv = try self.allocRegOrMem(inst, true);
  19844     try self.genCopy(.usize, dst_mcv, .{ .lea_frame = .{ .index = .base_ptr } }, .{});
  19845     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
  19846 }
  19847 
  19848 fn airCall(self: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifier, opts: CopyOptions) !void {
  19849     if (modifier == .always_tail) return self.fail("TODO implement tail calls for x86_64", .{});
  19850 
  19851     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  19852     const extra = self.air.extraData(Air.Call, pl_op.payload);
  19853     const arg_refs: []const Air.Inst.Ref =
  19854         @ptrCast(self.air.extra[extra.end..][0..extra.data.args_len]);
  19855 
  19856     const ExpectedContents = extern struct {
  19857         tys: [16][@sizeOf(Type)]u8 align(@alignOf(Type)),
  19858         vals: [16][@sizeOf(MCValue)]u8 align(@alignOf(MCValue)),
  19859     };
  19860     var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
  19861         std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
  19862     const allocator = stack.get();
  19863 
  19864     const arg_tys = try allocator.alloc(Type, arg_refs.len);
  19865     defer allocator.free(arg_tys);
  19866     for (arg_tys, arg_refs) |*arg_ty, arg_ref| arg_ty.* = self.typeOf(arg_ref);
  19867 
  19868     const arg_vals = try allocator.alloc(MCValue, arg_refs.len);
  19869     defer allocator.free(arg_vals);
  19870     for (arg_vals, arg_refs) |*arg_val, arg_ref| arg_val.* = .{ .air_ref = arg_ref };
  19871 
  19872     const ret = try self.genCall(.{ .air = pl_op.operand }, arg_tys, arg_vals, opts);
  19873 
  19874     var bt = self.liveness.iterateBigTomb(inst);
  19875     try self.feed(&bt, pl_op.operand);
  19876     for (arg_refs) |arg_ref| try self.feed(&bt, arg_ref);
  19877 
  19878     const result = if (self.liveness.isUnused(inst)) .unreach else ret;
  19879     return self.finishAirResult(inst, result);
  19880 }
  19881 
  19882 fn genCall(self: *CodeGen, info: union(enum) {
  19883     air: Air.Inst.Ref,
  19884     lib: struct {
  19885         return_type: InternPool.Index,
  19886         param_types: []const InternPool.Index,
  19887         lib: ?[]const u8 = null,
  19888         callee: []const u8,
  19889     },
  19890 }, arg_types: []const Type, args: []const MCValue, opts: CopyOptions) !MCValue {
  19891     const pt = self.pt;
  19892     const zcu = pt.zcu;
  19893     const ip = &zcu.intern_pool;
  19894 
  19895     const fn_ty = switch (info) {
  19896         .air => |callee| fn_info: {
  19897             const callee_ty = self.typeOf(callee);
  19898             break :fn_info switch (callee_ty.zigTypeTag(zcu)) {
  19899                 .@"fn" => callee_ty,
  19900                 .pointer => callee_ty.childType(zcu),
  19901                 else => unreachable,
  19902             };
  19903         },
  19904         .lib => |lib| try pt.funcType(.{
  19905             .param_types = lib.param_types,
  19906             .return_type = lib.return_type,
  19907             .cc = self.target.cCallingConvention().?,
  19908         }),
  19909     };
  19910     const fn_info = zcu.typeToFunc(fn_ty).?;
  19911 
  19912     const ExpectedContents = extern struct {
  19913         var_args: [16][@sizeOf(Type)]u8 align(@alignOf(Type)),
  19914         frame_indices: [16]FrameIndex,
  19915         reg_locks: [16][@sizeOf(?RegisterLock)]u8 align(@alignOf(?RegisterLock)),
  19916     };
  19917     var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
  19918         std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
  19919     const allocator = stack.get();
  19920 
  19921     const var_args = try allocator.alloc(Type, args.len - fn_info.param_types.len);
  19922     defer allocator.free(var_args);
  19923     for (var_args, arg_types[fn_info.param_types.len..]) |*var_arg, arg_ty| var_arg.* = arg_ty;
  19924 
  19925     const frame_indices = try allocator.alloc(FrameIndex, args.len);
  19926     defer allocator.free(frame_indices);
  19927 
  19928     var reg_locks: std.ArrayList(?RegisterLock) = .init(allocator);
  19929     defer reg_locks.deinit();
  19930     try reg_locks.ensureTotalCapacity(16);
  19931     defer for (reg_locks.items) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
  19932 
  19933     var call_info = try self.resolveCallingConventionValues(fn_info, var_args, .call_frame);
  19934     defer call_info.deinit(self);
  19935 
  19936     // We need a properly aligned and sized call frame to be able to call this function.
  19937     {
  19938         const needed_call_frame: FrameAlloc = .init(.{
  19939             .size = call_info.stack_byte_count,
  19940             .alignment = call_info.stack_align,
  19941         });
  19942         const frame_allocs_slice = self.frame_allocs.slice();
  19943         const stack_frame_size =
  19944             &frame_allocs_slice.items(.abi_size)[@intFromEnum(FrameIndex.call_frame)];
  19945         stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size);
  19946         const stack_frame_align =
  19947             &frame_allocs_slice.items(.abi_align)[@intFromEnum(FrameIndex.call_frame)];
  19948         stack_frame_align.* = stack_frame_align.max(needed_call_frame.abi_align);
  19949     }
  19950 
  19951     try self.spillEflagsIfOccupied();
  19952     try self.spillCallerPreservedRegs(fn_info.cc);
  19953 
  19954     // set stack arguments first because this can clobber registers
  19955     // also clobber spill arguments as we go
  19956     switch (call_info.return_value.long) {
  19957         .none, .unreach => {},
  19958         .indirect => |reg_off| try self.register_manager.getReg(reg_off.reg, null),
  19959         else => unreachable,
  19960     }
  19961     for (call_info.args, arg_types, args, frame_indices) |dst_arg, arg_ty, src_arg, *frame_index|
  19962         switch (dst_arg) {
  19963             .none => {},
  19964             .register => |reg| {
  19965                 try self.register_manager.getReg(reg, null);
  19966                 try reg_locks.append(self.register_manager.lockReg(reg));
  19967             },
  19968             .register_pair => |regs| {
  19969                 for (regs) |reg| try self.register_manager.getReg(reg, null);
  19970                 try reg_locks.appendSlice(&self.register_manager.lockRegs(2, regs));
  19971             },
  19972             .indirect => |reg_off| {
  19973                 frame_index.* = try self.allocFrameIndex(.initType(arg_ty, zcu));
  19974                 try self.genSetMem(.{ .frame = frame_index.* }, 0, arg_ty, src_arg, opts);
  19975                 try self.register_manager.getReg(reg_off.reg, null);
  19976                 try reg_locks.append(self.register_manager.lockReg(reg_off.reg));
  19977             },
  19978             .load_frame => {
  19979                 try self.genCopy(arg_ty, dst_arg, src_arg, opts);
  19980                 try self.freeValue(src_arg);
  19981             },
  19982             .elementwise_regs_then_frame => |regs_frame_addr| {
  19983                 const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  19984                 const index_lock = self.register_manager.lockRegAssumeUnused(index_reg);
  19985                 defer self.register_manager.unlockReg(index_lock);
  19986 
  19987                 const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{
  19988                     .base = .{ .reg = try self.copyToTmpRegister(.usize, switch (src_arg) {
  19989                         else => src_arg,
  19990                         .air_ref => |src_ref| try self.resolveInst(src_ref),
  19991                     }.address()) },
  19992                     .mod = .{ .rm = .{ .size = .dword } },
  19993                 };
  19994                 const src_lock = switch (src_mem.base) {
  19995                     .reg => |src_reg| self.register_manager.lockReg(src_reg),
  19996                     else => null,
  19997                 };
  19998                 defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  19999 
  20000                 try self.asmRegisterImmediate(
  20001                     .{ ._, .mov },
  20002                     index_reg.to32(),
  20003                     .u(regs_frame_addr.regs),
  20004                 );
  20005                 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  20006                 try self.asmMemoryRegister(.{ ._, .bt }, src_mem, index_reg.to32());
  20007                 try self.asmSetccMemory(.c, .{
  20008                     .base = .{ .frame = regs_frame_addr.frame_index },
  20009                     .mod = .{ .rm = .{
  20010                         .size = .byte,
  20011                         .index = index_reg.to64(),
  20012                         .scale = .@"8",
  20013                         .disp = regs_frame_addr.frame_off - @as(u6, regs_frame_addr.regs) * 8,
  20014                     } },
  20015                 });
  20016                 if (self.hasFeature(.slow_incdec)) {
  20017                     try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1));
  20018                 } else {
  20019                     try self.asmRegister(.{ ._, .inc }, index_reg.to32());
  20020                 }
  20021                 try self.asmRegisterImmediate(
  20022                     .{ ._, .cmp },
  20023                     index_reg.to32(),
  20024                     .u(arg_ty.vectorLen(zcu)),
  20025                 );
  20026                 _ = try self.asmJccReloc(.b, loop);
  20027 
  20028                 const param_int_regs = abi.getCAbiIntParamRegs(fn_info.cc);
  20029                 for (param_int_regs[param_int_regs.len - regs_frame_addr.regs ..]) |dst_reg| {
  20030                     try self.register_manager.getReg(dst_reg, null);
  20031                     try reg_locks.append(self.register_manager.lockReg(dst_reg));
  20032                 }
  20033             },
  20034             else => unreachable,
  20035         };
  20036 
  20037     // now we are free to set register arguments
  20038     switch (call_info.return_value.long) {
  20039         .none, .unreach => {},
  20040         .indirect => |reg_off| {
  20041             const ret_ty: Type = .fromInterned(fn_info.return_type);
  20042             const frame_index = try self.allocFrameIndex(.initSpill(ret_ty, zcu));
  20043             try self.genSetReg(reg_off.reg, .usize, .{
  20044                 .lea_frame = .{ .index = frame_index, .off = -reg_off.off },
  20045             }, .{});
  20046             call_info.return_value.short = .{ .load_frame = .{ .index = frame_index } };
  20047             try reg_locks.append(self.register_manager.lockReg(reg_off.reg));
  20048         },
  20049         else => unreachable,
  20050     }
  20051 
  20052     for (call_info.args, arg_types, args, frame_indices) |dst_arg, arg_ty, src_arg, frame_index|
  20053         switch (dst_arg) {
  20054             .none, .load_frame => {},
  20055             .register => |dst_reg| switch (fn_info.cc) {
  20056                 else => try self.genSetReg(registerAlias(
  20057                     dst_reg,
  20058                     @intCast(arg_ty.abiSize(zcu)),
  20059                 ), arg_ty, src_arg, opts),
  20060                 .x86_64_sysv, .x86_64_win => {
  20061                     const promoted_ty = self.promoteInt(arg_ty);
  20062                     const promoted_abi_size: u32 = @intCast(promoted_ty.abiSize(zcu));
  20063                     const dst_alias = registerAlias(dst_reg, promoted_abi_size);
  20064                     try self.genSetReg(dst_alias, promoted_ty, src_arg, opts);
  20065                     if (promoted_ty.toIntern() != arg_ty.toIntern())
  20066                         try self.truncateRegister(arg_ty, dst_alias);
  20067                 },
  20068             },
  20069             .register_pair => try self.genCopy(arg_ty, dst_arg, src_arg, opts),
  20070             .indirect => |reg_off| try self.genSetReg(reg_off.reg, .usize, .{
  20071                 .lea_frame = .{ .index = frame_index, .off = -reg_off.off },
  20072             }, .{}),
  20073             .elementwise_regs_then_frame => |regs_frame_addr| {
  20074                 const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{
  20075                     .base = .{ .reg = try self.copyToTmpRegister(
  20076                         .usize,
  20077                         switch (src_arg) {
  20078                             else => src_arg,
  20079                             .air_ref => |src_ref| try self.resolveInst(src_ref),
  20080                         }.address(),
  20081                     ) },
  20082                     .mod = .{ .rm = .{ .size = .dword } },
  20083                 };
  20084                 const src_lock = switch (src_mem.base) {
  20085                     .reg => |src_reg| self.register_manager.lockReg(src_reg),
  20086                     else => null,
  20087                 };
  20088                 defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  20089 
  20090                 const param_int_regs = abi.getCAbiIntParamRegs(fn_info.cc);
  20091                 for (
  20092                     param_int_regs[param_int_regs.len - regs_frame_addr.regs ..],
  20093                     0..,
  20094                 ) |dst_reg, elem_index| {
  20095                     try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32());
  20096                     try self.asmMemoryImmediate(.{ ._, .bt }, src_mem, .u(elem_index));
  20097                     try self.asmSetccRegister(.c, dst_reg.to8());
  20098                 }
  20099             },
  20100             else => unreachable,
  20101         };
  20102 
  20103     if (fn_info.is_var_args) try self.asmRegisterImmediate(.{ ._, .mov }, .al, .u(call_info.fp_count));
  20104 
  20105     // Due to incremental compilation, how function calls are generated depends
  20106     // on linking.
  20107     switch (info) {
  20108         .air => |callee| if (try self.air.value(callee, pt)) |func_value| {
  20109             const func_key = ip.indexToKey(func_value.ip_index);
  20110             switch (switch (func_key) {
  20111                 else => func_key,
  20112                 .ptr => |ptr| if (ptr.byte_offset == 0) switch (ptr.base_addr) {
  20113                     .nav => |nav| ip.indexToKey(zcu.navValue(nav).toIntern()),
  20114                     else => func_key,
  20115                 } else func_key,
  20116             }) {
  20117                 .func => |func| {
  20118                     if (self.bin_file.cast(.elf)) |elf_file| {
  20119                         const zo = elf_file.zigObjectPtr().?;
  20120                         const sym_index = try zo.getOrCreateMetadataForNav(zcu, func.owner_nav);
  20121                         try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = sym_index }));
  20122                     } else if (self.bin_file.cast(.coff)) |coff_file| {
  20123                         const atom = try coff_file.getOrCreateAtomForNav(func.owner_nav);
  20124                         const sym_index = coff_file.getAtom(atom).getSymbolIndex().?;
  20125                         const scratch_reg = abi.getCAbiLinkerScratchReg(fn_info.cc);
  20126                         try self.genSetReg(scratch_reg, .usize, .{ .lea_got = sym_index }, .{});
  20127                         try self.asmRegister(.{ ._, .call }, scratch_reg);
  20128                     } else if (self.bin_file.cast(.macho)) |macho_file| {
  20129                         const zo = macho_file.getZigObject().?;
  20130                         const sym_index = try zo.getOrCreateMetadataForNav(macho_file, func.owner_nav);
  20131                         const sym = zo.symbols.items[sym_index];
  20132                         try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = sym.nlist_idx }));
  20133                     } else if (self.bin_file.cast(.plan9)) |p9| {
  20134                         const atom_index = try p9.seeNav(pt, func.owner_nav);
  20135                         const atom = p9.getAtom(atom_index);
  20136                         try self.asmMemory(.{ ._, .call }, .{
  20137                             .base = .{ .reg = .ds },
  20138                             .mod = .{ .rm = .{
  20139                                 .size = .qword,
  20140                                 .disp = @intCast(atom.getOffsetTableAddress(p9)),
  20141                             } },
  20142                         });
  20143                     } else unreachable;
  20144                 },
  20145                 .@"extern" => |@"extern"| if (self.bin_file.cast(.elf)) |elf_file| {
  20146                     const target_sym_index = try elf_file.getGlobalSymbol(
  20147                         @"extern".name.toSlice(ip),
  20148                         @"extern".lib_name.toSlice(ip),
  20149                     );
  20150                     try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index }));
  20151                 } else if (self.bin_file.cast(.macho)) |macho_file| {
  20152                     const target_sym_index = try macho_file.getGlobalSymbol(
  20153                         @"extern".name.toSlice(ip),
  20154                         @"extern".lib_name.toSlice(ip),
  20155                     );
  20156                     try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index }));
  20157                 } else try self.genExternSymbolRef(
  20158                     .call,
  20159                     @"extern".lib_name.toSlice(ip),
  20160                     @"extern".name.toSlice(ip),
  20161                 ),
  20162                 else => return self.fail("TODO implement calling bitcasted functions", .{}),
  20163             }
  20164         } else {
  20165             assert(self.typeOf(callee).zigTypeTag(zcu) == .pointer);
  20166             const scratch_reg = abi.getCAbiLinkerScratchReg(fn_info.cc);
  20167             try self.genSetReg(scratch_reg, .usize, .{ .air_ref = callee }, .{});
  20168             try self.asmRegister(.{ ._, .call }, scratch_reg);
  20169         },
  20170         .lib => |lib| if (self.bin_file.cast(.elf)) |elf_file| {
  20171             const target_sym_index = try elf_file.getGlobalSymbol(lib.callee, lib.lib);
  20172             try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index }));
  20173         } else if (self.bin_file.cast(.macho)) |macho_file| {
  20174             const target_sym_index = try macho_file.getGlobalSymbol(lib.callee, lib.lib);
  20175             try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index }));
  20176         } else try self.genExternSymbolRef(.call, lib.lib, lib.callee),
  20177     }
  20178     return call_info.return_value.short;
  20179 }
  20180 
  20181 fn airRet(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
  20182     const pt = self.pt;
  20183     const zcu = pt.zcu;
  20184     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  20185 
  20186     const ret_ty = self.fn_type.fnReturnType(zcu);
  20187     switch (self.ret_mcv.short) {
  20188         .none => {},
  20189         .register,
  20190         .register_pair,
  20191         .register_triple,
  20192         .register_quadruple,
  20193         => try self.genCopy(ret_ty, self.ret_mcv.short, .{ .air_ref = un_op }, .{ .safety = safety }),
  20194         .indirect => |reg_off| {
  20195             try self.register_manager.getReg(reg_off.reg, null);
  20196             const lock = self.register_manager.lockRegAssumeUnused(reg_off.reg);
  20197             defer self.register_manager.unlockReg(lock);
  20198 
  20199             try self.genSetReg(reg_off.reg, .usize, self.ret_mcv.long, .{});
  20200             try self.genSetMem(
  20201                 .{ .reg = reg_off.reg },
  20202                 reg_off.off,
  20203                 ret_ty,
  20204                 .{ .air_ref = un_op },
  20205                 .{ .safety = safety },
  20206             );
  20207         },
  20208         else => unreachable,
  20209     }
  20210     self.ret_mcv.liveOut(self, inst);
  20211     try self.finishAir(inst, .unreach, .{ un_op, .none, .none });
  20212 
  20213     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
  20214     // which is available if the jump is 127 bytes or less forward.
  20215     const jmp_reloc = try self.asmJmpReloc(undefined);
  20216     try self.epilogue_relocs.append(self.gpa, jmp_reloc);
  20217 }
  20218 
  20219 fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void {
  20220     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  20221     const ptr = try self.resolveInst(un_op);
  20222 
  20223     const ptr_ty = self.typeOf(un_op);
  20224     switch (self.ret_mcv.short) {
  20225         .none => {},
  20226         .register, .register_pair => try self.load(self.ret_mcv.short, ptr_ty, ptr),
  20227         .indirect => |reg_off| try self.genSetReg(reg_off.reg, ptr_ty, ptr, .{}),
  20228         else => unreachable,
  20229     }
  20230     self.ret_mcv.liveOut(self, inst);
  20231     try self.finishAir(inst, .unreach, .{ un_op, .none, .none });
  20232 
  20233     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
  20234     // which is available if the jump is 127 bytes or less forward.
  20235     const jmp_reloc = try self.asmJmpReloc(undefined);
  20236     try self.epilogue_relocs.append(self.gpa, jmp_reloc);
  20237 }
  20238 
  20239 fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !void {
  20240     const pt = self.pt;
  20241     const zcu = pt.zcu;
  20242     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  20243     var ty = self.typeOf(bin_op.lhs);
  20244     var null_compare: ?Mir.Inst.Index = null;
  20245 
  20246     const result: Condition = result: {
  20247         try self.spillEflagsIfOccupied();
  20248 
  20249         const lhs_mcv = try self.resolveInst(bin_op.lhs);
  20250         const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) {
  20251             .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null },
  20252             .register_pair => |lhs_regs| locks: {
  20253                 const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs);
  20254                 break :locks .{ locks[0], locks[1] };
  20255             },
  20256             .register_offset => |lhs_ro| .{
  20257                 self.register_manager.lockRegAssumeUnused(lhs_ro.reg),
  20258                 null,
  20259             },
  20260             else => @splat(null),
  20261         };
  20262         defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
  20263 
  20264         const rhs_mcv = try self.resolveInst(bin_op.rhs);
  20265         const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) {
  20266             .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null },
  20267             .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs),
  20268             .register_offset => |rhs_ro| .{ self.register_manager.lockReg(rhs_ro.reg), null },
  20269             else => @splat(null),
  20270         };
  20271         defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  20272 
  20273         switch (ty.zigTypeTag(zcu)) {
  20274             .float => {
  20275                 const float_bits = ty.floatBits(self.target.*);
  20276                 if (switch (float_bits) {
  20277                     16 => !self.hasFeature(.f16c),
  20278                     32, 64 => false,
  20279                     80, 128 => true,
  20280                     else => unreachable,
  20281                 }) {
  20282                     var callee_buf: ["__???f2".len]u8 = undefined;
  20283                     const ret = try self.genCall(.{ .lib = .{
  20284                         .return_type = .i32_type,
  20285                         .param_types = &.{ ty.toIntern(), ty.toIntern() },
  20286                         .callee = std.fmt.bufPrint(&callee_buf, "__{s}{c}f2", .{
  20287                             switch (op) {
  20288                                 .eq => "eq",
  20289                                 .neq => "ne",
  20290                                 .lt => "lt",
  20291                                 .lte => "le",
  20292                                 .gt => "gt",
  20293                                 .gte => "ge",
  20294                             },
  20295                             floatCompilerRtAbiName(float_bits),
  20296                         }) catch unreachable,
  20297                     } }, &.{ ty, ty }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, .{});
  20298                     try self.genBinOpMir(.{ ._, .@"test" }, .i32, ret, ret);
  20299                     break :result switch (op) {
  20300                         .eq => .e,
  20301                         .neq => .ne,
  20302                         .lt => .l,
  20303                         .lte => .le,
  20304                         .gt => .g,
  20305                         .gte => .ge,
  20306                     };
  20307                 }
  20308             },
  20309             .optional => if (!ty.optionalReprIsPayload(zcu)) {
  20310                 const opt_ty = ty;
  20311                 const opt_abi_size: u31 = @intCast(opt_ty.abiSize(zcu));
  20312                 ty = opt_ty.optionalChild(zcu);
  20313                 const payload_abi_size: u31 = @intCast(ty.abiSize(zcu));
  20314 
  20315                 const temp_lhs_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  20316                 const temp_lhs_lock = self.register_manager.lockRegAssumeUnused(temp_lhs_reg);
  20317                 defer self.register_manager.unlockReg(temp_lhs_lock);
  20318 
  20319                 if (lhs_mcv.isBase()) try self.asmRegisterMemory(
  20320                     .{ ._, .mov },
  20321                     temp_lhs_reg.to8(),
  20322                     try lhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }),
  20323                 ) else {
  20324                     try self.genSetReg(temp_lhs_reg, opt_ty, lhs_mcv, .{});
  20325                     try self.asmRegisterImmediate(
  20326                         .{ ._r, .sh },
  20327                         registerAlias(temp_lhs_reg, opt_abi_size),
  20328                         .u(payload_abi_size * 8),
  20329                     );
  20330                 }
  20331 
  20332                 const payload_compare = payload_compare: {
  20333                     if (rhs_mcv.isBase()) {
  20334                         const rhs_mem =
  20335                             try rhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte });
  20336                         try self.asmMemoryRegister(.{ ._, .@"test" }, rhs_mem, temp_lhs_reg.to8());
  20337                         const payload_compare = try self.asmJccReloc(.nz, undefined);
  20338                         try self.asmRegisterMemory(.{ ._, .cmp }, temp_lhs_reg.to8(), rhs_mem);
  20339                         break :payload_compare payload_compare;
  20340                     }
  20341 
  20342                     const temp_rhs_reg = try self.copyToTmpRegister(opt_ty, rhs_mcv);
  20343                     const temp_rhs_lock = self.register_manager.lockRegAssumeUnused(temp_rhs_reg);
  20344                     defer self.register_manager.unlockReg(temp_rhs_lock);
  20345 
  20346                     try self.asmRegisterImmediate(
  20347                         .{ ._r, .sh },
  20348                         registerAlias(temp_rhs_reg, opt_abi_size),
  20349                         .u(payload_abi_size * 8),
  20350                     );
  20351                     try self.asmRegisterRegister(
  20352                         .{ ._, .@"test" },
  20353                         temp_lhs_reg.to8(),
  20354                         temp_rhs_reg.to8(),
  20355                     );
  20356                     const payload_compare = try self.asmJccReloc(.nz, undefined);
  20357                     try self.asmRegisterRegister(
  20358                         .{ ._, .cmp },
  20359                         temp_lhs_reg.to8(),
  20360                         temp_rhs_reg.to8(),
  20361                     );
  20362                     break :payload_compare payload_compare;
  20363                 };
  20364                 null_compare = try self.asmJmpReloc(undefined);
  20365                 self.performReloc(payload_compare);
  20366             },
  20367             else => {},
  20368         }
  20369 
  20370         switch (ty.zigTypeTag(zcu)) {
  20371             else => {
  20372                 const abi_size: u16 = @intCast(ty.abiSize(zcu));
  20373                 const may_flip: enum {
  20374                     may_flip,
  20375                     must_flip,
  20376                     must_not_flip,
  20377                 } = if (abi_size > 8) switch (op) {
  20378                     .lt, .gte => .must_not_flip,
  20379                     .lte, .gt => .must_flip,
  20380                     .eq, .neq => .may_flip,
  20381                 } else .may_flip;
  20382 
  20383                 const flipped = switch (may_flip) {
  20384                     .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isBase(),
  20385                     .must_flip => true,
  20386                     .must_not_flip => false,
  20387                 };
  20388                 const unmat_dst_mcv = if (flipped) rhs_mcv else lhs_mcv;
  20389                 const dst_mcv = if (unmat_dst_mcv.isRegister() or
  20390                     (abi_size <= 8 and unmat_dst_mcv.isBase())) unmat_dst_mcv else dst: {
  20391                     const dst_mcv = try self.allocTempRegOrMem(ty, true);
  20392                     try self.genCopy(ty, dst_mcv, unmat_dst_mcv, .{});
  20393                     break :dst dst_mcv;
  20394                 };
  20395                 const dst_lock =
  20396                     if (dst_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  20397                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  20398 
  20399                 const src_mcv = try self.resolveInst(if (flipped) bin_op.lhs else bin_op.rhs);
  20400                 const src_lock =
  20401                     if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  20402                 defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  20403 
  20404                 break :result .fromCompareOperator(
  20405                     if (ty.isAbiInt(zcu)) ty.intInfo(zcu).signedness else .unsigned,
  20406                     result_op: {
  20407                         const flipped_op = if (flipped) op.reverse() else op;
  20408                         if (abi_size > 8) switch (flipped_op) {
  20409                             .lt, .gte => {},
  20410                             .lte, .gt => unreachable,
  20411                             .eq, .neq => {
  20412                                 const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock };
  20413 
  20414                                 const resolved_dst_mcv = switch (dst_mcv) {
  20415                                     else => dst_mcv,
  20416                                     .air_ref => |dst_ref| try self.resolveInst(dst_ref),
  20417                                 };
  20418                                 const dst_info: OpInfo = switch (resolved_dst_mcv) {
  20419                                     .none,
  20420                                     .unreach,
  20421                                     .dead,
  20422                                     .undef,
  20423                                     .immediate,
  20424                                     .eflags,
  20425                                     .register_offset,
  20426                                     .register_overflow,
  20427                                     .register_mask,
  20428                                     .indirect,
  20429                                     .lea_direct,
  20430                                     .lea_got,
  20431                                     .lea_tlv,
  20432                                     .lea_frame,
  20433                                     .lea_symbol,
  20434                                     .elementwise_regs_then_frame,
  20435                                     .reserved_frame,
  20436                                     .air_ref,
  20437                                     => unreachable,
  20438                                     .register, .register_pair, .register_triple, .register_quadruple, .load_frame => null,
  20439                                     .memory, .load_symbol, .load_got, .load_direct, .load_tlv => dst: {
  20440                                         switch (resolved_dst_mcv) {
  20441                                             .memory => |addr| if (std.math.cast(
  20442                                                 i32,
  20443                                                 @as(i64, @bitCast(addr)),
  20444                                             ) != null and std.math.cast(
  20445                                                 i32,
  20446                                                 @as(i64, @bitCast(addr)) + abi_size - 8,
  20447                                             ) != null) break :dst null,
  20448                                             .load_symbol, .load_got, .load_direct, .load_tlv => {},
  20449                                             else => unreachable,
  20450                                         }
  20451 
  20452                                         const dst_addr_reg = (try self.register_manager.allocReg(
  20453                                             null,
  20454                                             abi.RegisterClass.gp,
  20455                                         )).to64();
  20456                                         const dst_addr_lock =
  20457                                             self.register_manager.lockRegAssumeUnused(dst_addr_reg);
  20458                                         errdefer self.register_manager.unlockReg(dst_addr_lock);
  20459 
  20460                                         try self.genSetReg(dst_addr_reg, .usize, resolved_dst_mcv.address(), .{});
  20461                                         break :dst .{
  20462                                             .addr_reg = dst_addr_reg,
  20463                                             .addr_lock = dst_addr_lock,
  20464                                         };
  20465                                     },
  20466                                 };
  20467                                 defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock);
  20468 
  20469                                 const resolved_src_mcv = switch (src_mcv) {
  20470                                     else => src_mcv,
  20471                                     .air_ref => |src_ref| try self.resolveInst(src_ref),
  20472                                 };
  20473                                 const src_info: OpInfo = switch (resolved_src_mcv) {
  20474                                     .none,
  20475                                     .unreach,
  20476                                     .dead,
  20477                                     .undef,
  20478                                     .immediate,
  20479                                     .eflags,
  20480                                     .register,
  20481                                     .register_offset,
  20482                                     .register_overflow,
  20483                                     .register_mask,
  20484                                     .indirect,
  20485                                     .lea_symbol,
  20486                                     .lea_direct,
  20487                                     .lea_got,
  20488                                     .lea_tlv,
  20489                                     .lea_frame,
  20490                                     .elementwise_regs_then_frame,
  20491                                     .reserved_frame,
  20492                                     .air_ref,
  20493                                     => unreachable,
  20494                                     .register_pair, .register_triple, .register_quadruple, .load_frame => null,
  20495                                     .memory, .load_symbol, .load_got, .load_direct, .load_tlv => src: {
  20496                                         switch (resolved_src_mcv) {
  20497                                             .memory => |addr| if (std.math.cast(
  20498                                                 i32,
  20499                                                 @as(i64, @bitCast(addr)),
  20500                                             ) != null and std.math.cast(
  20501                                                 i32,
  20502                                                 @as(i64, @bitCast(addr)) + abi_size - 8,
  20503                                             ) != null) break :src null,
  20504                                             .load_symbol, .load_got, .load_direct, .load_tlv => {},
  20505                                             else => unreachable,
  20506                                         }
  20507 
  20508                                         const src_addr_reg = (try self.register_manager.allocReg(
  20509                                             null,
  20510                                             abi.RegisterClass.gp,
  20511                                         )).to64();
  20512                                         const src_addr_lock =
  20513                                             self.register_manager.lockRegAssumeUnused(src_addr_reg);
  20514                                         errdefer self.register_manager.unlockReg(src_addr_lock);
  20515 
  20516                                         try self.genSetReg(src_addr_reg, .usize, resolved_src_mcv.address(), .{});
  20517                                         break :src .{
  20518                                             .addr_reg = src_addr_reg,
  20519                                             .addr_lock = src_addr_lock,
  20520                                         };
  20521                                     },
  20522                                 };
  20523                                 defer if (src_info) |info|
  20524                                     self.register_manager.unlockReg(info.addr_lock);
  20525 
  20526                                 const regs = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp);
  20527                                 const acc_reg = regs[0].to64();
  20528                                 const locks = self.register_manager.lockRegsAssumeUnused(2, regs);
  20529                                 defer for (locks) |lock| self.register_manager.unlockReg(lock);
  20530 
  20531                                 const limbs_len = std.math.divCeil(u16, abi_size, 8) catch unreachable;
  20532                                 var limb_i: u16 = 0;
  20533                                 while (limb_i < limbs_len) : (limb_i += 1) {
  20534                                     const off = limb_i * 8;
  20535                                     const tmp_reg = regs[@min(limb_i, 1)].to64();
  20536 
  20537                                     try self.genSetReg(tmp_reg, .usize, if (dst_info) |info| .{
  20538                                         .indirect = .{ .reg = info.addr_reg, .off = off },
  20539                                     } else switch (resolved_dst_mcv) {
  20540                                         inline .register_pair,
  20541                                         .register_triple,
  20542                                         .register_quadruple,
  20543                                         => |dst_regs| .{ .register = dst_regs[limb_i] },
  20544                                         .memory => |dst_addr| .{
  20545                                             .memory = @bitCast(@as(i64, @bitCast(dst_addr)) + off),
  20546                                         },
  20547                                         .indirect => |reg_off| .{ .indirect = .{
  20548                                             .reg = reg_off.reg,
  20549                                             .off = reg_off.off + off,
  20550                                         } },
  20551                                         .load_frame => |frame_addr| .{ .load_frame = .{
  20552                                             .index = frame_addr.index,
  20553                                             .off = frame_addr.off + off,
  20554                                         } },
  20555                                         else => unreachable,
  20556                                     }, .{});
  20557 
  20558                                     try self.genBinOpMir(
  20559                                         .{ ._, .xor },
  20560                                         .usize,
  20561                                         .{ .register = tmp_reg },
  20562                                         if (src_info) |info| .{
  20563                                             .indirect = .{ .reg = info.addr_reg, .off = off },
  20564                                         } else switch (resolved_src_mcv) {
  20565                                             inline .register_pair,
  20566                                             .register_triple,
  20567                                             .register_quadruple,
  20568                                             => |src_regs| .{ .register = src_regs[limb_i] },
  20569                                             .memory => |src_addr| .{
  20570                                                 .memory = @bitCast(@as(i64, @bitCast(src_addr)) + off),
  20571                                             },
  20572                                             .indirect => |reg_off| .{ .indirect = .{
  20573                                                 .reg = reg_off.reg,
  20574                                                 .off = reg_off.off + off,
  20575                                             } },
  20576                                             .load_frame => |frame_addr| .{ .load_frame = .{
  20577                                                 .index = frame_addr.index,
  20578                                                 .off = frame_addr.off + off,
  20579                                             } },
  20580                                             else => unreachable,
  20581                                         },
  20582                                     );
  20583 
  20584                                     if (limb_i > 0)
  20585                                         try self.asmRegisterRegister(.{ ._, .@"or" }, acc_reg, tmp_reg);
  20586                                 }
  20587                                 assert(limbs_len >= 2); // use flags from or
  20588                                 break :result_op flipped_op;
  20589                             },
  20590                         };
  20591                         try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, src_mcv);
  20592                         break :result_op flipped_op;
  20593                     },
  20594                 );
  20595             },
  20596             .float => {
  20597                 const flipped = switch (op) {
  20598                     .lt, .lte => true,
  20599                     .eq, .gte, .gt, .neq => false,
  20600                 };
  20601 
  20602                 const dst_mcv = if (flipped) rhs_mcv else lhs_mcv;
  20603                 const dst_reg = if (dst_mcv.isRegister())
  20604                     dst_mcv.getReg().?
  20605                 else
  20606                     try self.copyToTmpRegister(ty, dst_mcv);
  20607                 const dst_lock = self.register_manager.lockReg(dst_reg);
  20608                 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  20609                 const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
  20610 
  20611                 switch (ty.floatBits(self.target.*)) {
  20612                     16 => {
  20613                         assert(self.hasFeature(.f16c));
  20614                         const tmp1_reg =
  20615                             (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
  20616                         const tmp1_mcv = MCValue{ .register = tmp1_reg };
  20617                         const tmp1_lock = self.register_manager.lockRegAssumeUnused(tmp1_reg);
  20618                         defer self.register_manager.unlockReg(tmp1_lock);
  20619 
  20620                         const tmp2_reg =
  20621                             (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
  20622                         const tmp2_mcv = MCValue{ .register = tmp2_reg };
  20623                         const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg);
  20624                         defer self.register_manager.unlockReg(tmp2_lock);
  20625 
  20626                         if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  20627                             .{ .vp_w, .insr },
  20628                             tmp1_reg,
  20629                             dst_reg.to128(),
  20630                             try src_mcv.mem(self, .{ .size = .word }),
  20631                             .u(1),
  20632                         ) else try self.asmRegisterRegisterRegister(
  20633                             .{ .vp_, .unpcklwd },
  20634                             tmp1_reg,
  20635                             dst_reg.to128(),
  20636                             (if (src_mcv.isRegister())
  20637                                 src_mcv.getReg().?
  20638                             else
  20639                                 try self.copyToTmpRegister(ty, src_mcv)).to128(),
  20640                         );
  20641                         try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg);
  20642                         try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg);
  20643                         try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv);
  20644                     },
  20645                     32 => try self.genBinOpMir(
  20646                         .{ ._ss, .ucomi },
  20647                         ty,
  20648                         .{ .register = dst_reg },
  20649                         src_mcv,
  20650                     ),
  20651                     64 => try self.genBinOpMir(
  20652                         .{ ._sd, .ucomi },
  20653                         ty,
  20654                         .{ .register = dst_reg },
  20655                         src_mcv,
  20656                     ),
  20657                     else => unreachable,
  20658                 }
  20659 
  20660                 break :result switch (if (flipped) op.reverse() else op) {
  20661                     .lt, .lte => unreachable, // required to have been canonicalized to gt(e)
  20662                     .gt => .a,
  20663                     .gte => .ae,
  20664                     .eq => .z_and_np,
  20665                     .neq => .nz_or_p,
  20666                 };
  20667             },
  20668         }
  20669     };
  20670 
  20671     if (null_compare) |reloc| self.performReloc(reloc);
  20672     self.eflags_inst = inst;
  20673     return self.finishAir(inst, .{ .eflags = result }, .{ bin_op.lhs, bin_op.rhs, .none });
  20674 }
  20675 
  20676 fn airCmpVector(self: *CodeGen, inst: Air.Inst.Index) !void {
  20677     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  20678     const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data;
  20679     const dst_mcv = try self.genBinOp(
  20680         inst,
  20681         .fromCmpOp(extra.compareOperator(), false),
  20682         extra.lhs,
  20683         extra.rhs,
  20684     );
  20685     return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
  20686 }
  20687 
  20688 fn airCmpLtErrorsLen(self: *CodeGen, inst: Air.Inst.Index) !void {
  20689     const pt = self.pt;
  20690     const zcu = pt.zcu;
  20691     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  20692 
  20693     const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  20694     const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  20695     defer self.register_manager.unlockReg(addr_lock);
  20696     const anyerror_lazy_sym: link.File.LazySymbol = .{ .kind = .const_data, .ty = .anyerror_type };
  20697     try self.genLazySymbolRef(.lea, addr_reg, anyerror_lazy_sym);
  20698 
  20699     try self.spillEflagsIfOccupied();
  20700 
  20701     const op_ty = self.typeOf(un_op);
  20702     const op_abi_size: u32 = @intCast(op_ty.abiSize(zcu));
  20703     const op_mcv = try self.resolveInst(un_op);
  20704     const dst_reg = switch (op_mcv) {
  20705         .register => |reg| reg,
  20706         else => try self.copyToTmpRegister(op_ty, op_mcv),
  20707     };
  20708     try self.asmRegisterMemory(
  20709         .{ ._, .cmp },
  20710         registerAlias(dst_reg, op_abi_size),
  20711         .{
  20712             .base = .{ .reg = addr_reg },
  20713             .mod = .{ .rm = .{ .size = .fromSize(op_abi_size) } },
  20714         },
  20715     );
  20716 
  20717     self.eflags_inst = inst;
  20718     return self.finishAir(inst, .{ .eflags = .b }, .{ un_op, .none, .none });
  20719 }
  20720 
  20721 fn airTry(self: *CodeGen, inst: Air.Inst.Index) !void {
  20722     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  20723     const extra = self.air.extraData(Air.Try, pl_op.payload);
  20724     const body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]);
  20725     const operand_ty = self.typeOf(pl_op.operand);
  20726     const result = try self.genTry(inst, pl_op.operand, body, operand_ty, false);
  20727     return self.finishAir(inst, result, .{ .none, .none, .none });
  20728 }
  20729 
  20730 fn airTryPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  20731     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  20732     const extra = self.air.extraData(Air.TryPtr, ty_pl.payload);
  20733     const body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]);
  20734     const operand_ty = self.typeOf(extra.data.ptr);
  20735     const result = try self.genTry(inst, extra.data.ptr, body, operand_ty, true);
  20736     return self.finishAir(inst, result, .{ .none, .none, .none });
  20737 }
  20738 
  20739 fn genTry(
  20740     self: *CodeGen,
  20741     inst: Air.Inst.Index,
  20742     operand: Air.Inst.Ref,
  20743     body: []const Air.Inst.Index,
  20744     operand_ty: Type,
  20745     operand_is_ptr: bool,
  20746 ) !MCValue {
  20747     const liveness_cond_br = self.liveness.getCondBr(inst);
  20748 
  20749     const operand_mcv = try self.resolveInst(operand);
  20750     const is_err_mcv = if (operand_is_ptr)
  20751         try self.isErrPtr(null, operand_ty, operand_mcv)
  20752     else
  20753         try self.isErr(null, operand_ty, operand_mcv);
  20754 
  20755     const reloc = try self.genCondBrMir(.anyerror, is_err_mcv);
  20756 
  20757     if (self.liveness.operandDies(inst, 0)) {
  20758         if (operand.toIndex()) |operand_inst| try self.processDeath(operand_inst);
  20759     }
  20760 
  20761     self.scope_generation += 1;
  20762     const state = try self.saveState();
  20763 
  20764     for (liveness_cond_br.else_deaths) |death| try self.processDeath(death);
  20765     try self.genBodyBlock(body);
  20766     try self.restoreState(state, &.{}, .{
  20767         .emit_instructions = false,
  20768         .update_tracking = true,
  20769         .resurrect = true,
  20770         .close_scope = true,
  20771     });
  20772 
  20773     self.performReloc(reloc);
  20774 
  20775     for (liveness_cond_br.then_deaths) |death| try self.processDeath(death);
  20776 
  20777     const result = if (self.liveness.isUnused(inst))
  20778         .unreach
  20779     else if (operand_is_ptr)
  20780         try self.genUnwrapErrUnionPayloadPtrMir(inst, operand_ty, operand_mcv)
  20781     else
  20782         try self.genUnwrapErrUnionPayloadMir(inst, operand_ty, operand_mcv);
  20783     return result;
  20784 }
  20785 
  20786 fn airDbgStmt(self: *CodeGen, inst: Air.Inst.Index) !void {
  20787     const dbg_stmt = self.air.instructions.items(.data)[@intFromEnum(inst)].dbg_stmt;
  20788     _ = try self.addInst(.{
  20789         .tag = .pseudo,
  20790         .ops = .pseudo_dbg_line_stmt_line_column,
  20791         .data = .{ .line_column = .{
  20792             .line = dbg_stmt.line,
  20793             .column = dbg_stmt.column,
  20794         } },
  20795     });
  20796 }
  20797 
  20798 fn airDbgEmptyStmt(self: *CodeGen) !void {
  20799     if (self.mir_instructions.len > 0 and
  20800         self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] == .pseudo_dbg_line_stmt_line_column)
  20801         self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] = .pseudo_dbg_line_line_column;
  20802     try self.asmOpOnly(.{ ._, .nop });
  20803 }
  20804 
  20805 fn airDbgInlineBlock(self: *CodeGen, inst: Air.Inst.Index) !void {
  20806     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  20807     const extra = self.air.extraData(Air.DbgInlineBlock, ty_pl.payload);
  20808     const old_inline_func = self.inline_func;
  20809     defer self.inline_func = old_inline_func;
  20810     self.inline_func = extra.data.func;
  20811     _ = try self.addInst(.{
  20812         .tag = .pseudo,
  20813         .ops = .pseudo_dbg_enter_inline_func,
  20814         .data = .{ .func = extra.data.func },
  20815     });
  20816     try self.lowerBlock(inst, @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]));
  20817     _ = try self.addInst(.{
  20818         .tag = .pseudo,
  20819         .ops = .pseudo_dbg_leave_inline_func,
  20820         .data = .{ .func = old_inline_func },
  20821     });
  20822 }
  20823 
  20824 fn airDbgVar(self: *CodeGen, inst: Air.Inst.Index) !void {
  20825     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  20826     try self.genLocalDebugInfo(inst, try self.resolveInst(pl_op.operand));
  20827     return self.finishAir(inst, .unreach, .{ pl_op.operand, .none, .none });
  20828 }
  20829 
  20830 fn genCondBrMir(self: *CodeGen, ty: Type, mcv: MCValue) !Mir.Inst.Index {
  20831     const pt = self.pt;
  20832     const abi_size = ty.abiSize(pt.zcu);
  20833     switch (mcv) {
  20834         .eflags => |cc| {
  20835             // Here we map the opposites since the jump is to the false branch.
  20836             return self.asmJccReloc(cc.negate(), undefined);
  20837         },
  20838         .register => |reg| {
  20839             try self.spillEflagsIfOccupied();
  20840             try self.asmRegisterImmediate(.{ ._, .@"test" }, reg.to8(), .u(1));
  20841             return self.asmJccReloc(.z, undefined);
  20842         },
  20843         .immediate,
  20844         .load_frame,
  20845         => {
  20846             try self.spillEflagsIfOccupied();
  20847             if (abi_size <= 8) {
  20848                 const reg = try self.copyToTmpRegister(ty, mcv);
  20849                 return self.genCondBrMir(ty, .{ .register = reg });
  20850             }
  20851             return self.fail("TODO implement condbr when condition is {} with abi larger than 8 bytes", .{mcv});
  20852         },
  20853         else => return self.fail("TODO implement condbr when condition is {s}", .{@tagName(mcv)}),
  20854     }
  20855 }
  20856 
  20857 fn airCondBr(self: *CodeGen, inst: Air.Inst.Index) !void {
  20858     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  20859     const cond = try self.resolveInst(pl_op.operand);
  20860     const cond_ty = self.typeOf(pl_op.operand);
  20861     const extra = self.air.extraData(Air.CondBr, pl_op.payload);
  20862     const then_body: []const Air.Inst.Index =
  20863         @ptrCast(self.air.extra[extra.end..][0..extra.data.then_body_len]);
  20864     const else_body: []const Air.Inst.Index =
  20865         @ptrCast(self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len]);
  20866     const liveness_cond_br = self.liveness.getCondBr(inst);
  20867 
  20868     // If the condition dies here in this condbr instruction, process
  20869     // that death now instead of later as this has an effect on
  20870     // whether it needs to be spilled in the branches
  20871     if (self.liveness.operandDies(inst, 0)) {
  20872         if (pl_op.operand.toIndex()) |op_inst| try self.processDeath(op_inst);
  20873     }
  20874 
  20875     self.scope_generation += 1;
  20876     const state = try self.saveState();
  20877     const reloc = try self.genCondBrMir(cond_ty, cond);
  20878 
  20879     for (liveness_cond_br.then_deaths) |death| try self.processDeath(death);
  20880     try self.genBodyBlock(then_body);
  20881     try self.restoreState(state, &.{}, .{
  20882         .emit_instructions = false,
  20883         .update_tracking = true,
  20884         .resurrect = true,
  20885         .close_scope = true,
  20886     });
  20887 
  20888     self.performReloc(reloc);
  20889 
  20890     for (liveness_cond_br.else_deaths) |death| try self.processDeath(death);
  20891     try self.genBodyBlock(else_body);
  20892     try self.restoreState(state, &.{}, .{
  20893         .emit_instructions = false,
  20894         .update_tracking = true,
  20895         .resurrect = true,
  20896         .close_scope = true,
  20897     });
  20898 
  20899     // We already took care of pl_op.operand earlier, so there's nothing left to do.
  20900 }
  20901 
  20902 fn isNull(self: *CodeGen, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MCValue {
  20903     const pt = self.pt;
  20904     const zcu = pt.zcu;
  20905     switch (opt_mcv) {
  20906         .register_overflow => |ro| return .{ .eflags = ro.eflags.negate() },
  20907         else => {},
  20908     }
  20909 
  20910     try self.spillEflagsIfOccupied();
  20911 
  20912     const pl_ty = opt_ty.optionalChild(zcu);
  20913 
  20914     const some_info: struct { off: u31, ty: Type } = if (opt_ty.optionalReprIsPayload(zcu))
  20915         .{ .off = 0, .ty = if (pl_ty.isSlice(zcu)) pl_ty.slicePtrFieldType(zcu) else pl_ty }
  20916     else
  20917         .{ .off = @intCast(pl_ty.abiSize(zcu)), .ty = .bool };
  20918 
  20919     self.eflags_inst = inst;
  20920     switch (opt_mcv) {
  20921         .none,
  20922         .unreach,
  20923         .dead,
  20924         .undef,
  20925         .immediate,
  20926         .eflags,
  20927         .register_triple,
  20928         .register_quadruple,
  20929         .register_offset,
  20930         .register_overflow,
  20931         .register_mask,
  20932         .lea_direct,
  20933         .lea_got,
  20934         .lea_tlv,
  20935         .lea_symbol,
  20936         .elementwise_regs_then_frame,
  20937         .reserved_frame,
  20938         .air_ref,
  20939         => unreachable,
  20940 
  20941         .lea_frame => {
  20942             self.eflags_inst = null;
  20943             return .{ .immediate = @intFromBool(false) };
  20944         },
  20945 
  20946         .register => |opt_reg| {
  20947             if (some_info.off == 0) {
  20948                 const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu));
  20949                 const alias_reg = registerAlias(opt_reg, some_abi_size);
  20950                 assert(some_abi_size * 8 == alias_reg.bitSize());
  20951                 try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg);
  20952                 return .{ .eflags = .z };
  20953             }
  20954             assert(some_info.ty.ip_index == .bool_type);
  20955             const opt_abi_size: u32 = @intCast(opt_ty.abiSize(zcu));
  20956             try self.asmRegisterImmediate(
  20957                 .{ ._, .bt },
  20958                 registerAlias(opt_reg, opt_abi_size),
  20959                 .u(@as(u6, @intCast(some_info.off * 8))),
  20960             );
  20961             return .{ .eflags = .nc };
  20962         },
  20963 
  20964         .register_pair => |opt_regs| {
  20965             if (some_info.off == 0) {
  20966                 const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu));
  20967                 const alias_reg = registerAlias(opt_regs[0], some_abi_size);
  20968                 assert(some_abi_size * 8 == alias_reg.bitSize());
  20969                 try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg);
  20970                 return .{ .eflags = .z };
  20971             }
  20972             assert(some_info.ty.ip_index == .bool_type);
  20973             const opt_abi_size: u32 = @intCast(opt_ty.abiSize(zcu));
  20974             try self.asmRegisterImmediate(
  20975                 .{ ._, .bt },
  20976                 registerAlias(opt_regs[some_info.off / 8], opt_abi_size),
  20977                 .u(@as(u6, @truncate(some_info.off * 8))),
  20978             );
  20979             return .{ .eflags = .nc };
  20980         },
  20981 
  20982         .memory,
  20983         .load_symbol,
  20984         .load_got,
  20985         .load_direct,
  20986         .load_tlv,
  20987         => {
  20988             const addr_reg = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64();
  20989             const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  20990             defer self.register_manager.unlockReg(addr_reg_lock);
  20991 
  20992             try self.genSetReg(addr_reg, .usize, opt_mcv.address(), .{});
  20993             const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu));
  20994             try self.asmMemoryImmediate(
  20995                 .{ ._, .cmp },
  20996                 .{
  20997                     .base = .{ .reg = addr_reg },
  20998                     .mod = .{ .rm = .{
  20999                         .size = .fromSize(some_abi_size),
  21000                         .disp = some_info.off,
  21001                     } },
  21002                 },
  21003                 .u(0),
  21004             );
  21005             return .{ .eflags = .e };
  21006         },
  21007 
  21008         .indirect, .load_frame => {
  21009             const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu));
  21010             try self.asmMemoryImmediate(
  21011                 .{ ._, .cmp },
  21012                 switch (opt_mcv) {
  21013                     .indirect => |reg_off| .{
  21014                         .base = .{ .reg = reg_off.reg },
  21015                         .mod = .{ .rm = .{
  21016                             .size = .fromSize(some_abi_size),
  21017                             .disp = reg_off.off + some_info.off,
  21018                         } },
  21019                     },
  21020                     .load_frame => |frame_addr| .{
  21021                         .base = .{ .frame = frame_addr.index },
  21022                         .mod = .{ .rm = .{
  21023                             .size = .fromSize(some_abi_size),
  21024                             .disp = frame_addr.off + some_info.off,
  21025                         } },
  21026                     },
  21027                     else => unreachable,
  21028                 },
  21029                 .u(0),
  21030             );
  21031             return .{ .eflags = .e };
  21032         },
  21033     }
  21034 }
  21035 
  21036 fn isNullPtr(self: *CodeGen, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
  21037     const pt = self.pt;
  21038     const zcu = pt.zcu;
  21039     const opt_ty = ptr_ty.childType(zcu);
  21040     const pl_ty = opt_ty.optionalChild(zcu);
  21041 
  21042     try self.spillEflagsIfOccupied();
  21043 
  21044     const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(zcu))
  21045         .{ .off = 0, .ty = if (pl_ty.isSlice(zcu)) pl_ty.slicePtrFieldType(zcu) else pl_ty }
  21046     else
  21047         .{ .off = @intCast(pl_ty.abiSize(zcu)), .ty = .bool };
  21048 
  21049     const ptr_reg = switch (ptr_mcv) {
  21050         .register => |reg| reg,
  21051         else => try self.copyToTmpRegister(ptr_ty, ptr_mcv),
  21052     };
  21053     const ptr_lock = self.register_manager.lockReg(ptr_reg);
  21054     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  21055 
  21056     const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu));
  21057     try self.asmMemoryImmediate(
  21058         .{ ._, .cmp },
  21059         .{
  21060             .base = .{ .reg = ptr_reg },
  21061             .mod = .{ .rm = .{
  21062                 .size = .fromSize(some_abi_size),
  21063                 .disp = some_info.off,
  21064             } },
  21065         },
  21066         .u(0),
  21067     );
  21068 
  21069     self.eflags_inst = inst;
  21070     return .{ .eflags = .e };
  21071 }
  21072 
  21073 fn isErr(self: *CodeGen, maybe_inst: ?Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue {
  21074     const pt = self.pt;
  21075     const zcu = pt.zcu;
  21076     const err_ty = eu_ty.errorUnionSet(zcu);
  21077     if (err_ty.errorSetIsEmpty(zcu)) return MCValue{ .immediate = 0 }; // always false
  21078 
  21079     try self.spillEflagsIfOccupied();
  21080 
  21081     const err_off: u31 = @intCast(codegen.errUnionErrorOffset(eu_ty.errorUnionPayload(zcu), zcu));
  21082     switch (eu_mcv) {
  21083         .register => |reg| {
  21084             const eu_lock = self.register_manager.lockReg(reg);
  21085             defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
  21086 
  21087             const tmp_reg = try self.copyToTmpRegister(eu_ty, eu_mcv);
  21088             if (err_off > 0) {
  21089                 try self.genShiftBinOpMir(
  21090                     .{ ._r, .sh },
  21091                     eu_ty,
  21092                     .{ .register = tmp_reg },
  21093                     .u8,
  21094                     .{ .immediate = @as(u6, @intCast(err_off * 8)) },
  21095                 );
  21096             } else {
  21097                 try self.truncateRegister(.anyerror, tmp_reg);
  21098             }
  21099             try self.genBinOpMir(.{ ._, .cmp }, .anyerror, .{ .register = tmp_reg }, .{ .immediate = 0 });
  21100         },
  21101         .load_frame => |frame_addr| try self.genBinOpMir(
  21102             .{ ._, .cmp },
  21103             .anyerror,
  21104             .{ .load_frame = .{
  21105                 .index = frame_addr.index,
  21106                 .off = frame_addr.off + err_off,
  21107             } },
  21108             .{ .immediate = 0 },
  21109         ),
  21110         else => return self.fail("TODO implement isErr for {}", .{eu_mcv}),
  21111     }
  21112 
  21113     if (maybe_inst) |inst| self.eflags_inst = inst;
  21114     return MCValue{ .eflags = .a };
  21115 }
  21116 
  21117 fn isErrPtr(self: *CodeGen, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
  21118     const pt = self.pt;
  21119     const zcu = pt.zcu;
  21120     const eu_ty = ptr_ty.childType(zcu);
  21121     const err_ty = eu_ty.errorUnionSet(zcu);
  21122     if (err_ty.errorSetIsEmpty(zcu)) return MCValue{ .immediate = 0 }; // always false
  21123 
  21124     try self.spillEflagsIfOccupied();
  21125 
  21126     const ptr_reg = switch (ptr_mcv) {
  21127         .register => |reg| reg,
  21128         else => try self.copyToTmpRegister(ptr_ty, ptr_mcv),
  21129     };
  21130     const ptr_lock = self.register_manager.lockReg(ptr_reg);
  21131     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  21132 
  21133     const err_off: u31 = @intCast(codegen.errUnionErrorOffset(eu_ty.errorUnionPayload(zcu), zcu));
  21134     try self.asmMemoryImmediate(
  21135         .{ ._, .cmp },
  21136         .{
  21137             .base = .{ .reg = ptr_reg },
  21138             .mod = .{ .rm = .{
  21139                 .size = self.memSize(.anyerror),
  21140                 .disp = err_off,
  21141             } },
  21142         },
  21143         .u(0),
  21144     );
  21145 
  21146     if (maybe_inst) |inst| self.eflags_inst = inst;
  21147     return MCValue{ .eflags = .a };
  21148 }
  21149 
  21150 fn isNonErr(self: *CodeGen, inst: Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue {
  21151     const is_err_res = try self.isErr(inst, eu_ty, eu_mcv);
  21152     switch (is_err_res) {
  21153         .eflags => |cc| {
  21154             assert(cc == .a);
  21155             return MCValue{ .eflags = cc.negate() };
  21156         },
  21157         .immediate => |imm| {
  21158             assert(imm == 0);
  21159             return MCValue{ .immediate = @intFromBool(imm == 0) };
  21160         },
  21161         else => unreachable,
  21162     }
  21163 }
  21164 
  21165 fn isNonErrPtr(self: *CodeGen, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
  21166     const is_err_res = try self.isErrPtr(inst, ptr_ty, ptr_mcv);
  21167     switch (is_err_res) {
  21168         .eflags => |cc| {
  21169             assert(cc == .a);
  21170             return MCValue{ .eflags = cc.negate() };
  21171         },
  21172         .immediate => |imm| {
  21173             assert(imm == 0);
  21174             return MCValue{ .immediate = @intFromBool(imm == 0) };
  21175         },
  21176         else => unreachable,
  21177     }
  21178 }
  21179 
  21180 fn airIsNull(self: *CodeGen, inst: Air.Inst.Index) !void {
  21181     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  21182     const operand = try self.resolveInst(un_op);
  21183     const ty = self.typeOf(un_op);
  21184     const result = try self.isNull(inst, ty, operand);
  21185     return self.finishAir(inst, result, .{ un_op, .none, .none });
  21186 }
  21187 
  21188 fn airIsNullPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  21189     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  21190     const operand = try self.resolveInst(un_op);
  21191     const ty = self.typeOf(un_op);
  21192     const result = try self.isNullPtr(inst, ty, operand);
  21193     return self.finishAir(inst, result, .{ un_op, .none, .none });
  21194 }
  21195 
  21196 fn airIsNonNull(self: *CodeGen, inst: Air.Inst.Index) !void {
  21197     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  21198     const operand = try self.resolveInst(un_op);
  21199     const ty = self.typeOf(un_op);
  21200     const result: MCValue = switch (try self.isNull(inst, ty, operand)) {
  21201         .immediate => |imm| .{ .immediate = @intFromBool(imm == 0) },
  21202         .eflags => |cc| .{ .eflags = cc.negate() },
  21203         else => unreachable,
  21204     };
  21205     return self.finishAir(inst, result, .{ un_op, .none, .none });
  21206 }
  21207 
  21208 fn airIsNonNullPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  21209     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  21210     const operand = try self.resolveInst(un_op);
  21211     const ty = self.typeOf(un_op);
  21212     const result: MCValue = switch (try self.isNullPtr(inst, ty, operand)) {
  21213         .eflags => |cc| .{ .eflags = cc.negate() },
  21214         else => unreachable,
  21215     };
  21216     return self.finishAir(inst, result, .{ un_op, .none, .none });
  21217 }
  21218 
  21219 fn airIsErr(self: *CodeGen, inst: Air.Inst.Index) !void {
  21220     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  21221     const operand = try self.resolveInst(un_op);
  21222     const ty = self.typeOf(un_op);
  21223     const result = try self.isErr(inst, ty, operand);
  21224     return self.finishAir(inst, result, .{ un_op, .none, .none });
  21225 }
  21226 
  21227 fn airIsErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  21228     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  21229     const operand = try self.resolveInst(un_op);
  21230     const ty = self.typeOf(un_op);
  21231     const result = try self.isErrPtr(inst, ty, operand);
  21232     return self.finishAir(inst, result, .{ un_op, .none, .none });
  21233 }
  21234 
  21235 fn airIsNonErr(self: *CodeGen, inst: Air.Inst.Index) !void {
  21236     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  21237     const operand = try self.resolveInst(un_op);
  21238     const ty = self.typeOf(un_op);
  21239     const result = try self.isNonErr(inst, ty, operand);
  21240     return self.finishAir(inst, result, .{ un_op, .none, .none });
  21241 }
  21242 
  21243 fn airIsNonErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  21244     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  21245     const operand = try self.resolveInst(un_op);
  21246     const ty = self.typeOf(un_op);
  21247     const result = try self.isNonErrPtr(inst, ty, operand);
  21248     return self.finishAir(inst, result, .{ un_op, .none, .none });
  21249 }
  21250 
  21251 fn airLoop(self: *CodeGen, inst: Air.Inst.Index) !void {
  21252     // A loop is a setup to be able to jump back to the beginning.
  21253     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  21254     const loop = self.air.extraData(Air.Block, ty_pl.payload);
  21255     const body: []const Air.Inst.Index = @ptrCast(self.air.extra[loop.end..][0..loop.data.body_len]);
  21256 
  21257     self.scope_generation += 1;
  21258     const state = try self.saveState();
  21259 
  21260     try self.loops.putNoClobber(self.gpa, inst, .{
  21261         .state = state,
  21262         .target = @intCast(self.mir_instructions.len),
  21263     });
  21264     defer assert(self.loops.remove(inst));
  21265 
  21266     try self.genBodyBlock(body);
  21267 }
  21268 
  21269 fn airBlock(self: *CodeGen, inst: Air.Inst.Index) !void {
  21270     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  21271     const extra = self.air.extraData(Air.Block, ty_pl.payload);
  21272     try self.asmPseudo(.pseudo_dbg_enter_block_none);
  21273     try self.lowerBlock(inst, @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]));
  21274     try self.asmPseudo(.pseudo_dbg_leave_block_none);
  21275 }
  21276 
  21277 fn lowerBlock(self: *CodeGen, inst: Air.Inst.Index, body: []const Air.Inst.Index) !void {
  21278     // A block is a setup to be able to jump to the end.
  21279     const inst_tracking_i = self.inst_tracking.count();
  21280     self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(.unreach));
  21281 
  21282     self.scope_generation += 1;
  21283     try self.blocks.putNoClobber(self.gpa, inst, .{ .state = self.initRetroactiveState() });
  21284     const liveness = self.liveness.getBlock(inst);
  21285 
  21286     try self.genBody(body);
  21287 
  21288     var block_data = self.blocks.fetchRemove(inst).?;
  21289     defer block_data.value.deinit(self.gpa);
  21290     if (block_data.value.relocs.items.len > 0) {
  21291         try self.restoreState(block_data.value.state, liveness.deaths, .{
  21292             .emit_instructions = false,
  21293             .update_tracking = true,
  21294             .resurrect = true,
  21295             .close_scope = true,
  21296         });
  21297         for (block_data.value.relocs.items) |reloc| self.performReloc(reloc);
  21298     }
  21299 
  21300     if (std.debug.runtime_safety) assert(self.inst_tracking.getIndex(inst).? == inst_tracking_i);
  21301     const tracking = &self.inst_tracking.values()[inst_tracking_i];
  21302     if (self.liveness.isUnused(inst)) try tracking.die(self, inst);
  21303     self.getValueIfFree(tracking.short, inst);
  21304 }
  21305 
  21306 fn lowerSwitchBr(
  21307     self: *CodeGen,
  21308     inst: Air.Inst.Index,
  21309     switch_br: Air.UnwrappedSwitch,
  21310     condition: MCValue,
  21311     condition_dies: bool,
  21312     is_loop: bool,
  21313 ) !void {
  21314     const zcu = self.pt.zcu;
  21315     const condition_ty = self.typeOf(switch_br.operand);
  21316 
  21317     const ExpectedContents = extern struct {
  21318         liveness_deaths: [1 << 8 | 1]Air.Inst.Index,
  21319         bigint_limbs: [std.math.big.int.calcTwosCompLimbCount(1 << 8)]std.math.big.Limb,
  21320         relocs: [1 << 6]Mir.Inst.Index,
  21321     };
  21322     var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
  21323         std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
  21324     const allocator = stack.get();
  21325 
  21326     self.scope_generation += 1;
  21327     const state = try self.saveState();
  21328 
  21329     const liveness = try self.liveness.getSwitchBr(allocator, inst, switch_br.cases_len + 1);
  21330     defer allocator.free(liveness.deaths);
  21331 
  21332     if (!self.mod.pic and self.target.ofmt == .elf) table: {
  21333         var prong_items: u32 = 0;
  21334         var min: ?Value = null;
  21335         var max: ?Value = null;
  21336         {
  21337             var cases_it = switch_br.iterateCases();
  21338             while (cases_it.next()) |case| {
  21339                 prong_items += @intCast(case.items.len + case.ranges.len);
  21340                 for (case.items) |item| {
  21341                     const val = Value.fromInterned(item.toInterned().?);
  21342                     if (min == null or val.compareHetero(.lt, min.?, zcu)) min = val;
  21343                     if (max == null or val.compareHetero(.gt, max.?, zcu)) max = val;
  21344                 }
  21345                 for (case.ranges) |range| {
  21346                     const low = Value.fromInterned(range[0].toInterned().?);
  21347                     if (min == null or low.compareHetero(.lt, min.?, zcu)) min = low;
  21348                     const high = Value.fromInterned(range[1].toInterned().?);
  21349                     if (max == null or high.compareHetero(.gt, max.?, zcu)) max = high;
  21350                 }
  21351             }
  21352         }
  21353         // This condition also triggers for switches with no non-else prongs and switches on bool.
  21354         if (prong_items < 1 << 2 or prong_items > 1 << 8) break :table;
  21355 
  21356         var min_space: Value.BigIntSpace = undefined;
  21357         const min_bigint = min.?.toBigInt(&min_space, zcu);
  21358         var max_space: Value.BigIntSpace = undefined;
  21359         const max_bigint = max.?.toBigInt(&max_space, zcu);
  21360         const limbs = try allocator.alloc(
  21361             std.math.big.Limb,
  21362             @max(min_bigint.limbs.len, max_bigint.limbs.len) + 1,
  21363         );
  21364         defer allocator.free(limbs);
  21365         const table_len = table_len: {
  21366             var table_len_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
  21367             table_len_bigint.sub(max_bigint, min_bigint);
  21368             assert(table_len_bigint.positive); // min <= max
  21369             break :table_len @as(u11, table_len_bigint.toConst().to(u10) catch break :table) + 1; // no more than a 1024 entry table
  21370         };
  21371         assert(prong_items <= table_len); // each prong item introduces at least one unique integer to the range
  21372         if (prong_items < table_len >> 2) break :table; // no more than 75% waste
  21373 
  21374         const condition_index = if (condition_dies and condition.isModifiable()) condition else condition_index: {
  21375             const condition_index = try self.allocTempRegOrMem(condition_ty, true);
  21376             try self.genCopy(condition_ty, condition_index, condition, .{});
  21377             break :condition_index condition_index;
  21378         };
  21379         try self.spillEflagsIfOccupied();
  21380         if (min.?.orderAgainstZero(zcu).compare(.neq)) try self.genBinOpMir(
  21381             .{ ._, .sub },
  21382             condition_ty,
  21383             condition_index,
  21384             .{ .air_ref = Air.internedToRef(min.?.toIntern()) },
  21385         );
  21386         const else_reloc = if (switch_br.else_body_len > 0) else_reloc: {
  21387             try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_index, .{ .immediate = table_len - 1 });
  21388             break :else_reloc try self.asmJccReloc(.a, undefined);
  21389         } else undefined;
  21390         const table_start: u31 = @intCast(self.mir_table.items.len);
  21391         {
  21392             const condition_index_reg = if (condition_index.isRegister())
  21393                 condition_index.getReg().?
  21394             else
  21395                 try self.copyToTmpRegister(.usize, condition_index);
  21396             const condition_index_lock = self.register_manager.lockReg(condition_index_reg);
  21397             defer if (condition_index_lock) |lock| self.register_manager.unlockReg(lock);
  21398             try self.truncateRegister(condition_ty, condition_index_reg);
  21399             const ptr_size = @divExact(self.target.ptrBitWidth(), 8);
  21400             try self.asmMemory(.{ ._, .jmp }, .{
  21401                 .base = .table,
  21402                 .mod = .{ .rm = .{
  21403                     .size = .ptr,
  21404                     .index = registerAlias(condition_index_reg, ptr_size),
  21405                     .scale = .fromFactor(@intCast(ptr_size)),
  21406                     .disp = table_start * ptr_size,
  21407                 } },
  21408             });
  21409         }
  21410         const else_reloc_marker: u32 = 0;
  21411         assert(self.mir_instructions.len > else_reloc_marker);
  21412         try self.mir_table.appendNTimes(self.gpa, else_reloc_marker, table_len);
  21413         if (is_loop) try self.loop_switches.putNoClobber(self.gpa, inst, .{
  21414             .start = table_start,
  21415             .len = table_len,
  21416             .min = min.?,
  21417             .else_relocs = if (switch_br.else_body_len > 0) .{ .forward = .empty } else .@"unreachable",
  21418         });
  21419         defer if (is_loop) {
  21420             var loop_switch_data = self.loop_switches.fetchRemove(inst).?.value;
  21421             switch (loop_switch_data.else_relocs) {
  21422                 .@"unreachable", .backward => {},
  21423                 .forward => |*else_relocs| else_relocs.deinit(self.gpa),
  21424             }
  21425         };
  21426         var cases_it = switch_br.iterateCases();
  21427         while (cases_it.next()) |case| {
  21428             {
  21429                 const table = self.mir_table.items[table_start..][0..table_len];
  21430                 for (case.items) |item| {
  21431                     const val = Value.fromInterned(item.toInterned().?);
  21432                     var val_space: Value.BigIntSpace = undefined;
  21433                     const val_bigint = val.toBigInt(&val_space, zcu);
  21434                     var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
  21435                     index_bigint.sub(val_bigint, min_bigint);
  21436                     table[index_bigint.toConst().to(u10) catch unreachable] = @intCast(self.mir_instructions.len);
  21437                 }
  21438                 for (case.ranges) |range| {
  21439                     var low_space: Value.BigIntSpace = undefined;
  21440                     const low_bigint = Value.fromInterned(range[0].toInterned().?).toBigInt(&low_space, zcu);
  21441                     var high_space: Value.BigIntSpace = undefined;
  21442                     const high_bigint = Value.fromInterned(range[1].toInterned().?).toBigInt(&high_space, zcu);
  21443                     var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
  21444                     index_bigint.sub(low_bigint, min_bigint);
  21445                     const start = index_bigint.toConst().to(u10) catch unreachable;
  21446                     index_bigint.sub(high_bigint, min_bigint);
  21447                     const end = @as(u11, index_bigint.toConst().to(u10) catch unreachable) + 1;
  21448                     @memset(table[start..end], @intCast(self.mir_instructions.len));
  21449                 }
  21450             }
  21451 
  21452             for (liveness.deaths[case.idx]) |operand| try self.processDeath(operand);
  21453 
  21454             try self.genBodyBlock(case.body);
  21455             try self.restoreState(state, &.{}, .{
  21456                 .emit_instructions = false,
  21457                 .update_tracking = true,
  21458                 .resurrect = true,
  21459                 .close_scope = true,
  21460             });
  21461         }
  21462         if (switch_br.else_body_len > 0) {
  21463             const else_body = cases_it.elseBody();
  21464 
  21465             const else_deaths = liveness.deaths.len - 1;
  21466             for (liveness.deaths[else_deaths]) |operand| try self.processDeath(operand);
  21467 
  21468             self.performReloc(else_reloc);
  21469             if (is_loop) {
  21470                 const loop_switch_data = self.loop_switches.getPtr(inst).?;
  21471                 for (loop_switch_data.else_relocs.forward.items) |reloc| self.performReloc(reloc);
  21472                 loop_switch_data.else_relocs.forward.deinit(self.gpa);
  21473                 loop_switch_data.else_relocs = .{ .backward = @intCast(self.mir_instructions.len) };
  21474             }
  21475             for (self.mir_table.items[table_start..][0..table_len]) |*entry| if (entry.* == else_reloc_marker) {
  21476                 entry.* = @intCast(self.mir_instructions.len);
  21477             };
  21478 
  21479             try self.genBodyBlock(else_body);
  21480             try self.restoreState(state, &.{}, .{
  21481                 .emit_instructions = false,
  21482                 .update_tracking = true,
  21483                 .resurrect = true,
  21484                 .close_scope = true,
  21485             });
  21486         }
  21487         return;
  21488     }
  21489 
  21490     const signedness = if (condition_ty.isAbiInt(zcu)) condition_ty.intInfo(zcu).signedness else .unsigned;
  21491     var cases_it = switch_br.iterateCases();
  21492     while (cases_it.next()) |case| {
  21493         var relocs = try allocator.alloc(Mir.Inst.Index, case.items.len + case.ranges.len);
  21494         defer allocator.free(relocs);
  21495 
  21496         try self.spillEflagsIfOccupied();
  21497         for (case.items, relocs[0..case.items.len]) |item, *reloc| {
  21498             const item_mcv = try self.resolveInst(item);
  21499             const cc: Condition = switch (condition) {
  21500                 .eflags => |cc| switch (item_mcv.immediate) {
  21501                     0 => cc.negate(),
  21502                     1 => cc,
  21503                     else => unreachable,
  21504                 },
  21505                 else => cc: {
  21506                     try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, item_mcv);
  21507                     break :cc .e;
  21508                 },
  21509             };
  21510             reloc.* = try self.asmJccReloc(cc, undefined);
  21511         }
  21512 
  21513         for (case.ranges, relocs[case.items.len..]) |range, *reloc| {
  21514             const min_mcv = try self.resolveInst(range[0]);
  21515             const max_mcv = try self.resolveInst(range[1]);
  21516             // `null` means always false.
  21517             const lt_min: ?Condition = switch (condition) {
  21518                 .eflags => |cc| switch (min_mcv.immediate) {
  21519                     0 => null, // condition never <0
  21520                     1 => cc.negate(),
  21521                     else => unreachable,
  21522                 },
  21523                 else => cc: {
  21524                     try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, min_mcv);
  21525                     break :cc switch (signedness) {
  21526                         .unsigned => .b,
  21527                         .signed => .l,
  21528                     };
  21529                 },
  21530             };
  21531             const lt_min_reloc = if (lt_min) |cc| r: {
  21532                 break :r try self.asmJccReloc(cc, undefined);
  21533             } else null;
  21534             // `null` means always true.
  21535             const lte_max: ?Condition = switch (condition) {
  21536                 .eflags => |cc| switch (max_mcv.immediate) {
  21537                     0 => cc.negate(),
  21538                     1 => null, // condition always >=1
  21539                     else => unreachable,
  21540                 },
  21541                 else => cc: {
  21542                     try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, max_mcv);
  21543                     break :cc switch (signedness) {
  21544                         .unsigned => .be,
  21545                         .signed => .le,
  21546                     };
  21547                 },
  21548             };
  21549             // "Success" case is in `reloc`....
  21550             if (lte_max) |cc| {
  21551                 reloc.* = try self.asmJccReloc(cc, undefined);
  21552             } else {
  21553                 reloc.* = try self.asmJmpReloc(undefined);
  21554             }
  21555             // ...and "fail" case falls through to next checks.
  21556             if (lt_min_reloc) |r| self.performReloc(r);
  21557         }
  21558 
  21559         // The jump to skip this case if the conditions all failed.
  21560         const skip_case_reloc = try self.asmJmpReloc(undefined);
  21561 
  21562         for (liveness.deaths[case.idx]) |operand| try self.processDeath(operand);
  21563 
  21564         // Relocate all success cases to the body we're about to generate.
  21565         for (relocs) |reloc| self.performReloc(reloc);
  21566         try self.genBodyBlock(case.body);
  21567         try self.restoreState(state, &.{}, .{
  21568             .emit_instructions = false,
  21569             .update_tracking = true,
  21570             .resurrect = true,
  21571             .close_scope = true,
  21572         });
  21573 
  21574         // Relocate the "skip" branch to fall through to the next case.
  21575         self.performReloc(skip_case_reloc);
  21576     }
  21577     if (switch_br.else_body_len > 0) {
  21578         const else_body = cases_it.elseBody();
  21579 
  21580         const else_deaths = liveness.deaths.len - 1;
  21581         for (liveness.deaths[else_deaths]) |operand| try self.processDeath(operand);
  21582 
  21583         try self.genBodyBlock(else_body);
  21584         try self.restoreState(state, &.{}, .{
  21585             .emit_instructions = false,
  21586             .update_tracking = true,
  21587             .resurrect = true,
  21588             .close_scope = true,
  21589         });
  21590     }
  21591 }
  21592 
  21593 fn airSwitchBr(self: *CodeGen, inst: Air.Inst.Index) !void {
  21594     const switch_br = self.air.unwrapSwitch(inst);
  21595     const condition = try self.resolveInst(switch_br.operand);
  21596 
  21597     // If the condition dies here in this switch instruction, process
  21598     // that death now instead of later as this has an effect on
  21599     // whether it needs to be spilled in the branches
  21600     const condition_dies = self.liveness.operandDies(inst, 0);
  21601     if (condition_dies) {
  21602         if (switch_br.operand.toIndex()) |op_inst| try self.processDeath(op_inst);
  21603     }
  21604     try self.lowerSwitchBr(inst, switch_br, condition, condition_dies, false);
  21605 
  21606     // We already took care of pl_op.operand earlier, so there's nothing left to do
  21607 }
  21608 
  21609 fn airLoopSwitchBr(self: *CodeGen, inst: Air.Inst.Index) !void {
  21610     const switch_br = self.air.unwrapSwitch(inst);
  21611     const condition = try self.resolveInst(switch_br.operand);
  21612 
  21613     const mat_cond = if (condition.isModifiable() and
  21614         self.reuseOperand(inst, switch_br.operand, 0, condition))
  21615         condition
  21616     else mat_cond: {
  21617         const mat_cond = try self.allocRegOrMem(inst, true);
  21618         try self.genCopy(self.typeOf(switch_br.operand), mat_cond, condition, .{});
  21619         break :mat_cond mat_cond;
  21620     };
  21621     self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(mat_cond));
  21622 
  21623     // If the condition dies here in this switch instruction, process
  21624     // that death now instead of later as this has an effect on
  21625     // whether it needs to be spilled in the branches
  21626     if (self.liveness.operandDies(inst, 0)) {
  21627         if (switch_br.operand.toIndex()) |op_inst| try self.processDeath(op_inst);
  21628     }
  21629 
  21630     self.scope_generation += 1;
  21631     const state = try self.saveState();
  21632 
  21633     try self.loops.putNoClobber(self.gpa, inst, .{
  21634         .state = state,
  21635         .target = @intCast(self.mir_instructions.len),
  21636     });
  21637     defer assert(self.loops.remove(inst));
  21638 
  21639     // Stop tracking block result without forgetting tracking info
  21640     try self.freeValue(mat_cond);
  21641 
  21642     try self.lowerSwitchBr(inst, switch_br, mat_cond, true, true);
  21643 
  21644     try self.processDeath(inst);
  21645 }
  21646 
  21647 fn airSwitchDispatch(self: *CodeGen, inst: Air.Inst.Index) !void {
  21648     const br = self.air.instructions.items(.data)[@intFromEnum(inst)].br;
  21649 
  21650     const block_ty = self.typeOfIndex(br.block_inst);
  21651     const loop_data = self.loops.getPtr(br.block_inst).?;
  21652     if (self.loop_switches.getPtr(br.block_inst)) |table| {
  21653         // Process operand death so that it is properly accounted for in the State below.
  21654         const condition_dies = self.liveness.operandDies(inst, 0);
  21655 
  21656         try self.restoreState(loop_data.state, &.{}, .{
  21657             .emit_instructions = true,
  21658             .update_tracking = false,
  21659             .resurrect = false,
  21660             .close_scope = false,
  21661         });
  21662 
  21663         const condition_ty = self.typeOf(br.operand);
  21664         const condition = try self.resolveInst(br.operand);
  21665         const condition_index = if (condition_dies and condition.isModifiable()) condition else condition_index: {
  21666             const condition_index = try self.allocTempRegOrMem(condition_ty, true);
  21667             try self.genCopy(condition_ty, condition_index, condition, .{});
  21668             break :condition_index condition_index;
  21669         };
  21670         try self.spillEflagsIfOccupied();
  21671         if (table.min.orderAgainstZero(self.pt.zcu).compare(.neq)) try self.genBinOpMir(
  21672             .{ ._, .sub },
  21673             condition_ty,
  21674             condition_index,
  21675             .{ .air_ref = Air.internedToRef(table.min.toIntern()) },
  21676         );
  21677         switch (table.else_relocs) {
  21678             .@"unreachable" => {},
  21679             .forward => |*else_relocs| {
  21680                 try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_index, .{ .immediate = table.len - 1 });
  21681                 try else_relocs.append(self.gpa, try self.asmJccReloc(.a, undefined));
  21682             },
  21683             .backward => |else_reloc| {
  21684                 try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_index, .{ .immediate = table.len - 1 });
  21685                 _ = try self.asmJccReloc(.a, else_reloc);
  21686             },
  21687         }
  21688         {
  21689             const condition_index_reg = if (condition_index.isRegister())
  21690                 condition_index.getReg().?
  21691             else
  21692                 try self.copyToTmpRegister(.usize, condition_index);
  21693             const condition_index_lock = self.register_manager.lockReg(condition_index_reg);
  21694             defer if (condition_index_lock) |lock| self.register_manager.unlockReg(lock);
  21695             try self.truncateRegister(condition_ty, condition_index_reg);
  21696             const ptr_size = @divExact(self.target.ptrBitWidth(), 8);
  21697             try self.asmMemory(.{ ._, .jmp }, .{
  21698                 .base = .table,
  21699                 .mod = .{ .rm = .{
  21700                     .size = .ptr,
  21701                     .index = registerAlias(condition_index_reg, ptr_size),
  21702                     .scale = .fromFactor(@intCast(ptr_size)),
  21703                     .disp = @intCast(table.start * ptr_size),
  21704                 } },
  21705             });
  21706         }
  21707 
  21708         return self.finishAir(inst, .none, .{ br.operand, .none, .none });
  21709     }
  21710 
  21711     const block_tracking = self.inst_tracking.getPtr(br.block_inst).?;
  21712     done: {
  21713         try self.getValue(block_tracking.short, null);
  21714         const src_mcv = try self.resolveInst(br.operand);
  21715 
  21716         if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) {
  21717             try self.getValue(block_tracking.short, br.block_inst);
  21718             // .long = .none to avoid merging operand and block result stack frames.
  21719             const current_tracking: InstTracking = .{ .long = .none, .short = src_mcv };
  21720             try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*);
  21721             for (current_tracking.getRegs()) |src_reg| self.register_manager.freeReg(src_reg);
  21722             break :done;
  21723         }
  21724 
  21725         try self.getValue(block_tracking.short, br.block_inst);
  21726         const dst_mcv = block_tracking.short;
  21727         try self.genCopy(block_ty, dst_mcv, try self.resolveInst(br.operand), .{});
  21728         break :done;
  21729     }
  21730 
  21731     // Process operand death so that it is properly accounted for in the State below.
  21732     if (self.liveness.operandDies(inst, 0)) {
  21733         if (br.operand.toIndex()) |op_inst| try self.processDeath(op_inst);
  21734     }
  21735 
  21736     try self.restoreState(loop_data.state, &.{}, .{
  21737         .emit_instructions = true,
  21738         .update_tracking = false,
  21739         .resurrect = false,
  21740         .close_scope = false,
  21741     });
  21742 
  21743     // Emit a jump with a relocation. It will be patched up after the block ends.
  21744     // Leave the jump offset undefined
  21745     _ = try self.asmJmpReloc(loop_data.target);
  21746 
  21747     // Stop tracking block result without forgetting tracking info
  21748     try self.freeValue(block_tracking.short);
  21749 }
  21750 
  21751 fn performReloc(self: *CodeGen, reloc: Mir.Inst.Index) void {
  21752     const next_inst: u32 = @intCast(self.mir_instructions.len);
  21753     switch (self.mir_instructions.items(.tag)[reloc]) {
  21754         .j, .jmp => {},
  21755         .pseudo => switch (self.mir_instructions.items(.ops)[reloc]) {
  21756             .pseudo_j_z_and_np_inst, .pseudo_j_nz_or_p_inst => {},
  21757             else => unreachable,
  21758         },
  21759         else => unreachable,
  21760     }
  21761     self.mir_instructions.items(.data)[reloc].inst.inst = next_inst;
  21762 }
  21763 
  21764 fn airBr(self: *CodeGen, inst: Air.Inst.Index) !void {
  21765     const zcu = self.pt.zcu;
  21766     const br = self.air.instructions.items(.data)[@intFromEnum(inst)].br;
  21767 
  21768     const block_ty = self.typeOfIndex(br.block_inst);
  21769     const block_unused =
  21770         !block_ty.hasRuntimeBitsIgnoreComptime(zcu) or self.liveness.isUnused(br.block_inst);
  21771     const block_tracking = self.inst_tracking.getPtr(br.block_inst).?;
  21772     const block_data = self.blocks.getPtr(br.block_inst).?;
  21773     const first_br = block_data.relocs.items.len == 0;
  21774     const block_result = result: {
  21775         if (block_unused) break :result .none;
  21776 
  21777         if (!first_br) try self.getValue(block_tracking.short, null);
  21778         const src_mcv = try self.resolveInst(br.operand);
  21779 
  21780         if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) {
  21781             if (first_br) break :result src_mcv;
  21782 
  21783             try self.getValue(block_tracking.short, br.block_inst);
  21784             // .long = .none to avoid merging operand and block result stack frames.
  21785             const current_tracking: InstTracking = .{ .long = .none, .short = src_mcv };
  21786             try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*);
  21787             for (current_tracking.getRegs()) |src_reg| self.register_manager.freeReg(src_reg);
  21788             break :result block_tracking.short;
  21789         }
  21790 
  21791         const dst_mcv = if (first_br) try self.allocRegOrMem(br.block_inst, true) else dst: {
  21792             try self.getValue(block_tracking.short, br.block_inst);
  21793             break :dst block_tracking.short;
  21794         };
  21795         try self.genCopy(block_ty, dst_mcv, try self.resolveInst(br.operand), .{});
  21796         break :result dst_mcv;
  21797     };
  21798 
  21799     // Process operand death so that it is properly accounted for in the State below.
  21800     if (self.liveness.operandDies(inst, 0)) {
  21801         if (br.operand.toIndex()) |op_inst| try self.processDeath(op_inst);
  21802     }
  21803 
  21804     if (first_br) {
  21805         block_tracking.* = .init(block_result);
  21806         try self.saveRetroactiveState(&block_data.state);
  21807     } else try self.restoreState(block_data.state, &.{}, .{
  21808         .emit_instructions = true,
  21809         .update_tracking = false,
  21810         .resurrect = false,
  21811         .close_scope = false,
  21812     });
  21813 
  21814     // Emit a jump with a relocation. It will be patched up after the block ends.
  21815     // Leave the jump offset undefined
  21816     const jmp_reloc = try self.asmJmpReloc(undefined);
  21817     try block_data.relocs.append(self.gpa, jmp_reloc);
  21818 
  21819     // Stop tracking block result without forgetting tracking info
  21820     try self.freeValue(block_tracking.short);
  21821 }
  21822 
  21823 fn airRepeat(self: *CodeGen, inst: Air.Inst.Index) !void {
  21824     const loop_inst = self.air.instructions.items(.data)[@intFromEnum(inst)].repeat.loop_inst;
  21825     const repeat_info = self.loops.get(loop_inst).?;
  21826     try self.restoreState(repeat_info.state, &.{}, .{
  21827         .emit_instructions = true,
  21828         .update_tracking = false,
  21829         .resurrect = false,
  21830         .close_scope = true,
  21831     });
  21832     _ = try self.asmJmpReloc(repeat_info.target);
  21833 }
  21834 
  21835 fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
  21836     const pt = self.pt;
  21837     const zcu = pt.zcu;
  21838     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  21839     const extra = self.air.extraData(Air.Asm, ty_pl.payload);
  21840     const clobbers_len: u31 = @truncate(extra.data.flags);
  21841     var extra_i: usize = extra.end;
  21842     const outputs: []const Air.Inst.Ref =
  21843         @ptrCast(self.air.extra[extra_i..][0..extra.data.outputs_len]);
  21844     extra_i += outputs.len;
  21845     const inputs: []const Air.Inst.Ref = @ptrCast(self.air.extra[extra_i..][0..extra.data.inputs_len]);
  21846     extra_i += inputs.len;
  21847 
  21848     var result: MCValue = .none;
  21849     var args: std.ArrayList(MCValue) = .init(self.gpa);
  21850     try args.ensureTotalCapacity(outputs.len + inputs.len);
  21851     defer {
  21852         for (args.items) |arg| if (arg.getReg()) |reg| self.register_manager.unlockReg(.{
  21853             .tracked_index = RegisterManager.indexOfRegIntoTracked(reg) orelse continue,
  21854         });
  21855         args.deinit();
  21856     }
  21857     var arg_map: std.StringHashMap(u8) = .init(self.gpa);
  21858     try arg_map.ensureTotalCapacity(@intCast(outputs.len + inputs.len));
  21859     defer arg_map.deinit();
  21860 
  21861     var outputs_extra_i = extra_i;
  21862     for (outputs) |output| {
  21863         const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
  21864         const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
  21865         const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
  21866         // This equation accounts for the fact that even if we have exactly 4 bytes
  21867         // for the string, we still use the next u32 for the null terminator.
  21868         extra_i += (constraint.len + name.len + (2 + 3)) / 4;
  21869 
  21870         const maybe_inst = switch (output) {
  21871             .none => inst,
  21872             else => null,
  21873         };
  21874         const ty = switch (output) {
  21875             .none => self.typeOfIndex(inst),
  21876             else => self.typeOf(output).childType(zcu),
  21877         };
  21878         const is_read = switch (constraint[0]) {
  21879             '=' => false,
  21880             '+' => read: {
  21881                 if (output == .none) return self.fail(
  21882                     "read-write constraint unsupported for asm result: '{s}'",
  21883                     .{constraint},
  21884                 );
  21885                 break :read true;
  21886             },
  21887             else => return self.fail("invalid constraint: '{s}'", .{constraint}),
  21888         };
  21889         const is_early_clobber = constraint[1] == '&';
  21890         const rest = constraint[@as(usize, 1) + @intFromBool(is_early_clobber) ..];
  21891         const arg_mcv: MCValue = arg_mcv: {
  21892             const arg_maybe_reg: ?Register = if (std.mem.eql(u8, rest, "r") or
  21893                 std.mem.eql(u8, rest, "f") or std.mem.eql(u8, rest, "x"))
  21894                 registerAlias(
  21895                     self.register_manager.tryAllocReg(maybe_inst, switch (rest[0]) {
  21896                         'r' => abi.RegisterClass.gp,
  21897                         'f' => abi.RegisterClass.x87,
  21898                         'x' => abi.RegisterClass.sse,
  21899                         else => unreachable,
  21900                     }) orelse return self.fail("ran out of registers lowering inline asm", .{}),
  21901                     @intCast(ty.abiSize(zcu)),
  21902                 )
  21903             else if (std.mem.eql(u8, rest, "m"))
  21904                 if (output != .none) null else return self.fail(
  21905                     "memory constraint unsupported for asm result: '{s}'",
  21906                     .{constraint},
  21907                 )
  21908             else if (std.mem.eql(u8, rest, "g") or
  21909                 std.mem.eql(u8, rest, "rm") or std.mem.eql(u8, rest, "mr") or
  21910                 std.mem.eql(u8, rest, "r,m") or std.mem.eql(u8, rest, "m,r"))
  21911                 self.register_manager.tryAllocReg(maybe_inst, abi.RegisterClass.gp) orelse
  21912                     if (output != .none)
  21913                     null
  21914                 else
  21915                     return self.fail("ran out of registers lowering inline asm", .{})
  21916             else if (std.mem.startsWith(u8, rest, "{") and std.mem.endsWith(u8, rest, "}"))
  21917                 parseRegName(rest["{".len .. rest.len - "}".len]) orelse
  21918                     return self.fail("invalid register constraint: '{s}'", .{constraint})
  21919             else if (rest.len == 1 and std.ascii.isDigit(rest[0])) {
  21920                 const index = std.fmt.charToDigit(rest[0], 10) catch unreachable;
  21921                 if (index >= args.items.len) return self.fail("constraint out of bounds: '{s}'", .{
  21922                     constraint,
  21923                 });
  21924                 break :arg_mcv args.items[index];
  21925             } else return self.fail("invalid constraint: '{s}'", .{constraint});
  21926             break :arg_mcv if (arg_maybe_reg) |reg| .{ .register = reg } else arg: {
  21927                 const ptr_mcv = try self.resolveInst(output);
  21928                 switch (ptr_mcv) {
  21929                     .immediate => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |_|
  21930                         break :arg ptr_mcv.deref(),
  21931                     .register, .register_offset, .lea_frame => break :arg ptr_mcv.deref(),
  21932                     else => {},
  21933                 }
  21934                 break :arg .{ .indirect = .{ .reg = try self.copyToTmpRegister(.usize, ptr_mcv) } };
  21935             };
  21936         };
  21937         if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| {
  21938             _ = self.register_manager.lockReg(reg);
  21939         };
  21940         if (!std.mem.eql(u8, name, "_"))
  21941             arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len));
  21942         args.appendAssumeCapacity(arg_mcv);
  21943         if (output == .none) result = arg_mcv;
  21944         if (is_read) try self.load(arg_mcv, self.typeOf(output), .{ .air_ref = output });
  21945     }
  21946 
  21947     for (inputs) |input| {
  21948         const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
  21949         const constraint = std.mem.sliceTo(input_bytes, 0);
  21950         const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
  21951         // This equation accounts for the fact that even if we have exactly 4 bytes
  21952         // for the string, we still use the next u32 for the null terminator.
  21953         extra_i += (constraint.len + name.len + (2 + 3)) / 4;
  21954 
  21955         const ty = self.typeOf(input);
  21956         const input_mcv = try self.resolveInst(input);
  21957         const arg_mcv: MCValue = if (std.mem.eql(u8, constraint, "r") or
  21958             std.mem.eql(u8, constraint, "f") or std.mem.eql(u8, constraint, "x"))
  21959         arg: {
  21960             const rc = switch (constraint[0]) {
  21961                 'r' => abi.RegisterClass.gp,
  21962                 'f' => abi.RegisterClass.x87,
  21963                 'x' => abi.RegisterClass.sse,
  21964                 else => unreachable,
  21965             };
  21966             if (input_mcv.isRegister() and
  21967                 rc.isSet(RegisterManager.indexOfRegIntoTracked(input_mcv.getReg().?).?))
  21968                 break :arg input_mcv;
  21969             const reg = try self.register_manager.allocReg(null, rc);
  21970             try self.genSetReg(reg, ty, input_mcv, .{});
  21971             break :arg .{ .register = registerAlias(reg, @intCast(ty.abiSize(zcu))) };
  21972         } else if (std.mem.eql(u8, constraint, "i") or std.mem.eql(u8, constraint, "n"))
  21973             switch (input_mcv) {
  21974                 .immediate => |imm| .{ .immediate = imm },
  21975                 else => return self.fail("immediate operand requires comptime value: '{s}'", .{
  21976                     constraint,
  21977                 }),
  21978             }
  21979         else if (std.mem.eql(u8, constraint, "m")) arg: {
  21980             switch (input_mcv) {
  21981                 .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |_|
  21982                     break :arg input_mcv,
  21983                 .indirect, .load_frame => break :arg input_mcv,
  21984                 .load_symbol, .load_direct, .load_got, .load_tlv => {},
  21985                 else => {
  21986                     const temp_mcv = try self.allocTempRegOrMem(ty, false);
  21987                     try self.genCopy(ty, temp_mcv, input_mcv, .{});
  21988                     break :arg temp_mcv;
  21989                 },
  21990             }
  21991             const addr_reg = self.register_manager.tryAllocReg(null, abi.RegisterClass.gp) orelse {
  21992                 const temp_mcv = try self.allocTempRegOrMem(ty, false);
  21993                 try self.genCopy(ty, temp_mcv, input_mcv, .{});
  21994                 break :arg temp_mcv;
  21995             };
  21996             try self.genSetReg(addr_reg, .usize, input_mcv.address(), .{});
  21997             break :arg .{ .indirect = .{ .reg = addr_reg } };
  21998         } else if (std.mem.eql(u8, constraint, "g") or
  21999             std.mem.eql(u8, constraint, "rm") or std.mem.eql(u8, constraint, "mr") or
  22000             std.mem.eql(u8, constraint, "r,m") or std.mem.eql(u8, constraint, "m,r"))
  22001         arg: {
  22002             switch (input_mcv) {
  22003                 .register, .indirect, .load_frame => break :arg input_mcv,
  22004                 .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |_|
  22005                     break :arg input_mcv,
  22006                 else => {},
  22007             }
  22008             const temp_mcv = try self.allocTempRegOrMem(ty, true);
  22009             try self.genCopy(ty, temp_mcv, input_mcv, .{});
  22010             break :arg temp_mcv;
  22011         } else if (std.mem.eql(u8, constraint, "X"))
  22012             input_mcv
  22013         else if (std.mem.startsWith(u8, constraint, "{") and std.mem.endsWith(u8, constraint, "}")) arg: {
  22014             const reg = parseRegName(constraint["{".len .. constraint.len - "}".len]) orelse
  22015                 return self.fail("invalid register constraint: '{s}'", .{constraint});
  22016             try self.register_manager.getReg(reg, null);
  22017             try self.genSetReg(reg, ty, input_mcv, .{});
  22018             break :arg .{ .register = reg };
  22019         } else if (constraint.len == 1 and std.ascii.isDigit(constraint[0])) arg: {
  22020             const index = std.fmt.charToDigit(constraint[0], 10) catch unreachable;
  22021             if (index >= args.items.len) return self.fail("constraint out of bounds: '{s}'", .{constraint});
  22022             try self.genCopy(ty, args.items[index], input_mcv, .{});
  22023             break :arg args.items[index];
  22024         } else return self.fail("invalid constraint: '{s}'", .{constraint});
  22025         if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| {
  22026             _ = self.register_manager.lockReg(reg);
  22027         };
  22028         if (!std.mem.eql(u8, name, "_"))
  22029             arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len));
  22030         args.appendAssumeCapacity(arg_mcv);
  22031     }
  22032 
  22033     {
  22034         var clobber_i: u32 = 0;
  22035         while (clobber_i < clobbers_len) : (clobber_i += 1) {
  22036             const clobber = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
  22037             // This equation accounts for the fact that even if we have exactly 4 bytes
  22038             // for the string, we still use the next u32 for the null terminator.
  22039             extra_i += clobber.len / 4 + 1;
  22040 
  22041             if (std.mem.eql(u8, clobber, "") or std.mem.eql(u8, clobber, "memory")) {
  22042                 // ok, sure
  22043             } else if (std.mem.eql(u8, clobber, "cc") or
  22044                 std.mem.eql(u8, clobber, "flags") or
  22045                 std.mem.eql(u8, clobber, "eflags") or
  22046                 std.mem.eql(u8, clobber, "rflags"))
  22047             {
  22048                 try self.spillEflagsIfOccupied();
  22049             } else {
  22050                 try self.register_manager.getReg(parseRegName(clobber) orelse
  22051                     return self.fail("invalid clobber: '{s}'", .{clobber}), null);
  22052             }
  22053         }
  22054     }
  22055 
  22056     const Label = struct {
  22057         target: Mir.Inst.Index = undefined,
  22058         pending_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty,
  22059 
  22060         const Kind = enum { definition, reference };
  22061 
  22062         fn isValid(kind: Kind, name: []const u8) bool {
  22063             for (name, 0..) |c, i| switch (c) {
  22064                 else => return false,
  22065                 '$' => if (i == 0) return false,
  22066                 '.' => {},
  22067                 '0'...'9' => if (i == 0) switch (kind) {
  22068                     .definition => if (name.len != 1) return false,
  22069                     .reference => {
  22070                         if (name.len != 2) return false;
  22071                         switch (name[1]) {
  22072                             else => return false,
  22073                             'B', 'F', 'b', 'f' => {},
  22074                         }
  22075                     },
  22076                 },
  22077                 '@', 'A'...'Z', '_', 'a'...'z' => {},
  22078             };
  22079             return name.len > 0;
  22080         }
  22081     };
  22082     var labels: std.StringHashMapUnmanaged(Label) = .empty;
  22083     defer {
  22084         var label_it = labels.valueIterator();
  22085         while (label_it.next()) |label| label.pending_relocs.deinit(self.gpa);
  22086         labels.deinit(self.gpa);
  22087     }
  22088 
  22089     const asm_source = std.mem.sliceAsBytes(self.air.extra[extra_i..])[0..extra.data.source_len];
  22090     var line_it = std.mem.tokenizeAny(u8, asm_source, "\n\r;");
  22091     next_line: while (line_it.next()) |line| {
  22092         var mnem_it = std.mem.tokenizeAny(u8, line, " \t");
  22093         var prefix: encoder.Instruction.Prefix = .none;
  22094         const mnem_str = while (mnem_it.next()) |mnem_str| {
  22095             if (mnem_str[0] == '#') continue :next_line;
  22096             if (std.mem.startsWith(u8, mnem_str, "//")) continue :next_line;
  22097             if (std.meta.stringToEnum(encoder.Instruction.Prefix, mnem_str)) |pre| {
  22098                 if (prefix != .none) return self.fail("extra prefix: '{s}'", .{mnem_str});
  22099                 prefix = pre;
  22100                 continue;
  22101             }
  22102             if (!std.mem.endsWith(u8, mnem_str, ":")) break mnem_str;
  22103             const label_name = mnem_str[0 .. mnem_str.len - ":".len];
  22104             if (!Label.isValid(.definition, label_name))
  22105                 return self.fail("invalid label: '{s}'", .{label_name});
  22106             const label_gop = try labels.getOrPut(self.gpa, label_name);
  22107             if (!label_gop.found_existing) label_gop.value_ptr.* = .{} else {
  22108                 const anon = std.ascii.isDigit(label_name[0]);
  22109                 if (!anon and label_gop.value_ptr.pending_relocs.items.len == 0)
  22110                     return self.fail("redefined label: '{s}'", .{label_name});
  22111                 for (label_gop.value_ptr.pending_relocs.items) |pending_reloc|
  22112                     self.performReloc(pending_reloc);
  22113                 if (anon)
  22114                     label_gop.value_ptr.pending_relocs.clearRetainingCapacity()
  22115                 else
  22116                     label_gop.value_ptr.pending_relocs.clearAndFree(self.gpa);
  22117             }
  22118             label_gop.value_ptr.target = @intCast(self.mir_instructions.len);
  22119         } else continue;
  22120         if (mnem_str[0] == '.') {
  22121             if (prefix != .none) return self.fail("prefixed directive: '{s} {s}'", .{ @tagName(prefix), mnem_str });
  22122             prefix = .directive;
  22123         }
  22124 
  22125         var mnem_size: ?Memory.Size = if (prefix == .directive)
  22126             null
  22127         else if (std.mem.endsWith(u8, mnem_str, "b"))
  22128             .byte
  22129         else if (std.mem.endsWith(u8, mnem_str, "w"))
  22130             .word
  22131         else if (std.mem.endsWith(u8, mnem_str, "l"))
  22132             .dword
  22133         else if (std.mem.endsWith(u8, mnem_str, "q") and
  22134             (std.mem.indexOfScalar(u8, "vp", mnem_str[0]) == null or !std.mem.endsWith(u8, mnem_str, "dq")))
  22135             .qword
  22136         else if (std.mem.endsWith(u8, mnem_str, "t"))
  22137             .tbyte
  22138         else
  22139             null;
  22140         const mnem_tag = while (true) break std.meta.stringToEnum(
  22141             encoder.Instruction.Mnemonic,
  22142             mnem_str[0 .. mnem_str.len - @intFromBool(mnem_size != null)],
  22143         ) orelse if (mnem_size) |_| {
  22144             mnem_size = null;
  22145             continue;
  22146         } else return self.fail("invalid mnemonic: '{s}'", .{mnem_str});
  22147         if (@as(?Memory.Size, switch (mnem_tag) {
  22148             .clflush => .byte,
  22149             .fldenv, .fnstenv, .fstenv => .none,
  22150             .ldmxcsr, .stmxcsr, .vldmxcsr, .vstmxcsr => .dword,
  22151             else => null,
  22152         })) |fixed_mnem_size| {
  22153             if (mnem_size) |size| if (size != fixed_mnem_size)
  22154                 return self.fail("invalid size: '{s}'", .{mnem_str});
  22155             mnem_size = fixed_mnem_size;
  22156         }
  22157         const mnem_name = @tagName(mnem_tag);
  22158         const mnem_fixed_tag: Mir.Inst.FixedTag = if (prefix == .directive)
  22159             .{ ._, .pseudo }
  22160         else for (std.enums.values(Mir.Inst.Fixes)) |fixes| {
  22161             const fixes_name = @tagName(fixes);
  22162             const space_i = std.mem.indexOfScalar(u8, fixes_name, ' ');
  22163             const fixes_prefix = if (space_i) |i|
  22164                 std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..i]).?
  22165             else
  22166                 .none;
  22167             if (fixes_prefix != prefix) continue;
  22168             const pattern = fixes_name[if (space_i) |i| i + " ".len else 0..];
  22169             const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?;
  22170             const mnem_prefix = pattern[0..wildcard_i];
  22171             const mnem_suffix = pattern[wildcard_i + "_".len ..];
  22172             if (!std.mem.startsWith(u8, mnem_name, mnem_prefix)) continue;
  22173             if (!std.mem.endsWith(u8, mnem_name, mnem_suffix)) continue;
  22174             break .{ fixes, std.meta.stringToEnum(
  22175                 Mir.Inst.Tag,
  22176                 mnem_name[mnem_prefix.len .. mnem_name.len - mnem_suffix.len],
  22177             ) orelse continue };
  22178         } else {
  22179             assert(prefix != .none); // no combination of fixes produced a known mnemonic
  22180             return self.fail("invalid prefix for mnemonic: '{s} {s}'", .{
  22181                 @tagName(prefix), mnem_name,
  22182             });
  22183         };
  22184 
  22185         var ops: [4]Operand = @splat(.none);
  22186         var ops_len: usize = 0;
  22187 
  22188         var last_op = false;
  22189         var op_it = std.mem.splitScalar(u8, mnem_it.rest(), ',');
  22190         next_op: for (&ops) |*op| {
  22191             const op_str = while (!last_op) {
  22192                 const full_str = op_it.next() orelse break :next_op;
  22193                 const code_str = if (std.mem.indexOfScalar(u8, full_str, '#') orelse
  22194                     std.mem.indexOf(u8, full_str, "//")) |comment|
  22195                 code: {
  22196                     last_op = true;
  22197                     break :code full_str[0..comment];
  22198                 } else full_str;
  22199                 const trim_str = std.mem.trim(u8, code_str, " \t*");
  22200                 if (trim_str.len > 0) break trim_str;
  22201             } else break;
  22202             if (std.mem.startsWith(u8, op_str, "%%")) {
  22203                 const colon = std.mem.indexOfScalarPos(u8, op_str, "%%".len + 2, ':');
  22204                 const reg = parseRegName(op_str["%%".len .. colon orelse op_str.len]) orelse
  22205                     return self.fail("invalid register: '{s}'", .{op_str});
  22206                 if (colon) |colon_pos| {
  22207                     const disp = std.fmt.parseInt(i32, op_str[colon_pos + ":".len ..], 0) catch
  22208                         return self.fail("invalid displacement: '{s}'", .{op_str});
  22209                     op.* = .{ .mem = .{
  22210                         .base = .{ .reg = reg },
  22211                         .mod = .{ .rm = .{
  22212                             .size = mnem_size orelse return self.fail("unknown size: '{s}'", .{op_str}),
  22213                             .disp = disp,
  22214                         } },
  22215                     } };
  22216                 } else {
  22217                     if (mnem_size) |size| if (reg.bitSize() != size.bitSize(self.target))
  22218                         return self.fail("invalid register size: '{s}'", .{op_str});
  22219                     op.* = .{ .reg = reg };
  22220                 }
  22221             } else if (std.mem.startsWith(u8, op_str, "%[") and std.mem.endsWith(u8, op_str, "]")) {
  22222                 const colon = std.mem.indexOfScalarPos(u8, op_str, "%[".len, ':');
  22223                 const modifier = if (colon) |colon_pos|
  22224                     op_str[colon_pos + ":".len .. op_str.len - "]".len]
  22225                 else
  22226                     "";
  22227                 op.* = switch (args.items[
  22228                     arg_map.get(op_str["%[".len .. colon orelse op_str.len - "]".len]) orelse
  22229                         return self.fail("no matching constraint: '{s}'", .{op_str})
  22230                 ]) {
  22231                     .immediate => |imm| if (std.mem.eql(u8, modifier, "") or std.mem.eql(u8, modifier, "c"))
  22232                         .{ .imm = .u(imm) }
  22233                     else
  22234                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  22235                     .register => |reg| if (std.mem.eql(u8, modifier, ""))
  22236                         .{ .reg = reg }
  22237                     else
  22238                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  22239                     .memory => |addr| if (std.mem.eql(u8, modifier, "") or std.mem.eql(u8, modifier, "P"))
  22240                         .{ .mem = .{
  22241                             .base = .{ .reg = .ds },
  22242                             .mod = .{ .rm = .{
  22243                                 .size = mnem_size orelse
  22244                                     return self.fail("unknown size: '{s}'", .{op_str}),
  22245                                 .disp = @intCast(@as(i64, @bitCast(addr))),
  22246                             } },
  22247                         } }
  22248                     else
  22249                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  22250                     .indirect => |reg_off| if (std.mem.eql(u8, modifier, ""))
  22251                         .{ .mem = .{
  22252                             .base = .{ .reg = reg_off.reg },
  22253                             .mod = .{ .rm = .{
  22254                                 .size = mnem_size orelse
  22255                                     return self.fail("unknown size: '{s}'", .{op_str}),
  22256                                 .disp = reg_off.off,
  22257                             } },
  22258                         } }
  22259                     else
  22260                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  22261                     .load_frame => |frame_addr| if (std.mem.eql(u8, modifier, ""))
  22262                         .{ .mem = .{
  22263                             .base = .{ .frame = frame_addr.index },
  22264                             .mod = .{ .rm = .{
  22265                                 .size = mnem_size orelse
  22266                                     return self.fail("unknown size: '{s}'", .{op_str}),
  22267                                 .disp = frame_addr.off,
  22268                             } },
  22269                         } }
  22270                     else
  22271                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  22272                     .lea_got => |sym_index| if (std.mem.eql(u8, modifier, "P"))
  22273                         .{ .reg = try self.copyToTmpRegister(.usize, .{ .lea_got = sym_index }) }
  22274                     else
  22275                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  22276                     .lea_symbol => |sym_off| if (std.mem.eql(u8, modifier, "P"))
  22277                         .{ .reg = try self.copyToTmpRegister(.usize, .{ .lea_symbol = sym_off }) }
  22278                     else
  22279                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  22280                     else => return self.fail("invalid constraint: '{s}'", .{op_str}),
  22281                 };
  22282             } else if (std.mem.startsWith(u8, op_str, "$")) {
  22283                 if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| {
  22284                     if (mnem_size) |size| {
  22285                         const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - (size.bitSize(self.target) - 1));
  22286                         if ((if (s < 0) ~s else s) > max)
  22287                             return self.fail("invalid immediate size: '{s}'", .{op_str});
  22288                     }
  22289                     op.* = .{ .imm = .s(s) };
  22290                 } else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| {
  22291                     if (mnem_size) |size| {
  22292                         const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - size.bitSize(self.target));
  22293                         if (u > max)
  22294                             return self.fail("invalid immediate size: '{s}'", .{op_str});
  22295                     }
  22296                     op.* = .{ .imm = .u(u) };
  22297                 } else |_| return self.fail("invalid immediate: '{s}'", .{op_str});
  22298             } else if (std.mem.endsWith(u8, op_str, ")")) {
  22299                 const open = std.mem.indexOfScalar(u8, op_str, '(') orelse
  22300                     return self.fail("invalid operand: '{s}'", .{op_str});
  22301                 var sib_it = std.mem.splitScalar(u8, op_str[open + "(".len .. op_str.len - ")".len], ',');
  22302                 const base_str = sib_it.next() orelse
  22303                     return self.fail("invalid memory operand: '{s}'", .{op_str});
  22304                 if (base_str.len > 0 and !std.mem.startsWith(u8, base_str, "%%"))
  22305                     return self.fail("invalid memory operand: '{s}'", .{op_str});
  22306                 const index_str = sib_it.next() orelse "";
  22307                 if (index_str.len > 0 and !std.mem.startsWith(u8, base_str, "%%"))
  22308                     return self.fail("invalid memory operand: '{s}'", .{op_str});
  22309                 const scale_str = sib_it.next() orelse "";
  22310                 if (index_str.len == 0 and scale_str.len > 0)
  22311                     return self.fail("invalid memory operand: '{s}'", .{op_str});
  22312                 const scale: Memory.Scale = if (scale_str.len > 0)
  22313                     switch (std.fmt.parseInt(u4, scale_str, 10) catch
  22314                         return self.fail("invalid scale: '{s}'", .{op_str})) {
  22315                         1 => .@"1",
  22316                         2 => .@"2",
  22317                         4 => .@"4",
  22318                         8 => .@"8",
  22319                         else => return self.fail("invalid scale: '{s}'", .{op_str}),
  22320                     }
  22321                 else
  22322                     .@"1";
  22323                 if (sib_it.next()) |_| return self.fail("invalid memory operand: '{s}'", .{op_str});
  22324                 op.* = .{
  22325                     .mem = .{
  22326                         .base = if (base_str.len > 0)
  22327                             .{ .reg = parseRegName(base_str["%%".len..]) orelse
  22328                                 return self.fail("invalid base register: '{s}'", .{base_str}) }
  22329                         else
  22330                             .none,
  22331                         .mod = .{ .rm = .{
  22332                             .size = mnem_size orelse return self.fail("unknown size: '{s}'", .{op_str}),
  22333                             .index = if (index_str.len > 0)
  22334                                 parseRegName(index_str["%%".len..]) orelse
  22335                                     return self.fail("invalid index register: '{s}'", .{op_str})
  22336                             else
  22337                                 .none,
  22338                             .scale = scale,
  22339                             .disp = if (std.mem.startsWith(u8, op_str[0..open], "%[") and
  22340                                 std.mem.endsWith(u8, op_str[0..open], "]"))
  22341                             disp: {
  22342                                 const colon = std.mem.indexOfScalarPos(u8, op_str[0..open], "%[".len, ':');
  22343                                 const modifier = if (colon) |colon_pos|
  22344                                     op_str[colon_pos + ":".len .. open - "]".len]
  22345                                 else
  22346                                     "";
  22347                                 break :disp switch (args.items[
  22348                                     arg_map.get(op_str["%[".len .. colon orelse open - "]".len]) orelse
  22349                                         return self.fail("no matching constraint: '{s}'", .{op_str})
  22350                                 ]) {
  22351                                     .immediate => |imm| if (std.mem.eql(u8, modifier, "") or
  22352                                         std.mem.eql(u8, modifier, "c"))
  22353                                         std.math.cast(i32, @as(i64, @bitCast(imm))) orelse
  22354                                             return self.fail("invalid displacement: '{s}'", .{op_str})
  22355                                     else
  22356                                         return self.fail("invalid modifier: '{s}'", .{modifier}),
  22357                                     else => return self.fail("invalid constraint: '{s}'", .{op_str}),
  22358                                 };
  22359                             } else if (open > 0)
  22360                                 std.fmt.parseInt(i32, op_str[0..open], 0) catch
  22361                                     return self.fail("invalid displacement: '{s}'", .{op_str})
  22362                             else
  22363                                 0,
  22364                         } },
  22365                     },
  22366                 };
  22367             } else if (Label.isValid(.reference, op_str)) {
  22368                 const anon = std.ascii.isDigit(op_str[0]);
  22369                 const label_gop = try labels.getOrPut(self.gpa, op_str[0..if (anon) 1 else op_str.len]);
  22370                 if (!label_gop.found_existing) label_gop.value_ptr.* = .{};
  22371                 if (anon and (op_str[1] == 'b' or op_str[1] == 'B') and !label_gop.found_existing)
  22372                     return self.fail("undefined label: '{s}'", .{op_str});
  22373                 const pending_relocs = &label_gop.value_ptr.pending_relocs;
  22374                 if (if (anon)
  22375                     op_str[1] == 'f' or op_str[1] == 'F'
  22376                 else
  22377                     !label_gop.found_existing or pending_relocs.items.len > 0)
  22378                     try pending_relocs.append(self.gpa, @intCast(self.mir_instructions.len));
  22379                 op.* = .{ .inst = label_gop.value_ptr.target };
  22380             } else return self.fail("invalid operand: '{s}'", .{op_str});
  22381             ops_len += 1;
  22382         } else if (op_it.next()) |op_str| return self.fail("extra operand: '{s}'", .{op_str});
  22383 
  22384         // convert from att syntax to intel syntax
  22385         std.mem.reverse(Operand, ops[0..ops_len]);
  22386 
  22387         (if (prefix == .directive) switch (mnem_tag) {
  22388             .@".cfi_def_cfa" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none)
  22389                 self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, ops[0].reg, ops[1].imm)
  22390             else
  22391                 error.InvalidInstruction,
  22392             .@".cfi_def_cfa_register" => if (ops[0] == .reg and ops[1] == .none)
  22393                 self.asmPseudoRegister(.pseudo_cfi_def_cfa_register_r, ops[0].reg)
  22394             else
  22395                 error.InvalidInstruction,
  22396             .@".cfi_def_cfa_offset" => if (ops[0] == .imm and ops[1] == .none)
  22397                 self.asmPseudoImmediate(.pseudo_cfi_def_cfa_offset_i_s, ops[0].imm)
  22398             else
  22399                 error.InvalidInstruction,
  22400             .@".cfi_adjust_cfa_offset" => if (ops[0] == .imm and ops[1] == .none)
  22401                 self.asmPseudoImmediate(.pseudo_cfi_adjust_cfa_offset_i_s, ops[0].imm)
  22402             else
  22403                 error.InvalidInstruction,
  22404             .@".cfi_offset" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none)
  22405                 self.asmPseudoRegisterImmediate(.pseudo_cfi_offset_ri_s, ops[0].reg, ops[1].imm)
  22406             else
  22407                 error.InvalidInstruction,
  22408             .@".cfi_val_offset" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none)
  22409                 self.asmPseudoRegisterImmediate(.pseudo_cfi_val_offset_ri_s, ops[0].reg, ops[1].imm)
  22410             else
  22411                 error.InvalidInstruction,
  22412             .@".cfi_rel_offset" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none)
  22413                 self.asmPseudoRegisterImmediate(.pseudo_cfi_rel_offset_ri_s, ops[0].reg, ops[1].imm)
  22414             else
  22415                 error.InvalidInstruction,
  22416             .@".cfi_register" => if (ops[0] == .reg and ops[1] == .reg and ops[2] == .none)
  22417                 self.asmPseudoRegisterRegister(.pseudo_cfi_register_rr, ops[0].reg, ops[1].reg)
  22418             else
  22419                 error.InvalidInstruction,
  22420             .@".cfi_restore" => if (ops[0] == .reg and ops[1] == .none)
  22421                 self.asmPseudoRegister(.pseudo_cfi_restore_r, ops[0].reg)
  22422             else
  22423                 error.InvalidInstruction,
  22424             .@".cfi_undefined" => if (ops[0] == .reg and ops[1] == .none)
  22425                 self.asmPseudoRegister(.pseudo_cfi_undefined_r, ops[0].reg)
  22426             else
  22427                 error.InvalidInstruction,
  22428             .@".cfi_same_value" => if (ops[0] == .reg and ops[1] == .none)
  22429                 self.asmPseudoRegister(.pseudo_cfi_same_value_r, ops[0].reg)
  22430             else
  22431                 error.InvalidInstruction,
  22432             .@".cfi_remember_state" => if (ops[0] == .none)
  22433                 self.asmPseudo(.pseudo_cfi_remember_state_none)
  22434             else
  22435                 error.InvalidInstruction,
  22436             .@".cfi_restore_state" => if (ops[0] == .none)
  22437                 self.asmPseudo(.pseudo_cfi_restore_state_none)
  22438             else
  22439                 error.InvalidInstruction,
  22440             .@".cfi_escape" => error.InvalidInstruction,
  22441             else => unreachable,
  22442         } else self.asmOps(mnem_fixed_tag, ops)) catch |err| switch (err) {
  22443             error.InvalidInstruction => return self.fail(
  22444                 "invalid instruction: '{s} {s} {s} {s} {s}'",
  22445                 .{
  22446                     mnem_str,
  22447                     @tagName(ops[0]),
  22448                     @tagName(ops[1]),
  22449                     @tagName(ops[2]),
  22450                     @tagName(ops[3]),
  22451                 },
  22452             ),
  22453             else => |e| return e,
  22454         };
  22455     }
  22456 
  22457     var label_it = labels.iterator();
  22458     while (label_it.next()) |label| if (label.value_ptr.pending_relocs.items.len > 0)
  22459         return self.fail("undefined label: '{s}'", .{label.key_ptr.*});
  22460 
  22461     for (outputs, args.items[0..outputs.len]) |output, arg_mcv| {
  22462         const extra_bytes = std.mem.sliceAsBytes(self.air.extra[outputs_extra_i..]);
  22463         const constraint =
  22464             std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[outputs_extra_i..]), 0);
  22465         const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
  22466         // This equation accounts for the fact that even if we have exactly 4 bytes
  22467         // for the string, we still use the next u32 for the null terminator.
  22468         outputs_extra_i += (constraint.len + name.len + (2 + 3)) / 4;
  22469 
  22470         if (output == .none) continue;
  22471         if (arg_mcv != .register) continue;
  22472         if (constraint.len == 2 and std.ascii.isDigit(constraint[1])) continue;
  22473         try self.store(self.typeOf(output), .{ .air_ref = output }, arg_mcv, .{});
  22474     }
  22475 
  22476     simple: {
  22477         var buf: [Liveness.bpi - 1]Air.Inst.Ref = @splat(.none);
  22478         var buf_index: usize = 0;
  22479         for (outputs) |output| {
  22480             if (output == .none) continue;
  22481 
  22482             if (buf_index >= buf.len) break :simple;
  22483             buf[buf_index] = output;
  22484             buf_index += 1;
  22485         }
  22486         if (buf_index + inputs.len > buf.len) break :simple;
  22487         @memcpy(buf[buf_index..][0..inputs.len], inputs);
  22488         return self.finishAir(inst, result, buf);
  22489     }
  22490     var bt = self.liveness.iterateBigTomb(inst);
  22491     for (outputs) |output| if (output != .none) try self.feed(&bt, output);
  22492     for (inputs) |input| try self.feed(&bt, input);
  22493     return self.finishAirResult(inst, result);
  22494 }
  22495 
  22496 const MoveStrategy = union(enum) {
  22497     move: Mir.Inst.FixedTag,
  22498     x87_load_store,
  22499     insert_extract: InsertExtract,
  22500     vex_insert_extract: InsertExtract,
  22501 
  22502     const InsertExtract = struct {
  22503         insert: Mir.Inst.FixedTag,
  22504         extract: Mir.Inst.FixedTag,
  22505     };
  22506 
  22507     pub fn read(strat: MoveStrategy, self: *CodeGen, dst_reg: Register, src_mem: Memory) !void {
  22508         switch (strat) {
  22509             .move => |tag| try self.asmRegisterMemory(tag, switch (tag[1]) {
  22510                 else => dst_reg,
  22511                 .lea => if (dst_reg.bitSize() >= 32) dst_reg else dst_reg.to32(),
  22512             }, src_mem),
  22513             .x87_load_store => {
  22514                 try self.asmMemory(.{ .f_, .ld }, src_mem);
  22515                 assert(dst_reg != .st7);
  22516                 try self.asmRegister(.{ .f_p, .st }, @enumFromInt(@intFromEnum(dst_reg) + 1));
  22517             },
  22518             .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
  22519                 ie.insert,
  22520                 dst_reg,
  22521                 src_mem,
  22522                 .u(0),
  22523             ),
  22524             .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
  22525                 ie.insert,
  22526                 dst_reg,
  22527                 dst_reg,
  22528                 src_mem,
  22529                 .u(0),
  22530             ),
  22531         }
  22532     }
  22533     pub fn write(strat: MoveStrategy, self: *CodeGen, dst_mem: Memory, src_reg: Register) !void {
  22534         switch (strat) {
  22535             .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_reg),
  22536             .x87_load_store => {
  22537                 try self.asmRegister(.{ .f_, .ld }, src_reg);
  22538                 try self.asmMemory(.{ .f_p, .st }, dst_mem);
  22539             },
  22540             .insert_extract, .vex_insert_extract => |ie| if (ie.extract[0] != .p_w or self.hasFeature(.sse4_1))
  22541                 try self.asmMemoryRegisterImmediate(ie.extract, dst_mem, src_reg, .u(0))
  22542             else if (self.hasFeature(.sse2)) {
  22543                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  22544                 try self.asmRegisterRegisterImmediate(ie.extract, tmp_reg.to32(), src_reg.to128(), .u(0));
  22545                 try self.asmMemoryRegister(.{ ._, .mov }, dst_mem, tmp_reg.to16());
  22546             } else {
  22547                 const tmp_frame_index = try self.allocFrameIndex(.init(.{
  22548                     .size = 16,
  22549                     .alignment = .@"16",
  22550                 }));
  22551                 try self.asmMemoryRegister(.{ ._ps, .mova }, .{
  22552                     .base = .{ .frame = tmp_frame_index },
  22553                     .mod = .{ .rm = .{ .size = .xword } },
  22554                 }, src_reg.to128());
  22555                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  22556                 try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg.to16(), .{
  22557                     .base = .{ .frame = tmp_frame_index },
  22558                     .mod = .{ .rm = .{ .size = .word } },
  22559                 });
  22560                 try self.asmMemoryRegister(.{ ._, .mov }, dst_mem, tmp_reg.to16());
  22561             },
  22562         }
  22563     }
  22564 };
  22565 fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) !MoveStrategy {
  22566     const pt = self.pt;
  22567     const zcu = pt.zcu;
  22568     switch (class) {
  22569         .general_purpose, .segment => return .{ .move = .{ ._, .mov } },
  22570         .x87 => return .x87_load_store,
  22571         .mmx => {},
  22572         .sse => switch (ty.zigTypeTag(zcu)) {
  22573             else => {
  22574                 const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none);
  22575                 assert(std.mem.indexOfNone(abi.Class, classes, &.{
  22576                     .integer, .sse, .sseup, .memory, .float, .float_combine,
  22577                 }) == null);
  22578                 const abi_size = ty.abiSize(zcu);
  22579                 if (abi_size < 4 or
  22580                     std.mem.indexOfScalar(abi.Class, classes, .integer) != null) switch (abi_size) {
  22581                     1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
  22582                         .insert = .{ .vp_b, .insr },
  22583                         .extract = .{ .vp_b, .extr },
  22584                     } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
  22585                         .insert = .{ .p_b, .insr },
  22586                         .extract = .{ .p_b, .extr },
  22587                     } },
  22588                     2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
  22589                         .insert = .{ .vp_w, .insr },
  22590                         .extract = .{ .vp_w, .extr },
  22591                     } } else .{ .insert_extract = .{
  22592                         .insert = .{ .p_w, .insr },
  22593                         .extract = .{ .p_w, .extr },
  22594                     } },
  22595                     3...4 => return .{ .move = if (self.hasFeature(.avx))
  22596                         .{ .v_d, .mov }
  22597                     else
  22598                         .{ ._d, .mov } },
  22599                     5...8 => return .{ .move = if (self.hasFeature(.avx))
  22600                         .{ .v_q, .mov }
  22601                     else
  22602                         .{ ._q, .mov } },
  22603                     9...16 => return .{ .move = if (self.hasFeature(.avx))
  22604                         .{ if (aligned) .v_dqa else .v_dqu, .mov }
  22605                     else if (self.hasFeature(.sse2))
  22606                         .{ if (aligned) ._dqa else ._dqu, .mov }
  22607                     else
  22608                         .{ ._ps, if (aligned) .mova else .movu } },
  22609                     17...32 => if (self.hasFeature(.avx))
  22610                         return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
  22611                     else => {},
  22612                 } else switch (abi_size) {
  22613                     4 => return .{ .move = if (self.hasFeature(.avx))
  22614                         .{ .v_ss, .mov }
  22615                     else
  22616                         .{ ._ss, .mov } },
  22617                     5...8 => return .{ .move = if (self.hasFeature(.avx))
  22618                         .{ .v_sd, .mov }
  22619                     else if (self.hasFeature(.sse2))
  22620                         .{ ._sd, .mov }
  22621                     else
  22622                         .{ ._ps, .movl } },
  22623                     9...16 => return .{ .move = if (self.hasFeature(.avx))
  22624                         .{ .v_pd, if (aligned) .mova else .movu }
  22625                     else if (self.hasFeature(.sse2))
  22626                         .{ ._pd, if (aligned) .mova else .movu }
  22627                     else
  22628                         .{ ._ps, if (aligned) .mova else .movu } },
  22629                     17...32 => if (self.hasFeature(.avx))
  22630                         return .{ .move = .{ .v_pd, if (aligned) .mova else .movu } },
  22631                     else => {},
  22632                 }
  22633             },
  22634             .float => switch (ty.floatBits(self.target.*)) {
  22635                 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
  22636                     .insert = .{ .vp_w, .insr },
  22637                     .extract = .{ .vp_w, .extr },
  22638                 } } else .{ .insert_extract = .{
  22639                     .insert = .{ .p_w, .insr },
  22640                     .extract = .{ .p_w, .extr },
  22641                 } },
  22642                 32 => return .{ .move = if (self.hasFeature(.avx))
  22643                     .{ .v_ss, .mov }
  22644                 else
  22645                     .{ ._ss, .mov } },
  22646                 64 => return .{ .move = if (self.hasFeature(.avx))
  22647                     .{ .v_sd, .mov }
  22648                 else if (self.hasFeature(.sse2))
  22649                     .{ ._sd, .mov }
  22650                 else
  22651                     .{ ._ps, .movl } },
  22652                 128 => return .{ .move = if (self.hasFeature(.avx))
  22653                     .{ if (aligned) .v_dqa else .v_dqu, .mov }
  22654                 else if (self.hasFeature(.sse2))
  22655                     .{ if (aligned) ._dqa else ._dqu, .mov }
  22656                 else
  22657                     .{ ._ps, if (aligned) .mova else .movu } },
  22658                 else => {},
  22659             },
  22660             .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
  22661                 .bool => switch (ty.vectorLen(zcu)) {
  22662                     33...64 => return .{ .move = if (self.hasFeature(.avx))
  22663                         .{ .v_q, .mov }
  22664                     else
  22665                         .{ ._q, .mov } },
  22666                     else => {},
  22667                 },
  22668                 .int => switch (ty.childType(zcu).intInfo(zcu).bits) {
  22669                     1...8 => switch (ty.vectorLen(zcu)) {
  22670                         1...16 => return .{ .move = if (self.hasFeature(.avx))
  22671                             .{ if (aligned) .v_dqa else .v_dqu, .mov }
  22672                         else if (self.hasFeature(.sse2))
  22673                             .{ if (aligned) ._dqa else ._dqu, .mov }
  22674                         else
  22675                             .{ ._ps, if (aligned) .mova else .movu } },
  22676                         17...32 => if (self.hasFeature(.avx))
  22677                             return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
  22678                         else => {},
  22679                     },
  22680                     9...16 => switch (ty.vectorLen(zcu)) {
  22681                         1...8 => return .{ .move = if (self.hasFeature(.avx))
  22682                             .{ if (aligned) .v_dqa else .v_dqu, .mov }
  22683                         else if (self.hasFeature(.sse2))
  22684                             .{ if (aligned) ._dqa else ._dqu, .mov }
  22685                         else
  22686                             .{ ._ps, if (aligned) .mova else .movu } },
  22687                         9...16 => if (self.hasFeature(.avx))
  22688                             return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
  22689                         else => {},
  22690                     },
  22691                     17...32 => switch (ty.vectorLen(zcu)) {
  22692                         1...4 => return .{ .move = if (self.hasFeature(.avx))
  22693                             .{ if (aligned) .v_dqa else .v_dqu, .mov }
  22694                         else if (self.hasFeature(.sse2))
  22695                             .{ if (aligned) ._dqa else ._dqu, .mov }
  22696                         else
  22697                             .{ ._ps, if (aligned) .mova else .movu } },
  22698                         5...8 => if (self.hasFeature(.avx))
  22699                             return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
  22700                         else => {},
  22701                     },
  22702                     33...64 => switch (ty.vectorLen(zcu)) {
  22703                         1...2 => return .{ .move = if (self.hasFeature(.avx))
  22704                             .{ if (aligned) .v_dqa else .v_dqu, .mov }
  22705                         else if (self.hasFeature(.sse2))
  22706                             .{ if (aligned) ._dqa else ._dqu, .mov }
  22707                         else
  22708                             .{ ._ps, if (aligned) .mova else .movu } },
  22709                         3...4 => if (self.hasFeature(.avx))
  22710                             return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
  22711                         else => {},
  22712                     },
  22713                     65...128 => switch (ty.vectorLen(zcu)) {
  22714                         1 => return .{ .move = if (self.hasFeature(.avx))
  22715                             .{ if (aligned) .v_dqa else .v_dqu, .mov }
  22716                         else if (self.hasFeature(.sse2))
  22717                             .{ if (aligned) ._dqa else ._dqu, .mov }
  22718                         else
  22719                             .{ ._ps, if (aligned) .mova else .movu } },
  22720                         2 => if (self.hasFeature(.avx))
  22721                             return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
  22722                         else => {},
  22723                     },
  22724                     129...256 => switch (ty.vectorLen(zcu)) {
  22725                         1 => if (self.hasFeature(.avx))
  22726                             return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
  22727                         else => {},
  22728                     },
  22729                     else => {},
  22730                 },
  22731                 .pointer, .optional => if (ty.childType(zcu).isPtrAtRuntime(zcu))
  22732                     switch (ty.vectorLen(zcu)) {
  22733                         1...2 => return .{ .move = if (self.hasFeature(.avx))
  22734                             .{ if (aligned) .v_dqa else .v_dqu, .mov }
  22735                         else if (self.hasFeature(.sse2))
  22736                             .{ if (aligned) ._dqa else ._dqu, .mov }
  22737                         else
  22738                             .{ ._ps, if (aligned) .mova else .movu } },
  22739                         3...4 => if (self.hasFeature(.avx))
  22740                             return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
  22741                         else => {},
  22742                     }
  22743                 else
  22744                     unreachable,
  22745                 .float => switch (ty.childType(zcu).floatBits(self.target.*)) {
  22746                     16 => switch (ty.vectorLen(zcu)) {
  22747                         1...8 => return .{ .move = if (self.hasFeature(.avx))
  22748                             .{ if (aligned) .v_dqa else .v_dqu, .mov }
  22749                         else if (self.hasFeature(.sse2))
  22750                             .{ if (aligned) ._dqa else ._dqu, .mov }
  22751                         else
  22752                             .{ ._ps, if (aligned) .mova else .movu } },
  22753                         9...16 => if (self.hasFeature(.avx))
  22754                             return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
  22755                         else => {},
  22756                     },
  22757                     32 => switch (ty.vectorLen(zcu)) {
  22758                         1...4 => return .{ .move = if (self.hasFeature(.avx))
  22759                             .{ .v_ps, if (aligned) .mova else .movu }
  22760                         else
  22761                             .{ ._ps, if (aligned) .mova else .movu } },
  22762                         5...8 => if (self.hasFeature(.avx))
  22763                             return .{ .move = .{ .v_ps, if (aligned) .mova else .movu } },
  22764                         else => {},
  22765                     },
  22766                     64 => switch (ty.vectorLen(zcu)) {
  22767                         1...2 => return .{ .move = if (self.hasFeature(.avx))
  22768                             .{ .v_pd, if (aligned) .mova else .movu }
  22769                         else
  22770                             .{ ._pd, if (aligned) .mova else .movu } },
  22771                         3...4 => if (self.hasFeature(.avx))
  22772                             return .{ .move = .{ .v_pd, if (aligned) .mova else .movu } },
  22773                         else => {},
  22774                     },
  22775                     128 => switch (ty.vectorLen(zcu)) {
  22776                         1 => return .{ .move = if (self.hasFeature(.avx))
  22777                             .{ if (aligned) .v_dqa else .v_dqu, .mov }
  22778                         else if (self.hasFeature(.sse2))
  22779                             .{ if (aligned) ._dqa else ._dqu, .mov }
  22780                         else
  22781                             .{ ._ps, if (aligned) .mova else .movu } },
  22782                         2 => if (self.hasFeature(.avx))
  22783                             return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } },
  22784                         else => {},
  22785                     },
  22786                     else => {},
  22787                 },
  22788                 else => {},
  22789             },
  22790         },
  22791         .ip => {},
  22792     }
  22793     return self.fail("TODO moveStrategy for {}", .{ty.fmt(pt)});
  22794 }
  22795 
  22796 const CopyOptions = struct {
  22797     safety: bool = false,
  22798 };
  22799 
  22800 fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: CopyOptions) InnerError!void {
  22801     const pt = self.pt;
  22802 
  22803     const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  22804     defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  22805 
  22806     switch (dst_mcv) {
  22807         .none,
  22808         .unreach,
  22809         .dead,
  22810         .undef,
  22811         .immediate,
  22812         .eflags,
  22813         .register_overflow,
  22814         .register_mask,
  22815         .lea_direct,
  22816         .lea_got,
  22817         .lea_tlv,
  22818         .lea_frame,
  22819         .lea_symbol,
  22820         .elementwise_regs_then_frame,
  22821         .reserved_frame,
  22822         .air_ref,
  22823         => unreachable, // unmodifiable destination
  22824         .register => |reg| try self.genSetReg(reg, ty, src_mcv, opts),
  22825         .register_offset => |dst_reg_off| try self.genSetReg(dst_reg_off.reg, ty, switch (src_mcv) {
  22826             .none,
  22827             .unreach,
  22828             .dead,
  22829             .undef,
  22830             .register_overflow,
  22831             .elementwise_regs_then_frame,
  22832             .reserved_frame,
  22833             => unreachable,
  22834             .immediate,
  22835             .register,
  22836             .register_offset,
  22837             .lea_frame,
  22838             => src_mcv.offset(-dst_reg_off.off),
  22839             else => .{ .register_offset = .{
  22840                 .reg = try self.copyToTmpRegister(ty, src_mcv),
  22841                 .off = -dst_reg_off.off,
  22842             } },
  22843         }, opts),
  22844         inline .register_pair, .register_triple, .register_quadruple => |dst_regs, dst_tag| {
  22845             const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = src_info: switch (src_mcv) {
  22846                 .undef, .memory, .indirect, .load_frame => null,
  22847                 .register => |src_reg| switch (dst_regs[0].class()) {
  22848                     .general_purpose => switch (src_reg.class()) {
  22849                         else => unreachable,
  22850                         .sse => if (ty.abiSize(pt.zcu) <= 16) {
  22851                             if (self.hasFeature(.avx)) {
  22852                                 try self.asmRegisterRegister(.{ .v_q, .mov }, dst_regs[0].to64(), src_reg.to128());
  22853                                 try self.asmRegisterRegisterImmediate(.{ .vp_q, .extr }, dst_regs[1].to64(), src_reg.to128(), .u(1));
  22854                             } else if (self.hasFeature(.sse4_1)) {
  22855                                 try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128());
  22856                                 try self.asmRegisterRegisterImmediate(.{ .p_q, .extr }, dst_regs[1].to64(), src_reg.to128(), .u(1));
  22857                             } else {
  22858                                 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
  22859                                 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  22860                                 defer self.register_manager.unlockReg(tmp_lock);
  22861 
  22862                                 try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128());
  22863                                 try self.asmRegisterRegister(.{ ._ps, .movhl }, tmp_reg.to128(), src_reg.to128());
  22864                                 try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), src_reg.to128());
  22865                             }
  22866                             return;
  22867                         } else unreachable,
  22868                     },
  22869                     else => unreachable,
  22870                 },
  22871                 dst_tag => |src_regs| {
  22872                     var hazard_regs = src_regs;
  22873                     for (dst_regs, &hazard_regs, 1..) |dst_reg, src_reg, hazard_index| {
  22874                         const dst_id = dst_reg.id();
  22875                         if (dst_id == src_reg.id()) continue;
  22876                         var mir_tag: Mir.Inst.Tag = .mov;
  22877                         for (hazard_regs[hazard_index..]) |*hazard_reg| {
  22878                             if (dst_id != hazard_reg.id()) continue;
  22879                             mir_tag = .xchg;
  22880                             hazard_reg.* = src_reg;
  22881                         }
  22882                         try self.asmRegisterRegister(.{ ._, mir_tag }, dst_reg.to64(), src_reg.to64());
  22883                     }
  22884                     return;
  22885                 },
  22886                 .load_symbol, .load_direct, .load_got, .load_tlv => {
  22887                     const src_addr_reg =
  22888                         (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64();
  22889                     const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
  22890                     errdefer self.register_manager.unlockReg(src_addr_lock);
  22891 
  22892                     try self.genSetReg(src_addr_reg, .usize, src_mcv.address(), opts);
  22893                     break :src_info .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock };
  22894                 },
  22895                 .air_ref => |src_ref| return self.genCopy(ty, dst_mcv, try self.resolveInst(src_ref), opts),
  22896                 else => return self.fail("TODO implement genCopy for {s} of {}", .{
  22897                     @tagName(src_mcv), ty.fmt(pt),
  22898                 }),
  22899             };
  22900             defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock);
  22901 
  22902             for ([_]bool{ false, true }) |emit_hazard| {
  22903                 var hazard_count: u3 = 0;
  22904                 var part_disp: i32 = 0;
  22905                 for (dst_regs, try self.splitType(dst_regs.len, ty), 0..) |dst_reg, dst_ty, part_i| {
  22906                     defer part_disp += @intCast(dst_ty.abiSize(pt.zcu));
  22907                     const is_hazard = if (src_mcv.getReg()) |src_reg|
  22908                         dst_reg.id() == src_reg.id()
  22909                     else if (src_info) |info|
  22910                         dst_reg.id() == info.addr_reg.id()
  22911                     else
  22912                         false;
  22913                     if (is_hazard) hazard_count += 1;
  22914                     if (is_hazard != emit_hazard) continue;
  22915                     try self.genSetReg(dst_reg, dst_ty, switch (src_mcv) {
  22916                         .undef => if (opts.safety and part_i > 0) .{ .register = dst_regs[0] } else .undef,
  22917                         dst_tag => |src_regs| .{ .register = src_regs[part_i] },
  22918                         .memory, .indirect, .load_frame => src_mcv.address().offset(part_disp).deref(),
  22919                         .load_symbol, .load_direct, .load_got, .load_tlv => .{ .indirect = .{
  22920                             .reg = src_info.?.addr_reg,
  22921                             .off = part_disp,
  22922                         } },
  22923                         else => unreachable,
  22924                     }, opts);
  22925                 }
  22926                 switch (hazard_count) {
  22927                     0 => break,
  22928                     1 => continue,
  22929                     else => unreachable,
  22930                 }
  22931             }
  22932         },
  22933         .indirect => |reg_off| try self.genSetMem(
  22934             .{ .reg = reg_off.reg },
  22935             reg_off.off,
  22936             ty,
  22937             src_mcv,
  22938             opts,
  22939         ),
  22940         .memory, .load_symbol, .load_direct, .load_got, .load_tlv => {
  22941             switch (dst_mcv) {
  22942                 .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr|
  22943                     return self.genSetMem(.{ .reg = .ds }, small_addr, ty, src_mcv, opts),
  22944                 .load_symbol, .load_direct, .load_got, .load_tlv => {},
  22945                 else => unreachable,
  22946             }
  22947 
  22948             const addr_reg = try self.copyToTmpRegister(.usize, dst_mcv.address());
  22949             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  22950             defer self.register_manager.unlockReg(addr_lock);
  22951 
  22952             try self.genSetMem(.{ .reg = addr_reg }, 0, ty, src_mcv, opts);
  22953         },
  22954         .load_frame => |frame_addr| try self.genSetMem(
  22955             .{ .frame = frame_addr.index },
  22956             frame_addr.off,
  22957             ty,
  22958             src_mcv,
  22959             opts,
  22960         ),
  22961     }
  22962 }
  22963 
  22964 fn genSetReg(
  22965     self: *CodeGen,
  22966     dst_reg: Register,
  22967     ty: Type,
  22968     src_mcv: MCValue,
  22969     opts: CopyOptions,
  22970 ) InnerError!void {
  22971     const pt = self.pt;
  22972     const zcu = pt.zcu;
  22973     const abi_size: u32 = @intCast(ty.abiSize(zcu));
  22974     if (ty.bitSize(zcu) > dst_reg.bitSize())
  22975         return self.fail("genSetReg called with a value larger than dst_reg", .{});
  22976     switch (src_mcv) {
  22977         .none,
  22978         .unreach,
  22979         .dead,
  22980         .register_overflow,
  22981         .elementwise_regs_then_frame,
  22982         .reserved_frame,
  22983         => unreachable,
  22984         .undef => if (opts.safety) switch (dst_reg.class()) {
  22985             .general_purpose => switch (abi_size) {
  22986                 1 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to8(), .u(0xAA)),
  22987                 2 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to16(), .u(0xAAAA)),
  22988                 3...4 => try self.asmRegisterImmediate(
  22989                     .{ ._, .mov },
  22990                     dst_reg.to32(),
  22991                     .s(@as(i32, @bitCast(@as(u32, 0xAAAAAAAA)))),
  22992                 ),
  22993                 5...8 => try self.asmRegisterImmediate(
  22994                     .{ ._, .mov },
  22995                     dst_reg.to64(),
  22996                     .u(0xAAAAAAAAAAAAAAAA),
  22997                 ),
  22998                 else => unreachable,
  22999             },
  23000             .segment, .x87, .mmx, .sse => try self.genSetReg(dst_reg, ty, try self.genTypedValue(try pt.undefValue(ty)), opts),
  23001             .ip => unreachable,
  23002         },
  23003         .eflags => |cc| try self.asmSetccRegister(cc, dst_reg.to8()),
  23004         .immediate => |imm| {
  23005             if (imm == 0) {
  23006                 // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
  23007                 // register is the fastest way to zero a register.
  23008                 try self.spillEflagsIfOccupied();
  23009                 try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32());
  23010             } else if (abi_size > 4 and std.math.cast(u32, imm) != null) {
  23011                 // 32-bit moves zero-extend to 64-bit.
  23012                 try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), .u(imm));
  23013             } else if (abi_size <= 4 and @as(i64, @bitCast(imm)) < 0) {
  23014                 try self.asmRegisterImmediate(
  23015                     .{ ._, .mov },
  23016                     registerAlias(dst_reg, abi_size),
  23017                     .s(@intCast(@as(i64, @bitCast(imm)))),
  23018                 );
  23019             } else {
  23020                 try self.asmRegisterImmediate(
  23021                     .{ ._, .mov },
  23022                     registerAlias(dst_reg, abi_size),
  23023                     .u(imm),
  23024                 );
  23025             }
  23026         },
  23027         .register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) {
  23028             .general_purpose => switch (src_reg.class()) {
  23029                 .general_purpose => try self.asmRegisterRegister(
  23030                     .{ ._, .mov },
  23031                     registerAlias(dst_reg, abi_size),
  23032                     registerAlias(src_reg, abi_size),
  23033                 ),
  23034                 .segment => try self.asmRegisterRegister(
  23035                     .{ ._, .mov },
  23036                     registerAlias(dst_reg, abi_size),
  23037                     src_reg,
  23038                 ),
  23039                 .x87, .mmx, .ip => unreachable,
  23040                 .sse => if (self.hasFeature(.sse2)) try self.asmRegisterRegister(
  23041                     switch (abi_size) {
  23042                         1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
  23043                         5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
  23044                         else => unreachable,
  23045                     },
  23046                     registerAlias(dst_reg, @max(abi_size, 4)),
  23047                     src_reg.to128(),
  23048                 ) else {
  23049                     const frame_index = try self.allocFrameIndex(.init(.{
  23050                         .size = 4,
  23051                         .alignment = .@"4",
  23052                     }));
  23053                     try self.asmMemoryRegister(.{ ._ss, .mov }, .{
  23054                         .base = .{ .frame = frame_index },
  23055                         .mod = .{ .rm = .{ .size = .dword } },
  23056                     }, src_reg.to128());
  23057                     try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(dst_reg, abi_size), .{
  23058                         .base = .{ .frame = frame_index },
  23059                         .mod = .{ .rm = .{ .size = .fromSize(abi_size) } },
  23060                     });
  23061                 },
  23062             },
  23063             .segment => try self.asmRegisterRegister(
  23064                 .{ ._, .mov },
  23065                 dst_reg,
  23066                 switch (src_reg.class()) {
  23067                     .general_purpose, .segment => registerAlias(src_reg, abi_size),
  23068                     .x87, .mmx, .ip => unreachable,
  23069                     .sse => try self.copyToTmpRegister(ty, src_mcv),
  23070                 },
  23071             ),
  23072             .x87 => switch (src_reg.class()) {
  23073                 .general_purpose, .segment => unreachable,
  23074                 .x87 => switch (src_reg) {
  23075                     .st0 => try self.asmRegister(.{ .f_, .st }, dst_reg),
  23076                     .st1, .st2, .st3, .st4, .st5, .st6 => {
  23077                         try self.asmRegister(.{ .f_, .ld }, src_reg);
  23078                         assert(dst_reg != .st7);
  23079                         try self.asmRegister(.{ .f_p, .st }, @enumFromInt(@intFromEnum(dst_reg) + 1));
  23080                     },
  23081                     else => unreachable,
  23082                 },
  23083                 .mmx, .sse, .ip => unreachable,
  23084             },
  23085             .mmx => unreachable,
  23086             .sse => switch (src_reg.class()) {
  23087                 .general_purpose => try self.asmRegisterRegister(
  23088                     switch (abi_size) {
  23089                         1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
  23090                         5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
  23091                         else => unreachable,
  23092                     },
  23093                     dst_reg.to128(),
  23094                     registerAlias(src_reg, @max(abi_size, 4)),
  23095                 ),
  23096                 .segment => try self.genSetReg(
  23097                     dst_reg,
  23098                     ty,
  23099                     .{ .register = try self.copyToTmpRegister(ty, src_mcv) },
  23100                     opts,
  23101                 ),
  23102                 .x87, .mmx, .ip => unreachable,
  23103                 .sse => try self.asmRegisterRegister(
  23104                     @as(?Mir.Inst.FixedTag, switch (ty.scalarType(zcu).zigTypeTag(zcu)) {
  23105                         else => switch (abi_size) {
  23106                             1...16 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else .{ ._dqa, .mov },
  23107                             17...32 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else null,
  23108                             else => null,
  23109                         },
  23110                         .float => switch (ty.scalarType(zcu).floatBits(self.target.*)) {
  23111                             16, 128 => switch (abi_size) {
  23112                                 2...16 => if (self.hasFeature(.avx))
  23113                                     .{ .v_dqa, .mov }
  23114                                 else
  23115                                     .{ ._dqa, .mov },
  23116                                 17...32 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else null,
  23117                                 else => null,
  23118                             },
  23119                             32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova },
  23120                             64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova },
  23121                             80 => null,
  23122                             else => unreachable,
  23123                         },
  23124                     }) orelse return self.fail("TODO implement genSetReg for {}", .{ty.fmt(pt)}),
  23125                     registerAlias(dst_reg, abi_size),
  23126                     registerAlias(src_reg, abi_size),
  23127                 ),
  23128             },
  23129             .ip => unreachable,
  23130         },
  23131         inline .register_pair,
  23132         .register_triple,
  23133         .register_quadruple,
  23134         => |src_regs| switch (dst_reg.class()) {
  23135             .general_purpose => switch (src_regs[0].class()) {
  23136                 .general_purpose => try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts),
  23137                 else => unreachable,
  23138             },
  23139             .sse => switch (src_regs[0].class()) {
  23140                 .general_purpose => if (abi_size <= 16) {
  23141                     if (self.hasFeature(.avx)) {
  23142                         try self.asmRegisterRegister(.{ .v_q, .mov }, dst_reg.to128(), src_regs[0].to64());
  23143                         try self.asmRegisterRegisterRegisterImmediate(
  23144                             .{ .vp_q, .insr },
  23145                             dst_reg.to128(),
  23146                             dst_reg.to128(),
  23147                             src_regs[1].to64(),
  23148                             .u(1),
  23149                         );
  23150                     } else if (self.hasFeature(.sse4_1)) {
  23151                         try self.asmRegisterRegister(.{ ._q, .mov }, dst_reg.to128(), src_regs[0].to64());
  23152                         try self.asmRegisterRegisterImmediate(.{ .p_q, .insr }, dst_reg.to128(), src_regs[1].to64(), .u(1));
  23153                     } else {
  23154                         const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
  23155                         const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  23156                         defer self.register_manager.unlockReg(tmp_lock);
  23157 
  23158                         try self.asmRegisterRegister(.{ ._q, .mov }, dst_reg.to128(), src_regs[0].to64());
  23159                         try self.asmRegisterRegister(.{ ._q, .mov }, tmp_reg.to128(), src_regs[1].to64());
  23160                         try self.asmRegisterRegister(.{ ._ps, .movlh }, dst_reg.to128(), tmp_reg.to128());
  23161                     }
  23162                 } else unreachable,
  23163                 else => unreachable,
  23164             },
  23165             else => unreachable,
  23166         },
  23167         .register_offset,
  23168         .indirect,
  23169         .load_frame,
  23170         .lea_frame,
  23171         => try @as(MoveStrategy, switch (src_mcv) {
  23172             .register_offset => |reg_off| switch (reg_off.off) {
  23173                 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }, opts),
  23174                 else => .{ .move = .{ ._, .lea } },
  23175             },
  23176             .indirect => try self.moveStrategy(ty, dst_reg.class(), false),
  23177             .load_frame => |frame_addr| try self.moveStrategy(
  23178                 ty,
  23179                 dst_reg.class(),
  23180                 self.getFrameAddrAlignment(frame_addr).compare(.gte, .fromLog2Units(
  23181                     std.math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)),
  23182                 )),
  23183             ),
  23184             .lea_frame => .{ .move = .{ ._, .lea } },
  23185             else => unreachable,
  23186         }).read(self, registerAlias(dst_reg, abi_size), switch (src_mcv) {
  23187             .register_offset, .indirect => |reg_off| .{
  23188                 .base = .{ .reg = reg_off.reg.to64() },
  23189                 .mod = .{ .rm = .{
  23190                     .size = self.memSize(ty),
  23191                     .disp = reg_off.off,
  23192                 } },
  23193             },
  23194             .load_frame, .lea_frame => |frame_addr| .{
  23195                 .base = .{ .frame = frame_addr.index },
  23196                 .mod = .{ .rm = .{
  23197                     .size = self.memSize(ty),
  23198                     .disp = frame_addr.off,
  23199                 } },
  23200             },
  23201             else => unreachable,
  23202         }),
  23203         .register_mask => |src_reg_mask| {
  23204             assert(src_reg_mask.reg.class() == .sse);
  23205             const has_avx = self.hasFeature(.avx);
  23206             const bits_reg = switch (dst_reg.class()) {
  23207                 .general_purpose => dst_reg,
  23208                 else => try self.register_manager.allocReg(null, abi.RegisterClass.gp),
  23209             };
  23210             const bits_lock = self.register_manager.lockReg(bits_reg);
  23211             defer if (bits_lock) |lock| self.register_manager.unlockReg(lock);
  23212 
  23213             const pack_reg = switch (src_reg_mask.info.scalar) {
  23214                 else => src_reg_mask.reg,
  23215                 .word => try self.register_manager.allocReg(null, abi.RegisterClass.sse),
  23216             };
  23217             const pack_lock = self.register_manager.lockReg(pack_reg);
  23218             defer if (pack_lock) |lock| self.register_manager.unlockReg(lock);
  23219 
  23220             var mask_size: u32 = @intCast(ty.vectorLen(zcu) * @divExact(src_reg_mask.info.scalar.bitSize(self.target), 8));
  23221             switch (src_reg_mask.info.scalar) {
  23222                 else => {},
  23223                 .word => {
  23224                     const src_alias = registerAlias(src_reg_mask.reg, mask_size);
  23225                     const pack_alias = registerAlias(pack_reg, mask_size);
  23226                     if (has_avx) {
  23227                         try self.asmRegisterRegisterRegister(.{ .vp_b, .ackssw }, pack_alias, src_alias, src_alias);
  23228                     } else {
  23229                         try self.asmRegisterRegister(.{ ._dqa, .mov }, pack_alias, src_alias);
  23230                         try self.asmRegisterRegister(.{ .p_b, .ackssw }, pack_alias, pack_alias);
  23231                     }
  23232                     mask_size = std.math.divCeil(u32, mask_size, 2) catch unreachable;
  23233                 },
  23234             }
  23235             try self.asmRegisterRegister(.{ switch (src_reg_mask.info.scalar) {
  23236                 .byte, .word => if (has_avx) .vp_b else .p_b,
  23237                 .dword => if (has_avx) .v_ps else ._ps,
  23238                 .qword => if (has_avx) .v_pd else ._pd,
  23239                 else => unreachable,
  23240             }, .movmsk }, bits_reg.to32(), registerAlias(pack_reg, mask_size));
  23241             if (src_reg_mask.info.inverted) try self.asmRegister(.{ ._, .not }, registerAlias(bits_reg, abi_size));
  23242             try self.genSetReg(dst_reg, ty, .{ .register = bits_reg }, .{});
  23243         },
  23244         .memory, .load_symbol, .load_direct, .load_got, .load_tlv => {
  23245             switch (src_mcv) {
  23246                 .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr|
  23247                     return (try self.moveStrategy(
  23248                         ty,
  23249                         dst_reg.class(),
  23250                         ty.abiAlignment(zcu).check(@as(u32, @bitCast(small_addr))),
  23251                     )).read(self, registerAlias(dst_reg, abi_size), .{
  23252                         .base = .{ .reg = .ds },
  23253                         .mod = .{ .rm = .{
  23254                             .size = self.memSize(ty),
  23255                             .disp = small_addr,
  23256                         } },
  23257                     }),
  23258                 .load_symbol => |sym_off| switch (dst_reg.class()) {
  23259                     .general_purpose => {
  23260                         assert(sym_off.off == 0);
  23261                         try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(dst_reg, abi_size), .{
  23262                             .base = .{ .reloc = sym_off.sym_index },
  23263                             .mod = .{ .rm = .{
  23264                                 .size = self.memSize(ty),
  23265                                 .disp = sym_off.off,
  23266                             } },
  23267                         });
  23268                         return;
  23269                     },
  23270                     .segment, .mmx, .ip => unreachable,
  23271                     .x87, .sse => {},
  23272                 },
  23273                 .load_direct => |sym_index| switch (dst_reg.class()) {
  23274                     .general_purpose => {
  23275                         _ = try self.addInst(.{
  23276                             .tag = .mov,
  23277                             .ops = .direct_reloc,
  23278                             .data = .{ .rx = .{
  23279                                 .r1 = registerAlias(dst_reg, abi_size),
  23280                                 .payload = try self.addExtra(bits.SymbolOffset{ .sym_index = sym_index }),
  23281                             } },
  23282                         });
  23283                         return;
  23284                     },
  23285                     .segment, .mmx, .ip => unreachable,
  23286                     .x87, .sse => {},
  23287                 },
  23288                 .load_got, .load_tlv => {},
  23289                 else => unreachable,
  23290             }
  23291 
  23292             const addr_reg = try self.copyToTmpRegister(.usize, src_mcv.address());
  23293             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  23294             defer self.register_manager.unlockReg(addr_lock);
  23295 
  23296             try (try self.moveStrategy(ty, dst_reg.class(), false)).read(self, registerAlias(dst_reg, abi_size), .{
  23297                 .base = .{ .reg = addr_reg.to64() },
  23298                 .mod = .{ .rm = .{ .size = self.memSize(ty) } },
  23299             });
  23300         },
  23301         .lea_symbol => |sym_off| switch (self.bin_file.tag) {
  23302             .elf, .macho => try self.asmRegisterMemory(
  23303                 .{ ._, .lea },
  23304                 dst_reg.to64(),
  23305                 .{
  23306                     .base = .{ .reloc = sym_off.sym_index },
  23307                     .mod = .{ .rm = .{
  23308                         .size = .qword,
  23309                         .disp = sym_off.off,
  23310                     } },
  23311                 },
  23312             ),
  23313             else => return self.fail("TODO emit symbol sequence on {s}", .{
  23314                 @tagName(self.bin_file.tag),
  23315             }),
  23316         },
  23317         .lea_direct, .lea_got => |sym_index| _ = try self.addInst(.{
  23318             .tag = switch (src_mcv) {
  23319                 .lea_direct => .lea,
  23320                 .lea_got => .mov,
  23321                 else => unreachable,
  23322             },
  23323             .ops = switch (src_mcv) {
  23324                 .lea_direct => .direct_reloc,
  23325                 .lea_got => .got_reloc,
  23326                 else => unreachable,
  23327             },
  23328             .data = .{ .rx = .{
  23329                 .r1 = dst_reg.to64(),
  23330                 .payload = try self.addExtra(bits.SymbolOffset{ .sym_index = sym_index }),
  23331             } },
  23332         }),
  23333         .lea_tlv => unreachable, // TODO: remove this
  23334         .air_ref => |src_ref| try self.genSetReg(dst_reg, ty, try self.resolveInst(src_ref), opts),
  23335     }
  23336 }
  23337 
  23338 fn genSetMem(
  23339     self: *CodeGen,
  23340     base: Memory.Base,
  23341     disp: i32,
  23342     ty: Type,
  23343     src_mcv: MCValue,
  23344     opts: CopyOptions,
  23345 ) InnerError!void {
  23346     const pt = self.pt;
  23347     const zcu = pt.zcu;
  23348     const abi_size: u32 = @intCast(ty.abiSize(zcu));
  23349     const dst_ptr_mcv: MCValue = switch (base) {
  23350         .none => .{ .immediate = @bitCast(@as(i64, disp)) },
  23351         .reg => |base_reg| .{ .register_offset = .{ .reg = base_reg, .off = disp } },
  23352         .frame => |base_frame_index| .{ .lea_frame = .{ .index = base_frame_index, .off = disp } },
  23353         .table => unreachable,
  23354         .reloc => |sym_index| .{ .lea_symbol = .{ .sym_index = sym_index, .off = disp } },
  23355     };
  23356     switch (src_mcv) {
  23357         .none,
  23358         .unreach,
  23359         .dead,
  23360         .elementwise_regs_then_frame,
  23361         .reserved_frame,
  23362         => unreachable,
  23363         .undef => if (opts.safety) try self.genInlineMemset(
  23364             dst_ptr_mcv,
  23365             src_mcv,
  23366             .{ .immediate = abi_size },
  23367             opts,
  23368         ),
  23369         .immediate => |imm| switch (abi_size) {
  23370             1, 2, 4 => {
  23371                 const immediate: Immediate = switch (if (ty.isAbiInt(zcu))
  23372                     ty.intInfo(zcu).signedness
  23373                 else
  23374                     .unsigned) {
  23375                     .signed => .s(@truncate(@as(i64, @bitCast(imm)))),
  23376                     .unsigned => .u(@as(u32, @intCast(imm))),
  23377                 };
  23378                 try self.asmMemoryImmediate(
  23379                     .{ ._, .mov },
  23380                     .{ .base = base, .mod = .{ .rm = .{
  23381                         .size = .fromSize(abi_size),
  23382                         .disp = disp,
  23383                     } } },
  23384                     immediate,
  23385                 );
  23386             },
  23387             3, 5...7 => unreachable,
  23388             else => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| {
  23389                 try self.asmMemoryImmediate(
  23390                     .{ ._, .mov },
  23391                     .{ .base = base, .mod = .{ .rm = .{
  23392                         .size = .fromSize(abi_size),
  23393                         .disp = disp,
  23394                     } } },
  23395                     .s(small),
  23396                 );
  23397             } else {
  23398                 var offset: i32 = 0;
  23399                 while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate(
  23400                     .{ ._, .mov },
  23401                     .{ .base = base, .mod = .{ .rm = .{
  23402                         .size = .dword,
  23403                         .disp = disp + offset,
  23404                     } } },
  23405                     if (ty.isSignedInt(zcu)) .s(
  23406                         @truncate(@as(i64, @bitCast(imm)) >> (std.math.cast(u6, offset * 8) orelse 63)),
  23407                     ) else .u(
  23408                         @as(u32, @truncate(if (std.math.cast(u6, offset * 8)) |shift| imm >> shift else 0)),
  23409                     ),
  23410                 );
  23411             },
  23412         },
  23413         .eflags => |cc| try self.asmSetccMemory(cc, .{ .base = base, .mod = .{
  23414             .rm = .{ .size = .byte, .disp = disp },
  23415         } }),
  23416         .register => |src_reg| {
  23417             const mem_size = switch (base) {
  23418                 .frame => |base_fi| mem_size: {
  23419                     assert(disp >= 0);
  23420                     const frame_abi_size = self.frame_allocs.items(.abi_size)[@intFromEnum(base_fi)];
  23421                     const frame_spill_pad = self.frame_allocs.items(.spill_pad)[@intFromEnum(base_fi)];
  23422                     assert(frame_abi_size - frame_spill_pad - disp >= abi_size);
  23423                     break :mem_size if (frame_abi_size - frame_spill_pad - disp == abi_size)
  23424                         frame_abi_size
  23425                     else
  23426                         abi_size;
  23427                 },
  23428                 else => abi_size,
  23429             };
  23430             const src_alias = registerAlias(src_reg, abi_size);
  23431             const src_size: u32 = @intCast(switch (src_alias.class()) {
  23432                 .general_purpose, .segment, .x87, .ip => @divExact(src_alias.bitSize(), 8),
  23433                 .mmx, .sse => abi_size,
  23434             });
  23435             const src_align: InternPool.Alignment = .fromNonzeroByteUnits(
  23436                 std.math.ceilPowerOfTwoAssert(u32, src_size),
  23437             );
  23438             if (src_size > mem_size) {
  23439                 const frame_index = try self.allocFrameIndex(.init(.{
  23440                     .size = src_size,
  23441                     .alignment = src_align,
  23442                 }));
  23443                 const frame_mcv: MCValue = .{ .load_frame = .{ .index = frame_index } };
  23444                 try (try self.moveStrategy(ty, src_alias.class(), true)).write(
  23445                     self,
  23446                     .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{
  23447                         .size = .fromSize(src_size),
  23448                     } } },
  23449                     src_alias,
  23450                 );
  23451                 try self.genSetMem(base, disp, ty, frame_mcv, opts);
  23452                 try self.freeValue(frame_mcv);
  23453             } else try (try self.moveStrategy(ty, src_alias.class(), switch (base) {
  23454                 .none => src_align.check(@as(u32, @bitCast(disp))),
  23455                 .reg => |reg| switch (reg) {
  23456                     .es, .cs, .ss, .ds => src_align.check(@as(u32, @bitCast(disp))),
  23457                     else => false,
  23458                 },
  23459                 .frame => |frame_index| self.getFrameAddrAlignment(.{
  23460                     .index = frame_index,
  23461                     .off = disp,
  23462                 }).compare(.gte, src_align),
  23463                 .table => unreachable,
  23464                 .reloc => false,
  23465             })).write(
  23466                 self,
  23467                 .{ .base = base, .mod = .{ .rm = .{
  23468                     .size = .fromBitSize(@min(
  23469                         self.memSize(ty).bitSize(self.target),
  23470                         src_alias.bitSize(),
  23471                     )),
  23472                     .disp = disp,
  23473                 } } },
  23474                 src_alias,
  23475             );
  23476         },
  23477         inline .register_pair, .register_triple, .register_quadruple => |src_regs| {
  23478             var part_disp: i32 = disp;
  23479             for (try self.splitType(src_regs.len, ty), src_regs) |src_ty, src_reg| {
  23480                 try self.genSetMem(base, part_disp, src_ty, .{ .register = src_reg }, opts);
  23481                 part_disp += @intCast(src_ty.abiSize(zcu));
  23482             }
  23483         },
  23484         .register_overflow => |ro| switch (ty.zigTypeTag(zcu)) {
  23485             .@"struct" => {
  23486                 try self.genSetMem(
  23487                     base,
  23488                     disp + @as(i32, @intCast(ty.structFieldOffset(0, zcu))),
  23489                     ty.fieldType(0, zcu),
  23490                     .{ .register = ro.reg },
  23491                     opts,
  23492                 );
  23493                 try self.genSetMem(
  23494                     base,
  23495                     disp + @as(i32, @intCast(ty.structFieldOffset(1, zcu))),
  23496                     ty.fieldType(1, zcu),
  23497                     .{ .eflags = ro.eflags },
  23498                     opts,
  23499                 );
  23500             },
  23501             .optional => {
  23502                 assert(!ty.optionalReprIsPayload(zcu));
  23503                 const child_ty = ty.optionalChild(zcu);
  23504                 try self.genSetMem(base, disp, child_ty, .{ .register = ro.reg }, opts);
  23505                 try self.genSetMem(
  23506                     base,
  23507                     disp + @as(i32, @intCast(child_ty.abiSize(zcu))),
  23508                     .bool,
  23509                     .{ .eflags = ro.eflags },
  23510                     opts,
  23511                 );
  23512             },
  23513             else => return self.fail("TODO implement genSetMem for {s} of {}", .{
  23514                 @tagName(src_mcv), ty.fmt(pt),
  23515             }),
  23516         },
  23517         .register_offset => |reg_off| {
  23518             const src_reg = self.copyToTmpRegister(ty, src_mcv) catch |err| switch (err) {
  23519                 error.OutOfRegisters => {
  23520                     const src_reg = registerAlias(reg_off.reg, abi_size);
  23521                     try self.asmRegisterMemory(.{ ._, .lea }, src_reg, .{
  23522                         .base = .{ .reg = src_reg },
  23523                         .mod = .{ .rm = .{
  23524                             .size = .qword,
  23525                             .disp = reg_off.off,
  23526                         } },
  23527                     });
  23528                     try self.genSetMem(base, disp, ty, .{ .register = reg_off.reg }, opts);
  23529                     return self.asmRegisterMemory(.{ ._, .lea }, src_reg, .{
  23530                         .base = .{ .reg = src_reg },
  23531                         .mod = .{ .rm = .{
  23532                             .size = .qword,
  23533                             .disp = -reg_off.off,
  23534                         } },
  23535                     });
  23536                 },
  23537                 else => |e| return e,
  23538             };
  23539             const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  23540             defer self.register_manager.unlockReg(src_lock);
  23541 
  23542             try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts);
  23543         },
  23544         .register_mask => {
  23545             const src_reg = try self.copyToTmpRegister(ty, src_mcv);
  23546             const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  23547             defer self.register_manager.unlockReg(src_lock);
  23548 
  23549             try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts);
  23550         },
  23551         .memory,
  23552         .indirect,
  23553         .load_direct,
  23554         .lea_direct,
  23555         .load_got,
  23556         .lea_got,
  23557         .load_tlv,
  23558         .lea_tlv,
  23559         .load_frame,
  23560         .lea_frame,
  23561         .load_symbol,
  23562         .lea_symbol,
  23563         => switch (abi_size) {
  23564             0 => {},
  23565             1, 2, 4, 8 => {
  23566                 const src_reg = try self.copyToTmpRegister(ty, src_mcv);
  23567                 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  23568                 defer self.register_manager.unlockReg(src_lock);
  23569 
  23570                 try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts);
  23571             },
  23572             else => try self.genInlineMemcpy(dst_ptr_mcv, src_mcv.address(), .{ .immediate = abi_size }, .{ .no_alias = true }),
  23573         },
  23574         .air_ref => |src_ref| try self.genSetMem(base, disp, ty, try self.resolveInst(src_ref), opts),
  23575     }
  23576 }
  23577 
  23578 fn genInlineMemcpy(self: *CodeGen, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue, opts: struct {
  23579     no_alias: bool,
  23580 }) InnerError!void {
  23581     if (opts.no_alias and dst_ptr.isAddress() and src_ptr.isAddress()) switch (len) {
  23582         else => {},
  23583         .immediate => |len_imm| switch (len_imm) {
  23584             else => {},
  23585             1 => if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| {
  23586                 try self.asmRegisterMemory(.{ ._, .mov }, reg.to8(), try src_ptr.deref().mem(self, .{ .size = .byte }));
  23587                 try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .byte }), reg.to8());
  23588                 return;
  23589             },
  23590             2 => if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| {
  23591                 try self.asmRegisterMemory(.{ ._, .mov }, reg.to16(), try src_ptr.deref().mem(self, .{ .size = .word }));
  23592                 try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .word }), reg.to16());
  23593                 return;
  23594             },
  23595             4 => if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| {
  23596                 try self.asmRegisterMemory(.{ ._, .mov }, reg.to32(), try src_ptr.deref().mem(self, .{ .size = .dword }));
  23597                 try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .dword }), reg.to32());
  23598                 return;
  23599             },
  23600             8 => if (self.target.cpu.arch == .x86_64) {
  23601                 if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| {
  23602                     try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), try src_ptr.deref().mem(self, .{ .size = .qword }));
  23603                     try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .qword }), reg.to64());
  23604                     return;
  23605                 }
  23606             },
  23607             16 => if (self.hasFeature(.avx)) {
  23608                 if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| {
  23609                     try self.asmRegisterMemory(.{ .v_dqu, .mov }, reg.to128(), try src_ptr.deref().mem(self, .{ .size = .xword }));
  23610                     try self.asmMemoryRegister(.{ .v_dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128());
  23611                     return;
  23612                 }
  23613             } else if (self.hasFeature(.sse2)) {
  23614                 if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| {
  23615                     try self.asmRegisterMemory(.{ ._dqu, .mov }, reg.to128(), try src_ptr.deref().mem(self, .{ .size = .xword }));
  23616                     try self.asmMemoryRegister(.{ ._dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128());
  23617                     return;
  23618                 }
  23619             } else if (self.hasFeature(.sse)) {
  23620                 if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| {
  23621                     try self.asmRegisterMemory(.{ ._ps, .movu }, reg.to128(), try src_ptr.deref().mem(self, .{ .size = .xword }));
  23622                     try self.asmMemoryRegister(.{ ._ps, .movu }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128());
  23623                     return;
  23624                 }
  23625             },
  23626             32 => if (self.hasFeature(.avx)) {
  23627                 if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| {
  23628                     try self.asmRegisterMemory(.{ .v_dqu, .mov }, reg.to256(), try src_ptr.deref().mem(self, .{ .size = .yword }));
  23629                     try self.asmMemoryRegister(.{ .v_dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .yword }), reg.to256());
  23630                     return;
  23631                 }
  23632             },
  23633         },
  23634     };
  23635     try self.spillRegisters(&.{ .rsi, .rdi, .rcx });
  23636     try self.genSetReg(.rsi, .usize, src_ptr, .{});
  23637     try self.genSetReg(.rdi, .usize, dst_ptr, .{});
  23638     try self.genSetReg(.rcx, .usize, len, .{});
  23639     try self.asmOpOnly(.{ .@"rep _sb", .mov });
  23640 }
  23641 
  23642 fn genInlineMemset(
  23643     self: *CodeGen,
  23644     dst_ptr: MCValue,
  23645     value: MCValue,
  23646     len: MCValue,
  23647     opts: CopyOptions,
  23648 ) InnerError!void {
  23649     try self.spillRegisters(&.{ .rdi, .al, .rcx });
  23650     try self.genSetReg(.rdi, .usize, dst_ptr, .{});
  23651     try self.genSetReg(.al, .u8, value, opts);
  23652     try self.genSetReg(.rcx, .usize, len, .{});
  23653     try self.asmOpOnly(.{ .@"rep _sb", .sto });
  23654 }
  23655 
  23656 fn genExternSymbolRef(
  23657     self: *CodeGen,
  23658     comptime tag: Mir.Inst.Tag,
  23659     lib: ?[]const u8,
  23660     callee: []const u8,
  23661 ) InnerError!void {
  23662     if (self.bin_file.cast(.coff)) |coff_file| {
  23663         const global_index = try coff_file.getGlobalSymbol(callee, lib);
  23664         const scratch_reg = abi.getCAbiLinkerScratchReg(self.fn_type.fnCallingConvention(self.pt.zcu));
  23665         _ = try self.addInst(.{
  23666             .tag = .mov,
  23667             .ops = .import_reloc,
  23668             .data = .{ .rx = .{
  23669                 .r1 = scratch_reg,
  23670                 .payload = try self.addExtra(bits.SymbolOffset{
  23671                     .sym_index = link.File.Coff.global_symbol_bit | global_index,
  23672                 }),
  23673             } },
  23674         });
  23675         switch (tag) {
  23676             .mov => {},
  23677             .call => try self.asmRegister(.{ ._, .call }, scratch_reg),
  23678             else => unreachable,
  23679         }
  23680     } else return self.fail("TODO implement calling extern functions", .{});
  23681 }
  23682 
  23683 fn genLazySymbolRef(
  23684     self: *CodeGen,
  23685     comptime tag: Mir.Inst.Tag,
  23686     reg: Register,
  23687     lazy_sym: link.File.LazySymbol,
  23688 ) InnerError!void {
  23689     const pt = self.pt;
  23690     if (self.bin_file.cast(.elf)) |elf_file| {
  23691         const zo = elf_file.zigObjectPtr().?;
  23692         const sym_index = zo.getOrCreateMetadataForLazySymbol(elf_file, pt, lazy_sym) catch |err|
  23693             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  23694         if (self.mod.pic) {
  23695             switch (tag) {
  23696                 .lea, .call => try self.genSetReg(reg, .usize, .{
  23697                     .lea_symbol = .{ .sym_index = sym_index },
  23698                 }, .{}),
  23699                 .mov => try self.genSetReg(reg, .usize, .{
  23700                     .load_symbol = .{ .sym_index = sym_index },
  23701                 }, .{}),
  23702                 else => unreachable,
  23703             }
  23704             switch (tag) {
  23705                 .lea, .mov => {},
  23706                 .call => try self.asmRegister(.{ ._, .call }, reg),
  23707                 else => unreachable,
  23708             }
  23709         } else switch (tag) {
  23710             .lea, .mov => try self.asmRegisterMemory(.{ ._, tag }, reg.to64(), .{
  23711                 .base = .{ .reloc = sym_index },
  23712                 .mod = .{ .rm = .{ .size = .qword } },
  23713             }),
  23714             .call => try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = sym_index })),
  23715             else => unreachable,
  23716         }
  23717     } else if (self.bin_file.cast(.plan9)) |p9_file| {
  23718         const atom_index = p9_file.getOrCreateAtomForLazySymbol(pt, lazy_sym) catch |err|
  23719             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  23720         var atom = p9_file.getAtom(atom_index);
  23721         _ = atom.getOrCreateOffsetTableEntry(p9_file);
  23722         const got_addr = atom.getOffsetTableAddress(p9_file);
  23723         const got_mem: Memory = .{
  23724             .base = .{ .reg = .ds },
  23725             .mod = .{ .rm = .{
  23726                 .size = .qword,
  23727                 .disp = @intCast(got_addr),
  23728             } },
  23729         };
  23730         switch (tag) {
  23731             .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), got_mem),
  23732             .call => try self.asmMemory(.{ ._, .call }, got_mem),
  23733             else => unreachable,
  23734         }
  23735         switch (tag) {
  23736             .lea, .call => {},
  23737             .mov => try self.asmRegisterMemory(
  23738                 .{ ._, tag },
  23739                 reg.to64(),
  23740                 .initSib(.qword, .{ .base = .{ .reg = reg.to64() } }),
  23741             ),
  23742             else => unreachable,
  23743         }
  23744     } else if (self.bin_file.cast(.coff)) |coff_file| {
  23745         const atom_index = coff_file.getOrCreateAtomForLazySymbol(pt, lazy_sym) catch |err|
  23746             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  23747         const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?;
  23748         switch (tag) {
  23749             .lea, .call => try self.genSetReg(reg, .usize, .{ .lea_got = sym_index }, .{}),
  23750             .mov => try self.genSetReg(reg, .usize, .{ .load_got = sym_index }, .{}),
  23751             else => unreachable,
  23752         }
  23753         switch (tag) {
  23754             .lea, .mov => {},
  23755             .call => try self.asmRegister(.{ ._, .call }, reg),
  23756             else => unreachable,
  23757         }
  23758     } else if (self.bin_file.cast(.macho)) |macho_file| {
  23759         const zo = macho_file.getZigObject().?;
  23760         const sym_index = zo.getOrCreateMetadataForLazySymbol(macho_file, pt, lazy_sym) catch |err|
  23761             return self.fail("{s} creating lazy symbol", .{@errorName(err)});
  23762         const sym = zo.symbols.items[sym_index];
  23763         switch (tag) {
  23764             .lea, .call => try self.genSetReg(reg, .usize, .{
  23765                 .lea_symbol = .{ .sym_index = sym.nlist_idx },
  23766             }, .{}),
  23767             .mov => try self.genSetReg(reg, .usize, .{
  23768                 .load_symbol = .{ .sym_index = sym.nlist_idx },
  23769             }, .{}),
  23770             else => unreachable,
  23771         }
  23772         switch (tag) {
  23773             .lea, .mov => {},
  23774             .call => try self.asmRegister(.{ ._, .call }, reg),
  23775             else => unreachable,
  23776         }
  23777     } else {
  23778         return self.fail("TODO implement genLazySymbol for x86_64 {s}", .{@tagName(self.bin_file.tag)});
  23779     }
  23780 }
  23781 
  23782 fn airIntFromPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
  23783     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  23784     const result = result: {
  23785         // TODO: handle case where the operand is a slice not a raw pointer
  23786         const src_mcv = try self.resolveInst(un_op);
  23787         if (self.reuseOperand(inst, un_op, 0, src_mcv)) break :result src_mcv;
  23788 
  23789         const dst_mcv = try self.allocRegOrMem(inst, true);
  23790         const dst_ty = self.typeOfIndex(inst);
  23791         try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
  23792         break :result dst_mcv;
  23793     };
  23794     return self.finishAir(inst, result, .{ un_op, .none, .none });
  23795 }
  23796 
  23797 fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void {
  23798     const pt = self.pt;
  23799     const zcu = pt.zcu;
  23800     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  23801     const dst_ty = self.typeOfIndex(inst);
  23802     const src_ty = self.typeOf(ty_op.operand);
  23803 
  23804     const result = result: {
  23805         const src_mcv = try self.resolveInst(ty_op.operand);
  23806         if (dst_ty.isPtrAtRuntime(zcu) and src_ty.isPtrAtRuntime(zcu)) switch (src_mcv) {
  23807             .lea_frame => break :result src_mcv,
  23808             else => if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv,
  23809         };
  23810 
  23811         const dst_rc = self.regSetForType(dst_ty);
  23812         const src_rc = self.regSetForType(src_ty);
  23813 
  23814         const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  23815         defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
  23816 
  23817         const dst_mcv = if (dst_rc.supersetOf(src_rc) and dst_ty.abiSize(zcu) <= src_ty.abiSize(zcu) and
  23818             self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
  23819             const dst_mcv = try self.allocRegOrMem(inst, true);
  23820             try self.genCopy(switch (std.math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) {
  23821                 .lt => dst_ty,
  23822                 .eq => if (!dst_mcv.isBase() or src_mcv.isBase()) dst_ty else src_ty,
  23823                 .gt => src_ty,
  23824             }, dst_mcv, src_mcv, .{});
  23825             break :dst dst_mcv;
  23826         };
  23827 
  23828         if (dst_ty.isRuntimeFloat()) break :result dst_mcv;
  23829 
  23830         if (dst_ty.isAbiInt(zcu) and src_ty.isAbiInt(zcu) and
  23831             dst_ty.intInfo(zcu).signedness == src_ty.intInfo(zcu).signedness) break :result dst_mcv;
  23832 
  23833         const abi_size = dst_ty.abiSize(zcu);
  23834         const bit_size = dst_ty.bitSize(zcu);
  23835         if (abi_size * 8 <= bit_size or dst_ty.isVector(zcu)) break :result dst_mcv;
  23836 
  23837         const dst_limbs_len = std.math.divCeil(u31, @intCast(bit_size), 64) catch unreachable;
  23838         const high_mcv: MCValue = switch (dst_mcv) {
  23839             .register => |dst_reg| .{ .register = dst_reg },
  23840             .register_pair => |dst_regs| .{ .register = dst_regs[1] },
  23841             else => dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(),
  23842         };
  23843         const high_reg = if (high_mcv.isRegister())
  23844             high_mcv.getReg().?
  23845         else
  23846             try self.copyToTmpRegister(.usize, high_mcv);
  23847         const high_lock = self.register_manager.lockReg(high_reg);
  23848         defer if (high_lock) |lock| self.register_manager.unlockReg(lock);
  23849         try self.truncateRegister(dst_ty, high_reg);
  23850         if (!high_mcv.isRegister()) try self.genCopy(
  23851             if (abi_size <= 8) dst_ty else .usize,
  23852             high_mcv,
  23853             .{ .register = high_reg },
  23854             .{},
  23855         );
  23856         var offset = dst_limbs_len * 8;
  23857         if (offset < abi_size) {
  23858             const dst_signedness: std.builtin.Signedness = if (dst_ty.isAbiInt(zcu))
  23859                 dst_ty.intInfo(zcu).signedness
  23860             else
  23861                 .unsigned;
  23862             const ext_mcv: MCValue = ext_mcv: switch (dst_signedness) {
  23863                 .signed => {
  23864                     try self.asmRegisterImmediate(.{ ._r, .sa }, high_reg, .u(63));
  23865                     break :ext_mcv .{ .register = high_reg };
  23866                 },
  23867                 .unsigned => .{ .immediate = 0 },
  23868             };
  23869             while (offset < abi_size) : (offset += 8) {
  23870                 const limb_mcv: MCValue = switch (dst_mcv) {
  23871                     .register => |dst_reg| .{ .register = dst_reg },
  23872                     .register_pair => |dst_regs| .{ .register = dst_regs[@divExact(offset, 8)] },
  23873                     else => dst_mcv.address().offset(offset).deref(),
  23874                 };
  23875                 const limb_lock = if (limb_mcv.isRegister())
  23876                     self.register_manager.lockReg(limb_mcv.getReg().?)
  23877                 else
  23878                     null;
  23879                 defer if (limb_lock) |lock| self.register_manager.unlockReg(lock);
  23880                 try self.genCopy(.usize, limb_mcv, ext_mcv, .{});
  23881             }
  23882         }
  23883         break :result dst_mcv;
  23884     };
  23885     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  23886 }
  23887 
  23888 fn airArrayToSlice(self: *CodeGen, inst: Air.Inst.Index) !void {
  23889     const pt = self.pt;
  23890     const zcu = pt.zcu;
  23891     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  23892 
  23893     const slice_ty = self.typeOfIndex(inst);
  23894     const ptr_ty = self.typeOf(ty_op.operand);
  23895     const ptr = try self.resolveInst(ty_op.operand);
  23896     const array_ty = ptr_ty.childType(zcu);
  23897     const array_len = array_ty.arrayLen(zcu);
  23898 
  23899     const frame_index = try self.allocFrameIndex(.initSpill(slice_ty, zcu));
  23900     try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr, .{});
  23901     try self.genSetMem(
  23902         .{ .frame = frame_index },
  23903         @intCast(ptr_ty.abiSize(zcu)),
  23904         .usize,
  23905         .{ .immediate = array_len },
  23906         .{},
  23907     );
  23908 
  23909     const result = MCValue{ .load_frame = .{ .index = frame_index } };
  23910     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  23911 }
  23912 
  23913 fn airFloatFromInt(self: *CodeGen, inst: Air.Inst.Index) !void {
  23914     const pt = self.pt;
  23915     const zcu = pt.zcu;
  23916     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  23917 
  23918     const dst_ty = self.typeOfIndex(inst);
  23919     const dst_bits = dst_ty.floatBits(self.target.*);
  23920 
  23921     const src_ty = self.typeOf(ty_op.operand);
  23922     const src_bits: u32 = @intCast(src_ty.bitSize(zcu));
  23923     const src_signedness =
  23924         if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned;
  23925     const src_size = std.math.divCeil(u32, @max(switch (src_signedness) {
  23926         .signed => src_bits,
  23927         .unsigned => src_bits + 1,
  23928     }, 32), 8) catch unreachable;
  23929 
  23930     const result = result: {
  23931         if (switch (dst_bits) {
  23932             16, 80, 128 => true,
  23933             32, 64 => src_size > 8,
  23934             else => unreachable,
  23935         }) {
  23936             if (src_bits > 128) return self.fail("TODO implement airFloatFromInt from {} to {}", .{
  23937                 src_ty.fmt(pt), dst_ty.fmt(pt),
  23938             });
  23939 
  23940             var callee_buf: ["__floatun?i?f".len]u8 = undefined;
  23941             break :result try self.genCall(.{ .lib = .{
  23942                 .return_type = dst_ty.toIntern(),
  23943                 .param_types = &.{src_ty.toIntern()},
  23944                 .callee = std.fmt.bufPrint(&callee_buf, "__float{s}{c}i{c}f", .{
  23945                     switch (src_signedness) {
  23946                         .signed => "",
  23947                         .unsigned => "un",
  23948                     },
  23949                     intCompilerRtAbiName(src_bits),
  23950                     floatCompilerRtAbiName(dst_bits),
  23951                 }) catch unreachable,
  23952             } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{});
  23953         }
  23954 
  23955         const src_mcv = try self.resolveInst(ty_op.operand);
  23956         const src_reg = if (src_mcv.isRegister())
  23957             src_mcv.getReg().?
  23958         else
  23959             try self.copyToTmpRegister(src_ty, src_mcv);
  23960         const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  23961         defer self.register_manager.unlockReg(src_lock);
  23962 
  23963         if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg);
  23964 
  23965         const dst_reg = try self.register_manager.allocReg(inst, self.regSetForType(dst_ty));
  23966         const dst_mcv = MCValue{ .register = dst_reg };
  23967         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  23968         defer self.register_manager.unlockReg(dst_lock);
  23969 
  23970         const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag(zcu)) {
  23971             .float => switch (dst_ty.floatBits(self.target.*)) {
  23972                 32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 },
  23973                 64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 },
  23974                 16, 80, 128 => null,
  23975                 else => unreachable,
  23976             },
  23977             else => null,
  23978         }) orelse return self.fail("TODO implement airFloatFromInt from {} to {}", .{
  23979             src_ty.fmt(pt), dst_ty.fmt(pt),
  23980         });
  23981         const dst_alias = dst_reg.to128();
  23982         const src_alias = registerAlias(src_reg, src_size);
  23983         switch (mir_tag[0]) {
  23984             .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias),
  23985             else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias),
  23986         }
  23987 
  23988         break :result dst_mcv;
  23989     };
  23990     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  23991 }
  23992 
  23993 fn airIntFromFloat(self: *CodeGen, inst: Air.Inst.Index) !void {
  23994     const pt = self.pt;
  23995     const zcu = pt.zcu;
  23996     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  23997 
  23998     const dst_ty = self.typeOfIndex(inst);
  23999     const dst_bits: u32 = @intCast(dst_ty.bitSize(zcu));
  24000     const dst_signedness =
  24001         if (dst_ty.isAbiInt(zcu)) dst_ty.intInfo(zcu).signedness else .unsigned;
  24002     const dst_size = std.math.divCeil(u32, @max(switch (dst_signedness) {
  24003         .signed => dst_bits,
  24004         .unsigned => dst_bits + 1,
  24005     }, 32), 8) catch unreachable;
  24006 
  24007     const src_ty = self.typeOf(ty_op.operand);
  24008     const src_bits = src_ty.floatBits(self.target.*);
  24009 
  24010     const result = result: {
  24011         if (switch (src_bits) {
  24012             16, 80, 128 => true,
  24013             32, 64 => dst_size > 8,
  24014             else => unreachable,
  24015         }) {
  24016             if (dst_bits > 128) return self.fail("TODO implement airIntFromFloat from {} to {}", .{
  24017                 src_ty.fmt(pt), dst_ty.fmt(pt),
  24018             });
  24019 
  24020             var callee_buf: ["__fixuns?f?i".len]u8 = undefined;
  24021             break :result try self.genCall(.{ .lib = .{
  24022                 .return_type = dst_ty.toIntern(),
  24023                 .param_types = &.{src_ty.toIntern()},
  24024                 .callee = std.fmt.bufPrint(&callee_buf, "__fix{s}{c}f{c}i", .{
  24025                     switch (dst_signedness) {
  24026                         .signed => "",
  24027                         .unsigned => "uns",
  24028                     },
  24029                     floatCompilerRtAbiName(src_bits),
  24030                     intCompilerRtAbiName(dst_bits),
  24031                 }) catch unreachable,
  24032             } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{});
  24033         }
  24034 
  24035         const src_mcv = try self.resolveInst(ty_op.operand);
  24036         const src_reg = if (src_mcv.isRegister())
  24037             src_mcv.getReg().?
  24038         else
  24039             try self.copyToTmpRegister(src_ty, src_mcv);
  24040         const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
  24041         defer self.register_manager.unlockReg(src_lock);
  24042 
  24043         const dst_reg = try self.register_manager.allocReg(inst, self.regSetForType(dst_ty));
  24044         const dst_mcv = MCValue{ .register = dst_reg };
  24045         const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  24046         defer self.register_manager.unlockReg(dst_lock);
  24047 
  24048         try self.asmRegisterRegister(
  24049             switch (src_bits) {
  24050                 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si },
  24051                 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si },
  24052                 else => unreachable,
  24053             },
  24054             registerAlias(dst_reg, dst_size),
  24055             src_reg.to128(),
  24056         );
  24057 
  24058         if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg);
  24059 
  24060         break :result dst_mcv;
  24061     };
  24062     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  24063 }
  24064 
  24065 fn airCmpxchg(self: *CodeGen, inst: Air.Inst.Index) !void {
  24066     const pt = self.pt;
  24067     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  24068     const extra = self.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
  24069 
  24070     const ptr_ty = self.typeOf(extra.ptr);
  24071     const val_ty = self.typeOf(extra.expected_value);
  24072     const val_abi_size: u32 = @intCast(val_ty.abiSize(pt.zcu));
  24073 
  24074     try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx });
  24075     const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx });
  24076     defer for (regs_lock) |lock| self.register_manager.unlockReg(lock);
  24077 
  24078     const exp_mcv = try self.resolveInst(extra.expected_value);
  24079     if (val_abi_size > 8) {
  24080         const exp_addr_mcv: MCValue = switch (exp_mcv) {
  24081             .memory, .indirect, .load_frame => exp_mcv.address(),
  24082             else => .{ .register = try self.copyToTmpRegister(.usize, exp_mcv.address()) },
  24083         };
  24084         const exp_addr_lock =
  24085             if (exp_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  24086         defer if (exp_addr_lock) |lock| self.register_manager.unlockReg(lock);
  24087 
  24088         try self.genSetReg(.rax, .usize, exp_addr_mcv.deref(), .{});
  24089         try self.genSetReg(.rdx, .usize, exp_addr_mcv.offset(8).deref(), .{});
  24090     } else try self.genSetReg(.rax, val_ty, exp_mcv, .{});
  24091 
  24092     const new_mcv = try self.resolveInst(extra.new_value);
  24093     const new_reg = if (val_abi_size > 8) new: {
  24094         const new_addr_mcv: MCValue = switch (new_mcv) {
  24095             .memory, .indirect, .load_frame => new_mcv.address(),
  24096             else => .{ .register = try self.copyToTmpRegister(.usize, new_mcv.address()) },
  24097         };
  24098         const new_addr_lock =
  24099             if (new_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
  24100         defer if (new_addr_lock) |lock| self.register_manager.unlockReg(lock);
  24101 
  24102         try self.genSetReg(.rbx, .usize, new_addr_mcv.deref(), .{});
  24103         try self.genSetReg(.rcx, .usize, new_addr_mcv.offset(8).deref(), .{});
  24104         break :new null;
  24105     } else try self.copyToTmpRegister(val_ty, new_mcv);
  24106     const new_lock = if (new_reg) |reg| self.register_manager.lockRegAssumeUnused(reg) else null;
  24107     defer if (new_lock) |lock| self.register_manager.unlockReg(lock);
  24108 
  24109     const ptr_mcv = try self.resolveInst(extra.ptr);
  24110     const mem_size: Memory.Size = .fromSize(val_abi_size);
  24111     const ptr_mem: Memory = switch (ptr_mcv) {
  24112         .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, .{ .size = mem_size }),
  24113         else => .{
  24114             .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
  24115             .mod = .{ .rm = .{ .size = mem_size } },
  24116         },
  24117     };
  24118     switch (ptr_mem.mod) {
  24119         .rm => {},
  24120         .off => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}),
  24121     }
  24122     const ptr_lock = switch (ptr_mem.base) {
  24123         .none, .frame, .reloc => null,
  24124         .reg => |reg| self.register_manager.lockReg(reg),
  24125         .table => unreachable,
  24126     };
  24127     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  24128 
  24129     try self.spillEflagsIfOccupied();
  24130     if (val_abi_size <= 8) try self.asmMemoryRegister(
  24131         .{ .@"lock _", .cmpxchg },
  24132         ptr_mem,
  24133         registerAlias(new_reg.?, val_abi_size),
  24134     ) else try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem);
  24135 
  24136     const result: MCValue = result: {
  24137         if (self.liveness.isUnused(inst)) break :result .unreach;
  24138 
  24139         if (val_abi_size <= 8) {
  24140             self.eflags_inst = inst;
  24141             break :result .{ .register_overflow = .{ .reg = .rax, .eflags = .ne } };
  24142         }
  24143 
  24144         const dst_mcv = try self.allocRegOrMem(inst, false);
  24145         try self.genCopy(.usize, dst_mcv, .{ .register = .rax }, .{});
  24146         try self.genCopy(.usize, dst_mcv.address().offset(8).deref(), .{ .register = .rdx }, .{});
  24147         try self.genCopy(.bool, dst_mcv.address().offset(16).deref(), .{ .eflags = .ne }, .{});
  24148         break :result dst_mcv;
  24149     };
  24150     return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value });
  24151 }
  24152 
  24153 fn atomicOp(
  24154     self: *CodeGen,
  24155     ptr_mcv: MCValue,
  24156     val_mcv: MCValue,
  24157     ptr_ty: Type,
  24158     val_ty: Type,
  24159     unused: bool,
  24160     rmw_op: ?std.builtin.AtomicRmwOp,
  24161     order: std.builtin.AtomicOrder,
  24162 ) InnerError!MCValue {
  24163     const pt = self.pt;
  24164     const zcu = pt.zcu;
  24165     const ptr_lock = switch (ptr_mcv) {
  24166         .register => |reg| self.register_manager.lockReg(reg),
  24167         else => null,
  24168     };
  24169     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  24170 
  24171     const val_lock = switch (val_mcv) {
  24172         .register => |reg| self.register_manager.lockReg(reg),
  24173         else => null,
  24174     };
  24175     defer if (val_lock) |lock| self.register_manager.unlockReg(lock);
  24176 
  24177     const val_abi_size: u32 = @intCast(val_ty.abiSize(zcu));
  24178     const mem_size: Memory.Size = .fromSize(val_abi_size);
  24179     const ptr_mem: Memory = switch (ptr_mcv) {
  24180         .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, .{ .size = mem_size }),
  24181         else => .{
  24182             .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
  24183             .mod = .{ .rm = .{ .size = mem_size } },
  24184         },
  24185     };
  24186     switch (ptr_mem.mod) {
  24187         .rm => {},
  24188         .off => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}),
  24189     }
  24190     const mem_lock = switch (ptr_mem.base) {
  24191         .none, .frame, .reloc => null,
  24192         .reg => |reg| self.register_manager.lockReg(reg),
  24193         .table => unreachable,
  24194     };
  24195     defer if (mem_lock) |lock| self.register_manager.unlockReg(lock);
  24196 
  24197     const use_sse = rmw_op orelse .Xchg != .Xchg and val_ty.isRuntimeFloat();
  24198     const strat: enum { lock, loop, libcall } = if (use_sse) .loop else switch (rmw_op orelse .Xchg) {
  24199         .Xchg,
  24200         .Add,
  24201         .Sub,
  24202         => if (val_abi_size <= 8) .lock else if (val_abi_size <= 16) .loop else .libcall,
  24203         .And,
  24204         .Or,
  24205         .Xor,
  24206         => if (val_abi_size <= 8 and unused) .lock else if (val_abi_size <= 16) .loop else .libcall,
  24207         .Nand,
  24208         .Max,
  24209         .Min,
  24210         => if (val_abi_size <= 16) .loop else .libcall,
  24211     };
  24212     switch (strat) {
  24213         .lock => {
  24214             const tag: Mir.Inst.Tag = if (rmw_op) |op| switch (op) {
  24215                 .Xchg => if (unused) .mov else .xchg,
  24216                 .Add => if (unused) .add else .xadd,
  24217                 .Sub => if (unused) .sub else .xadd,
  24218                 .And => .@"and",
  24219                 .Or => .@"or",
  24220                 .Xor => .xor,
  24221                 else => unreachable,
  24222             } else switch (order) {
  24223                 .unordered, .monotonic, .release, .acq_rel => .mov,
  24224                 .acquire => unreachable,
  24225                 .seq_cst => .xchg,
  24226             };
  24227 
  24228             const dst_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  24229             const dst_mcv = MCValue{ .register = dst_reg };
  24230             const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  24231             defer self.register_manager.unlockReg(dst_lock);
  24232 
  24233             try self.genSetReg(dst_reg, val_ty, val_mcv, .{});
  24234             if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) {
  24235                 try self.genUnOpMir(.{ ._, .neg }, val_ty, dst_mcv);
  24236             }
  24237             try self.asmMemoryRegister(
  24238                 switch (tag) {
  24239                     .mov, .xchg => .{ ._, tag },
  24240                     .xadd, .add, .sub, .@"and", .@"or", .xor => .{ .@"lock _", tag },
  24241                     else => unreachable,
  24242                 },
  24243                 ptr_mem,
  24244                 registerAlias(dst_reg, val_abi_size),
  24245             );
  24246 
  24247             return if (unused) .unreach else dst_mcv;
  24248         },
  24249         .loop => _ = if (val_abi_size <= 8) {
  24250             const sse_reg: Register = if (use_sse)
  24251                 try self.register_manager.allocReg(null, abi.RegisterClass.sse)
  24252             else
  24253                 undefined;
  24254             const sse_lock =
  24255                 if (use_sse) self.register_manager.lockRegAssumeUnused(sse_reg) else undefined;
  24256             defer if (use_sse) self.register_manager.unlockReg(sse_lock);
  24257 
  24258             const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  24259             const tmp_mcv = MCValue{ .register = tmp_reg };
  24260             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  24261             defer self.register_manager.unlockReg(tmp_lock);
  24262 
  24263             try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(.rax, val_abi_size), ptr_mem);
  24264             const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  24265             if (!use_sse and rmw_op orelse .Xchg != .Xchg) {
  24266                 try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax }, .{});
  24267             }
  24268             if (rmw_op) |op| if (use_sse) {
  24269                 const mir_tag = @as(?Mir.Inst.FixedTag, switch (op) {
  24270                     .Add => switch (val_ty.floatBits(self.target.*)) {
  24271                         32 => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
  24272                         64 => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
  24273                         else => null,
  24274                     },
  24275                     .Sub => switch (val_ty.floatBits(self.target.*)) {
  24276                         32 => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
  24277                         64 => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
  24278                         else => null,
  24279                     },
  24280                     .Min => switch (val_ty.floatBits(self.target.*)) {
  24281                         32 => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
  24282                         64 => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
  24283                         else => null,
  24284                     },
  24285                     .Max => switch (val_ty.floatBits(self.target.*)) {
  24286                         32 => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
  24287                         64 => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
  24288                         else => null,
  24289                     },
  24290                     else => unreachable,
  24291                 }) orelse return self.fail("TODO implement atomicOp of {s} for {}", .{
  24292                     @tagName(op), val_ty.fmt(pt),
  24293                 });
  24294                 try self.genSetReg(sse_reg, val_ty, .{ .register = .rax }, .{});
  24295                 switch (mir_tag[0]) {
  24296                     .v_ss, .v_sd => if (val_mcv.isBase()) try self.asmRegisterRegisterMemory(
  24297                         mir_tag,
  24298                         sse_reg.to128(),
  24299                         sse_reg.to128(),
  24300                         try val_mcv.mem(self, .{ .size = self.memSize(val_ty) }),
  24301                     ) else try self.asmRegisterRegisterRegister(
  24302                         mir_tag,
  24303                         sse_reg.to128(),
  24304                         sse_reg.to128(),
  24305                         (if (val_mcv.isRegister())
  24306                             val_mcv.getReg().?
  24307                         else
  24308                             try self.copyToTmpRegister(val_ty, val_mcv)).to128(),
  24309                     ),
  24310                     ._ss, ._sd => if (val_mcv.isBase()) try self.asmRegisterMemory(
  24311                         mir_tag,
  24312                         sse_reg.to128(),
  24313                         try val_mcv.mem(self, .{ .size = self.memSize(val_ty) }),
  24314                     ) else try self.asmRegisterRegister(
  24315                         mir_tag,
  24316                         sse_reg.to128(),
  24317                         (if (val_mcv.isRegister())
  24318                             val_mcv.getReg().?
  24319                         else
  24320                             try self.copyToTmpRegister(val_ty, val_mcv)).to128(),
  24321                     ),
  24322                     else => unreachable,
  24323                 }
  24324                 try self.genSetReg(tmp_reg, val_ty, .{ .register = sse_reg }, .{});
  24325             } else switch (op) {
  24326                 .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv, .{}),
  24327                 .Add => try self.genBinOpMir(.{ ._, .add }, val_ty, tmp_mcv, val_mcv),
  24328                 .Sub => try self.genBinOpMir(.{ ._, .sub }, val_ty, tmp_mcv, val_mcv),
  24329                 .And => try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv),
  24330                 .Nand => {
  24331                     try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv);
  24332                     try self.genUnOpMir(.{ ._, .not }, val_ty, tmp_mcv);
  24333                 },
  24334                 .Or => try self.genBinOpMir(.{ ._, .@"or" }, val_ty, tmp_mcv, val_mcv),
  24335                 .Xor => try self.genBinOpMir(.{ ._, .xor }, val_ty, tmp_mcv, val_mcv),
  24336                 .Min, .Max => {
  24337                     const cc: Condition = switch (if (val_ty.isAbiInt(zcu))
  24338                         val_ty.intInfo(zcu).signedness
  24339                     else
  24340                         .unsigned) {
  24341                         .unsigned => switch (op) {
  24342                             .Min => .a,
  24343                             .Max => .b,
  24344                             else => unreachable,
  24345                         },
  24346                         .signed => switch (op) {
  24347                             .Min => .g,
  24348                             .Max => .l,
  24349                             else => unreachable,
  24350                         },
  24351                     };
  24352 
  24353                     const cmov_abi_size = @max(val_abi_size, 2);
  24354                     switch (val_mcv) {
  24355                         .register => |val_reg| {
  24356                             try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv);
  24357                             try self.asmCmovccRegisterRegister(
  24358                                 cc,
  24359                                 registerAlias(tmp_reg, cmov_abi_size),
  24360                                 registerAlias(val_reg, cmov_abi_size),
  24361                             );
  24362                         },
  24363                         .memory, .indirect, .load_frame => {
  24364                             try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv);
  24365                             try self.asmCmovccRegisterMemory(
  24366                                 cc,
  24367                                 registerAlias(tmp_reg, cmov_abi_size),
  24368                                 try val_mcv.mem(self, .{ .size = .fromSize(cmov_abi_size) }),
  24369                             );
  24370                         },
  24371                         else => {
  24372                             const mat_reg = try self.copyToTmpRegister(val_ty, val_mcv);
  24373                             const mat_lock = self.register_manager.lockRegAssumeUnused(mat_reg);
  24374                             defer self.register_manager.unlockReg(mat_lock);
  24375 
  24376                             try self.genBinOpMir(
  24377                                 .{ ._, .cmp },
  24378                                 val_ty,
  24379                                 tmp_mcv,
  24380                                 .{ .register = mat_reg },
  24381                             );
  24382                             try self.asmCmovccRegisterRegister(
  24383                                 cc,
  24384                                 registerAlias(tmp_reg, cmov_abi_size),
  24385                                 registerAlias(mat_reg, cmov_abi_size),
  24386                             );
  24387                         },
  24388                     }
  24389                 },
  24390             };
  24391             try self.asmMemoryRegister(
  24392                 .{ .@"lock _", .cmpxchg },
  24393                 ptr_mem,
  24394                 registerAlias(tmp_reg, val_abi_size),
  24395             );
  24396             _ = try self.asmJccReloc(.ne, loop);
  24397             return if (unused) .unreach else .{ .register = .rax };
  24398         } else {
  24399             try self.asmRegisterMemory(.{ ._, .mov }, .rax, .{
  24400                 .base = ptr_mem.base,
  24401                 .mod = .{ .rm = .{
  24402                     .size = .qword,
  24403                     .index = ptr_mem.mod.rm.index,
  24404                     .scale = ptr_mem.mod.rm.scale,
  24405                     .disp = ptr_mem.mod.rm.disp + 0,
  24406                 } },
  24407             });
  24408             try self.asmRegisterMemory(.{ ._, .mov }, .rdx, .{
  24409                 .base = ptr_mem.base,
  24410                 .mod = .{ .rm = .{
  24411                     .size = .qword,
  24412                     .index = ptr_mem.mod.rm.index,
  24413                     .scale = ptr_mem.mod.rm.scale,
  24414                     .disp = ptr_mem.mod.rm.disp + 8,
  24415                 } },
  24416             });
  24417             const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
  24418             const val_mem_mcv: MCValue = switch (val_mcv) {
  24419                 .memory, .indirect, .load_frame => val_mcv,
  24420                 else => .{ .indirect = .{
  24421                     .reg = try self.copyToTmpRegister(.usize, val_mcv.address()),
  24422                 } },
  24423             };
  24424             const val_lo_mem = try val_mem_mcv.mem(self, .{ .size = .qword });
  24425             const val_hi_mem = try val_mem_mcv.address().offset(8).deref().mem(self, .{ .size = .qword });
  24426             if (rmw_op != std.builtin.AtomicRmwOp.Xchg) {
  24427                 try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax);
  24428                 try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx);
  24429             }
  24430             if (rmw_op) |op| switch (op) {
  24431                 .Xchg => {
  24432                     try self.asmRegisterMemory(.{ ._, .mov }, .rbx, val_lo_mem);
  24433                     try self.asmRegisterMemory(.{ ._, .mov }, .rcx, val_hi_mem);
  24434                 },
  24435                 .Add => {
  24436                     try self.asmRegisterMemory(.{ ._, .add }, .rbx, val_lo_mem);
  24437                     try self.asmRegisterMemory(.{ ._, .adc }, .rcx, val_hi_mem);
  24438                 },
  24439                 .Sub => {
  24440                     try self.asmRegisterMemory(.{ ._, .sub }, .rbx, val_lo_mem);
  24441                     try self.asmRegisterMemory(.{ ._, .sbb }, .rcx, val_hi_mem);
  24442                 },
  24443                 .And => {
  24444                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem);
  24445                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem);
  24446                 },
  24447                 .Nand => {
  24448                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem);
  24449                     try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem);
  24450                     try self.asmRegister(.{ ._, .not }, .rbx);
  24451                     try self.asmRegister(.{ ._, .not }, .rcx);
  24452                 },
  24453                 .Or => {
  24454                     try self.asmRegisterMemory(.{ ._, .@"or" }, .rbx, val_lo_mem);
  24455                     try self.asmRegisterMemory(.{ ._, .@"or" }, .rcx, val_hi_mem);
  24456                 },
  24457                 .Xor => {
  24458                     try self.asmRegisterMemory(.{ ._, .xor }, .rbx, val_lo_mem);
  24459                     try self.asmRegisterMemory(.{ ._, .xor }, .rcx, val_hi_mem);
  24460                 },
  24461                 .Min, .Max => {
  24462                     const cc: Condition = switch (if (val_ty.isAbiInt(zcu))
  24463                         val_ty.intInfo(zcu).signedness
  24464                     else
  24465                         .unsigned) {
  24466                         .unsigned => switch (op) {
  24467                             .Min => .a,
  24468                             .Max => .b,
  24469                             else => unreachable,
  24470                         },
  24471                         .signed => switch (op) {
  24472                             .Min => .g,
  24473                             .Max => .l,
  24474                             else => unreachable,
  24475                         },
  24476                     };
  24477 
  24478                     const tmp_reg = try self.copyToTmpRegister(.usize, .{ .register = .rcx });
  24479                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  24480                     defer self.register_manager.unlockReg(tmp_lock);
  24481 
  24482                     try self.asmRegisterMemory(.{ ._, .cmp }, .rbx, val_lo_mem);
  24483                     try self.asmRegisterMemory(.{ ._, .sbb }, tmp_reg, val_hi_mem);
  24484                     try self.asmCmovccRegisterMemory(cc, .rbx, val_lo_mem);
  24485                     try self.asmCmovccRegisterMemory(cc, .rcx, val_hi_mem);
  24486                 },
  24487             };
  24488             try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem);
  24489             _ = try self.asmJccReloc(.ne, loop);
  24490 
  24491             if (unused) return .unreach;
  24492             const dst_mcv = try self.allocTempRegOrMem(val_ty, false);
  24493             try self.asmMemoryRegister(.{ ._, .mov }, .{
  24494                 .base = .{ .frame = dst_mcv.load_frame.index },
  24495                 .mod = .{ .rm = .{
  24496                     .size = .qword,
  24497                     .disp = dst_mcv.load_frame.off + 0,
  24498                 } },
  24499             }, .rax);
  24500             try self.asmMemoryRegister(.{ ._, .mov }, .{
  24501                 .base = .{ .frame = dst_mcv.load_frame.index },
  24502                 .mod = .{ .rm = .{
  24503                     .size = .qword,
  24504                     .disp = dst_mcv.load_frame.off + 8,
  24505                 } },
  24506             }, .rdx);
  24507             return dst_mcv;
  24508         },
  24509         .libcall => return self.fail("TODO implement x86 atomic libcall", .{}),
  24510     }
  24511 }
  24512 
  24513 fn airAtomicRmw(self: *CodeGen, inst: Air.Inst.Index) !void {
  24514     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  24515     const extra = self.air.extraData(Air.AtomicRmw, pl_op.payload).data;
  24516 
  24517     try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx });
  24518     const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx });
  24519     defer for (regs_lock) |lock| self.register_manager.unlockReg(lock);
  24520 
  24521     const unused = self.liveness.isUnused(inst);
  24522 
  24523     const ptr_ty = self.typeOf(pl_op.operand);
  24524     const ptr_mcv = try self.resolveInst(pl_op.operand);
  24525 
  24526     const val_ty = self.typeOf(extra.operand);
  24527     const val_mcv = try self.resolveInst(extra.operand);
  24528 
  24529     const result =
  24530         try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, unused, extra.op(), extra.ordering());
  24531     return self.finishAir(inst, result, .{ pl_op.operand, extra.operand, .none });
  24532 }
  24533 
  24534 fn airAtomicLoad(self: *CodeGen, inst: Air.Inst.Index) !void {
  24535     const atomic_load = self.air.instructions.items(.data)[@intFromEnum(inst)].atomic_load;
  24536 
  24537     const ptr_ty = self.typeOf(atomic_load.ptr);
  24538     const ptr_mcv = try self.resolveInst(atomic_load.ptr);
  24539     const ptr_lock = switch (ptr_mcv) {
  24540         .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  24541         else => null,
  24542     };
  24543     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
  24544 
  24545     const dst_mcv =
  24546         if (self.reuseOperand(inst, atomic_load.ptr, 0, ptr_mcv))
  24547         ptr_mcv
  24548     else
  24549         try self.allocRegOrMem(inst, true);
  24550 
  24551     try self.load(dst_mcv, ptr_ty, ptr_mcv);
  24552     return self.finishAir(inst, dst_mcv, .{ atomic_load.ptr, .none, .none });
  24553 }
  24554 
  24555 fn airAtomicStore(self: *CodeGen, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void {
  24556     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  24557 
  24558     const ptr_ty = self.typeOf(bin_op.lhs);
  24559     const ptr_mcv = try self.resolveInst(bin_op.lhs);
  24560 
  24561     const val_ty = self.typeOf(bin_op.rhs);
  24562     const val_mcv = try self.resolveInst(bin_op.rhs);
  24563 
  24564     const result = try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, true, null, order);
  24565     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
  24566 }
  24567 
  24568 fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
  24569     const pt = self.pt;
  24570     const zcu = pt.zcu;
  24571     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  24572 
  24573     result: {
  24574         if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result;
  24575 
  24576         try self.spillRegisters(&.{ .rax, .rdi, .rsi, .rcx });
  24577         const reg_locks = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdi, .rsi, .rcx });
  24578         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  24579 
  24580         const dst = try self.resolveInst(bin_op.lhs);
  24581         const dst_ty = self.typeOf(bin_op.lhs);
  24582         const dst_locks: [2]?RegisterLock = switch (dst) {
  24583             .register => |dst_reg| .{ self.register_manager.lockRegAssumeUnused(dst_reg), null },
  24584             .register_pair => |dst_regs| .{
  24585                 self.register_manager.lockRegAssumeUnused(dst_regs[0]),
  24586                 self.register_manager.lockRegAssumeUnused(dst_regs[1]),
  24587             },
  24588             else => @splat(null),
  24589         };
  24590         for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  24591 
  24592         const src_val = try self.resolveInst(bin_op.rhs);
  24593         const elem_ty = self.typeOf(bin_op.rhs);
  24594         const src_val_lock: ?RegisterLock = switch (src_val) {
  24595             .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  24596             else => null,
  24597         };
  24598         defer if (src_val_lock) |lock| self.register_manager.unlockReg(lock);
  24599 
  24600         const elem_abi_size: u31 = @intCast(elem_ty.abiSize(zcu));
  24601 
  24602         if (elem_abi_size == 1) {
  24603             const dst_ptr: MCValue = switch (dst_ty.ptrSize(zcu)) {
  24604                 .slice => switch (dst) {
  24605                     .register_pair => |dst_regs| .{ .register = dst_regs[0] },
  24606                     else => dst,
  24607                 },
  24608                 .one => dst,
  24609                 .c, .many => unreachable,
  24610             };
  24611             const len: MCValue = switch (dst_ty.ptrSize(zcu)) {
  24612                 .slice => switch (dst) {
  24613                     .register_pair => |dst_regs| .{ .register = dst_regs[1] },
  24614                     else => dst.address().offset(8).deref(),
  24615                 },
  24616                 .one => .{ .immediate = dst_ty.childType(zcu).arrayLen(zcu) },
  24617                 .c, .many => unreachable,
  24618             };
  24619             const len_lock: ?RegisterLock = switch (len) {
  24620                 .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  24621                 else => null,
  24622             };
  24623             defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
  24624 
  24625             try self.genInlineMemset(dst_ptr, src_val, len, .{ .safety = safety });
  24626             break :result;
  24627         }
  24628 
  24629         // Store the first element, and then rely on memcpy copying forwards.
  24630         // Length zero requires a runtime check - so we handle arrays specially
  24631         // here to elide it.
  24632         switch (dst_ty.ptrSize(zcu)) {
  24633             .slice => {
  24634                 const slice_ptr_ty = dst_ty.slicePtrFieldType(zcu);
  24635 
  24636                 const dst_ptr: MCValue = switch (dst) {
  24637                     .register_pair => |dst_regs| .{ .register = dst_regs[0] },
  24638                     else => dst,
  24639                 };
  24640                 const len: MCValue = switch (dst) {
  24641                     .register_pair => |dst_regs| .{ .register = dst_regs[1] },
  24642                     else => dst.address().offset(8).deref(),
  24643                 };
  24644 
  24645                 // Used to store the number of elements for comparison.
  24646                 // After comparison, updated to store number of bytes needed to copy.
  24647                 const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  24648                 const len_mcv: MCValue = .{ .register = len_reg };
  24649                 const len_lock = self.register_manager.lockRegAssumeUnused(len_reg);
  24650                 defer self.register_manager.unlockReg(len_lock);
  24651 
  24652                 try self.genSetReg(len_reg, .usize, len, .{});
  24653                 try self.asmRegisterRegister(.{ ._, .@"test" }, len_reg, len_reg);
  24654 
  24655                 const skip_reloc = try self.asmJccReloc(.z, undefined);
  24656                 try self.store(slice_ptr_ty, dst_ptr, src_val, .{ .safety = safety });
  24657 
  24658                 const second_elem_ptr_reg =
  24659                     try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  24660                 const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
  24661                 const second_elem_ptr_lock =
  24662                     self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
  24663                 defer self.register_manager.unlockReg(second_elem_ptr_lock);
  24664 
  24665                 try self.genSetReg(second_elem_ptr_reg, .usize, .{ .register_offset = .{
  24666                     .reg = try self.copyToTmpRegister(.usize, dst_ptr),
  24667                     .off = elem_abi_size,
  24668                 } }, .{});
  24669 
  24670                 try self.genBinOpMir(.{ ._, .sub }, .usize, len_mcv, .{ .immediate = 1 });
  24671                 try self.asmRegisterRegisterImmediate(
  24672                     .{ .i_, .mul },
  24673                     len_reg,
  24674                     len_reg,
  24675                     .s(elem_abi_size),
  24676                 );
  24677                 try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, len_mcv, .{ .no_alias = false });
  24678 
  24679                 self.performReloc(skip_reloc);
  24680             },
  24681             .one => {
  24682                 const elem_ptr_ty = try pt.singleMutPtrType(elem_ty);
  24683 
  24684                 const len = dst_ty.childType(zcu).arrayLen(zcu);
  24685 
  24686                 assert(len != 0); // prevented by Sema
  24687                 try self.store(elem_ptr_ty, dst, src_val, .{ .safety = safety });
  24688 
  24689                 const second_elem_ptr_reg =
  24690                     try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  24691                 const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
  24692                 const second_elem_ptr_lock =
  24693                     self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
  24694                 defer self.register_manager.unlockReg(second_elem_ptr_lock);
  24695 
  24696                 try self.genSetReg(second_elem_ptr_reg, .usize, .{ .register_offset = .{
  24697                     .reg = try self.copyToTmpRegister(.usize, dst),
  24698                     .off = elem_abi_size,
  24699                 } }, .{});
  24700 
  24701                 const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) };
  24702                 try self.genInlineMemcpy(second_elem_ptr_mcv, dst, bytes_to_copy, .{ .no_alias = false });
  24703             },
  24704             .c, .many => unreachable,
  24705         }
  24706     }
  24707     return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
  24708 }
  24709 
  24710 fn airMemcpy(self: *CodeGen, inst: Air.Inst.Index) !void {
  24711     const pt = self.pt;
  24712     const zcu = pt.zcu;
  24713     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
  24714 
  24715     try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
  24716     const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
  24717     defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  24718 
  24719     const dst = try self.resolveInst(bin_op.lhs);
  24720     const dst_ty = self.typeOf(bin_op.lhs);
  24721     const dst_locks: [2]?RegisterLock = switch (dst) {
  24722         .register => |dst_reg| .{ self.register_manager.lockRegAssumeUnused(dst_reg), null },
  24723         .register_pair => |dst_regs| .{
  24724             self.register_manager.lockRegAssumeUnused(dst_regs[0]),
  24725             self.register_manager.lockReg(dst_regs[1]),
  24726         },
  24727         else => @splat(null),
  24728     };
  24729     for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  24730 
  24731     const src = try self.resolveInst(bin_op.rhs);
  24732     const src_locks: [2]?RegisterLock = switch (src) {
  24733         .register => |src_reg| .{ self.register_manager.lockReg(src_reg), null },
  24734         .register_pair => |src_regs| .{
  24735             self.register_manager.lockRegAssumeUnused(src_regs[0]),
  24736             self.register_manager.lockRegAssumeUnused(src_regs[1]),
  24737         },
  24738         else => @splat(null),
  24739     };
  24740     for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock);
  24741 
  24742     const len: MCValue = switch (dst_ty.ptrSize(zcu)) {
  24743         .slice => len: {
  24744             const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  24745             const len_lock = self.register_manager.lockRegAssumeUnused(len_reg);
  24746             defer self.register_manager.unlockReg(len_lock);
  24747 
  24748             switch (dst) {
  24749                 .register_pair => |dst_regs| try self.asmRegisterRegisterImmediate(
  24750                     .{ .i_, .mul },
  24751                     len_reg,
  24752                     dst_regs[1],
  24753                     .s(@intCast(dst_ty.childType(zcu).abiSize(zcu))),
  24754                 ),
  24755                 else => try self.asmRegisterMemoryImmediate(
  24756                     .{ .i_, .mul },
  24757                     len_reg,
  24758                     try dst.address().offset(8).deref().mem(self, .{ .size = .qword }),
  24759                     .s(@intCast(dst_ty.childType(zcu).abiSize(zcu))),
  24760                 ),
  24761             }
  24762             break :len .{ .register = len_reg };
  24763         },
  24764         .one => len: {
  24765             const array_ty = dst_ty.childType(zcu);
  24766             break :len .{ .immediate = array_ty.arrayLen(zcu) * array_ty.childType(zcu).abiSize(zcu) };
  24767         },
  24768         .c, .many => unreachable,
  24769     };
  24770     const len_lock: ?RegisterLock = switch (len) {
  24771         .register => |reg| self.register_manager.lockReg(reg),
  24772         else => null,
  24773     };
  24774     defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
  24775 
  24776     const dst_ptr: MCValue = switch (dst) {
  24777         .register_pair => |dst_regs| .{ .register = dst_regs[0] },
  24778         else => dst,
  24779     };
  24780     const src_ptr: MCValue = switch (src) {
  24781         .register_pair => |src_regs| .{ .register = src_regs[0] },
  24782         else => src,
  24783     };
  24784 
  24785     try self.genInlineMemcpy(dst_ptr, src_ptr, len, .{ .no_alias = true });
  24786 
  24787     return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
  24788 }
  24789 
  24790 fn airTagName(self: *CodeGen, inst: Air.Inst.Index) !void {
  24791     const pt = self.pt;
  24792     const zcu = pt.zcu;
  24793     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  24794     const inst_ty = self.typeOfIndex(inst);
  24795     const enum_ty = self.typeOf(un_op);
  24796 
  24797     // We need a properly aligned and sized call frame to be able to call this function.
  24798     {
  24799         const needed_call_frame: FrameAlloc = .init(.{
  24800             .size = inst_ty.abiSize(zcu),
  24801             .alignment = inst_ty.abiAlignment(zcu),
  24802         });
  24803         const frame_allocs_slice = self.frame_allocs.slice();
  24804         const stack_frame_size =
  24805             &frame_allocs_slice.items(.abi_size)[@intFromEnum(FrameIndex.call_frame)];
  24806         stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size);
  24807         const stack_frame_align =
  24808             &frame_allocs_slice.items(.abi_align)[@intFromEnum(FrameIndex.call_frame)];
  24809         stack_frame_align.* = stack_frame_align.max(needed_call_frame.abi_align);
  24810     }
  24811 
  24812     try self.spillEflagsIfOccupied();
  24813     try self.spillCallerPreservedRegs(.auto);
  24814 
  24815     const param_regs = abi.getCAbiIntParamRegs(.auto);
  24816 
  24817     const dst_mcv = try self.allocRegOrMem(inst, false);
  24818     try self.genSetReg(param_regs[0], .usize, dst_mcv.address(), .{});
  24819 
  24820     const operand = try self.resolveInst(un_op);
  24821     try self.genSetReg(param_regs[1], enum_ty, operand, .{});
  24822 
  24823     const enum_lazy_sym: link.File.LazySymbol = .{ .kind = .code, .ty = enum_ty.toIntern() };
  24824     try self.genLazySymbolRef(.call, abi.getCAbiLinkerScratchReg(self.fn_type.fnCallingConvention(zcu)), enum_lazy_sym);
  24825 
  24826     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
  24827 }
  24828 
  24829 fn airErrorName(self: *CodeGen, inst: Air.Inst.Index) !void {
  24830     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  24831 
  24832     const err_ty = self.typeOf(un_op);
  24833     const err_mcv = try self.resolveInst(un_op);
  24834     const err_reg = try self.copyToTmpRegister(err_ty, err_mcv);
  24835     const err_lock = self.register_manager.lockRegAssumeUnused(err_reg);
  24836     defer self.register_manager.unlockReg(err_lock);
  24837 
  24838     const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  24839     const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
  24840     defer self.register_manager.unlockReg(addr_lock);
  24841     const anyerror_lazy_sym: link.File.LazySymbol = .{ .kind = .const_data, .ty = .anyerror_type };
  24842     try self.genLazySymbolRef(.lea, addr_reg, anyerror_lazy_sym);
  24843 
  24844     const start_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  24845     const start_lock = self.register_manager.lockRegAssumeUnused(start_reg);
  24846     defer self.register_manager.unlockReg(start_lock);
  24847 
  24848     const end_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  24849     const end_lock = self.register_manager.lockRegAssumeUnused(end_reg);
  24850     defer self.register_manager.unlockReg(end_lock);
  24851 
  24852     try self.truncateRegister(err_ty, err_reg.to32());
  24853 
  24854     try self.asmRegisterMemory(
  24855         .{ ._, .mov },
  24856         start_reg.to32(),
  24857         .{
  24858             .base = .{ .reg = addr_reg.to64() },
  24859             .mod = .{ .rm = .{
  24860                 .size = .dword,
  24861                 .index = err_reg.to64(),
  24862                 .scale = .@"4",
  24863                 .disp = (1 - 1) * 4,
  24864             } },
  24865         },
  24866     );
  24867     try self.asmRegisterMemory(
  24868         .{ ._, .mov },
  24869         end_reg.to32(),
  24870         .{
  24871             .base = .{ .reg = addr_reg.to64() },
  24872             .mod = .{ .rm = .{
  24873                 .size = .dword,
  24874                 .index = err_reg.to64(),
  24875                 .scale = .@"4",
  24876                 .disp = (2 - 1) * 4,
  24877             } },
  24878         },
  24879     );
  24880     try self.asmRegisterRegister(.{ ._, .sub }, end_reg.to32(), start_reg.to32());
  24881     try self.asmRegisterMemory(
  24882         .{ ._, .lea },
  24883         start_reg.to64(),
  24884         .{
  24885             .base = .{ .reg = addr_reg.to64() },
  24886             .mod = .{ .rm = .{
  24887                 .size = .dword,
  24888                 .index = start_reg.to64(),
  24889             } },
  24890         },
  24891     );
  24892     try self.asmRegisterMemory(
  24893         .{ ._, .lea },
  24894         end_reg.to32(),
  24895         .{
  24896             .base = .{ .reg = end_reg.to64() },
  24897             .mod = .{ .rm = .{
  24898                 .size = .byte,
  24899                 .disp = -1,
  24900             } },
  24901         },
  24902     );
  24903 
  24904     const dst_mcv = try self.allocRegOrMem(inst, false);
  24905     try self.asmMemoryRegister(
  24906         .{ ._, .mov },
  24907         .{
  24908             .base = .{ .frame = dst_mcv.load_frame.index },
  24909             .mod = .{ .rm = .{
  24910                 .size = .qword,
  24911                 .disp = dst_mcv.load_frame.off,
  24912             } },
  24913         },
  24914         start_reg.to64(),
  24915     );
  24916     try self.asmMemoryRegister(
  24917         .{ ._, .mov },
  24918         .{
  24919             .base = .{ .frame = dst_mcv.load_frame.index },
  24920             .mod = .{ .rm = .{
  24921                 .size = .qword,
  24922                 .disp = dst_mcv.load_frame.off + 8,
  24923             } },
  24924         },
  24925         end_reg.to64(),
  24926     );
  24927 
  24928     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
  24929 }
  24930 
  24931 fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void {
  24932     const pt = self.pt;
  24933     const zcu = pt.zcu;
  24934     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  24935     const vector_ty = self.typeOfIndex(inst);
  24936     const vector_len = vector_ty.vectorLen(zcu);
  24937     const dst_rc = self.regSetForType(vector_ty);
  24938     const scalar_ty = self.typeOf(ty_op.operand);
  24939 
  24940     const result: MCValue = result: {
  24941         switch (scalar_ty.zigTypeTag(zcu)) {
  24942             else => {},
  24943             .bool => {
  24944                 const regs =
  24945                     try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp);
  24946                 const reg_locks = self.register_manager.lockRegsAssumeUnused(2, regs);
  24947                 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
  24948 
  24949                 try self.genSetReg(regs[1], vector_ty, .{ .immediate = 0 }, .{});
  24950                 try self.genSetReg(
  24951                     regs[1],
  24952                     vector_ty,
  24953                     .{ .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - vector_len) },
  24954                     .{},
  24955                 );
  24956                 const src_mcv = try self.resolveInst(ty_op.operand);
  24957                 const abi_size = @max(std.math.divCeil(u32, vector_len, 8) catch unreachable, 4);
  24958                 try self.asmCmovccRegisterRegister(
  24959                     switch (src_mcv) {
  24960                         .eflags => |cc| cc,
  24961                         .register => |src_reg| cc: {
  24962                             try self.asmRegisterImmediate(.{ ._, .@"test" }, src_reg.to8(), .u(1));
  24963                             break :cc .nz;
  24964                         },
  24965                         else => cc: {
  24966                             try self.asmMemoryImmediate(
  24967                                 .{ ._, .@"test" },
  24968                                 try src_mcv.mem(self, .{ .size = .byte }),
  24969                                 .u(1),
  24970                             );
  24971                             break :cc .nz;
  24972                         },
  24973                     },
  24974                     registerAlias(regs[0], abi_size),
  24975                     registerAlias(regs[1], abi_size),
  24976                 );
  24977                 break :result .{ .register = regs[0] };
  24978             },
  24979             .int => if (self.hasFeature(.avx2)) avx2: {
  24980                 const mir_tag = @as(?Mir.Inst.FixedTag, switch (scalar_ty.intInfo(zcu).bits) {
  24981                     else => null,
  24982                     1...8 => switch (vector_len) {
  24983                         else => null,
  24984                         1...32 => .{ .vp_b, .broadcast },
  24985                     },
  24986                     9...16 => switch (vector_len) {
  24987                         else => null,
  24988                         1...16 => .{ .vp_w, .broadcast },
  24989                     },
  24990                     17...32 => switch (vector_len) {
  24991                         else => null,
  24992                         1...8 => .{ .vp_d, .broadcast },
  24993                     },
  24994                     33...64 => switch (vector_len) {
  24995                         else => null,
  24996                         1...4 => .{ .vp_q, .broadcast },
  24997                     },
  24998                     65...128 => switch (vector_len) {
  24999                         else => null,
  25000                         1...2 => .{ .v_i128, .broadcast },
  25001                     },
  25002                 }) orelse break :avx2;
  25003 
  25004                 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
  25005                 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  25006                 defer self.register_manager.unlockReg(dst_lock);
  25007 
  25008                 const src_mcv = try self.resolveInst(ty_op.operand);
  25009                 if (src_mcv.isBase()) try self.asmRegisterMemory(
  25010                     mir_tag,
  25011                     registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))),
  25012                     try src_mcv.mem(self, .{ .size = self.memSize(scalar_ty) }),
  25013                 ) else {
  25014                     if (mir_tag[0] == .v_i128) break :avx2;
  25015                     try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
  25016                     try self.asmRegisterRegister(
  25017                         mir_tag,
  25018                         registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))),
  25019                         registerAlias(dst_reg, @intCast(scalar_ty.abiSize(zcu))),
  25020                     );
  25021                 }
  25022                 break :result .{ .register = dst_reg };
  25023             } else {
  25024                 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
  25025                 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
  25026                 defer self.register_manager.unlockReg(dst_lock);
  25027 
  25028                 try self.genSetReg(dst_reg, scalar_ty, .{ .air_ref = ty_op.operand }, .{});
  25029                 if (vector_len == 1) break :result .{ .register = dst_reg };
  25030 
  25031                 const dst_alias = registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu)));
  25032                 const scalar_bits = scalar_ty.intInfo(zcu).bits;
  25033                 if (switch (scalar_bits) {
  25034                     1...8 => true,
  25035                     9...128 => false,
  25036                     else => unreachable,
  25037                 }) if (self.hasFeature(.avx)) try self.asmRegisterRegisterRegister(
  25038                     .{ .vp_, .unpcklbw },
  25039                     dst_alias,
  25040                     dst_alias,
  25041                     dst_alias,
  25042                 ) else try self.asmRegisterRegister(
  25043                     .{ .p_, .unpcklbw },
  25044                     dst_alias,
  25045                     dst_alias,
  25046                 );
  25047                 if (switch (scalar_bits) {
  25048                     1...8 => vector_len > 2,
  25049                     9...16 => true,
  25050                     17...128 => false,
  25051                     else => unreachable,
  25052                 }) try self.asmRegisterRegisterImmediate(
  25053                     .{ if (self.hasFeature(.avx)) .vp_w else .p_w, .shufl },
  25054                     dst_alias,
  25055                     dst_alias,
  25056                     .u(0b00_00_00_00),
  25057                 );
  25058                 if (switch (scalar_bits) {
  25059                     1...8 => vector_len > 4,
  25060                     9...16 => vector_len > 2,
  25061                     17...64 => true,
  25062                     65...128 => false,
  25063                     else => unreachable,
  25064                 }) try self.asmRegisterRegisterImmediate(
  25065                     .{ if (self.hasFeature(.avx)) .vp_d else .p_d, .shuf },
  25066                     dst_alias,
  25067                     dst_alias,
  25068                     .u(if (scalar_bits <= 64) 0b00_00_00_00 else 0b01_00_01_00),
  25069                 );
  25070                 break :result .{ .register = dst_reg };
  25071             },
  25072             .float => switch (scalar_ty.floatBits(self.target.*)) {
  25073                 32 => switch (vector_len) {
  25074                     1 => {
  25075                         const src_mcv = try self.resolveInst(ty_op.operand);
  25076                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  25077                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  25078                         try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
  25079                         break :result .{ .register = dst_reg };
  25080                     },
  25081                     2...4 => {
  25082                         const src_mcv = try self.resolveInst(ty_op.operand);
  25083                         if (self.hasFeature(.avx)) {
  25084                             const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  25085                             if (src_mcv.isBase()) try self.asmRegisterMemory(
  25086                                 .{ .v_ss, .broadcast },
  25087                                 dst_reg.to128(),
  25088                                 try src_mcv.mem(self, .{ .size = .dword }),
  25089                             ) else {
  25090                                 const src_reg = if (src_mcv.isRegister())
  25091                                     src_mcv.getReg().?
  25092                                 else
  25093                                     try self.copyToTmpRegister(scalar_ty, src_mcv);
  25094                                 try self.asmRegisterRegisterRegisterImmediate(
  25095                                     .{ .v_ps, .shuf },
  25096                                     dst_reg.to128(),
  25097                                     src_reg.to128(),
  25098                                     src_reg.to128(),
  25099                                     .u(0),
  25100                                 );
  25101                             }
  25102                             break :result .{ .register = dst_reg };
  25103                         } else {
  25104                             const dst_mcv = if (src_mcv.isRegister() and
  25105                                 self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
  25106                                 src_mcv
  25107                             else
  25108                                 try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv);
  25109                             const dst_reg = dst_mcv.getReg().?;
  25110                             try self.asmRegisterRegisterImmediate(
  25111                                 .{ ._ps, .shuf },
  25112                                 dst_reg.to128(),
  25113                                 dst_reg.to128(),
  25114                                 .u(0),
  25115                             );
  25116                             break :result dst_mcv;
  25117                         }
  25118                     },
  25119                     5...8 => if (self.hasFeature(.avx)) {
  25120                         const src_mcv = try self.resolveInst(ty_op.operand);
  25121                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  25122                         if (src_mcv.isBase()) try self.asmRegisterMemory(
  25123                             .{ .v_ss, .broadcast },
  25124                             dst_reg.to256(),
  25125                             try src_mcv.mem(self, .{ .size = .dword }),
  25126                         ) else {
  25127                             const src_reg = if (src_mcv.isRegister())
  25128                                 src_mcv.getReg().?
  25129                             else
  25130                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  25131                             if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
  25132                                 .{ .v_ss, .broadcast },
  25133                                 dst_reg.to256(),
  25134                                 src_reg.to128(),
  25135                             ) else {
  25136                                 try self.asmRegisterRegisterRegisterImmediate(
  25137                                     .{ .v_ps, .shuf },
  25138                                     dst_reg.to128(),
  25139                                     src_reg.to128(),
  25140                                     src_reg.to128(),
  25141                                     .u(0),
  25142                                 );
  25143                                 try self.asmRegisterRegisterRegisterImmediate(
  25144                                     .{ .v_f128, .insert },
  25145                                     dst_reg.to256(),
  25146                                     dst_reg.to256(),
  25147                                     dst_reg.to128(),
  25148                                     .u(1),
  25149                                 );
  25150                             }
  25151                         }
  25152                         break :result .{ .register = dst_reg };
  25153                     },
  25154                     else => {},
  25155                 },
  25156                 64 => switch (vector_len) {
  25157                     1 => {
  25158                         const src_mcv = try self.resolveInst(ty_op.operand);
  25159                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  25160                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  25161                         try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
  25162                         break :result .{ .register = dst_reg };
  25163                     },
  25164                     2 => {
  25165                         const src_mcv = try self.resolveInst(ty_op.operand);
  25166                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  25167                         if (self.hasFeature(.sse3)) {
  25168                             if (src_mcv.isBase()) try self.asmRegisterMemory(
  25169                                 if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
  25170                                 dst_reg.to128(),
  25171                                 try src_mcv.mem(self, .{ .size = .qword }),
  25172                             ) else try self.asmRegisterRegister(
  25173                                 if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
  25174                                 dst_reg.to128(),
  25175                                 (if (src_mcv.isRegister())
  25176                                     src_mcv.getReg().?
  25177                                 else
  25178                                     try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
  25179                             );
  25180                             break :result .{ .register = dst_reg };
  25181                         } else try self.asmRegisterRegister(
  25182                             .{ ._ps, .movlh },
  25183                             dst_reg.to128(),
  25184                             (if (src_mcv.isRegister())
  25185                                 src_mcv.getReg().?
  25186                             else
  25187                                 try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
  25188                         );
  25189                     },
  25190                     3...4 => if (self.hasFeature(.avx)) {
  25191                         const src_mcv = try self.resolveInst(ty_op.operand);
  25192                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  25193                         if (src_mcv.isBase()) try self.asmRegisterMemory(
  25194                             .{ .v_sd, .broadcast },
  25195                             dst_reg.to256(),
  25196                             try src_mcv.mem(self, .{ .size = .qword }),
  25197                         ) else {
  25198                             const src_reg = if (src_mcv.isRegister())
  25199                                 src_mcv.getReg().?
  25200                             else
  25201                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  25202                             if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
  25203                                 .{ .v_sd, .broadcast },
  25204                                 dst_reg.to256(),
  25205                                 src_reg.to128(),
  25206                             ) else {
  25207                                 try self.asmRegisterRegister(
  25208                                     .{ .v_, .movddup },
  25209                                     dst_reg.to128(),
  25210                                     src_reg.to128(),
  25211                                 );
  25212                                 try self.asmRegisterRegisterRegisterImmediate(
  25213                                     .{ .v_f128, .insert },
  25214                                     dst_reg.to256(),
  25215                                     dst_reg.to256(),
  25216                                     dst_reg.to128(),
  25217                                     .u(1),
  25218                                 );
  25219                             }
  25220                         }
  25221                         break :result .{ .register = dst_reg };
  25222                     },
  25223                     else => {},
  25224                 },
  25225                 128 => switch (vector_len) {
  25226                     1 => {
  25227                         const src_mcv = try self.resolveInst(ty_op.operand);
  25228                         if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
  25229                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  25230                         try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
  25231                         break :result .{ .register = dst_reg };
  25232                     },
  25233                     2 => if (self.hasFeature(.avx)) {
  25234                         const src_mcv = try self.resolveInst(ty_op.operand);
  25235                         const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
  25236                         if (src_mcv.isBase()) try self.asmRegisterMemory(
  25237                             .{ .v_f128, .broadcast },
  25238                             dst_reg.to256(),
  25239                             try src_mcv.mem(self, .{ .size = .xword }),
  25240                         ) else {
  25241                             const src_reg = if (src_mcv.isRegister())
  25242                                 src_mcv.getReg().?
  25243                             else
  25244                                 try self.copyToTmpRegister(scalar_ty, src_mcv);
  25245                             try self.asmRegisterRegisterRegisterImmediate(
  25246                                 .{ .v_f128, .insert },
  25247                                 dst_reg.to256(),
  25248                                 src_reg.to256(),
  25249                                 src_reg.to128(),
  25250                                 .u(1),
  25251                             );
  25252                         }
  25253                         break :result .{ .register = dst_reg };
  25254                     },
  25255                     else => {},
  25256                 },
  25257                 16, 80 => {},
  25258                 else => unreachable,
  25259             },
  25260         }
  25261         return self.fail("TODO implement airSplat for {}", .{vector_ty.fmt(pt)});
  25262     };
  25263     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  25264 }
  25265 
  25266 fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void {
  25267     const pt = self.pt;
  25268     const zcu = pt.zcu;
  25269     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  25270     const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
  25271     const ty = self.typeOfIndex(inst);
  25272     const vec_len = ty.vectorLen(zcu);
  25273     const elem_ty = ty.childType(zcu);
  25274     const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu));
  25275     const abi_size: u32 = @intCast(ty.abiSize(zcu));
  25276     const pred_ty = self.typeOf(pl_op.operand);
  25277 
  25278     const result = result: {
  25279         const has_blend = self.hasFeature(.sse4_1);
  25280         const has_avx = self.hasFeature(.avx);
  25281         const need_xmm0 = has_blend and !has_avx;
  25282         const pred_mcv = try self.resolveInst(pl_op.operand);
  25283         const mask_reg = mask: {
  25284             switch (pred_mcv) {
  25285                 .register => |pred_reg| switch (pred_reg.class()) {
  25286                     .general_purpose => {},
  25287                     .sse => if (need_xmm0 and pred_reg.id() != comptime Register.xmm0.id()) {
  25288                         try self.register_manager.getKnownReg(.xmm0, null);
  25289                         try self.genSetReg(.xmm0, pred_ty, pred_mcv, .{});
  25290                         break :mask .xmm0;
  25291                     } else break :mask if (has_blend)
  25292                         pred_reg
  25293                     else
  25294                         try self.copyToTmpRegister(pred_ty, pred_mcv),
  25295                     else => unreachable,
  25296                 },
  25297                 else => {},
  25298             }
  25299             const mask_reg: Register = if (need_xmm0) mask_reg: {
  25300                 try self.register_manager.getKnownReg(.xmm0, null);
  25301                 break :mask_reg .xmm0;
  25302             } else try self.register_manager.allocReg(null, abi.RegisterClass.sse);
  25303             const mask_alias = registerAlias(mask_reg, abi_size);
  25304             const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg);
  25305             defer self.register_manager.unlockReg(mask_lock);
  25306 
  25307             const pred_fits_in_elem = vec_len <= elem_abi_size;
  25308             if (self.hasFeature(.avx2) and abi_size <= 32) {
  25309                 if (pred_mcv.isRegister()) broadcast: {
  25310                     try self.asmRegisterRegister(
  25311                         .{ .v_d, .mov },
  25312                         mask_reg.to128(),
  25313                         pred_mcv.getReg().?.to32(),
  25314                     );
  25315                     if (pred_fits_in_elem and vec_len > 1) try self.asmRegisterRegister(
  25316                         .{ switch (elem_abi_size) {
  25317                             1 => .vp_b,
  25318                             2 => .vp_w,
  25319                             3...4 => .vp_d,
  25320                             5...8 => .vp_q,
  25321                             9...16 => {
  25322                                 try self.asmRegisterRegisterRegisterImmediate(
  25323                                     .{ .v_f128, .insert },
  25324                                     mask_alias,
  25325                                     mask_alias,
  25326                                     mask_reg.to128(),
  25327                                     .u(1),
  25328                                 );
  25329                                 break :broadcast;
  25330                             },
  25331                             17...32 => break :broadcast,
  25332                             else => unreachable,
  25333                         }, .broadcast },
  25334                         mask_alias,
  25335                         mask_reg.to128(),
  25336                     );
  25337                 } else try self.asmRegisterMemory(
  25338                     .{ switch (vec_len) {
  25339                         1...8 => .vp_b,
  25340                         9...16 => .vp_w,
  25341                         17...32 => .vp_d,
  25342                         else => unreachable,
  25343                     }, .broadcast },
  25344                     mask_alias,
  25345                     if (pred_mcv.isBase()) try pred_mcv.mem(self, .{ .size = .byte }) else .{
  25346                         .base = .{ .reg = (try self.copyToTmpRegister(
  25347                             .usize,
  25348                             pred_mcv.address(),
  25349                         )).to64() },
  25350                         .mod = .{ .rm = .{ .size = .byte } },
  25351                     },
  25352                 );
  25353             } else if (abi_size <= 16) broadcast: {
  25354                 try self.asmRegisterRegister(
  25355                     .{ if (has_avx) .v_d else ._d, .mov },
  25356                     mask_alias,
  25357                     (if (pred_mcv.isRegister())
  25358                         pred_mcv.getReg().?
  25359                     else
  25360                         try self.copyToTmpRegister(pred_ty, pred_mcv.address())).to32(),
  25361                 );
  25362                 if (!pred_fits_in_elem or vec_len == 1) break :broadcast;
  25363                 if (elem_abi_size <= 1) {
  25364                     if (has_avx) try self.asmRegisterRegisterRegister(
  25365                         .{ .vp_, .unpcklbw },
  25366                         mask_alias,
  25367                         mask_alias,
  25368                         mask_alias,
  25369                     ) else try self.asmRegisterRegister(
  25370                         .{ .p_, .unpcklbw },
  25371                         mask_alias,
  25372                         mask_alias,
  25373                     );
  25374                     if (abi_size <= 2) break :broadcast;
  25375                 }
  25376                 if (elem_abi_size <= 2) {
  25377                     try self.asmRegisterRegisterImmediate(
  25378                         .{ if (has_avx) .vp_w else .p_w, .shufl },
  25379                         mask_alias,
  25380                         mask_alias,
  25381                         .u(0b00_00_00_00),
  25382                     );
  25383                     if (abi_size <= 8) break :broadcast;
  25384                 }
  25385                 try self.asmRegisterRegisterImmediate(
  25386                     .{ if (has_avx) .vp_d else .p_d, .shuf },
  25387                     mask_alias,
  25388                     mask_alias,
  25389                     .u(switch (elem_abi_size) {
  25390                         1...2, 5...8 => 0b01_00_01_00,
  25391                         3...4 => 0b00_00_00_00,
  25392                         else => unreachable,
  25393                     }),
  25394                 );
  25395             } else return self.fail("TODO implement airSelect for {}", .{ty.fmt(pt)});
  25396             const elem_bits: u16 = @intCast(elem_abi_size * 8);
  25397             const mask_elem_ty = try pt.intType(.unsigned, elem_bits);
  25398             const mask_ty = try pt.vectorType(.{ .len = vec_len, .child = mask_elem_ty.toIntern() });
  25399             if (!pred_fits_in_elem) if (self.hasFeature(.ssse3)) {
  25400                 var mask_elems: [32]InternPool.Index = undefined;
  25401                 for (mask_elems[0..vec_len], 0..) |*elem, bit| elem.* = try pt.intern(.{ .int = .{
  25402                     .ty = mask_elem_ty.toIntern(),
  25403                     .storage = .{ .u64 = bit / elem_bits },
  25404                 } });
  25405                 const mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{
  25406                     .ty = mask_ty.toIntern(),
  25407                     .storage = .{ .elems = mask_elems[0..vec_len] },
  25408                 } })));
  25409                 const mask_mem: Memory = .{
  25410                     .base = .{ .reg = try self.copyToTmpRegister(.usize, mask_mcv.address()) },
  25411                     .mod = .{ .rm = .{ .size = self.memSize(ty) } },
  25412                 };
  25413                 if (has_avx) try self.asmRegisterRegisterMemory(
  25414                     .{ .vp_b, .shuf },
  25415                     mask_alias,
  25416                     mask_alias,
  25417                     mask_mem,
  25418                 ) else try self.asmRegisterMemory(
  25419                     .{ .p_b, .shuf },
  25420                     mask_alias,
  25421                     mask_mem,
  25422                 );
  25423             } else return self.fail("TODO implement airSelect for {}", .{ty.fmt(pt)});
  25424             {
  25425                 var mask_elems: [32]InternPool.Index = undefined;
  25426                 for (mask_elems[0..vec_len], 0..) |*elem, bit| elem.* = try pt.intern(.{ .int = .{
  25427                     .ty = mask_elem_ty.toIntern(),
  25428                     .storage = .{ .u64 = @as(u32, 1) << @intCast(bit & (elem_bits - 1)) },
  25429                 } });
  25430                 const mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{
  25431                     .ty = mask_ty.toIntern(),
  25432                     .storage = .{ .elems = mask_elems[0..vec_len] },
  25433                 } })));
  25434                 const mask_mem: Memory = .{
  25435                     .base = .{ .reg = try self.copyToTmpRegister(.usize, mask_mcv.address()) },
  25436                     .mod = .{ .rm = .{ .size = self.memSize(ty) } },
  25437                 };
  25438                 if (has_avx) {
  25439                     try self.asmRegisterRegisterMemory(
  25440                         .{ .vp_, .@"and" },
  25441                         mask_alias,
  25442                         mask_alias,
  25443                         mask_mem,
  25444                     );
  25445                     try self.asmRegisterRegisterMemory(
  25446                         .{ .vp_d, .cmpeq },
  25447                         mask_alias,
  25448                         mask_alias,
  25449                         mask_mem,
  25450                     );
  25451                 } else {
  25452                     try self.asmRegisterMemory(
  25453                         .{ .p_, .@"and" },
  25454                         mask_alias,
  25455                         mask_mem,
  25456                     );
  25457                     try self.asmRegisterMemory(
  25458                         .{ .p_d, .cmpeq },
  25459                         mask_alias,
  25460                         mask_mem,
  25461                     );
  25462                 }
  25463             }
  25464             break :mask mask_reg;
  25465         };
  25466         const mask_alias = registerAlias(mask_reg, abi_size);
  25467         const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg);
  25468         defer self.register_manager.unlockReg(mask_lock);
  25469 
  25470         const lhs_mcv = try self.resolveInst(extra.lhs);
  25471         const lhs_lock = switch (lhs_mcv) {
  25472             .register => |lhs_reg| self.register_manager.lockRegAssumeUnused(lhs_reg),
  25473             else => null,
  25474         };
  25475         defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
  25476 
  25477         const rhs_mcv = try self.resolveInst(extra.rhs);
  25478         const rhs_lock = switch (rhs_mcv) {
  25479             .register => |rhs_reg| self.register_manager.lockReg(rhs_reg),
  25480             else => null,
  25481         };
  25482         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
  25483 
  25484         const reuse_mcv = if (has_blend) rhs_mcv else lhs_mcv;
  25485         const dst_mcv: MCValue = if (reuse_mcv.isRegister() and self.reuseOperand(
  25486             inst,
  25487             if (has_blend) extra.rhs else extra.lhs,
  25488             @intFromBool(has_blend),
  25489             reuse_mcv,
  25490         )) reuse_mcv else if (has_avx)
  25491             .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
  25492         else
  25493             try self.copyToRegisterWithInstTracking(inst, ty, reuse_mcv);
  25494         const dst_reg = dst_mcv.getReg().?;
  25495         const dst_alias = registerAlias(dst_reg, abi_size);
  25496         const dst_lock = self.register_manager.lockReg(dst_reg);
  25497         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  25498 
  25499         const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.childType(zcu).zigTypeTag(zcu)) {
  25500             else => null,
  25501             .int => switch (abi_size) {
  25502                 0 => unreachable,
  25503                 1...16 => if (has_avx)
  25504                     .{ .vp_b, .blendv }
  25505                 else if (has_blend)
  25506                     .{ .p_b, .blendv }
  25507                 else
  25508                     .{ .p_, undefined },
  25509                 17...32 => if (self.hasFeature(.avx2))
  25510                     .{ .vp_b, .blendv }
  25511                 else
  25512                     null,
  25513                 else => null,
  25514             },
  25515             .float => switch (ty.childType(zcu).floatBits(self.target.*)) {
  25516                 else => unreachable,
  25517                 16, 80, 128 => null,
  25518                 32 => switch (vec_len) {
  25519                     0 => unreachable,
  25520                     1...4 => if (has_avx) .{ .v_ps, .blendv } else .{ ._ps, .blendv },
  25521                     5...8 => if (has_avx) .{ .v_ps, .blendv } else null,
  25522                     else => null,
  25523                 },
  25524                 64 => switch (vec_len) {
  25525                     0 => unreachable,
  25526                     1...2 => if (has_avx) .{ .v_pd, .blendv } else .{ ._pd, .blendv },
  25527                     3...4 => if (has_avx) .{ .v_pd, .blendv } else null,
  25528                     else => null,
  25529                 },
  25530             },
  25531         }) orelse return self.fail("TODO implement airSelect for {}", .{ty.fmt(pt)});
  25532         if (has_avx) {
  25533             const rhs_alias = if (rhs_mcv.isRegister())
  25534                 registerAlias(rhs_mcv.getReg().?, abi_size)
  25535             else rhs: {
  25536                 try self.genSetReg(dst_reg, ty, rhs_mcv, .{});
  25537                 break :rhs dst_alias;
  25538             };
  25539             if (lhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister(
  25540                 mir_tag,
  25541                 dst_alias,
  25542                 rhs_alias,
  25543                 try lhs_mcv.mem(self, .{ .size = self.memSize(ty) }),
  25544                 mask_alias,
  25545             ) else try self.asmRegisterRegisterRegisterRegister(
  25546                 mir_tag,
  25547                 dst_alias,
  25548                 rhs_alias,
  25549                 registerAlias(if (lhs_mcv.isRegister())
  25550                     lhs_mcv.getReg().?
  25551                 else
  25552                     try self.copyToTmpRegister(ty, lhs_mcv), abi_size),
  25553                 mask_alias,
  25554             );
  25555         } else if (has_blend) if (lhs_mcv.isBase()) try self.asmRegisterMemoryRegister(
  25556             mir_tag,
  25557             dst_alias,
  25558             try lhs_mcv.mem(self, .{ .size = self.memSize(ty) }),
  25559             mask_alias,
  25560         ) else try self.asmRegisterRegisterRegister(
  25561             mir_tag,
  25562             dst_alias,
  25563             registerAlias(if (lhs_mcv.isRegister())
  25564                 lhs_mcv.getReg().?
  25565             else
  25566                 try self.copyToTmpRegister(ty, lhs_mcv), abi_size),
  25567             mask_alias,
  25568         ) else {
  25569             const mir_fixes = @as(?Mir.Inst.Fixes, switch (elem_ty.zigTypeTag(zcu)) {
  25570                 else => null,
  25571                 .int => .p_,
  25572                 .float => switch (elem_ty.floatBits(self.target.*)) {
  25573                     32 => ._ps,
  25574                     64 => ._pd,
  25575                     16, 80, 128 => null,
  25576                     else => unreachable,
  25577                 },
  25578             }) orelse return self.fail("TODO implement airSelect for {}", .{ty.fmt(pt)});
  25579             try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias);
  25580             if (rhs_mcv.isBase()) try self.asmRegisterMemory(
  25581                 .{ mir_fixes, .andn },
  25582                 mask_alias,
  25583                 try rhs_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
  25584             ) else try self.asmRegisterRegister(
  25585                 .{ mir_fixes, .andn },
  25586                 mask_alias,
  25587                 if (rhs_mcv.isRegister())
  25588                     rhs_mcv.getReg().?
  25589                 else
  25590                     try self.copyToTmpRegister(ty, rhs_mcv),
  25591             );
  25592             try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias);
  25593         }
  25594         break :result dst_mcv;
  25595     };
  25596     return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
  25597 }
  25598 
  25599 fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
  25600     const pt = self.pt;
  25601     const zcu = pt.zcu;
  25602     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  25603     const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
  25604 
  25605     const dst_ty = self.typeOfIndex(inst);
  25606     const elem_ty = dst_ty.childType(zcu);
  25607     const elem_abi_size: u16 = @intCast(elem_ty.abiSize(zcu));
  25608     const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
  25609     const lhs_ty = self.typeOf(extra.a);
  25610     const lhs_abi_size: u32 = @intCast(lhs_ty.abiSize(zcu));
  25611     const rhs_ty = self.typeOf(extra.b);
  25612     const rhs_abi_size: u32 = @intCast(rhs_ty.abiSize(zcu));
  25613     const max_abi_size = @max(dst_abi_size, lhs_abi_size, rhs_abi_size);
  25614 
  25615     const ExpectedContents = [32]?i32;
  25616     var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
  25617         std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
  25618     const allocator = stack.get();
  25619 
  25620     const mask_elems = try allocator.alloc(?i32, extra.mask_len);
  25621     defer allocator.free(mask_elems);
  25622     for (mask_elems, 0..) |*mask_elem, elem_index| {
  25623         const mask_elem_val =
  25624             Value.fromInterned(extra.mask).elemValue(pt, elem_index) catch unreachable;
  25625         mask_elem.* = if (mask_elem_val.isUndef(zcu))
  25626             null
  25627         else
  25628             @intCast(mask_elem_val.toSignedInt(zcu));
  25629     }
  25630 
  25631     const has_avx = self.hasFeature(.avx);
  25632     const result = @as(?MCValue, result: {
  25633         for (mask_elems) |mask_elem| {
  25634             if (mask_elem) |_| break;
  25635         } else break :result try self.allocRegOrMem(inst, true);
  25636 
  25637         for (mask_elems, 0..) |mask_elem, elem_index| {
  25638             if (mask_elem orelse continue != elem_index) break;
  25639         } else {
  25640             const lhs_mcv = try self.resolveInst(extra.a);
  25641             if (self.reuseOperand(inst, extra.a, 0, lhs_mcv)) break :result lhs_mcv;
  25642             const dst_mcv = try self.allocRegOrMem(inst, true);
  25643             try self.genCopy(dst_ty, dst_mcv, lhs_mcv, .{});
  25644             break :result dst_mcv;
  25645         }
  25646 
  25647         for (mask_elems, 0..) |mask_elem, elem_index| {
  25648             if (~(mask_elem orelse continue) != elem_index) break;
  25649         } else {
  25650             const rhs_mcv = try self.resolveInst(extra.b);
  25651             if (self.reuseOperand(inst, extra.b, 1, rhs_mcv)) break :result rhs_mcv;
  25652             const dst_mcv = try self.allocRegOrMem(inst, true);
  25653             try self.genCopy(dst_ty, dst_mcv, rhs_mcv, .{});
  25654             break :result dst_mcv;
  25655         }
  25656 
  25657         for ([_]Mir.Inst.Tag{ .unpckl, .unpckh }) |variant| unpck: {
  25658             if (elem_abi_size > 8) break :unpck;
  25659             if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :unpck;
  25660 
  25661             var sources: [2]?u1 = @splat(null);
  25662             for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
  25663                 const mask_elem = maybe_mask_elem orelse continue;
  25664                 const mask_elem_index =
  25665                     std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :unpck;
  25666                 const elem_byte = (elem_index >> 1) * elem_abi_size;
  25667                 if (mask_elem_index * elem_abi_size != (elem_byte & 0b0111) | @as(u4, switch (variant) {
  25668                     .unpckl => 0b0000,
  25669                     .unpckh => 0b1000,
  25670                     else => unreachable,
  25671                 }) | (elem_byte << 1 & 0b10000)) break :unpck;
  25672 
  25673                 const source = @intFromBool(mask_elem < 0);
  25674                 if (sources[elem_index & 0b00001]) |prev_source| {
  25675                     if (source != prev_source) break :unpck;
  25676                 } else sources[elem_index & 0b00001] = source;
  25677             }
  25678             if (sources[0] orelse break :unpck == sources[1] orelse break :unpck) break :unpck;
  25679 
  25680             const operands = [2]Air.Inst.Ref{ extra.a, extra.b };
  25681             const operand_tys = [2]Type{ lhs_ty, rhs_ty };
  25682             const lhs_mcv = try self.resolveInst(operands[sources[0].?]);
  25683             const rhs_mcv = try self.resolveInst(operands[sources[1].?]);
  25684 
  25685             const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
  25686                 self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv))
  25687                 lhs_mcv
  25688             else if (has_avx and lhs_mcv.isRegister())
  25689                 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
  25690             else
  25691                 try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv);
  25692             const dst_reg = dst_mcv.getReg().?;
  25693             const dst_alias = registerAlias(dst_reg, max_abi_size);
  25694 
  25695             const mir_tag: Mir.Inst.FixedTag = if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or
  25696                 (dst_abi_size > 16 and !self.hasFeature(.avx2))) .{ switch (elem_abi_size) {
  25697                 4 => if (has_avx) .v_ps else ._ps,
  25698                 8 => if (has_avx) .v_pd else ._pd,
  25699                 else => unreachable,
  25700             }, variant } else .{ if (has_avx) .vp_ else .p_, switch (variant) {
  25701                 .unpckl => switch (elem_abi_size) {
  25702                     1 => .unpcklbw,
  25703                     2 => .unpcklwd,
  25704                     4 => .unpckldq,
  25705                     8 => .unpcklqdq,
  25706                     else => unreachable,
  25707                 },
  25708                 .unpckh => switch (elem_abi_size) {
  25709                     1 => .unpckhbw,
  25710                     2 => .unpckhwd,
  25711                     4 => .unpckhdq,
  25712                     8 => .unpckhqdq,
  25713                     else => unreachable,
  25714                 },
  25715                 else => unreachable,
  25716             } };
  25717             if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemory(
  25718                 mir_tag,
  25719                 dst_alias,
  25720                 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
  25721                 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
  25722             ) else try self.asmRegisterRegisterRegister(
  25723                 mir_tag,
  25724                 dst_alias,
  25725                 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
  25726                 registerAlias(if (rhs_mcv.isRegister())
  25727                     rhs_mcv.getReg().?
  25728                 else
  25729                     try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
  25730             ) else if (rhs_mcv.isBase()) try self.asmRegisterMemory(
  25731                 mir_tag,
  25732                 dst_alias,
  25733                 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
  25734             ) else try self.asmRegisterRegister(
  25735                 mir_tag,
  25736                 dst_alias,
  25737                 registerAlias(if (rhs_mcv.isRegister())
  25738                     rhs_mcv.getReg().?
  25739                 else
  25740                     try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
  25741             );
  25742             break :result dst_mcv;
  25743         }
  25744 
  25745         pshufd: {
  25746             if (elem_abi_size != 4) break :pshufd;
  25747             if (max_abi_size > self.vectorSize(.float)) break :pshufd;
  25748 
  25749             var control: u8 = 0b00_00_00_00;
  25750             var sources: [1]?u1 = @splat(null);
  25751             for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
  25752                 const mask_elem = maybe_mask_elem orelse continue;
  25753                 const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem);
  25754                 if (mask_elem_index & 0b100 != elem_index & 0b100) break :pshufd;
  25755 
  25756                 const source = @intFromBool(mask_elem < 0);
  25757                 if (sources[0]) |prev_source| {
  25758                     if (source != prev_source) break :pshufd;
  25759                 } else sources[(elem_index & 0b010) >> 1] = source;
  25760 
  25761                 const select_bit: u3 = @intCast((elem_index & 0b011) << 1);
  25762                 const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit;
  25763                 if (elem_index & 0b100 == 0)
  25764                     control |= select_mask
  25765                 else if (control & @as(u8, 0b11) << select_bit != select_mask) break :pshufd;
  25766             }
  25767 
  25768             const operands = [2]Air.Inst.Ref{ extra.a, extra.b };
  25769             const operand_tys = [2]Type{ lhs_ty, rhs_ty };
  25770             const src_mcv = try self.resolveInst(operands[sources[0] orelse break :pshufd]);
  25771 
  25772             const dst_reg = if (src_mcv.isRegister() and
  25773                 self.reuseOperand(inst, operands[sources[0].?], sources[0].?, src_mcv))
  25774                 src_mcv.getReg().?
  25775             else
  25776                 try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
  25777             const dst_alias = registerAlias(dst_reg, max_abi_size);
  25778 
  25779             if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate(
  25780                 .{ if (has_avx) .vp_d else .p_d, .shuf },
  25781                 dst_alias,
  25782                 try src_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
  25783                 .u(control),
  25784             ) else try self.asmRegisterRegisterImmediate(
  25785                 .{ if (has_avx) .vp_d else .p_d, .shuf },
  25786                 dst_alias,
  25787                 registerAlias(if (src_mcv.isRegister())
  25788                     src_mcv.getReg().?
  25789                 else
  25790                     try self.copyToTmpRegister(operand_tys[sources[0].?], src_mcv), max_abi_size),
  25791                 .u(control),
  25792             );
  25793             break :result .{ .register = dst_reg };
  25794         }
  25795 
  25796         shufps: {
  25797             if (elem_abi_size != 4) break :shufps;
  25798             if (max_abi_size > self.vectorSize(.float)) break :shufps;
  25799 
  25800             var control: u8 = 0b00_00_00_00;
  25801             var sources: [2]?u1 = @splat(null);
  25802             for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
  25803                 const mask_elem = maybe_mask_elem orelse continue;
  25804                 const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem);
  25805                 if (mask_elem_index & 0b100 != elem_index & 0b100) break :shufps;
  25806 
  25807                 const source = @intFromBool(mask_elem < 0);
  25808                 if (sources[(elem_index & 0b010) >> 1]) |prev_source| {
  25809                     if (source != prev_source) break :shufps;
  25810                 } else sources[(elem_index & 0b010) >> 1] = source;
  25811 
  25812                 const select_bit: u3 = @intCast((elem_index & 0b011) << 1);
  25813                 const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit;
  25814                 if (elem_index & 0b100 == 0)
  25815                     control |= select_mask
  25816                 else if (control & @as(u8, 0b11) << select_bit != select_mask) break :shufps;
  25817             }
  25818             if (sources[0] orelse break :shufps == sources[1] orelse break :shufps) break :shufps;
  25819 
  25820             const operands = [2]Air.Inst.Ref{ extra.a, extra.b };
  25821             const operand_tys = [2]Type{ lhs_ty, rhs_ty };
  25822             const lhs_mcv = try self.resolveInst(operands[sources[0].?]);
  25823             const rhs_mcv = try self.resolveInst(operands[sources[1].?]);
  25824 
  25825             const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
  25826                 self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv))
  25827                 lhs_mcv
  25828             else if (has_avx and lhs_mcv.isRegister())
  25829                 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
  25830             else
  25831                 try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv);
  25832             const dst_reg = dst_mcv.getReg().?;
  25833             const dst_alias = registerAlias(dst_reg, max_abi_size);
  25834 
  25835             if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  25836                 .{ .v_ps, .shuf },
  25837                 dst_alias,
  25838                 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
  25839                 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
  25840                 .u(control),
  25841             ) else try self.asmRegisterRegisterRegisterImmediate(
  25842                 .{ .v_ps, .shuf },
  25843                 dst_alias,
  25844                 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
  25845                 registerAlias(if (rhs_mcv.isRegister())
  25846                     rhs_mcv.getReg().?
  25847                 else
  25848                     try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
  25849                 .u(control),
  25850             ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
  25851                 .{ ._ps, .shuf },
  25852                 dst_alias,
  25853                 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
  25854                 .u(control),
  25855             ) else try self.asmRegisterRegisterImmediate(
  25856                 .{ ._ps, .shuf },
  25857                 dst_alias,
  25858                 registerAlias(if (rhs_mcv.isRegister())
  25859                     rhs_mcv.getReg().?
  25860                 else
  25861                     try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
  25862                 .u(control),
  25863             );
  25864             break :result dst_mcv;
  25865         }
  25866 
  25867         shufpd: {
  25868             if (elem_abi_size != 8) break :shufpd;
  25869             if (max_abi_size > self.vectorSize(.float)) break :shufpd;
  25870 
  25871             var control: u4 = 0b0_0_0_0;
  25872             var sources: [2]?u1 = @splat(null);
  25873             for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
  25874                 const mask_elem = maybe_mask_elem orelse continue;
  25875                 const mask_elem_index: u2 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem);
  25876                 if (mask_elem_index & 0b10 != elem_index & 0b10) break :shufpd;
  25877 
  25878                 const source = @intFromBool(mask_elem < 0);
  25879                 if (sources[elem_index & 0b01]) |prev_source| {
  25880                     if (source != prev_source) break :shufpd;
  25881                 } else sources[elem_index & 0b01] = source;
  25882 
  25883                 control |= @as(u4, @intCast(mask_elem_index & 0b01)) << @intCast(elem_index);
  25884             }
  25885             if (sources[0] orelse break :shufpd == sources[1] orelse break :shufpd) break :shufpd;
  25886 
  25887             const operands: [2]Air.Inst.Ref = .{ extra.a, extra.b };
  25888             const operand_tys: [2]Type = .{ lhs_ty, rhs_ty };
  25889             const lhs_mcv = try self.resolveInst(operands[sources[0].?]);
  25890             const rhs_mcv = try self.resolveInst(operands[sources[1].?]);
  25891 
  25892             const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
  25893                 self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv))
  25894                 lhs_mcv
  25895             else if (has_avx and lhs_mcv.isRegister())
  25896                 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
  25897             else
  25898                 try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv);
  25899             const dst_reg = dst_mcv.getReg().?;
  25900             const dst_alias = registerAlias(dst_reg, max_abi_size);
  25901 
  25902             if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  25903                 .{ .v_pd, .shuf },
  25904                 dst_alias,
  25905                 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
  25906                 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
  25907                 .u(control),
  25908             ) else try self.asmRegisterRegisterRegisterImmediate(
  25909                 .{ .v_pd, .shuf },
  25910                 dst_alias,
  25911                 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
  25912                 registerAlias(if (rhs_mcv.isRegister())
  25913                     rhs_mcv.getReg().?
  25914                 else
  25915                     try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
  25916                 .u(control),
  25917             ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
  25918                 .{ ._pd, .shuf },
  25919                 dst_alias,
  25920                 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
  25921                 .u(control),
  25922             ) else try self.asmRegisterRegisterImmediate(
  25923                 .{ ._pd, .shuf },
  25924                 dst_alias,
  25925                 registerAlias(if (rhs_mcv.isRegister())
  25926                     rhs_mcv.getReg().?
  25927                 else
  25928                     try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
  25929                 .u(control),
  25930             );
  25931             break :result dst_mcv;
  25932         }
  25933 
  25934         blend: {
  25935             if (elem_abi_size < 2) break :blend;
  25936             if (dst_abi_size > self.vectorSize(.float)) break :blend;
  25937             if (!self.hasFeature(.sse4_1)) break :blend;
  25938 
  25939             var control: u8 = 0b0_0_0_0_0_0_0_0;
  25940             for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
  25941                 const mask_elem = maybe_mask_elem orelse continue;
  25942                 const mask_elem_index =
  25943                     std.math.cast(u4, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blend;
  25944                 if (mask_elem_index != elem_index) break :blend;
  25945 
  25946                 const select_mask = @as(u8, @intFromBool(mask_elem < 0)) << @truncate(elem_index);
  25947                 if (elem_index & 0b1000 == 0)
  25948                     control |= select_mask
  25949                 else if (control & @as(u8, 0b1) << @truncate(elem_index) != select_mask) break :blend;
  25950             }
  25951 
  25952             if (!elem_ty.isRuntimeFloat() and self.hasFeature(.avx2)) vpblendd: {
  25953                 const expanded_control = switch (elem_abi_size) {
  25954                     4 => control,
  25955                     8 => @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) |
  25956                         @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) |
  25957                         @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) |
  25958                         @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00),
  25959                     else => break :vpblendd,
  25960                 };
  25961 
  25962                 const lhs_mcv = try self.resolveInst(extra.a);
  25963                 const lhs_reg = if (lhs_mcv.isRegister())
  25964                     lhs_mcv.getReg().?
  25965                 else
  25966                     try self.copyToTmpRegister(dst_ty, lhs_mcv);
  25967                 const lhs_lock = self.register_manager.lockReg(lhs_reg);
  25968                 defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
  25969 
  25970                 const rhs_mcv = try self.resolveInst(extra.b);
  25971                 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
  25972                 if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  25973                     .{ .vp_d, .blend },
  25974                     registerAlias(dst_reg, dst_abi_size),
  25975                     registerAlias(lhs_reg, dst_abi_size),
  25976                     try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
  25977                     .u(expanded_control),
  25978                 ) else try self.asmRegisterRegisterRegisterImmediate(
  25979                     .{ .vp_d, .blend },
  25980                     registerAlias(dst_reg, dst_abi_size),
  25981                     registerAlias(lhs_reg, dst_abi_size),
  25982                     registerAlias(if (rhs_mcv.isRegister())
  25983                         rhs_mcv.getReg().?
  25984                     else
  25985                         try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
  25986                     .u(expanded_control),
  25987                 );
  25988                 break :result .{ .register = dst_reg };
  25989             }
  25990 
  25991             if (!elem_ty.isRuntimeFloat() or elem_abi_size == 2) pblendw: {
  25992                 const expanded_control = switch (elem_abi_size) {
  25993                     2 => control,
  25994                     4 => if (dst_abi_size <= 16 or
  25995                         @as(u4, @intCast(control >> 4)) == @as(u4, @truncate(control >> 0)))
  25996                         @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) |
  25997                             @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) |
  25998                             @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) |
  25999                             @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00)
  26000                     else
  26001                         break :pblendw,
  26002                     8 => if (dst_abi_size <= 16 or
  26003                         @as(u2, @intCast(control >> 2)) == @as(u2, @truncate(control >> 0)))
  26004                         @as(u8, if (control & 0b01 != 0) 0b0000_1111 else 0b0000_0000) |
  26005                             @as(u8, if (control & 0b10 != 0) 0b1111_0000 else 0b0000_0000)
  26006                     else
  26007                         break :pblendw,
  26008                     16 => break :pblendw,
  26009                     else => unreachable,
  26010                 };
  26011 
  26012                 const lhs_mcv = try self.resolveInst(extra.a);
  26013                 const rhs_mcv = try self.resolveInst(extra.b);
  26014 
  26015                 const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
  26016                     self.reuseOperand(inst, extra.a, 0, lhs_mcv))
  26017                     lhs_mcv
  26018                 else if (has_avx and lhs_mcv.isRegister())
  26019                     .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
  26020                 else
  26021                     try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
  26022                 const dst_reg = dst_mcv.getReg().?;
  26023 
  26024                 if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  26025                     .{ .vp_w, .blend },
  26026                     registerAlias(dst_reg, dst_abi_size),
  26027                     registerAlias(if (lhs_mcv.isRegister())
  26028                         lhs_mcv.getReg().?
  26029                     else
  26030                         dst_reg, dst_abi_size),
  26031                     try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
  26032                     .u(expanded_control),
  26033                 ) else try self.asmRegisterRegisterRegisterImmediate(
  26034                     .{ .vp_w, .blend },
  26035                     registerAlias(dst_reg, dst_abi_size),
  26036                     registerAlias(if (lhs_mcv.isRegister())
  26037                         lhs_mcv.getReg().?
  26038                     else
  26039                         dst_reg, dst_abi_size),
  26040                     registerAlias(if (rhs_mcv.isRegister())
  26041                         rhs_mcv.getReg().?
  26042                     else
  26043                         try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
  26044                     .u(expanded_control),
  26045                 ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
  26046                     .{ .p_w, .blend },
  26047                     registerAlias(dst_reg, dst_abi_size),
  26048                     try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
  26049                     .u(expanded_control),
  26050                 ) else try self.asmRegisterRegisterImmediate(
  26051                     .{ .p_w, .blend },
  26052                     registerAlias(dst_reg, dst_abi_size),
  26053                     registerAlias(if (rhs_mcv.isRegister())
  26054                         rhs_mcv.getReg().?
  26055                     else
  26056                         try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
  26057                     .u(expanded_control),
  26058                 );
  26059                 break :result .{ .register = dst_reg };
  26060             }
  26061 
  26062             const expanded_control = switch (elem_abi_size) {
  26063                 4, 8 => control,
  26064                 16 => @as(u4, if (control & 0b01 != 0) 0b00_11 else 0b00_00) |
  26065                     @as(u4, if (control & 0b10 != 0) 0b11_00 else 0b00_00),
  26066                 else => unreachable,
  26067             };
  26068 
  26069             const lhs_mcv = try self.resolveInst(extra.a);
  26070             const rhs_mcv = try self.resolveInst(extra.b);
  26071 
  26072             const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
  26073                 self.reuseOperand(inst, extra.a, 0, lhs_mcv))
  26074                 lhs_mcv
  26075             else if (has_avx and lhs_mcv.isRegister())
  26076                 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
  26077             else
  26078                 try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
  26079             const dst_reg = dst_mcv.getReg().?;
  26080 
  26081             if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
  26082                 switch (elem_abi_size) {
  26083                     4 => .{ .v_ps, .blend },
  26084                     8, 16 => .{ .v_pd, .blend },
  26085                     else => unreachable,
  26086                 },
  26087                 registerAlias(dst_reg, dst_abi_size),
  26088                 registerAlias(if (lhs_mcv.isRegister())
  26089                     lhs_mcv.getReg().?
  26090                 else
  26091                     dst_reg, dst_abi_size),
  26092                 try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
  26093                 .u(expanded_control),
  26094             ) else try self.asmRegisterRegisterRegisterImmediate(
  26095                 switch (elem_abi_size) {
  26096                     4 => .{ .v_ps, .blend },
  26097                     8, 16 => .{ .v_pd, .blend },
  26098                     else => unreachable,
  26099                 },
  26100                 registerAlias(dst_reg, dst_abi_size),
  26101                 registerAlias(if (lhs_mcv.isRegister())
  26102                     lhs_mcv.getReg().?
  26103                 else
  26104                     dst_reg, dst_abi_size),
  26105                 registerAlias(if (rhs_mcv.isRegister())
  26106                     rhs_mcv.getReg().?
  26107                 else
  26108                     try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
  26109                 .u(expanded_control),
  26110             ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
  26111                 switch (elem_abi_size) {
  26112                     4 => .{ ._ps, .blend },
  26113                     8, 16 => .{ ._pd, .blend },
  26114                     else => unreachable,
  26115                 },
  26116                 registerAlias(dst_reg, dst_abi_size),
  26117                 try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
  26118                 .u(expanded_control),
  26119             ) else try self.asmRegisterRegisterImmediate(
  26120                 switch (elem_abi_size) {
  26121                     4 => .{ ._ps, .blend },
  26122                     8, 16 => .{ ._pd, .blend },
  26123                     else => unreachable,
  26124                 },
  26125                 registerAlias(dst_reg, dst_abi_size),
  26126                 registerAlias(if (rhs_mcv.isRegister())
  26127                     rhs_mcv.getReg().?
  26128                 else
  26129                     try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
  26130                 .u(expanded_control),
  26131             );
  26132             break :result .{ .register = dst_reg };
  26133         }
  26134 
  26135         blendv: {
  26136             if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :blendv;
  26137 
  26138             const select_mask_elem_ty = try pt.intType(.unsigned, elem_abi_size * 8);
  26139             const select_mask_ty = try pt.vectorType(.{
  26140                 .len = @intCast(mask_elems.len),
  26141                 .child = select_mask_elem_ty.toIntern(),
  26142             });
  26143             var select_mask_elems: [32]InternPool.Index = undefined;
  26144             for (
  26145                 select_mask_elems[0..mask_elems.len],
  26146                 mask_elems,
  26147                 0..,
  26148             ) |*select_mask_elem, maybe_mask_elem, elem_index| {
  26149                 const mask_elem = maybe_mask_elem orelse continue;
  26150                 const mask_elem_index =
  26151                     std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blendv;
  26152                 if (mask_elem_index != elem_index) break :blendv;
  26153 
  26154                 select_mask_elem.* = (if (mask_elem < 0)
  26155                     try select_mask_elem_ty.maxIntScalar(pt, select_mask_elem_ty)
  26156                 else
  26157                     try select_mask_elem_ty.minIntScalar(pt, select_mask_elem_ty)).toIntern();
  26158             }
  26159             const select_mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{
  26160                 .ty = select_mask_ty.toIntern(),
  26161                 .storage = .{ .elems = select_mask_elems[0..mask_elems.len] },
  26162             } })));
  26163 
  26164             if (self.hasFeature(.sse4_1)) {
  26165                 const mir_tag: Mir.Inst.FixedTag = .{
  26166                     if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or
  26167                         (dst_abi_size > 16 and !self.hasFeature(.avx2))) switch (elem_abi_size) {
  26168                         4 => if (has_avx) .v_ps else ._ps,
  26169                         8 => if (has_avx) .v_pd else ._pd,
  26170                         else => unreachable,
  26171                     } else if (has_avx) .vp_b else .p_b,
  26172                     .blendv,
  26173                 };
  26174 
  26175                 const select_mask_reg = if (!has_avx) reg: {
  26176                     try self.register_manager.getKnownReg(.xmm0, null);
  26177                     try self.genSetReg(.xmm0, select_mask_elem_ty, select_mask_mcv, .{});
  26178                     break :reg .xmm0;
  26179                 } else try self.copyToTmpRegister(select_mask_ty, select_mask_mcv);
  26180                 const select_mask_alias = registerAlias(select_mask_reg, dst_abi_size);
  26181                 const select_mask_lock = self.register_manager.lockRegAssumeUnused(select_mask_reg);
  26182                 defer self.register_manager.unlockReg(select_mask_lock);
  26183 
  26184                 const lhs_mcv = try self.resolveInst(extra.a);
  26185                 const rhs_mcv = try self.resolveInst(extra.b);
  26186 
  26187                 const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
  26188                     self.reuseOperand(inst, extra.a, 0, lhs_mcv))
  26189                     lhs_mcv
  26190                 else if (has_avx and lhs_mcv.isRegister())
  26191                     .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
  26192                 else
  26193                     try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
  26194                 const dst_reg = dst_mcv.getReg().?;
  26195                 const dst_alias = registerAlias(dst_reg, dst_abi_size);
  26196 
  26197                 if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister(
  26198                     mir_tag,
  26199                     dst_alias,
  26200                     if (lhs_mcv.isRegister())
  26201                         registerAlias(lhs_mcv.getReg().?, dst_abi_size)
  26202                     else
  26203                         dst_alias,
  26204                     try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
  26205                     select_mask_alias,
  26206                 ) else try self.asmRegisterRegisterRegisterRegister(
  26207                     mir_tag,
  26208                     dst_alias,
  26209                     if (lhs_mcv.isRegister())
  26210                         registerAlias(lhs_mcv.getReg().?, dst_abi_size)
  26211                     else
  26212                         dst_alias,
  26213                     registerAlias(if (rhs_mcv.isRegister())
  26214                         rhs_mcv.getReg().?
  26215                     else
  26216                         try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
  26217                     select_mask_alias,
  26218                 ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryRegister(
  26219                     mir_tag,
  26220                     dst_alias,
  26221                     try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
  26222                     select_mask_alias,
  26223                 ) else try self.asmRegisterRegisterRegister(
  26224                     mir_tag,
  26225                     dst_alias,
  26226                     registerAlias(if (rhs_mcv.isRegister())
  26227                         rhs_mcv.getReg().?
  26228                     else
  26229                         try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
  26230                     select_mask_alias,
  26231                 );
  26232                 break :result dst_mcv;
  26233             }
  26234 
  26235             const lhs_mcv = try self.resolveInst(extra.a);
  26236             const rhs_mcv = try self.resolveInst(extra.b);
  26237 
  26238             const dst_mcv: MCValue = if (rhs_mcv.isRegister() and
  26239                 self.reuseOperand(inst, extra.b, 1, rhs_mcv))
  26240                 rhs_mcv
  26241             else
  26242                 try self.copyToRegisterWithInstTracking(inst, dst_ty, rhs_mcv);
  26243             const dst_reg = dst_mcv.getReg().?;
  26244             const dst_alias = registerAlias(dst_reg, dst_abi_size);
  26245 
  26246             const mask_reg = try self.copyToTmpRegister(select_mask_ty, select_mask_mcv);
  26247             const mask_alias = registerAlias(mask_reg, dst_abi_size);
  26248             const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg);
  26249             defer self.register_manager.unlockReg(mask_lock);
  26250 
  26251             const mir_fixes: Mir.Inst.Fixes = if (elem_ty.isRuntimeFloat())
  26252                 switch (elem_ty.floatBits(self.target.*)) {
  26253                     16, 80, 128 => .p_,
  26254                     32 => ._ps,
  26255                     64 => ._pd,
  26256                     else => unreachable,
  26257                 }
  26258             else
  26259                 .p_;
  26260             try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias);
  26261             if (lhs_mcv.isBase()) try self.asmRegisterMemory(
  26262                 .{ mir_fixes, .andn },
  26263                 mask_alias,
  26264                 try lhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
  26265             ) else try self.asmRegisterRegister(
  26266                 .{ mir_fixes, .andn },
  26267                 mask_alias,
  26268                 if (lhs_mcv.isRegister())
  26269                     lhs_mcv.getReg().?
  26270                 else
  26271                     try self.copyToTmpRegister(dst_ty, lhs_mcv),
  26272             );
  26273             try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias);
  26274             break :result dst_mcv;
  26275         }
  26276 
  26277         pshufb: {
  26278             if (max_abi_size > 16) break :pshufb;
  26279             if (!self.hasFeature(.ssse3)) break :pshufb;
  26280 
  26281             const temp_regs =
  26282                 try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.sse);
  26283             const temp_locks = self.register_manager.lockRegsAssumeUnused(2, temp_regs);
  26284             defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
  26285 
  26286             const lhs_temp_alias = registerAlias(temp_regs[0], max_abi_size);
  26287             try self.genSetReg(temp_regs[0], lhs_ty, .{ .air_ref = extra.a }, .{});
  26288 
  26289             const rhs_temp_alias = registerAlias(temp_regs[1], max_abi_size);
  26290             try self.genSetReg(temp_regs[1], rhs_ty, .{ .air_ref = extra.b }, .{});
  26291 
  26292             var lhs_mask_elems: [16]InternPool.Index = undefined;
  26293             for (lhs_mask_elems[0..max_abi_size], 0..) |*lhs_mask_elem, byte_index| {
  26294                 const elem_index = byte_index / elem_abi_size;
  26295                 lhs_mask_elem.* = try pt.intern(.{ .int = .{
  26296                     .ty = .u8_type,
  26297                     .storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: {
  26298                         const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000;
  26299                         if (mask_elem < 0) break :elem 0b1_00_00000;
  26300                         const mask_elem_index: u31 = @intCast(mask_elem);
  26301                         const byte_off: u32 = @intCast(byte_index % elem_abi_size);
  26302                         break :elem @intCast(mask_elem_index * elem_abi_size + byte_off);
  26303                     } },
  26304                 } });
  26305             }
  26306             const lhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type });
  26307             const lhs_mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{
  26308                 .ty = lhs_mask_ty.toIntern(),
  26309                 .storage = .{ .elems = lhs_mask_elems[0..max_abi_size] },
  26310             } })));
  26311             const lhs_mask_mem: Memory = .{
  26312                 .base = .{ .reg = try self.copyToTmpRegister(.usize, lhs_mask_mcv.address()) },
  26313                 .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } },
  26314             };
  26315             if (has_avx) try self.asmRegisterRegisterMemory(
  26316                 .{ .vp_b, .shuf },
  26317                 lhs_temp_alias,
  26318                 lhs_temp_alias,
  26319                 lhs_mask_mem,
  26320             ) else try self.asmRegisterMemory(
  26321                 .{ .p_b, .shuf },
  26322                 lhs_temp_alias,
  26323                 lhs_mask_mem,
  26324             );
  26325 
  26326             var rhs_mask_elems: [16]InternPool.Index = undefined;
  26327             for (rhs_mask_elems[0..max_abi_size], 0..) |*rhs_mask_elem, byte_index| {
  26328                 const elem_index = byte_index / elem_abi_size;
  26329                 rhs_mask_elem.* = try pt.intern(.{ .int = .{
  26330                     .ty = .u8_type,
  26331                     .storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: {
  26332                         const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000;
  26333                         if (mask_elem >= 0) break :elem 0b1_00_00000;
  26334                         const mask_elem_index: u31 = @intCast(~mask_elem);
  26335                         const byte_off: u32 = @intCast(byte_index % elem_abi_size);
  26336                         break :elem @intCast(mask_elem_index * elem_abi_size + byte_off);
  26337                     } },
  26338                 } });
  26339             }
  26340             const rhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type });
  26341             const rhs_mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{
  26342                 .ty = rhs_mask_ty.toIntern(),
  26343                 .storage = .{ .elems = rhs_mask_elems[0..max_abi_size] },
  26344             } })));
  26345             const rhs_mask_mem: Memory = .{
  26346                 .base = .{ .reg = try self.copyToTmpRegister(.usize, rhs_mask_mcv.address()) },
  26347                 .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } },
  26348             };
  26349             if (has_avx) try self.asmRegisterRegisterMemory(
  26350                 .{ .vp_b, .shuf },
  26351                 rhs_temp_alias,
  26352                 rhs_temp_alias,
  26353                 rhs_mask_mem,
  26354             ) else try self.asmRegisterMemory(
  26355                 .{ .p_b, .shuf },
  26356                 rhs_temp_alias,
  26357                 rhs_mask_mem,
  26358             );
  26359 
  26360             if (has_avx) try self.asmRegisterRegisterRegister(
  26361                 .{ switch (elem_ty.zigTypeTag(zcu)) {
  26362                     else => break :result null,
  26363                     .int => .vp_,
  26364                     .float => switch (elem_ty.floatBits(self.target.*)) {
  26365                         32 => .v_ps,
  26366                         64 => .v_pd,
  26367                         16, 80, 128 => break :result null,
  26368                         else => unreachable,
  26369                     },
  26370                 }, .@"or" },
  26371                 lhs_temp_alias,
  26372                 lhs_temp_alias,
  26373                 rhs_temp_alias,
  26374             ) else try self.asmRegisterRegister(
  26375                 .{ switch (elem_ty.zigTypeTag(zcu)) {
  26376                     else => break :result null,
  26377                     .int => .p_,
  26378                     .float => switch (elem_ty.floatBits(self.target.*)) {
  26379                         32 => ._ps,
  26380                         64 => ._pd,
  26381                         16, 80, 128 => break :result null,
  26382                         else => unreachable,
  26383                     },
  26384                 }, .@"or" },
  26385                 lhs_temp_alias,
  26386                 rhs_temp_alias,
  26387             );
  26388             break :result .{ .register = temp_regs[0] };
  26389         }
  26390 
  26391         break :result null;
  26392     }) orelse return self.fail("TODO implement airShuffle from {} and {} to {} with {}", .{
  26393         lhs_ty.fmt(pt),
  26394         rhs_ty.fmt(pt),
  26395         dst_ty.fmt(pt),
  26396         Value.fromInterned(extra.mask).fmtValue(pt),
  26397     });
  26398     return self.finishAir(inst, result, .{ extra.a, extra.b, .none });
  26399 }
  26400 
  26401 fn airReduce(self: *CodeGen, inst: Air.Inst.Index) !void {
  26402     const pt = self.pt;
  26403     const zcu = pt.zcu;
  26404     const reduce = self.air.instructions.items(.data)[@intFromEnum(inst)].reduce;
  26405 
  26406     const result: MCValue = result: {
  26407         const operand_ty = self.typeOf(reduce.operand);
  26408         if (operand_ty.isVector(zcu) and operand_ty.childType(zcu).toIntern() == .bool_type) {
  26409             try self.spillEflagsIfOccupied();
  26410 
  26411             const abi_size: u32 = @intCast(operand_ty.abiSize(zcu));
  26412             const operand_mcv = try self.resolveInst(reduce.operand);
  26413             const mask_len = operand_ty.vectorLen(zcu);
  26414             const mask_len_minus_one = (std.math.cast(u6, mask_len - 1) orelse {
  26415                 const acc_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
  26416                 const acc_lock = self.register_manager.lockRegAssumeUnused(acc_reg);
  26417                 defer self.register_manager.unlockReg(acc_lock);
  26418                 var limb_offset: i31 = 0;
  26419                 while (limb_offset < abi_size) : (limb_offset += 8) {
  26420                     try self.asmRegisterMemory(
  26421                         .{ ._, if (limb_offset == 0) .mov else switch (reduce.operation) {
  26422                             .Or => .@"or",
  26423                             .And => .@"and",
  26424                             else => return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)}),
  26425                         } },
  26426                         acc_reg.to64(),
  26427                         try operand_mcv.mem(self, .{
  26428                             .size = .qword,
  26429                             .disp = limb_offset,
  26430                         }),
  26431                     );
  26432                 }
  26433                 switch (reduce.operation) {
  26434                     .Or => {
  26435                         try self.asmRegisterRegister(.{ ._, .@"test" }, acc_reg.to64(), acc_reg.to64());
  26436                         break :result .{ .eflags = .nz };
  26437                     },
  26438                     .And => {
  26439                         try self.asmRegisterImmediate(.{ ._, .cmp }, acc_reg.to64(), .s(-1));
  26440                         break :result .{ .eflags = .z };
  26441                     },
  26442                     else => unreachable,
  26443                 }
  26444             });
  26445             const mask = @as(u64, std.math.maxInt(u64)) >> ~mask_len_minus_one;
  26446             switch (reduce.operation) {
  26447                 .Or => {
  26448                     if (operand_mcv.isBase()) try self.asmMemoryImmediate(
  26449                         .{ ._, .@"test" },
  26450                         try operand_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
  26451                         if (mask_len < abi_size * 8)
  26452                             .u(mask)
  26453                         else
  26454                             .s(-1),
  26455                     ) else {
  26456                         const operand_reg = registerAlias(operand_reg: {
  26457                             if (operand_mcv.isRegister()) {
  26458                                 const operand_reg = operand_mcv.getReg().?;
  26459                                 if (operand_reg.class() == .general_purpose) break :operand_reg operand_reg;
  26460                             }
  26461                             break :operand_reg try self.copyToTmpRegister(operand_ty, operand_mcv);
  26462                         }, abi_size);
  26463                         const operand_lock = self.register_manager.lockReg(operand_reg);
  26464                         defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
  26465 
  26466                         if (mask_len < abi_size * 8) try self.asmRegisterImmediate(
  26467                             .{ ._, .@"test" },
  26468                             operand_reg,
  26469                             .u(mask),
  26470                         ) else try self.asmRegisterRegister(
  26471                             .{ ._, .@"test" },
  26472                             operand_reg,
  26473                             operand_reg,
  26474                         );
  26475                     }
  26476                     break :result .{ .eflags = .nz };
  26477                 },
  26478                 .And => {
  26479                     const tmp_reg = registerAlias(
  26480                         try self.copyToTmpRegister(operand_ty, operand_mcv),
  26481                         abi_size,
  26482                     );
  26483                     const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
  26484                     defer self.register_manager.unlockReg(tmp_lock);
  26485 
  26486                     try self.asmRegister(.{ ._, .not }, tmp_reg);
  26487                     if (mask_len < abi_size * 8)
  26488                         try self.asmRegisterImmediate(.{ ._, .@"test" }, tmp_reg, .u(mask))
  26489                     else
  26490                         try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_reg, tmp_reg);
  26491                     break :result .{ .eflags = .z };
  26492                 },
  26493                 else => return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)}),
  26494             }
  26495         }
  26496         return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)});
  26497     };
  26498     return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
  26499 }
  26500 
  26501 fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void {
  26502     const pt = self.pt;
  26503     const zcu = pt.zcu;
  26504     const result_ty = self.typeOfIndex(inst);
  26505     const len: usize = @intCast(result_ty.arrayLen(zcu));
  26506     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  26507     const elements: []const Air.Inst.Ref = @ptrCast(self.air.extra[ty_pl.payload..][0..len]);
  26508     const result: MCValue = result: {
  26509         switch (result_ty.zigTypeTag(zcu)) {
  26510             .@"struct" => {
  26511                 const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu));
  26512                 if (result_ty.containerLayout(zcu) == .@"packed") {
  26513                     const loaded_struct = zcu.intern_pool.loadStructType(result_ty.toIntern());
  26514                     try self.genInlineMemset(
  26515                         .{ .lea_frame = .{ .index = frame_index } },
  26516                         .{ .immediate = 0 },
  26517                         .{ .immediate = result_ty.abiSize(zcu) },
  26518                         .{},
  26519                     );
  26520                     for (elements, 0..) |elem, elem_i_usize| {
  26521                         const elem_i: u32 = @intCast(elem_i_usize);
  26522                         if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue;
  26523 
  26524                         const elem_ty = result_ty.fieldType(elem_i, zcu);
  26525                         const elem_bit_size: u32 = @intCast(elem_ty.bitSize(zcu));
  26526                         if (elem_bit_size > 64) {
  26527                             return self.fail(
  26528                                 "TODO airAggregateInit implement packed structs with large fields",
  26529                                 .{},
  26530                             );
  26531                         }
  26532                         const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu));
  26533                         const elem_abi_bits = elem_abi_size * 8;
  26534                         const elem_off = pt.structPackedFieldBitOffset(loaded_struct, elem_i);
  26535                         const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size);
  26536                         const elem_bit_off = elem_off % elem_abi_bits;
  26537                         const elem_mcv = try self.resolveInst(elem);
  26538                         const mat_elem_mcv = switch (elem_mcv) {
  26539                             .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  26540                             else => elem_mcv,
  26541                         };
  26542                         const elem_lock = switch (mat_elem_mcv) {
  26543                             .register => |reg| self.register_manager.lockReg(reg),
  26544                             .immediate => |imm| lock: {
  26545                                 if (imm == 0) continue;
  26546                                 break :lock null;
  26547                             },
  26548                             else => null,
  26549                         };
  26550                         defer if (elem_lock) |lock| self.register_manager.unlockReg(lock);
  26551 
  26552                         const elem_extra_bits = self.regExtraBits(elem_ty);
  26553                         {
  26554                             const temp_reg = try self.copyToTmpRegister(elem_ty, mat_elem_mcv);
  26555                             const temp_alias = registerAlias(temp_reg, elem_abi_size);
  26556                             const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
  26557                             defer self.register_manager.unlockReg(temp_lock);
  26558 
  26559                             if (elem_bit_off < elem_extra_bits) {
  26560                                 try self.truncateRegister(elem_ty, temp_alias);
  26561                             }
  26562                             if (elem_bit_off > 0) try self.genShiftBinOpMir(
  26563                                 .{ ._l, .sh },
  26564                                 elem_ty,
  26565                                 .{ .register = temp_alias },
  26566                                 .u8,
  26567                                 .{ .immediate = elem_bit_off },
  26568                             );
  26569                             try self.genBinOpMir(
  26570                                 .{ ._, .@"or" },
  26571                                 elem_ty,
  26572                                 .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } },
  26573                                 .{ .register = temp_alias },
  26574                             );
  26575                         }
  26576                         if (elem_bit_off > elem_extra_bits) {
  26577                             const temp_reg = try self.copyToTmpRegister(elem_ty, mat_elem_mcv);
  26578                             const temp_alias = registerAlias(temp_reg, elem_abi_size);
  26579                             const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
  26580                             defer self.register_manager.unlockReg(temp_lock);
  26581 
  26582                             if (elem_extra_bits > 0) {
  26583                                 try self.truncateRegister(elem_ty, temp_alias);
  26584                             }
  26585                             try self.genShiftBinOpMir(
  26586                                 .{ ._r, .sh },
  26587                                 elem_ty,
  26588                                 .{ .register = temp_reg },
  26589                                 .u8,
  26590                                 .{ .immediate = elem_abi_bits - elem_bit_off },
  26591                             );
  26592                             try self.genBinOpMir(
  26593                                 .{ ._, .@"or" },
  26594                                 elem_ty,
  26595                                 .{ .load_frame = .{
  26596                                     .index = frame_index,
  26597                                     .off = elem_byte_off + @as(i32, @intCast(elem_abi_size)),
  26598                                 } },
  26599                                 .{ .register = temp_alias },
  26600                             );
  26601                         }
  26602                     }
  26603                 } else for (elements, 0..) |elem, elem_i| {
  26604                     if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue;
  26605 
  26606                     const elem_ty = result_ty.fieldType(elem_i, zcu);
  26607                     const elem_off: i32 = @intCast(result_ty.structFieldOffset(elem_i, zcu));
  26608                     const elem_mcv = try self.resolveInst(elem);
  26609                     const mat_elem_mcv = switch (elem_mcv) {
  26610                         .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  26611                         else => elem_mcv,
  26612                     };
  26613                     try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv, .{});
  26614                 }
  26615                 break :result .{ .load_frame = .{ .index = frame_index } };
  26616             },
  26617             .array, .vector => {
  26618                 const elem_ty = result_ty.childType(zcu);
  26619                 if (result_ty.isVector(zcu) and elem_ty.toIntern() == .bool_type) {
  26620                     const result_size: u32 = @intCast(result_ty.abiSize(zcu));
  26621                     const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
  26622                     try self.asmRegisterRegister(
  26623                         .{ ._, .xor },
  26624                         registerAlias(dst_reg, @min(result_size, 4)),
  26625                         registerAlias(dst_reg, @min(result_size, 4)),
  26626                     );
  26627 
  26628                     for (elements, 0..) |elem, elem_i| {
  26629                         const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem });
  26630                         const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg);
  26631                         defer self.register_manager.unlockReg(elem_lock);
  26632 
  26633                         try self.asmRegisterImmediate(
  26634                             .{ ._, .@"and" },
  26635                             registerAlias(elem_reg, @min(result_size, 4)),
  26636                             .u(1),
  26637                         );
  26638                         if (elem_i > 0) try self.asmRegisterImmediate(
  26639                             .{ ._l, .sh },
  26640                             registerAlias(elem_reg, result_size),
  26641                             .u(@intCast(elem_i)),
  26642                         );
  26643                         try self.asmRegisterRegister(
  26644                             .{ ._, .@"or" },
  26645                             registerAlias(dst_reg, result_size),
  26646                             registerAlias(elem_reg, result_size),
  26647                         );
  26648                     }
  26649                     break :result .{ .register = dst_reg };
  26650                 } else {
  26651                     const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu));
  26652                     const elem_size: u32 = @intCast(elem_ty.abiSize(zcu));
  26653 
  26654                     for (elements, 0..) |elem, elem_i| {
  26655                         const elem_mcv = try self.resolveInst(elem);
  26656                         const mat_elem_mcv = switch (elem_mcv) {
  26657                             .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
  26658                             else => elem_mcv,
  26659                         };
  26660                         const elem_off: i32 = @intCast(elem_size * elem_i);
  26661                         try self.genSetMem(
  26662                             .{ .frame = frame_index },
  26663                             elem_off,
  26664                             elem_ty,
  26665                             mat_elem_mcv,
  26666                             .{},
  26667                         );
  26668                     }
  26669                     if (result_ty.sentinel(zcu)) |sentinel| try self.genSetMem(
  26670                         .{ .frame = frame_index },
  26671                         @intCast(elem_size * elements.len),
  26672                         elem_ty,
  26673                         try self.genTypedValue(sentinel),
  26674                         .{},
  26675                     );
  26676                     break :result .{ .load_frame = .{ .index = frame_index } };
  26677                 }
  26678             },
  26679             else => unreachable,
  26680         }
  26681     };
  26682 
  26683     if (elements.len <= Liveness.bpi - 1) {
  26684         var buf: [Liveness.bpi - 1]Air.Inst.Ref = @splat(.none);
  26685         @memcpy(buf[0..elements.len], elements);
  26686         return self.finishAir(inst, result, buf);
  26687     }
  26688     var bt = self.liveness.iterateBigTomb(inst);
  26689     for (elements) |elem| try self.feed(&bt, elem);
  26690     return self.finishAirResult(inst, result);
  26691 }
  26692 
  26693 fn airUnionInit(self: *CodeGen, inst: Air.Inst.Index) !void {
  26694     const pt = self.pt;
  26695     const zcu = pt.zcu;
  26696     const ip = &zcu.intern_pool;
  26697     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
  26698     const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data;
  26699     const result: MCValue = result: {
  26700         const union_ty = self.typeOfIndex(inst);
  26701         const layout = union_ty.unionGetLayout(zcu);
  26702 
  26703         const src_ty = self.typeOf(extra.init);
  26704         const src_mcv = try self.resolveInst(extra.init);
  26705         if (layout.tag_size == 0) {
  26706             if (layout.abi_size <= src_ty.abiSize(zcu) and
  26707                 self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv;
  26708 
  26709             const dst_mcv = try self.allocRegOrMem(inst, true);
  26710             try self.genCopy(src_ty, dst_mcv, src_mcv, .{});
  26711             break :result dst_mcv;
  26712         }
  26713 
  26714         const dst_mcv = try self.allocRegOrMem(inst, false);
  26715 
  26716         const loaded_union = zcu.typeToUnion(union_ty).?;
  26717         const field_name = loaded_union.loadTagType(ip).names.get(ip)[extra.field_index];
  26718         const tag_ty: Type = .fromInterned(loaded_union.enum_tag_ty);
  26719         const field_index = tag_ty.enumFieldIndex(field_name, zcu).?;
  26720         const tag_val = try pt.enumValueFieldIndex(tag_ty, field_index);
  26721         const tag_int_val = try tag_val.intFromEnum(tag_ty, pt);
  26722         const tag_int = tag_int_val.toUnsignedInt(zcu);
  26723         const tag_off: i32 = @intCast(layout.tagOffset());
  26724         try self.genCopy(
  26725             tag_ty,
  26726             dst_mcv.address().offset(tag_off).deref(),
  26727             .{ .immediate = tag_int },
  26728             .{},
  26729         );
  26730 
  26731         const pl_off: i32 = @intCast(layout.payloadOffset());
  26732         try self.genCopy(src_ty, dst_mcv.address().offset(pl_off).deref(), src_mcv, .{});
  26733 
  26734         break :result dst_mcv;
  26735     };
  26736     return self.finishAir(inst, result, .{ extra.init, .none, .none });
  26737 }
  26738 
  26739 fn airPrefetch(self: *CodeGen, inst: Air.Inst.Index) !void {
  26740     const prefetch = self.air.instructions.items(.data)[@intFromEnum(inst)].prefetch;
  26741     return self.finishAir(inst, .unreach, .{ prefetch.ptr, .none, .none });
  26742 }
  26743 
  26744 fn airMulAdd(self: *CodeGen, inst: Air.Inst.Index) !void {
  26745     const pt = self.pt;
  26746     const zcu = pt.zcu;
  26747     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
  26748     const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
  26749     const ty = self.typeOfIndex(inst);
  26750 
  26751     const ops = [3]Air.Inst.Ref{ extra.lhs, extra.rhs, pl_op.operand };
  26752     const result = result: {
  26753         if (switch (ty.scalarType(zcu).floatBits(self.target.*)) {
  26754             16, 80, 128 => true,
  26755             32, 64 => !self.hasFeature(.fma),
  26756             else => unreachable,
  26757         }) {
  26758             if (ty.zigTypeTag(zcu) != .float) return self.fail("TODO implement airMulAdd for {}", .{
  26759                 ty.fmt(pt),
  26760             });
  26761 
  26762             var callee_buf: ["__fma?".len]u8 = undefined;
  26763             break :result try self.genCall(.{ .lib = .{
  26764                 .return_type = ty.toIntern(),
  26765                 .param_types = &.{ ty.toIntern(), ty.toIntern(), ty.toIntern() },
  26766                 .callee = std.fmt.bufPrint(&callee_buf, "{s}fma{s}", .{
  26767                     floatLibcAbiPrefix(ty),
  26768                     floatLibcAbiSuffix(ty),
  26769                 }) catch unreachable,
  26770             } }, &.{ ty, ty, ty }, &.{
  26771                 .{ .air_ref = extra.lhs }, .{ .air_ref = extra.rhs }, .{ .air_ref = pl_op.operand },
  26772             }, .{});
  26773         }
  26774 
  26775         var mcvs: [3]MCValue = undefined;
  26776         var locks: [3]?RegisterManager.RegisterLock = @splat(null);
  26777         defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
  26778         var order: [3]u2 = @splat(0);
  26779         var unused: std.StaticBitSet(3) = .initFull();
  26780         for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| {
  26781             const op_index: u2 = @intCast(op_i);
  26782             mcv.* = try self.resolveInst(op);
  26783             if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) {
  26784                 order[op_index] = 1;
  26785                 unused.unset(0);
  26786             } else if (unused.isSet(2) and mcv.isBase()) {
  26787                 order[op_index] = 3;
  26788                 unused.unset(2);
  26789             }
  26790             switch (mcv.*) {
  26791                 .register => |reg| lock.* = self.register_manager.lockReg(reg),
  26792                 else => {},
  26793             }
  26794         }
  26795         for (&order, &mcvs, &locks) |*mop_index, *mcv, *lock| {
  26796             if (mop_index.* != 0) continue;
  26797             mop_index.* = 1 + @as(u2, @intCast(unused.toggleFirstSet().?));
  26798             if (mop_index.* > 1 and mcv.isRegister()) continue;
  26799             const reg = try self.copyToTmpRegister(ty, mcv.*);
  26800             mcv.* = .{ .register = reg };
  26801             if (lock.*) |old_lock| self.register_manager.unlockReg(old_lock);
  26802             lock.* = self.register_manager.lockRegAssumeUnused(reg);
  26803         }
  26804 
  26805         const mir_tag = @as(?Mir.Inst.FixedTag, if (std.mem.eql(u2, &order, &.{ 1, 3, 2 }) or
  26806             std.mem.eql(u2, &order, &.{ 3, 1, 2 }))
  26807             switch (ty.zigTypeTag(zcu)) {
  26808                 .float => switch (ty.floatBits(self.target.*)) {
  26809                     32 => .{ .v_ss, .fmadd132 },
  26810                     64 => .{ .v_sd, .fmadd132 },
  26811                     16, 80, 128 => null,
  26812                     else => unreachable,
  26813                 },
  26814                 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
  26815                     .float => switch (ty.childType(zcu).floatBits(self.target.*)) {
  26816                         32 => switch (ty.vectorLen(zcu)) {
  26817                             1 => .{ .v_ss, .fmadd132 },
  26818                             2...8 => .{ .v_ps, .fmadd132 },
  26819                             else => null,
  26820                         },
  26821                         64 => switch (ty.vectorLen(zcu)) {
  26822                             1 => .{ .v_sd, .fmadd132 },
  26823                             2...4 => .{ .v_pd, .fmadd132 },
  26824                             else => null,
  26825                         },
  26826                         16, 80, 128 => null,
  26827                         else => unreachable,
  26828                     },
  26829                     else => unreachable,
  26830                 },
  26831                 else => unreachable,
  26832             }
  26833         else if (std.mem.eql(u2, &order, &.{ 2, 1, 3 }) or std.mem.eql(u2, &order, &.{ 1, 2, 3 }))
  26834             switch (ty.zigTypeTag(zcu)) {
  26835                 .float => switch (ty.floatBits(self.target.*)) {
  26836                     32 => .{ .v_ss, .fmadd213 },
  26837                     64 => .{ .v_sd, .fmadd213 },
  26838                     16, 80, 128 => null,
  26839                     else => unreachable,
  26840                 },
  26841                 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
  26842                     .float => switch (ty.childType(zcu).floatBits(self.target.*)) {
  26843                         32 => switch (ty.vectorLen(zcu)) {
  26844                             1 => .{ .v_ss, .fmadd213 },
  26845                             2...8 => .{ .v_ps, .fmadd213 },
  26846                             else => null,
  26847                         },
  26848                         64 => switch (ty.vectorLen(zcu)) {
  26849                             1 => .{ .v_sd, .fmadd213 },
  26850                             2...4 => .{ .v_pd, .fmadd213 },
  26851                             else => null,
  26852                         },
  26853                         16, 80, 128 => null,
  26854                         else => unreachable,
  26855                     },
  26856                     else => unreachable,
  26857                 },
  26858                 else => unreachable,
  26859             }
  26860         else if (std.mem.eql(u2, &order, &.{ 2, 3, 1 }) or std.mem.eql(u2, &order, &.{ 3, 2, 1 }))
  26861             switch (ty.zigTypeTag(zcu)) {
  26862                 .float => switch (ty.floatBits(self.target.*)) {
  26863                     32 => .{ .v_ss, .fmadd231 },
  26864                     64 => .{ .v_sd, .fmadd231 },
  26865                     16, 80, 128 => null,
  26866                     else => unreachable,
  26867                 },
  26868                 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
  26869                     .float => switch (ty.childType(zcu).floatBits(self.target.*)) {
  26870                         32 => switch (ty.vectorLen(zcu)) {
  26871                             1 => .{ .v_ss, .fmadd231 },
  26872                             2...8 => .{ .v_ps, .fmadd231 },
  26873                             else => null,
  26874                         },
  26875                         64 => switch (ty.vectorLen(zcu)) {
  26876                             1 => .{ .v_sd, .fmadd231 },
  26877                             2...4 => .{ .v_pd, .fmadd231 },
  26878                             else => null,
  26879                         },
  26880                         16, 80, 128 => null,
  26881                         else => unreachable,
  26882                     },
  26883                     else => unreachable,
  26884                 },
  26885                 else => unreachable,
  26886             }
  26887         else
  26888             unreachable) orelse return self.fail("TODO implement airMulAdd for {}", .{ty.fmt(pt)});
  26889 
  26890         var mops: [3]MCValue = undefined;
  26891         for (order, mcvs) |mop_index, mcv| mops[mop_index - 1] = mcv;
  26892 
  26893         const abi_size: u32 = @intCast(ty.abiSize(zcu));
  26894         const mop1_reg = registerAlias(mops[0].getReg().?, abi_size);
  26895         const mop2_reg = registerAlias(mops[1].getReg().?, abi_size);
  26896         if (mops[2].isRegister()) try self.asmRegisterRegisterRegister(
  26897             mir_tag,
  26898             mop1_reg,
  26899             mop2_reg,
  26900             registerAlias(mops[2].getReg().?, abi_size),
  26901         ) else try self.asmRegisterRegisterMemory(
  26902             mir_tag,
  26903             mop1_reg,
  26904             mop2_reg,
  26905             try mops[2].mem(self, .{ .size = .fromSize(abi_size) }),
  26906         );
  26907         break :result mops[0];
  26908     };
  26909     return self.finishAir(inst, result, ops);
  26910 }
  26911 
  26912 fn airVaStart(self: *CodeGen, inst: Air.Inst.Index) !void {
  26913     const pt = self.pt;
  26914     const zcu = pt.zcu;
  26915     const va_list_ty = self.air.instructions.items(.data)[@intFromEnum(inst)].ty;
  26916     const ptr_anyopaque_ty = try pt.singleMutPtrType(.anyopaque);
  26917 
  26918     const result: MCValue = switch (self.fn_type.fnCallingConvention(zcu)) {
  26919         .x86_64_sysv => result: {
  26920             const info = self.va_info.sysv;
  26921             const dst_fi = try self.allocFrameIndex(.initSpill(va_list_ty, zcu));
  26922             var field_off: u31 = 0;
  26923             // gp_offset: c_uint,
  26924             try self.genSetMem(
  26925                 .{ .frame = dst_fi },
  26926                 field_off,
  26927                 .c_uint,
  26928                 .{ .immediate = info.gp_count * 8 },
  26929                 .{},
  26930             );
  26931             field_off += @intCast(Type.c_uint.abiSize(zcu));
  26932             // fp_offset: c_uint,
  26933             try self.genSetMem(
  26934                 .{ .frame = dst_fi },
  26935                 field_off,
  26936                 .c_uint,
  26937                 .{ .immediate = abi.SysV.c_abi_int_param_regs.len * 8 + info.fp_count * 16 },
  26938                 .{},
  26939             );
  26940             field_off += @intCast(Type.c_uint.abiSize(zcu));
  26941             // overflow_arg_area: *anyopaque,
  26942             try self.genSetMem(
  26943                 .{ .frame = dst_fi },
  26944                 field_off,
  26945                 ptr_anyopaque_ty,
  26946                 .{ .lea_frame = info.overflow_arg_area },
  26947                 .{},
  26948             );
  26949             field_off += @intCast(ptr_anyopaque_ty.abiSize(zcu));
  26950             // reg_save_area: *anyopaque,
  26951             try self.genSetMem(
  26952                 .{ .frame = dst_fi },
  26953                 field_off,
  26954                 ptr_anyopaque_ty,
  26955                 .{ .lea_frame = info.reg_save_area },
  26956                 .{},
  26957             );
  26958             field_off += @intCast(ptr_anyopaque_ty.abiSize(zcu));
  26959             break :result .{ .load_frame = .{ .index = dst_fi } };
  26960         },
  26961         .x86_64_win => return self.fail("TODO implement c_va_start for Win64", .{}),
  26962         else => |cc| return self.fail("{s} does not support var args", .{@tagName(cc)}),
  26963     };
  26964     return self.finishAir(inst, result, .{ .none, .none, .none });
  26965 }
  26966 
  26967 fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
  26968     const pt = self.pt;
  26969     const zcu = pt.zcu;
  26970     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  26971     const ty = self.typeOfIndex(inst);
  26972     const promote_ty = self.promoteVarArg(ty);
  26973     const ptr_anyopaque_ty = try pt.singleMutPtrType(.anyopaque);
  26974     const unused = self.liveness.isUnused(inst);
  26975 
  26976     const result: MCValue = switch (self.fn_type.fnCallingConvention(zcu)) {
  26977         .x86_64_sysv => result: {
  26978             try self.spillEflagsIfOccupied();
  26979 
  26980             const tmp_regs =
  26981                 try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp);
  26982             const offset_reg = tmp_regs[0].to32();
  26983             const addr_reg = tmp_regs[1].to64();
  26984             const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs);
  26985             defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
  26986 
  26987             const promote_mcv = try self.allocTempRegOrMem(promote_ty, true);
  26988             const promote_lock = switch (promote_mcv) {
  26989                 .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
  26990                 else => null,
  26991             };
  26992             defer if (promote_lock) |lock| self.register_manager.unlockReg(lock);
  26993 
  26994             const ptr_arg_list_reg =
  26995                 try self.copyToTmpRegister(self.typeOf(ty_op.operand), .{ .air_ref = ty_op.operand });
  26996             const ptr_arg_list_lock = self.register_manager.lockRegAssumeUnused(ptr_arg_list_reg);
  26997             defer self.register_manager.unlockReg(ptr_arg_list_lock);
  26998 
  26999             const gp_offset: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 0 } };
  27000             const fp_offset: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 4 } };
  27001             const overflow_arg_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 8 } };
  27002             const reg_save_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 16 } };
  27003 
  27004             const classes = std.mem.sliceTo(&abi.classifySystemV(promote_ty, zcu, self.target.*, .arg), .none);
  27005             switch (classes[0]) {
  27006                 .integer => {
  27007                     assert(classes.len == 1);
  27008 
  27009                     try self.genSetReg(offset_reg, .c_uint, gp_offset, .{});
  27010                     try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, .u(
  27011                         abi.SysV.c_abi_int_param_regs.len * 8,
  27012                     ));
  27013                     const mem_reloc = try self.asmJccReloc(.ae, undefined);
  27014 
  27015                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area, .{});
  27016                     if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{
  27017                         .base = .{ .reg = addr_reg },
  27018                         .mod = .{ .rm = .{
  27019                             .size = .qword,
  27020                             .index = offset_reg.to64(),
  27021                         } },
  27022                     });
  27023                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{
  27024                         .base = .{ .reg = offset_reg.to64() },
  27025                         .mod = .{ .rm = .{
  27026                             .size = .qword,
  27027                             .disp = 8,
  27028                         } },
  27029                     });
  27030                     try self.genCopy(.c_uint, gp_offset, .{ .register = offset_reg }, .{});
  27031                     const done_reloc = try self.asmJmpReloc(undefined);
  27032 
  27033                     self.performReloc(mem_reloc);
  27034                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area, .{});
  27035                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{
  27036                         .base = .{ .reg = addr_reg },
  27037                         .mod = .{ .rm = .{
  27038                             .size = .qword,
  27039                             .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)),
  27040                         } },
  27041                     });
  27042                     try self.genCopy(
  27043                         ptr_anyopaque_ty,
  27044                         overflow_arg_area,
  27045                         .{ .register = offset_reg.to64() },
  27046                         .{},
  27047                     );
  27048 
  27049                     self.performReloc(done_reloc);
  27050                     if (!unused) try self.genCopy(promote_ty, promote_mcv, .{
  27051                         .indirect = .{ .reg = addr_reg },
  27052                     }, .{});
  27053                 },
  27054                 .sse => {
  27055                     assert(classes.len == 1);
  27056 
  27057                     try self.genSetReg(offset_reg, .c_uint, fp_offset, .{});
  27058                     try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, .u(
  27059                         abi.SysV.c_abi_int_param_regs.len * 8 + abi.SysV.c_abi_sse_param_regs.len * 16,
  27060                     ));
  27061                     const mem_reloc = try self.asmJccReloc(.ae, undefined);
  27062 
  27063                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area, .{});
  27064                     if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{
  27065                         .base = .{ .reg = addr_reg },
  27066                         .mod = .{ .rm = .{
  27067                             .size = .qword,
  27068                             .index = offset_reg.to64(),
  27069                         } },
  27070                     });
  27071                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{
  27072                         .base = .{ .reg = offset_reg.to64() },
  27073                         .mod = .{ .rm = .{
  27074                             .size = .qword,
  27075                             .disp = 16,
  27076                         } },
  27077                     });
  27078                     try self.genCopy(.c_uint, fp_offset, .{ .register = offset_reg }, .{});
  27079                     const done_reloc = try self.asmJmpReloc(undefined);
  27080 
  27081                     self.performReloc(mem_reloc);
  27082                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area, .{});
  27083                     try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{
  27084                         .base = .{ .reg = addr_reg },
  27085                         .mod = .{ .rm = .{
  27086                             .size = .qword,
  27087                             .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)),
  27088                         } },
  27089                     });
  27090                     try self.genCopy(
  27091                         ptr_anyopaque_ty,
  27092                         overflow_arg_area,
  27093                         .{ .register = offset_reg.to64() },
  27094                         .{},
  27095                     );
  27096 
  27097                     self.performReloc(done_reloc);
  27098                     if (!unused) try self.genCopy(promote_ty, promote_mcv, .{
  27099                         .indirect = .{ .reg = addr_reg },
  27100                     }, .{});
  27101                 },
  27102                 .memory => {
  27103                     assert(classes.len == 1);
  27104                     unreachable;
  27105                 },
  27106                 else => return self.fail("TODO implement c_va_arg for {} on SysV", .{promote_ty.fmt(pt)}),
  27107             }
  27108 
  27109             if (unused) break :result .unreach;
  27110             if (ty.toIntern() == promote_ty.toIntern()) break :result promote_mcv;
  27111 
  27112             if (!promote_ty.isRuntimeFloat()) {
  27113                 const dst_mcv = try self.allocRegOrMem(inst, true);
  27114                 try self.genCopy(ty, dst_mcv, promote_mcv, .{});
  27115                 break :result dst_mcv;
  27116             }
  27117 
  27118             assert(ty.toIntern() == .f32_type and promote_ty.toIntern() == .f64_type);
  27119             const dst_mcv = if (promote_mcv.isRegister())
  27120                 promote_mcv
  27121             else
  27122                 try self.copyToRegisterWithInstTracking(inst, ty, promote_mcv);
  27123             const dst_reg = dst_mcv.getReg().?.to128();
  27124             const dst_lock = self.register_manager.lockReg(dst_reg);
  27125             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
  27126 
  27127             if (self.hasFeature(.avx)) if (promote_mcv.isBase()) try self.asmRegisterRegisterMemory(
  27128                 .{ .v_ss, .cvtsd2 },
  27129                 dst_reg,
  27130                 dst_reg,
  27131                 try promote_mcv.mem(self, .{ .size = .qword }),
  27132             ) else try self.asmRegisterRegisterRegister(
  27133                 .{ .v_ss, .cvtsd2 },
  27134                 dst_reg,
  27135                 dst_reg,
  27136                 (if (promote_mcv.isRegister())
  27137                     promote_mcv.getReg().?
  27138                 else
  27139                     try self.copyToTmpRegister(promote_ty, promote_mcv)).to128(),
  27140             ) else if (promote_mcv.isBase()) try self.asmRegisterMemory(
  27141                 .{ ._ss, .cvtsd2 },
  27142                 dst_reg,
  27143                 try promote_mcv.mem(self, .{ .size = .qword }),
  27144             ) else try self.asmRegisterRegister(
  27145                 .{ ._ss, .cvtsd2 },
  27146                 dst_reg,
  27147                 (if (promote_mcv.isRegister())
  27148                     promote_mcv.getReg().?
  27149                 else
  27150                     try self.copyToTmpRegister(promote_ty, promote_mcv)).to128(),
  27151             );
  27152             break :result promote_mcv;
  27153         },
  27154         .x86_64_win => return self.fail("TODO implement c_va_arg for Win64", .{}),
  27155         else => |cc| return self.fail("{s} does not support var args", .{@tagName(cc)}),
  27156     };
  27157     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
  27158 }
  27159 
  27160 fn airVaCopy(self: *CodeGen, inst: Air.Inst.Index) !void {
  27161     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
  27162     const ptr_va_list_ty = self.typeOf(ty_op.operand);
  27163 
  27164     const dst_mcv = try self.allocRegOrMem(inst, true);
  27165     try self.load(dst_mcv, ptr_va_list_ty, .{ .air_ref = ty_op.operand });
  27166     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
  27167 }
  27168 
  27169 fn airVaEnd(self: *CodeGen, inst: Air.Inst.Index) !void {
  27170     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
  27171     return self.finishAir(inst, .unreach, .{ un_op, .none, .none });
  27172 }
  27173 
  27174 fn resolveInst(self: *CodeGen, ref: Air.Inst.Ref) InnerError!MCValue {
  27175     const zcu = self.pt.zcu;
  27176     const ty = self.typeOf(ref);
  27177 
  27178     // If the type has no codegen bits, no need to store it.
  27179     if (!ty.hasRuntimeBitsIgnoreComptime(zcu)) return .none;
  27180 
  27181     const mcv = if (ref.toIndex()) |inst| mcv: {
  27182         break :mcv self.inst_tracking.getPtr(inst).?.short;
  27183     } else mcv: {
  27184         const ip_index = ref.toInterned().?;
  27185         const gop = try self.const_tracking.getOrPut(self.gpa, ip_index);
  27186         if (!gop.found_existing) gop.value_ptr.* = .init(init: {
  27187             const const_mcv = try self.genTypedValue(.fromInterned(ip_index));
  27188             switch (const_mcv) {
  27189                 .lea_tlv => |tlv_sym| switch (self.bin_file.tag) {
  27190                     .elf, .macho => {
  27191                         if (self.mod.pic) {
  27192                             try self.spillRegisters(&.{ .rdi, .rax });
  27193                         } else {
  27194                             try self.spillRegisters(&.{.rax});
  27195                         }
  27196                         const frame_index = try self.allocFrameIndex(.init(.{
  27197                             .size = 8,
  27198                             .alignment = .@"8",
  27199                         }));
  27200                         try self.genSetMem(
  27201                             .{ .frame = frame_index },
  27202                             0,
  27203                             .usize,
  27204                             .{ .lea_symbol = .{ .sym_index = tlv_sym } },
  27205                             .{},
  27206                         );
  27207                         break :init .{ .load_frame = .{ .index = frame_index } };
  27208                     },
  27209                     else => break :init const_mcv,
  27210                 },
  27211                 else => break :init const_mcv,
  27212             }
  27213         });
  27214         break :mcv gop.value_ptr.short;
  27215     };
  27216 
  27217     switch (mcv) {
  27218         .none, .unreach, .dead => unreachable,
  27219         else => return mcv,
  27220     }
  27221 }
  27222 
  27223 fn getResolvedInstValue(self: *CodeGen, inst: Air.Inst.Index) *InstTracking {
  27224     const tracking = self.inst_tracking.getPtr(inst).?;
  27225     return switch (tracking.short) {
  27226         .none, .unreach, .dead => unreachable,
  27227         else => tracking,
  27228     };
  27229 }
  27230 
  27231 /// If the MCValue is an immediate, and it does not fit within this type,
  27232 /// we put it in a register.
  27233 /// A potential opportunity for future optimization here would be keeping track
  27234 /// of the fact that the instruction is available both as an immediate
  27235 /// and as a register.
  27236 fn limitImmediateType(self: *CodeGen, operand: Air.Inst.Ref, comptime T: type) !MCValue {
  27237     const mcv = try self.resolveInst(operand);
  27238     const ti = @typeInfo(T).int;
  27239     switch (mcv) {
  27240         .immediate => |imm| {
  27241             // This immediate is unsigned.
  27242             const U = std.meta.Int(.unsigned, ti.bits - @intFromBool(ti.signedness == .signed));
  27243             if (imm >= std.math.maxInt(U)) {
  27244                 return MCValue{ .register = try self.copyToTmpRegister(.usize, mcv) };
  27245             }
  27246         },
  27247         else => {},
  27248     }
  27249     return mcv;
  27250 }
  27251 
  27252 fn genTypedValue(self: *CodeGen, val: Value) InnerError!MCValue {
  27253     const pt = self.pt;
  27254     return switch (try codegen.genTypedValue(self.bin_file, pt, self.src_loc, val, self.target.*)) {
  27255         .mcv => |mcv| switch (mcv) {
  27256             .none => .none,
  27257             .undef => .undef,
  27258             .immediate => |imm| .{ .immediate = imm },
  27259             .memory => |addr| .{ .memory = addr },
  27260             .load_symbol => |sym_index| .{ .load_symbol = .{ .sym_index = sym_index } },
  27261             .lea_symbol => |sym_index| .{ .lea_symbol = .{ .sym_index = sym_index } },
  27262             .load_direct => |sym_index| .{ .load_direct = sym_index },
  27263             .lea_direct => |sym_index| .{ .lea_direct = sym_index },
  27264             .load_got => |sym_index| .{ .lea_got = sym_index },
  27265             .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
  27266         },
  27267         .fail => |msg| return self.failMsg(msg),
  27268     };
  27269 }
  27270 
  27271 const CallMCValues = struct {
  27272     args: []MCValue,
  27273     return_value: InstTracking,
  27274     stack_byte_count: u31,
  27275     stack_align: InternPool.Alignment,
  27276     gp_count: u32,
  27277     fp_count: u32,
  27278 
  27279     fn deinit(self: *CallMCValues, func: *CodeGen) void {
  27280         func.gpa.free(self.args);
  27281         self.* = undefined;
  27282     }
  27283 };
  27284 
  27285 /// Caller must call `CallMCValues.deinit`.
  27286 fn resolveCallingConventionValues(
  27287     self: *CodeGen,
  27288     fn_info: InternPool.Key.FuncType,
  27289     var_args: []const Type,
  27290     stack_frame_base: FrameIndex,
  27291 ) !CallMCValues {
  27292     const pt = self.pt;
  27293     const zcu = pt.zcu;
  27294     const ip = &zcu.intern_pool;
  27295     const cc = fn_info.cc;
  27296     const param_types = try self.gpa.alloc(Type, fn_info.param_types.len + var_args.len);
  27297     defer self.gpa.free(param_types);
  27298 
  27299     for (param_types[0..fn_info.param_types.len], fn_info.param_types.get(ip)) |*dest, src|
  27300         dest.* = .fromInterned(src);
  27301     for (param_types[fn_info.param_types.len..], var_args) |*param_ty, arg_ty|
  27302         param_ty.* = self.promoteVarArg(arg_ty);
  27303 
  27304     var result: CallMCValues = .{
  27305         .args = try self.gpa.alloc(MCValue, param_types.len),
  27306         // These undefined values must be populated before returning from this function.
  27307         .return_value = undefined,
  27308         .stack_byte_count = 0,
  27309         .stack_align = undefined,
  27310         .gp_count = 0,
  27311         .fp_count = 0,
  27312     };
  27313     errdefer self.gpa.free(result.args);
  27314 
  27315     const ret_ty: Type = .fromInterned(fn_info.return_type);
  27316     switch (cc) {
  27317         .naked => {
  27318             assert(result.args.len == 0);
  27319             result.return_value = .init(.unreach);
  27320             result.stack_align = switch (self.target.cpu.arch) {
  27321                 else => unreachable,
  27322                 .x86 => .@"4",
  27323                 .x86_64 => .@"8",
  27324             };
  27325         },
  27326         .x86_64_sysv, .x86_64_win => |cc_opts| {
  27327             var ret_int_reg_i: u32 = 0;
  27328             var ret_sse_reg_i: u32 = 0;
  27329             var param_int_reg_i: u32 = 0;
  27330             var param_sse_reg_i: u32 = 0;
  27331             result.stack_align = .fromByteUnits(cc_opts.incoming_stack_alignment orelse 16);
  27332 
  27333             switch (cc) {
  27334                 .x86_64_sysv => {},
  27335                 .x86_64_win => result.stack_byte_count += @intCast(4 * 8),
  27336                 else => unreachable,
  27337             }
  27338 
  27339             // Return values
  27340             if (ret_ty.isNoReturn(zcu)) {
  27341                 result.return_value = .init(.unreach);
  27342             } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
  27343                 // TODO: is this even possible for C calling convention?
  27344                 result.return_value = .init(.none);
  27345             } else {
  27346                 var ret_tracking: [4]InstTracking = undefined;
  27347                 var ret_tracking_i: usize = 0;
  27348 
  27349                 const classes = switch (cc) {
  27350                     .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, self.target.*, .ret), .none),
  27351                     .x86_64_win => &.{abi.classifyWindows(ret_ty, zcu)},
  27352                     else => unreachable,
  27353                 };
  27354                 for (classes) |class| switch (class) {
  27355                     .integer => {
  27356                         const ret_int_reg = registerAlias(
  27357                             abi.getCAbiIntReturnRegs(cc)[ret_int_reg_i],
  27358                             @intCast(@min(ret_ty.abiSize(zcu), 8)),
  27359                         );
  27360                         ret_int_reg_i += 1;
  27361 
  27362                         ret_tracking[ret_tracking_i] = .init(.{ .register = ret_int_reg });
  27363                         ret_tracking_i += 1;
  27364                     },
  27365                     .sse, .float, .float_combine, .win_i128 => {
  27366                         const ret_sse_regs = abi.getCAbiSseReturnRegs(cc);
  27367                         const abi_size: u32 = @intCast(ret_ty.abiSize(zcu));
  27368                         const reg_size = @min(abi_size, self.vectorSize(.float));
  27369                         var byte_offset: u32 = 0;
  27370                         while (byte_offset < abi_size) : (byte_offset += reg_size) {
  27371                             const ret_sse_reg = registerAlias(ret_sse_regs[ret_sse_reg_i], reg_size);
  27372                             ret_sse_reg_i += 1;
  27373 
  27374                             ret_tracking[ret_tracking_i] = .init(.{ .register = ret_sse_reg });
  27375                             ret_tracking_i += 1;
  27376                         }
  27377                     },
  27378                     .sseup => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .sse),
  27379                     .x87 => {
  27380                         ret_tracking[ret_tracking_i] = .init(.{ .register = abi.getCAbiX87ReturnRegs(cc)[0] });
  27381                         ret_tracking_i += 1;
  27382                     },
  27383                     .x87up => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .x87),
  27384                     .complex_x87 => {
  27385                         ret_tracking[ret_tracking_i] = .init(.{ .register_pair = abi.getCAbiX87ReturnRegs(cc)[0..2].* });
  27386                         ret_tracking_i += 1;
  27387                     },
  27388                     .memory => {
  27389                         const ret_int_reg = abi.getCAbiIntReturnRegs(cc)[ret_int_reg_i].to64();
  27390                         ret_int_reg_i += 1;
  27391                         const ret_indirect_reg = abi.getCAbiIntParamRegs(cc)[param_int_reg_i];
  27392                         param_int_reg_i += 1;
  27393 
  27394                         ret_tracking[ret_tracking_i] = .{
  27395                             .short = .{ .indirect = .{ .reg = ret_int_reg } },
  27396                             .long = .{ .indirect = .{ .reg = ret_indirect_reg } },
  27397                         };
  27398                         ret_tracking_i += 1;
  27399                     },
  27400                     .none, .integer_per_element => unreachable,
  27401                 };
  27402                 result.return_value = switch (ret_tracking_i) {
  27403                     else => unreachable,
  27404                     1 => ret_tracking[0],
  27405                     2 => .init(.{ .register_pair = .{
  27406                         ret_tracking[0].short.register,
  27407                         ret_tracking[1].short.register,
  27408                     } }),
  27409                     3 => .init(.{ .register_triple = .{
  27410                         ret_tracking[0].short.register,
  27411                         ret_tracking[1].short.register,
  27412                         ret_tracking[2].short.register,
  27413                     } }),
  27414                     4 => .init(.{ .register_quadruple = .{
  27415                         ret_tracking[0].short.register,
  27416                         ret_tracking[1].short.register,
  27417                         ret_tracking[2].short.register,
  27418                         ret_tracking[3].short.register,
  27419                     } }),
  27420                 };
  27421             }
  27422 
  27423             // Input params
  27424             for (param_types, result.args) |ty, *arg| {
  27425                 assert(ty.hasRuntimeBitsIgnoreComptime(zcu));
  27426                 switch (cc) {
  27427                     .x86_64_sysv => {},
  27428                     .x86_64_win => {
  27429                         param_int_reg_i = @max(param_int_reg_i, param_sse_reg_i);
  27430                         param_sse_reg_i = param_int_reg_i;
  27431                     },
  27432                     else => unreachable,
  27433                 }
  27434 
  27435                 var arg_mcv: [4]MCValue = undefined;
  27436                 var arg_mcv_i: usize = 0;
  27437 
  27438                 const classes = switch (cc) {
  27439                     .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .arg), .none),
  27440                     .x86_64_win => &.{abi.classifyWindows(ty, zcu)},
  27441                     else => unreachable,
  27442                 };
  27443                 classes: for (classes) |class| switch (class) {
  27444                     .integer => {
  27445                         const param_int_regs = abi.getCAbiIntParamRegs(cc);
  27446                         if (param_int_reg_i >= param_int_regs.len) break;
  27447 
  27448                         const param_int_reg =
  27449                             registerAlias(param_int_regs[param_int_reg_i], @intCast(@min(ty.abiSize(zcu), 8)));
  27450                         param_int_reg_i += 1;
  27451 
  27452                         arg_mcv[arg_mcv_i] = .{ .register = param_int_reg };
  27453                         arg_mcv_i += 1;
  27454                     },
  27455                     .sse, .float, .float_combine => {
  27456                         const param_sse_regs = abi.getCAbiSseParamRegs(cc);
  27457                         const abi_size: u32 = @intCast(ty.abiSize(zcu));
  27458                         const reg_size = @min(abi_size, self.vectorSize(.float));
  27459                         var byte_offset: u32 = 0;
  27460                         while (byte_offset < abi_size) : (byte_offset += reg_size) {
  27461                             if (param_sse_reg_i >= param_sse_regs.len) break :classes;
  27462 
  27463                             const param_sse_reg = registerAlias(param_sse_regs[param_sse_reg_i], reg_size);
  27464                             param_sse_reg_i += 1;
  27465 
  27466                             arg_mcv[arg_mcv_i] = .{ .register = param_sse_reg };
  27467                             arg_mcv_i += 1;
  27468                         }
  27469                     },
  27470                     .sseup => assert(arg_mcv[arg_mcv_i - 1].register.class() == .sse),
  27471                     .x87, .x87up, .complex_x87, .memory, .win_i128 => switch (cc) {
  27472                         .x86_64_sysv => switch (class) {
  27473                             .x87, .x87up, .complex_x87, .memory => break,
  27474                             else => unreachable,
  27475                         },
  27476                         .x86_64_win => if (ty.abiSize(zcu) > 8) {
  27477                             const param_int_reg = abi.getCAbiIntParamRegs(cc)[param_int_reg_i].to64();
  27478                             param_int_reg_i += 1;
  27479 
  27480                             arg_mcv[arg_mcv_i] = .{ .indirect = .{ .reg = param_int_reg } };
  27481                             arg_mcv_i += 1;
  27482                         } else break,
  27483                         else => unreachable,
  27484                     },
  27485                     .none => unreachable,
  27486                     .integer_per_element => {
  27487                         const param_int_regs_len: u32 =
  27488                             @intCast(abi.getCAbiIntParamRegs(cc).len);
  27489                         const remaining_param_int_regs: u3 =
  27490                             @intCast(param_int_regs_len - param_int_reg_i);
  27491                         param_int_reg_i = param_int_regs_len;
  27492 
  27493                         const frame_elem_align = 8;
  27494                         const frame_elems_len = ty.vectorLen(zcu) - remaining_param_int_regs;
  27495                         const frame_elem_size = std.mem.alignForward(
  27496                             u64,
  27497                             ty.childType(zcu).abiSize(zcu),
  27498                             frame_elem_align,
  27499                         );
  27500                         const frame_size: u31 = @intCast(frame_elems_len * frame_elem_size);
  27501 
  27502                         result.stack_byte_count =
  27503                             std.mem.alignForward(u31, result.stack_byte_count, frame_elem_align);
  27504                         arg_mcv[arg_mcv_i] = .{ .elementwise_regs_then_frame = .{
  27505                             .regs = remaining_param_int_regs,
  27506                             .frame_off = @intCast(result.stack_byte_count),
  27507                             .frame_index = stack_frame_base,
  27508                         } };
  27509                         arg_mcv_i += 1;
  27510                         result.stack_byte_count += frame_size;
  27511                     },
  27512                 } else {
  27513                     arg.* = switch (arg_mcv_i) {
  27514                         else => unreachable,
  27515                         1 => arg_mcv[0],
  27516                         2 => .{ .register_pair = .{
  27517                             arg_mcv[0].register,
  27518                             arg_mcv[1].register,
  27519                         } },
  27520                         3 => .{ .register_triple = .{
  27521                             arg_mcv[0].register,
  27522                             arg_mcv[1].register,
  27523                             arg_mcv[2].register,
  27524                         } },
  27525                         4 => .{ .register_quadruple = .{
  27526                             arg_mcv[0].register,
  27527                             arg_mcv[1].register,
  27528                             arg_mcv[2].register,
  27529                             arg_mcv[3].register,
  27530                         } },
  27531                     };
  27532                     continue;
  27533                 }
  27534 
  27535                 const param_align = ty.abiAlignment(zcu).max(.@"8");
  27536                 result.stack_byte_count = @intCast(param_align.forward(result.stack_byte_count));
  27537                 result.stack_align = result.stack_align.max(param_align);
  27538                 arg.* = .{ .load_frame = .{
  27539                     .index = stack_frame_base,
  27540                     .off = result.stack_byte_count,
  27541                 } };
  27542                 result.stack_byte_count += @intCast(ty.abiSize(zcu));
  27543             }
  27544             assert(param_int_reg_i <= 6);
  27545             result.gp_count = param_int_reg_i;
  27546             assert(param_sse_reg_i <= 16);
  27547             result.fp_count = param_sse_reg_i;
  27548         },
  27549         .auto => {
  27550             result.stack_align = abi.zigcc.stack_align orelse .fromByteUnits(self.vectorSize(.float));
  27551 
  27552             var param_gpr = abi.getCAbiIntParamRegs(cc);
  27553             var param_x87 = abi.getCAbiX87ParamRegs(cc);
  27554             var param_sse = abi.getCAbiSseParamRegs(cc);
  27555 
  27556             // Return values
  27557             result.return_value = if (ret_ty.isNoReturn(zcu))
  27558                 .init(.unreach)
  27559             else if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu))
  27560                 .init(.none)
  27561             else return_value: {
  27562                 const ret_gpr = abi.getCAbiIntReturnRegs(cc);
  27563                 const ret_size: u31 = @intCast(ret_ty.abiSize(zcu));
  27564                 if (abi.zigcc.return_in_regs) switch (self.regClassForType(ret_ty)) {
  27565                     .general_purpose => if (ret_size <= @as(u4, switch (self.target.cpu.arch) {
  27566                         else => unreachable,
  27567                         .x86 => 4,
  27568                         .x86_64 => 8,
  27569                     }))
  27570                         break :return_value .init(.{ .register = registerAlias(ret_gpr[0], ret_size) })
  27571                     else if (ret_gpr.len >= 2 and ret_ty.isSliceAtRuntime(zcu))
  27572                         break :return_value .init(.{ .register_pair = ret_gpr[0..2].* }),
  27573                     .segment, .mmx, .ip => unreachable,
  27574                     .x87 => break :return_value .init(.{ .register = .st0 }),
  27575                     .sse => if (ret_size <= self.vectorSize(.float)) break :return_value .init(.{
  27576                         .register = registerAlias(abi.getCAbiSseReturnRegs(cc)[0], @max(ret_size, 16)),
  27577                     }),
  27578                 };
  27579                 const ret_indirect_reg = param_gpr[0];
  27580                 param_gpr = param_gpr[1..];
  27581                 break :return_value .{
  27582                     .short = .{ .indirect = .{ .reg = ret_gpr[0] } },
  27583                     .long = .{ .indirect = .{ .reg = ret_indirect_reg } },
  27584                 };
  27585             };
  27586 
  27587             // Input params
  27588             for (param_types, result.args) |param_ty, *arg| {
  27589                 if (!param_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
  27590                     arg.* = .none;
  27591                     continue;
  27592                 }
  27593                 const param_size: u31 = @intCast(param_ty.abiSize(zcu));
  27594                 if (abi.zigcc.params_in_regs) switch (self.regClassForType(param_ty)) {
  27595                     .general_purpose => if (param_gpr.len >= 1 and param_size <= @as(u4, switch (self.target.cpu.arch) {
  27596                         else => unreachable,
  27597                         .x86 => 4,
  27598                         .x86_64 => 8,
  27599                     })) {
  27600                         arg.* = .{ .register = registerAlias(param_gpr[0], param_size) };
  27601                         param_gpr = param_gpr[1..];
  27602                         continue;
  27603                     } else if (param_gpr.len >= 2 and param_ty.isSliceAtRuntime(zcu)) {
  27604                         arg.* = .{ .register_pair = param_gpr[0..2].* };
  27605                         param_gpr = param_gpr[2..];
  27606                         continue;
  27607                     },
  27608                     .segment, .mmx, .ip => unreachable,
  27609                     .x87 => if (param_x87.len >= 1) {
  27610                         arg.* = .{ .register = param_x87[0] };
  27611                         param_x87 = param_x87[1..];
  27612                         continue;
  27613                     },
  27614                     .sse => if (param_sse.len >= 1 and param_size <= self.vectorSize(.float)) {
  27615                         arg.* = .{
  27616                             .register = registerAlias(param_sse[0], @max(param_size, 16)),
  27617                         };
  27618                         param_sse = param_sse[1..];
  27619                         continue;
  27620                     },
  27621                 };
  27622                 const param_align = param_ty.abiAlignment(zcu);
  27623                 result.stack_byte_count = @intCast(param_align.forward(result.stack_byte_count));
  27624                 result.stack_align = result.stack_align.max(param_align);
  27625                 arg.* = .{ .load_frame = .{
  27626                     .index = stack_frame_base,
  27627                     .off = result.stack_byte_count,
  27628                 } };
  27629                 result.stack_byte_count += param_size;
  27630             }
  27631         },
  27632         else => return self.fail("TODO implement function parameters and return values for {} on x86_64", .{cc}),
  27633     }
  27634 
  27635     result.stack_byte_count = @intCast(result.stack_align.forward(result.stack_byte_count));
  27636     return result;
  27637 }
  27638 
  27639 fn fail(self: *CodeGen, comptime format: []const u8, args: anytype) error{ OutOfMemory, CodegenFail } {
  27640     @branchHint(.cold);
  27641     const zcu = self.pt.zcu;
  27642     switch (self.owner) {
  27643         .nav_index => |i| return zcu.codegenFail(i, format, args),
  27644         .lazy_sym => |s| return zcu.codegenFailType(s.ty, format, args),
  27645     }
  27646     return error.CodegenFail;
  27647 }
  27648 
  27649 fn failMsg(self: *CodeGen, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail } {
  27650     @branchHint(.cold);
  27651     const zcu = self.pt.zcu;
  27652     switch (self.owner) {
  27653         .nav_index => |i| return zcu.codegenFailMsg(i, msg),
  27654         .lazy_sym => |s| return zcu.codegenFailTypeMsg(s.ty, msg),
  27655     }
  27656     return error.CodegenFail;
  27657 }
  27658 
  27659 fn parseRegName(name: []const u8) ?Register {
  27660     if (@hasDecl(Register, "parseRegName")) {
  27661         return Register.parseRegName(name);
  27662     }
  27663     return std.meta.stringToEnum(Register, name);
  27664 }
  27665 
  27666 /// Returns register wide enough to hold at least `size_bytes`.
  27667 fn registerAlias(reg: Register, size_bytes: u32) Register {
  27668     return switch (reg.class()) {
  27669         .general_purpose => if (size_bytes == 0)
  27670             unreachable // should be comptime-known
  27671         else if (size_bytes <= 1)
  27672             reg.to8()
  27673         else if (size_bytes <= 2)
  27674             reg.to16()
  27675         else if (size_bytes <= 4)
  27676             reg.to32()
  27677         else if (size_bytes <= 8)
  27678             reg.to64()
  27679         else
  27680             unreachable,
  27681         .segment => if (size_bytes <= 2)
  27682             reg
  27683         else
  27684             unreachable,
  27685         .x87 => if (size_bytes == 16)
  27686             reg
  27687         else
  27688             unreachable,
  27689         .mmx => if (size_bytes <= 8)
  27690             reg
  27691         else
  27692             unreachable,
  27693         .sse => if (size_bytes <= 16)
  27694             reg.to128()
  27695         else if (size_bytes <= 32)
  27696             reg.to256()
  27697         else
  27698             unreachable,
  27699         .ip => if (size_bytes <= 2)
  27700             .ip
  27701         else if (size_bytes <= 4)
  27702             .eip
  27703         else if (size_bytes <= 8)
  27704             .rip
  27705         else
  27706             unreachable,
  27707     };
  27708 }
  27709 
  27710 fn memSize(self: *CodeGen, ty: Type) Memory.Size {
  27711     const zcu = self.pt.zcu;
  27712     return switch (ty.zigTypeTag(zcu)) {
  27713         .float => .fromBitSize(ty.floatBits(self.target.*)),
  27714         else => .fromSize(@intCast(ty.abiSize(zcu))),
  27715     };
  27716 }
  27717 
  27718 fn splitType(self: *CodeGen, comptime parts_len: usize, ty: Type) ![parts_len]Type {
  27719     const pt = self.pt;
  27720     const zcu = pt.zcu;
  27721     var parts: [parts_len]Type = undefined;
  27722     if (ty.isVector(zcu)) if (std.math.divExact(u32, ty.vectorLen(zcu), parts_len)) |vec_len| return .{
  27723         try pt.vectorType(.{ .len = vec_len, .child = ty.scalarType(zcu).toIntern() }),
  27724     } ** parts_len else |err| switch (err) {
  27725         error.DivisionByZero => unreachable,
  27726         error.UnexpectedRemainder => {},
  27727     };
  27728     const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none);
  27729     if (classes.len == parts_len) for (&parts, classes, 0..) |*part, class, part_i| {
  27730         part.* = switch (class) {
  27731             .integer => if (part_i < parts_len - 1)
  27732                 .u64
  27733             else part: {
  27734                 const elem_size = ty.abiAlignment(zcu).minStrict(.@"8").toByteUnits().?;
  27735                 const elem_ty = try pt.intType(.unsigned, @intCast(elem_size * 8));
  27736                 break :part switch (@divExact(ty.abiSize(zcu) - part_i * 8, elem_size)) {
  27737                     1 => elem_ty,
  27738                     else => |array_len| try pt.arrayType(.{ .len = array_len, .child = elem_ty.toIntern() }),
  27739                 };
  27740             },
  27741             .float => .f32,
  27742             .float_combine => try pt.arrayType(.{ .len = 2, .child = .f32_type }),
  27743             .sse => .f64,
  27744             else => break,
  27745         };
  27746     } else {
  27747         var part_sizes: u64 = 0;
  27748         for (parts) |part| part_sizes += part.abiSize(zcu);
  27749         if (part_sizes == ty.abiSize(zcu)) return parts;
  27750     };
  27751     return self.fail("TODO implement splitType({d}, {})", .{ parts_len, ty.fmt(pt) });
  27752 }
  27753 
  27754 /// Truncates the value in the register in place.
  27755 /// Clobbers any remaining bits.
  27756 fn truncateRegister(self: *CodeGen, ty: Type, reg: Register) !void {
  27757     const pt = self.pt;
  27758     const zcu = pt.zcu;
  27759     const int_info: InternPool.Key.IntType = if (ty.isAbiInt(zcu)) ty.intInfo(zcu) else .{
  27760         .signedness = .unsigned,
  27761         .bits = @intCast(ty.bitSize(zcu)),
  27762     };
  27763     const shift = std.math.cast(u6, 64 - int_info.bits % 64) orelse return;
  27764     try self.spillEflagsIfOccupied();
  27765     switch (int_info.signedness) {
  27766         .signed => {
  27767             try self.genShiftBinOpMir(.{ ._l, .sa }, .isize, .{ .register = reg }, .u8, .{ .immediate = shift });
  27768             try self.genShiftBinOpMir(.{ ._r, .sa }, .isize, .{ .register = reg }, .u8, .{ .immediate = shift });
  27769         },
  27770         .unsigned => {
  27771             const mask = ~@as(u64, 0) >> shift;
  27772             if (int_info.bits <= 32) {
  27773                 try self.genBinOpMir(.{ ._, .@"and" }, .u32, .{ .register = reg }, .{ .immediate = mask });
  27774             } else {
  27775                 const tmp_reg = try self.copyToTmpRegister(.usize, .{ .immediate = mask });
  27776                 try self.genBinOpMir(.{ ._, .@"and" }, .usize, .{ .register = reg }, .{ .register = tmp_reg });
  27777             }
  27778         },
  27779     }
  27780 }
  27781 
  27782 fn regBitSize(self: *CodeGen, ty: Type) u64 {
  27783     const zcu = self.pt.zcu;
  27784     const abi_size = ty.abiSize(zcu);
  27785     return switch (ty.zigTypeTag(zcu)) {
  27786         else => switch (abi_size) {
  27787             1 => 8,
  27788             2 => 16,
  27789             3...4 => 32,
  27790             5...8 => 64,
  27791             else => unreachable,
  27792         },
  27793         .float => switch (abi_size) {
  27794             1...16 => 128,
  27795             17...32 => 256,
  27796             else => unreachable,
  27797         },
  27798     };
  27799 }
  27800 
  27801 fn regExtraBits(self: *CodeGen, ty: Type) u64 {
  27802     return self.regBitSize(ty) - ty.bitSize(self.pt.zcu);
  27803 }
  27804 
  27805 fn hasFeature(cg: *CodeGen, feature: std.Target.x86.Feature) bool {
  27806     return switch (feature) {
  27807         .@"64bit" => switch (cg.target.cpu.arch) {
  27808             else => unreachable,
  27809             .x86 => false,
  27810             .x86_64 => true,
  27811         },
  27812         .false_deps_getmant,
  27813         .false_deps_lzcnt_tzcnt,
  27814         .false_deps_mulc,
  27815         .false_deps_mullq,
  27816         .false_deps_perm,
  27817         .false_deps_popcnt,
  27818         .false_deps_range,
  27819         .slow_3ops_lea,
  27820         .slow_incdec,
  27821         .slow_lea,
  27822         .slow_pmaddwd,
  27823         .slow_pmulld,
  27824         .slow_shld,
  27825         .slow_two_mem_ops,
  27826         .slow_unaligned_mem_16,
  27827         .slow_unaligned_mem_32,
  27828         => switch (cg.mod.optimize_mode) {
  27829             .Debug, .ReleaseSafe, .ReleaseFast => null,
  27830             .ReleaseSmall => false,
  27831         },
  27832         .fast_11bytenop,
  27833         .fast_15bytenop,
  27834         .fast_7bytenop,
  27835         .fast_bextr,
  27836         .fast_dpwssd,
  27837         .fast_gather,
  27838         .fast_hops,
  27839         .fast_imm16,
  27840         .fast_lzcnt,
  27841         .fast_movbe,
  27842         .fast_scalar_fsqrt,
  27843         .fast_scalar_shift_masks,
  27844         .fast_shld_rotate,
  27845         .fast_variable_crosslane_shuffle,
  27846         .fast_variable_perlane_shuffle,
  27847         .fast_vector_fsqrt,
  27848         .fast_vector_shift_masks,
  27849         => switch (cg.mod.optimize_mode) {
  27850             .Debug, .ReleaseSafe, .ReleaseFast => null,
  27851             .ReleaseSmall => true,
  27852         },
  27853         .mmx => false,
  27854         else => null,
  27855     } orelse std.Target.x86.featureSetHas(cg.target.cpu.features, feature);
  27856 }
  27857 
  27858 fn typeOf(self: *CodeGen, inst: Air.Inst.Ref) Type {
  27859     const pt = self.pt;
  27860     const zcu = pt.zcu;
  27861     return self.air.typeOf(inst, &zcu.intern_pool);
  27862 }
  27863 
  27864 fn typeOfIndex(self: *CodeGen, inst: Air.Inst.Index) Type {
  27865     const pt = self.pt;
  27866     const zcu = pt.zcu;
  27867     const temp: Temp = .{ .index = inst };
  27868     return switch (temp.unwrap(self)) {
  27869         .ref => switch (self.air.instructions.items(.tag)[@intFromEnum(inst)]) {
  27870             .loop_switch_br => self.typeOf(self.air.unwrapSwitch(inst).operand),
  27871             else => self.air.typeOfIndex(inst, &zcu.intern_pool),
  27872         },
  27873         .temp => temp.typeOf(self),
  27874     };
  27875 }
  27876 
  27877 fn intCompilerRtAbiName(int_bits: u32) u8 {
  27878     return switch (int_bits) {
  27879         1...32 => 's',
  27880         33...64 => 'd',
  27881         65...128 => 't',
  27882         else => unreachable,
  27883     };
  27884 }
  27885 
  27886 fn floatCompilerRtAbiName(float_bits: u32) u8 {
  27887     return switch (float_bits) {
  27888         16 => 'h',
  27889         32 => 's',
  27890         64 => 'd',
  27891         80 => 'x',
  27892         128 => 't',
  27893         else => unreachable,
  27894     };
  27895 }
  27896 
  27897 fn floatCompilerRtAbiType(self: *CodeGen, ty: Type, other_ty: Type) Type {
  27898     if (ty.toIntern() == .f16_type and
  27899         (other_ty.toIntern() == .f32_type or other_ty.toIntern() == .f64_type) and
  27900         self.target.isDarwin()) return .u16;
  27901     return ty;
  27902 }
  27903 
  27904 fn floatLibcAbiPrefix(ty: Type) []const u8 {
  27905     return switch (ty.toIntern()) {
  27906         .f16_type, .f80_type => "__",
  27907         .f32_type, .f64_type, .f128_type, .c_longdouble_type => "",
  27908         else => unreachable,
  27909     };
  27910 }
  27911 
  27912 fn floatLibcAbiSuffix(ty: Type) []const u8 {
  27913     return switch (ty.toIntern()) {
  27914         .f16_type => "h",
  27915         .f32_type => "f",
  27916         .f64_type => "",
  27917         .f80_type => "x",
  27918         .f128_type => "q",
  27919         .c_longdouble_type => "l",
  27920         else => unreachable,
  27921     };
  27922 }
  27923 
  27924 fn promoteInt(self: *CodeGen, ty: Type) Type {
  27925     const pt = self.pt;
  27926     const zcu = pt.zcu;
  27927     const int_info: InternPool.Key.IntType = switch (ty.toIntern()) {
  27928         .bool_type => .{ .signedness = .unsigned, .bits = 1 },
  27929         else => if (ty.isAbiInt(zcu)) ty.intInfo(zcu) else return ty,
  27930     };
  27931     for ([_]Type{
  27932         .c_int,      .c_uint,
  27933         .c_long,     .c_ulong,
  27934         .c_longlong, .c_ulonglong,
  27935     }) |promote_ty| {
  27936         const promote_info = promote_ty.intInfo(zcu);
  27937         if (int_info.signedness == .signed and promote_info.signedness == .unsigned) continue;
  27938         if (int_info.bits + @intFromBool(int_info.signedness == .unsigned and
  27939             promote_info.signedness == .signed) <= promote_info.bits) return promote_ty;
  27940     }
  27941     return ty;
  27942 }
  27943 
  27944 fn promoteVarArg(self: *CodeGen, ty: Type) Type {
  27945     if (!ty.isRuntimeFloat()) return self.promoteInt(ty);
  27946     switch (ty.floatBits(self.target.*)) {
  27947         32, 64 => return .f64,
  27948         else => |float_bits| {
  27949             assert(float_bits == self.target.cTypeBitSize(.longdouble));
  27950             return .c_longdouble;
  27951         },
  27952     }
  27953 }
  27954 
  27955 const Temp = struct {
  27956     index: Air.Inst.Index,
  27957 
  27958     fn unwrap(temp: Temp, cg: *CodeGen) union(enum) {
  27959         ref: Air.Inst.Ref,
  27960         temp: Index,
  27961     } {
  27962         switch (temp.index.unwrap()) {
  27963             .ref => |ref| return .{ .ref = ref },
  27964             .target => |target_index| {
  27965                 const temp_index: Index = @enumFromInt(target_index);
  27966                 assert(temp_index.isValid(cg));
  27967                 return .{ .temp = temp_index };
  27968             },
  27969         }
  27970     }
  27971 
  27972     fn typeOf(temp: Temp, cg: *CodeGen) Type {
  27973         return switch (temp.unwrap(cg)) {
  27974             .ref => |ref| cg.typeOf(ref),
  27975             .temp => |temp_index| temp_index.typeOf(cg),
  27976         };
  27977     }
  27978 
  27979     fn isMut(temp: Temp, cg: *CodeGen) bool {
  27980         return switch (temp.unwrap(cg)) {
  27981             .ref => false,
  27982             .temp => |temp_index| switch (temp_index.tracking(cg).short) {
  27983                 .none,
  27984                 .unreach,
  27985                 .dead,
  27986                 .undef,
  27987                 .immediate,
  27988                 .eflags,
  27989                 .register_offset,
  27990                 .register_mask,
  27991                 .memory,
  27992                 .load_symbol,
  27993                 .lea_symbol,
  27994                 .indirect,
  27995                 .load_direct,
  27996                 .lea_direct,
  27997                 .load_got,
  27998                 .lea_got,
  27999                 .load_tlv,
  28000                 .lea_tlv,
  28001                 .lea_frame,
  28002                 .elementwise_regs_then_frame,
  28003                 .reserved_frame,
  28004                 .air_ref,
  28005                 => false,
  28006                 .register,
  28007                 .register_pair,
  28008                 .register_triple,
  28009                 .register_quadruple,
  28010                 .register_overflow,
  28011                 => true,
  28012                 .load_frame => |frame_addr| !frame_addr.index.isNamed(),
  28013             },
  28014         };
  28015     }
  28016 
  28017     fn tracking(temp: Temp, cg: *CodeGen) InstTracking {
  28018         return cg.inst_tracking.get(temp.index).?;
  28019     }
  28020 
  28021     fn getOffset(temp: Temp, off: i32, cg: *CodeGen) !Temp {
  28022         const new_temp_index = cg.next_temp_index;
  28023         cg.temp_type[@intFromEnum(new_temp_index)] = .usize;
  28024         cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
  28025         switch (temp.tracking(cg).short) {
  28026             else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
  28027             .register => |reg| {
  28028                 const new_reg =
  28029                     try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
  28030                 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
  28031                 try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
  28032                     .base = .{ .reg = reg.to64() },
  28033                     .mod = .{ .rm = .{
  28034                         .size = .qword,
  28035                         .disp = off,
  28036                     } },
  28037                 });
  28038             },
  28039             .register_offset => |reg_off| {
  28040                 const new_reg =
  28041                     try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
  28042                 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
  28043                 try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
  28044                     .base = .{ .reg = reg_off.reg.to64() },
  28045                     .mod = .{ .rm = .{
  28046                         .size = .qword,
  28047                         .disp = reg_off.off + off,
  28048                     } },
  28049                 });
  28050             },
  28051             .lea_symbol => |sym_off| new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = .{
  28052                 .sym_index = sym_off.sym_index,
  28053                 .off = sym_off.off + off,
  28054             } }),
  28055             .load_frame => |frame_addr| {
  28056                 const new_reg =
  28057                     try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
  28058                 new_temp_index.tracking(cg).* = .init(.{ .register_offset = .{
  28059                     .reg = new_reg,
  28060                     .off = off,
  28061                 } });
  28062                 try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{
  28063                     .base = .{ .frame = frame_addr.index },
  28064                     .mod = .{ .rm = .{
  28065                         .size = .qword,
  28066                         .disp = frame_addr.off,
  28067                     } },
  28068                 });
  28069             },
  28070             .lea_frame => |frame_addr| new_temp_index.tracking(cg).* = .init(.{ .lea_frame = .{
  28071                 .index = frame_addr.index,
  28072                 .off = frame_addr.off + off,
  28073             } }),
  28074         }
  28075         return .{ .index = new_temp_index.toIndex() };
  28076     }
  28077 
  28078     fn toOffset(temp: *Temp, off: i32, cg: *CodeGen) !void {
  28079         if (off == 0) return;
  28080         switch (temp.unwrap(cg)) {
  28081             .ref => {},
  28082             .temp => |temp_index| {
  28083                 const temp_tracking = temp_index.tracking(cg);
  28084                 switch (temp_tracking.short) {
  28085                     else => {},
  28086                     .register => |reg| {
  28087                         try cg.freeValue(temp_tracking.long);
  28088                         temp_tracking.* = .init(.{ .register_offset = .{
  28089                             .reg = reg,
  28090                             .off = off,
  28091                         } });
  28092                         return;
  28093                     },
  28094                     .register_offset => |reg_off| {
  28095                         try cg.freeValue(temp_tracking.long);
  28096                         temp_tracking.* = .init(.{ .register_offset = .{
  28097                             .reg = reg_off.reg,
  28098                             .off = reg_off.off + off,
  28099                         } });
  28100                         return;
  28101                     },
  28102                     .lea_symbol => |sym_off| {
  28103                         assert(std.meta.eql(temp_tracking.long.lea_symbol, sym_off));
  28104                         temp_tracking.* = .init(.{ .lea_symbol = .{
  28105                             .sym_index = sym_off.sym_index,
  28106                             .off = sym_off.off + off,
  28107                         } });
  28108                         return;
  28109                     },
  28110                     .lea_frame => |frame_addr| {
  28111                         assert(std.meta.eql(temp_tracking.long.lea_frame, frame_addr));
  28112                         temp_tracking.* = .init(.{ .lea_frame = .{
  28113                             .index = frame_addr.index,
  28114                             .off = frame_addr.off + off,
  28115                         } });
  28116                         return;
  28117                     },
  28118                 }
  28119             },
  28120         }
  28121         const new_temp = try temp.getOffset(off, cg);
  28122         try temp.die(cg);
  28123         temp.* = new_temp;
  28124     }
  28125 
  28126     fn getLimb(temp: Temp, limb_ty: Type, limb_index: u28, cg: *CodeGen) !Temp {
  28127         const new_temp_index = cg.next_temp_index;
  28128         cg.temp_type[@intFromEnum(new_temp_index)] = limb_ty;
  28129         switch (temp.tracking(cg).short) {
  28130             else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
  28131             .immediate => |imm| {
  28132                 assert(limb_index == 0);
  28133                 new_temp_index.tracking(cg).* = .init(.{ .immediate = imm });
  28134             },
  28135             .register => |reg| {
  28136                 assert(limb_index == 0);
  28137                 const new_reg =
  28138                     try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
  28139                 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
  28140                 try cg.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), reg.to64());
  28141             },
  28142             .register_pair => |regs| {
  28143                 const new_reg =
  28144                     try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
  28145                 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
  28146                 try cg.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), regs[limb_index].to64());
  28147             },
  28148             .register_offset => |reg_off| {
  28149                 assert(limb_index == 0);
  28150                 const new_reg =
  28151                     try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
  28152                 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
  28153                 try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
  28154                     .base = .{ .reg = reg_off.reg.to64() },
  28155                     .mod = .{ .rm = .{
  28156                         .size = .qword,
  28157                         .disp = reg_off.off + @as(u31, limb_index) * 8,
  28158                     } },
  28159                 });
  28160             },
  28161             .load_symbol => |sym_off| {
  28162                 const new_reg =
  28163                     try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
  28164                 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
  28165                 try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{
  28166                     .base = .{ .reloc = sym_off.sym_index },
  28167                     .mod = .{ .rm = .{
  28168                         .size = .qword,
  28169                         .disp = sym_off.off + @as(u31, limb_index) * 8,
  28170                     } },
  28171                 });
  28172             },
  28173             .lea_symbol => |sym_off| {
  28174                 assert(limb_index == 0);
  28175                 new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = sym_off });
  28176             },
  28177             .load_frame => |frame_addr| {
  28178                 const new_reg =
  28179                     try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
  28180                 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
  28181                 try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{
  28182                     .base = .{ .frame = frame_addr.index },
  28183                     .mod = .{ .rm = .{
  28184                         .size = .qword,
  28185                         .disp = frame_addr.off + @as(u31, limb_index) * 8,
  28186                     } },
  28187                 });
  28188             },
  28189             .lea_frame => |frame_addr| {
  28190                 assert(limb_index == 0);
  28191                 new_temp_index.tracking(cg).* = .init(.{ .lea_frame = frame_addr });
  28192             },
  28193         }
  28194         cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
  28195         return .{ .index = new_temp_index.toIndex() };
  28196     }
  28197 
  28198     fn toLimb(temp: *Temp, limb_ty: Type, limb_index: u28, cg: *CodeGen) !void {
  28199         switch (temp.unwrap(cg)) {
  28200             .ref => {},
  28201             .temp => |temp_index| {
  28202                 const temp_tracking = temp_index.tracking(cg);
  28203                 switch (temp_tracking.short) {
  28204                     else => {},
  28205                     .register, .lea_symbol, .lea_frame => {
  28206                         assert(limb_index == 0);
  28207                         cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
  28208                         return;
  28209                     },
  28210                     .register_pair => |regs| {
  28211                         switch (temp_tracking.long) {
  28212                             .none, .reserved_frame => {},
  28213                             else => temp_tracking.long =
  28214                                 temp_tracking.long.address().offset(@as(u31, limb_index) * 8).deref(),
  28215                         }
  28216                         for (regs, 0..) |reg, reg_index| if (reg_index != limb_index)
  28217                             cg.register_manager.freeReg(reg);
  28218                         temp_tracking.* = .init(.{ .register = regs[limb_index] });
  28219                         cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
  28220                         return;
  28221                     },
  28222                     .load_symbol => |sym_off| {
  28223                         assert(std.meta.eql(temp_tracking.long.load_symbol, sym_off));
  28224                         temp_tracking.* = .init(.{ .load_symbol = .{
  28225                             .sym_index = sym_off.sym_index,
  28226                             .off = sym_off.off + @as(u31, limb_index) * 8,
  28227                         } });
  28228                         cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
  28229                         return;
  28230                     },
  28231                     .load_frame => |frame_addr| if (!frame_addr.index.isNamed()) {
  28232                         assert(std.meta.eql(temp_tracking.long.load_frame, frame_addr));
  28233                         temp_tracking.* = .init(.{ .load_frame = .{
  28234                             .index = frame_addr.index,
  28235                             .off = frame_addr.off + @as(u31, limb_index) * 8,
  28236                         } });
  28237                         cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
  28238                         return;
  28239                     },
  28240                 }
  28241             },
  28242         }
  28243         const new_temp = try temp.getLimb(limb_ty, limb_index, cg);
  28244         try temp.die(cg);
  28245         temp.* = new_temp;
  28246     }
  28247 
  28248     fn toSlicePtr(temp: *Temp, cg: *CodeGen) !void {
  28249         const temp_ty = temp.typeOf(cg);
  28250         if (temp_ty.isSlice(cg.pt.zcu)) try temp.toLimb(temp_ty.slicePtrFieldType(cg.pt.zcu), 0, cg);
  28251     }
  28252 
  28253     fn toSliceLen(temp: *Temp, cg: *CodeGen) !void {
  28254         try temp.toLimb(.usize, 1, cg);
  28255     }
  28256 
  28257     fn toReg(temp: *Temp, new_reg: Register, cg: *CodeGen) !bool {
  28258         const val, const ty = val_ty: switch (temp.unwrap(cg)) {
  28259             .ref => |ref| .{ temp.tracking(cg).short, cg.typeOf(ref) },
  28260             .temp => |temp_index| {
  28261                 const temp_tracking = temp_index.tracking(cg);
  28262                 if (temp_tracking.short == .register and
  28263                     temp_tracking.short.register == new_reg) return false;
  28264                 break :val_ty .{ temp_tracking.short, temp_index.typeOf(cg) };
  28265             },
  28266         };
  28267         const new_temp_index = cg.next_temp_index;
  28268         try cg.register_manager.getReg(new_reg, new_temp_index.toIndex());
  28269         cg.temp_type[@intFromEnum(new_temp_index)] = ty;
  28270         try cg.genSetReg(new_reg, ty, val, .{});
  28271         new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
  28272         try temp.die(cg);
  28273         cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
  28274         temp.* = .{ .index = new_temp_index.toIndex() };
  28275         return true;
  28276     }
  28277 
  28278     fn toRegClass(temp: *Temp, mut: bool, rc: Register.Class, cg: *CodeGen) !bool {
  28279         const val = temp.tracking(cg).short;
  28280         if (!mut or temp.isMut(cg)) switch (val) {
  28281             else => {},
  28282             .register => |reg| if (reg.class() == rc) return false,
  28283             .register_offset => |reg_off| if (reg_off.reg.class() == rc and reg_off.off == 0) return false,
  28284         };
  28285         const ty = temp.typeOf(cg);
  28286         const new_temp_index = cg.next_temp_index;
  28287         cg.temp_type[@intFromEnum(new_temp_index)] = ty;
  28288         const new_reg = try cg.register_manager.allocReg(new_temp_index.toIndex(), regSetForRegClass(rc));
  28289         try cg.genSetReg(new_reg, ty, val, .{});
  28290         new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
  28291         try temp.die(cg);
  28292         cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
  28293         temp.* = .{ .index = new_temp_index.toIndex() };
  28294         return true;
  28295     }
  28296 
  28297     fn toPair(first_temp: *Temp, second_temp: *Temp, cg: *CodeGen) !void {
  28298         while (true) for ([_]*Temp{ first_temp, second_temp }) |part_temp| {
  28299             if (try part_temp.toRegClass(true, .general_purpose, cg)) break;
  28300         } else break;
  28301         const first_temp_tracking = first_temp.unwrap(cg).temp.tracking(cg);
  28302         const second_temp_tracking = second_temp.unwrap(cg).temp.tracking(cg);
  28303         const result: MCValue = .{ .register_pair = .{
  28304             first_temp_tracking.short.register,
  28305             second_temp_tracking.short.register,
  28306         } };
  28307         const result_temp_index = cg.next_temp_index;
  28308         const result_temp: Temp = .{ .index = result_temp_index.toIndex() };
  28309         assert(cg.reuseTemp(result_temp.index, first_temp.index, first_temp_tracking));
  28310         assert(cg.reuseTemp(result_temp.index, second_temp.index, second_temp_tracking));
  28311         cg.temp_type[@intFromEnum(result_temp_index)] = .slice_const_u8;
  28312         result_temp_index.tracking(cg).* = .init(result);
  28313         first_temp.* = result_temp;
  28314     }
  28315 
  28316     fn asMask(temp: Temp, info: MaskInfo, cg: *CodeGen) void {
  28317         assert(info.scalar != .none);
  28318         const mcv = &temp.unwrap(cg).temp.tracking(cg).short;
  28319         const reg = mcv.register;
  28320         mcv.* = .{ .register_mask = .{ .reg = reg, .info = info } };
  28321     }
  28322 
  28323     fn toLea(temp: *Temp, cg: *CodeGen) !bool {
  28324         switch (temp.tracking(cg).short) {
  28325             .none,
  28326             .unreach,
  28327             .dead,
  28328             .undef,
  28329             .eflags,
  28330             .register_pair,
  28331             .register_triple,
  28332             .register_quadruple,
  28333             .register_overflow,
  28334             .register_mask,
  28335             .elementwise_regs_then_frame,
  28336             .reserved_frame,
  28337             .air_ref,
  28338             => unreachable, // not a valid pointer
  28339             .immediate,
  28340             .register,
  28341             .register_offset,
  28342             .lea_direct,
  28343             .lea_got,
  28344             .lea_tlv,
  28345             .lea_frame,
  28346             => return false,
  28347             .memory,
  28348             .indirect,
  28349             .load_symbol,
  28350             .load_direct,
  28351             .load_got,
  28352             .load_tlv,
  28353             .load_frame,
  28354             => return temp.toRegClass(true, .general_purpose, cg),
  28355             .lea_symbol => |sym_off| {
  28356                 const off = sym_off.off;
  28357                 if (off == 0) return false;
  28358                 try temp.toOffset(-off, cg);
  28359                 while (try temp.toRegClass(true, .general_purpose, cg)) {}
  28360                 try temp.toOffset(off, cg);
  28361                 return true;
  28362             },
  28363         }
  28364     }
  28365 
  28366     fn toMemory(temp: *Temp, cg: *CodeGen) !bool {
  28367         const temp_tracking = temp.tracking(cg);
  28368         if (temp_tracking.short.isMemory()) return false;
  28369         const new_temp_index = cg.next_temp_index;
  28370         const ty = temp.typeOf(cg);
  28371         cg.temp_type[@intFromEnum(new_temp_index)] = ty;
  28372         const new_frame_index = try cg.allocFrameIndex(.initSpill(ty, cg.pt.zcu));
  28373         try cg.genSetMem(.{ .frame = new_frame_index }, 0, ty, temp_tracking.short, .{});
  28374         new_temp_index.tracking(cg).* = .init(.{ .load_frame = .{ .index = new_frame_index } });
  28375         try temp.die(cg);
  28376         cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
  28377         temp.* = .{ .index = new_temp_index.toIndex() };
  28378         return true;
  28379     }
  28380 
  28381     // hack around linker relocation bugs
  28382     fn toBase(temp: *Temp, cg: *CodeGen) !bool {
  28383         const temp_tracking = temp.tracking(cg);
  28384         if (temp_tracking.short.isBase()) return false;
  28385         if (try temp.toMemory(cg)) return true;
  28386         const new_temp_index = cg.next_temp_index;
  28387         cg.temp_type[@intFromEnum(new_temp_index)] = temp.typeOf(cg);
  28388         const new_reg =
  28389             try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
  28390         try cg.genSetReg(new_reg, .usize, temp_tracking.short.address(), .{});
  28391         new_temp_index.tracking(cg).* = .init(.{ .indirect = .{ .reg = new_reg } });
  28392         try temp.die(cg);
  28393         cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
  28394         temp.* = .{ .index = new_temp_index.toIndex() };
  28395         return true;
  28396     }
  28397 
  28398     const AccessOptions = struct {
  28399         disp: i32 = 0,
  28400         safe: bool = false,
  28401     };
  28402 
  28403     fn load(ptr: *Temp, val_ty: Type, opts: AccessOptions, cg: *CodeGen) !Temp {
  28404         const val = try cg.tempAlloc(val_ty);
  28405         try ptr.toOffset(opts.disp, cg);
  28406         while (try ptr.toLea(cg)) {}
  28407         const val_mcv = val.tracking(cg).short;
  28408         switch (val_mcv) {
  28409             else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
  28410             .register => |val_reg| try ptr.loadReg(val_ty, registerAlias(
  28411                 val_reg,
  28412                 @intCast(val_ty.abiSize(cg.pt.zcu)),
  28413             ), cg),
  28414             inline .register_pair,
  28415             .register_triple,
  28416             .register_quadruple,
  28417             => |val_regs| for (val_regs) |val_reg| {
  28418                 try ptr.loadReg(val_ty, val_reg, cg);
  28419                 try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg);
  28420                 while (try ptr.toLea(cg)) {}
  28421             },
  28422             .register_offset => |val_reg_off| switch (val_reg_off.off) {
  28423                 0 => try ptr.loadReg(val_ty, registerAlias(
  28424                     val_reg_off.reg,
  28425                     @intCast(val_ty.abiSize(cg.pt.zcu)),
  28426                 ), cg),
  28427                 else => unreachable,
  28428             },
  28429             .memory, .indirect, .load_frame, .load_symbol => {
  28430                 var val_ptr = try cg.tempInit(.usize, val_mcv.address());
  28431                 var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
  28432                 try val_ptr.memcpy(ptr, &len, cg);
  28433                 try val_ptr.die(cg);
  28434                 try len.die(cg);
  28435             },
  28436         }
  28437         return val;
  28438     }
  28439 
  28440     fn store(ptr: *Temp, val: *Temp, opts: AccessOptions, cg: *CodeGen) !void {
  28441         const val_ty = val.typeOf(cg);
  28442         try ptr.toOffset(opts.disp, cg);
  28443         while (try ptr.toLea(cg)) {}
  28444         val_to_gpr: while (true) : (while (try ptr.toLea(cg) or
  28445             try val.toRegClass(false, .general_purpose, cg))
  28446         {}) {
  28447             const val_mcv = val.tracking(cg).short;
  28448             switch (val_mcv) {
  28449                 else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
  28450                 .undef => if (opts.safe) {
  28451                     var pat = try cg.tempInit(.u8, .{ .immediate = 0xaa });
  28452                     var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
  28453                     try ptr.memset(&pat, &len, cg);
  28454                     try pat.die(cg);
  28455                     try len.die(cg);
  28456                 },
  28457                 .immediate => |val_imm| {
  28458                     const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31|
  28459                         .u(val_uimm31)
  28460                     else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32|
  28461                         .s(val_simm32)
  28462                     else
  28463                         continue :val_to_gpr;
  28464                     // hack around linker relocation bugs
  28465                     switch (ptr.tracking(cg).short) {
  28466                         else => {},
  28467                         .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {},
  28468                     }
  28469                     try cg.asmMemoryImmediate(
  28470                         .{ ._, .mov },
  28471                         try ptr.tracking(cg).short.deref().mem(cg, .{
  28472                             .size = cg.memSize(val_ty),
  28473                         }),
  28474                         val_op,
  28475                     );
  28476                 },
  28477                 .eflags => |cc| {
  28478                     // hack around linker relocation bugs
  28479                     switch (ptr.tracking(cg).short) {
  28480                         else => {},
  28481                         .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {},
  28482                     }
  28483                     try cg.asmSetccMemory(
  28484                         cc,
  28485                         try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }),
  28486                     );
  28487                 },
  28488                 .register => |val_reg| try ptr.storeReg(val_ty, registerAlias(
  28489                     val_reg,
  28490                     @intCast(val_ty.abiSize(cg.pt.zcu)),
  28491                 ), cg),
  28492                 inline .register_pair,
  28493                 .register_triple,
  28494                 .register_quadruple,
  28495                 => |val_regs| for (val_regs) |val_reg| {
  28496                     try ptr.storeReg(val_ty, val_reg, cg);
  28497                     try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg);
  28498                     while (try ptr.toLea(cg)) {}
  28499                 },
  28500                 .register_offset => |val_reg_off| switch (val_reg_off.off) {
  28501                     0 => try ptr.storeReg(val_ty, registerAlias(
  28502                         val_reg_off.reg,
  28503                         @intCast(val_ty.abiSize(cg.pt.zcu)),
  28504                     ), cg),
  28505                     else => continue :val_to_gpr,
  28506                 },
  28507                 .register_overflow => |val_reg_ov| {
  28508                     const ip = &cg.pt.zcu.intern_pool;
  28509                     const first_ty: Type = .fromInterned(first_ty: switch (ip.indexToKey(val_ty.toIntern())) {
  28510                         .tuple_type => |tuple_type| {
  28511                             const tuple_field_types = tuple_type.types.get(ip);
  28512                             assert(tuple_field_types.len == 2 and tuple_field_types[1] == .u1_type);
  28513                             break :first_ty tuple_field_types[0];
  28514                         },
  28515                         .opt_type => |opt_child| {
  28516                             assert(!val_ty.optionalReprIsPayload(cg.pt.zcu));
  28517                             break :first_ty opt_child;
  28518                         },
  28519                         else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }),
  28520                     });
  28521                     const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu));
  28522                     try ptr.storeReg(first_ty, registerAlias(val_reg_ov.reg, first_size), cg);
  28523                     try ptr.toOffset(first_size, cg);
  28524                     try cg.asmSetccMemory(
  28525                         val_reg_ov.eflags,
  28526                         try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }),
  28527                     );
  28528                 },
  28529                 .lea_frame, .lea_symbol => continue :val_to_gpr,
  28530                 .memory, .indirect, .load_frame, .load_symbol => {
  28531                     var val_ptr = try cg.tempInit(.usize, val_mcv.address());
  28532                     var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
  28533                     try ptr.memcpy(&val_ptr, &len, cg);
  28534                     try val_ptr.die(cg);
  28535                     try len.die(cg);
  28536                 },
  28537             }
  28538             break;
  28539         }
  28540     }
  28541 
  28542     fn read(src: *Temp, val_ty: Type, opts: AccessOptions, cg: *CodeGen) !Temp {
  28543         var val = try cg.tempAlloc(val_ty);
  28544         while (try src.toBase(cg)) {}
  28545         const val_mcv = val.tracking(cg).short;
  28546         switch (val_mcv) {
  28547             else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
  28548             .register => |val_reg| try src.readReg(opts.disp, val_ty, registerAlias(
  28549                 val_reg,
  28550                 @intCast(val_ty.abiSize(cg.pt.zcu)),
  28551             ), cg),
  28552             inline .register_pair, .register_triple, .register_quadruple => |val_regs| {
  28553                 var disp = opts.disp;
  28554                 for (val_regs) |val_reg| {
  28555                     try src.readReg(disp, val_ty, val_reg, cg);
  28556                     disp += @divExact(val_reg.bitSize(), 8);
  28557                 }
  28558             },
  28559             .register_offset => |val_reg_off| switch (val_reg_off.off) {
  28560                 0 => try src.readReg(opts.disp, val_ty, registerAlias(
  28561                     val_reg_off.reg,
  28562                     @intCast(val_ty.abiSize(cg.pt.zcu)),
  28563                 ), cg),
  28564                 else => unreachable,
  28565             },
  28566             .memory, .indirect, .load_frame, .load_symbol => {
  28567                 var val_ptr = try cg.tempInit(.usize, val_mcv.address());
  28568                 var src_ptr =
  28569                     try cg.tempInit(.usize, src.tracking(cg).short.address().offset(opts.disp));
  28570                 var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
  28571                 try val_ptr.memcpy(&src_ptr, &len, cg);
  28572                 try val_ptr.die(cg);
  28573                 try src_ptr.die(cg);
  28574                 try len.die(cg);
  28575             },
  28576         }
  28577         return val;
  28578     }
  28579 
  28580     fn write(dst: *Temp, val: *Temp, opts: AccessOptions, cg: *CodeGen) !void {
  28581         const val_ty = val.typeOf(cg);
  28582         while (try dst.toBase(cg)) {}
  28583         val_to_gpr: while (true) : (while (try dst.toBase(cg) or
  28584             try val.toRegClass(false, .general_purpose, cg))
  28585         {}) {
  28586             const val_mcv = val.tracking(cg).short;
  28587             switch (val_mcv) {
  28588                 else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
  28589                 .immediate => |val_imm| {
  28590                     const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31|
  28591                         .u(val_uimm31)
  28592                     else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32|
  28593                         .s(val_simm32)
  28594                     else
  28595                         continue :val_to_gpr;
  28596                     try cg.asmMemoryImmediate(
  28597                         .{ ._, .mov },
  28598                         try dst.tracking(cg).short.mem(cg, .{
  28599                             .size = cg.memSize(val_ty),
  28600                             .disp = opts.disp,
  28601                         }),
  28602                         val_op,
  28603                     );
  28604                 },
  28605                 .register => |val_reg| try dst.writeReg(opts.disp, val_ty, registerAlias(
  28606                     val_reg,
  28607                     @intCast(val_ty.abiSize(cg.pt.zcu)),
  28608                 ), cg),
  28609                 inline .register_pair, .register_triple, .register_quadruple => |val_regs| {
  28610                     var disp = opts.disp;
  28611                     for (val_regs) |val_reg| {
  28612                         try dst.writeReg(disp, val_ty, val_reg, cg);
  28613                         disp += @divExact(val_reg.bitSize(), 8);
  28614                     }
  28615                 },
  28616                 .register_offset => |val_reg_off| switch (val_reg_off.off) {
  28617                     0 => try dst.writeReg(opts.disp, val_ty, registerAlias(
  28618                         val_reg_off.reg,
  28619                         @intCast(val_ty.abiSize(cg.pt.zcu)),
  28620                     ), cg),
  28621                     else => continue :val_to_gpr,
  28622                 },
  28623                 .lea_frame, .lea_symbol => continue :val_to_gpr,
  28624                 .memory, .indirect, .load_frame, .load_symbol => {
  28625                     var dst_ptr =
  28626                         try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(opts.disp));
  28627                     var val_ptr = try cg.tempInit(.usize, val_mcv.address());
  28628                     var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
  28629                     try dst_ptr.memcpy(&val_ptr, &len, cg);
  28630                     try dst_ptr.die(cg);
  28631                     try val_ptr.die(cg);
  28632                     try len.die(cg);
  28633                 },
  28634             }
  28635             break;
  28636         }
  28637     }
  28638 
  28639     fn loadReg(ptr: *Temp, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void {
  28640         const dst_rc = dst_reg.class();
  28641         const strat = try cg.moveStrategy(dst_ty, dst_rc, false);
  28642         // hack around linker relocation bugs
  28643         switch (ptr.tracking(cg).short) {
  28644             else => {},
  28645             .lea_symbol => |sym_off| if (dst_rc != .general_purpose or sym_off.off != 0)
  28646                 while (try ptr.toRegClass(false, .general_purpose, cg)) {},
  28647         }
  28648         try strat.read(cg, dst_reg, try ptr.tracking(cg).short.deref().mem(cg, .{
  28649             .size = .fromBitSize(@min(8 * dst_ty.abiSize(cg.pt.zcu), dst_reg.bitSize())),
  28650         }));
  28651     }
  28652 
  28653     fn storeReg(ptr: *Temp, src_ty: Type, src_reg: Register, cg: *CodeGen) !void {
  28654         const src_rc = src_reg.class();
  28655         const src_abi_size = src_ty.abiSize(cg.pt.zcu);
  28656         const strat = try cg.moveStrategy(src_ty, src_rc, false);
  28657         // hack around linker relocation bugs
  28658         switch (ptr.tracking(cg).short) {
  28659             else => {},
  28660             .lea_symbol => |sym_off| if (src_rc != .general_purpose or sym_off.off != 0)
  28661                 while (try ptr.toRegClass(false, .general_purpose, cg)) {},
  28662         }
  28663         if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) {
  28664             try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{
  28665                 .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())),
  28666             }), src_reg);
  28667         } else {
  28668             const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu);
  28669             const frame_index = try cg.allocFrameIndex(frame_alloc);
  28670             const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size);
  28671             try strat.write(cg, .{
  28672                 .base = .{ .frame = frame_index },
  28673                 .mod = .{ .rm = .{ .size = frame_size } },
  28674             }, src_reg);
  28675             var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
  28676             var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
  28677             try ptr.memcpy(&src_ptr, &len, cg);
  28678             try src_ptr.die(cg);
  28679             try len.die(cg);
  28680         }
  28681     }
  28682 
  28683     fn readReg(src: Temp, disp: i32, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void {
  28684         const strat = try cg.moveStrategy(dst_ty, dst_reg.class(), false);
  28685         try strat.read(cg, dst_reg, try src.tracking(cg).short.mem(cg, .{
  28686             .size = .fromBitSize(@min(8 * dst_ty.abiSize(cg.pt.zcu), dst_reg.bitSize())),
  28687             .disp = disp,
  28688         }));
  28689     }
  28690 
  28691     fn writeReg(dst: Temp, disp: i32, src_ty: Type, src_reg: Register, cg: *CodeGen) !void {
  28692         const src_rc = src_reg.class();
  28693         const src_abi_size = src_ty.abiSize(cg.pt.zcu);
  28694         const strat = try cg.moveStrategy(src_ty, src_rc, false);
  28695         if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) {
  28696             try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{
  28697                 .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())),
  28698                 .disp = disp,
  28699             }), src_reg);
  28700         } else {
  28701             const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu);
  28702             const frame_index = try cg.allocFrameIndex(frame_alloc);
  28703             const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size);
  28704             try strat.write(cg, .{
  28705                 .base = .{ .frame = frame_index },
  28706                 .mod = .{ .rm = .{ .size = frame_size } },
  28707             }, src_reg);
  28708             var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address());
  28709             var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
  28710             var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
  28711             try dst_ptr.memcpy(&src_ptr, &len, cg);
  28712             try dst_ptr.die(cg);
  28713             try src_ptr.die(cg);
  28714             try len.die(cg);
  28715         }
  28716     }
  28717 
  28718     fn memcpy(dst: *Temp, src: *Temp, len: *Temp, cg: *CodeGen) !void {
  28719         while (true) for ([_]*Temp{ dst, src, len }, [_]Register{ .rdi, .rsi, .rcx }) |temp, reg| {
  28720             if (try temp.toReg(reg, cg)) break;
  28721         } else break;
  28722         try cg.asmOpOnly(.{ .@"rep _sb", .mov });
  28723     }
  28724 
  28725     fn memset(dst: *Temp, val: *Temp, len: *Temp, cg: *CodeGen) !void {
  28726         while (true) for ([_]*Temp{ dst, val, len }, [_]Register{ .rdi, .rax, .rcx }) |temp, reg| {
  28727             if (try temp.toReg(reg, cg)) break;
  28728         } else break;
  28729         try cg.asmOpOnly(.{ .@"rep _sb", .sto });
  28730     }
  28731 
  28732     fn moveTo(temp: Temp, inst: Air.Inst.Index, cg: *CodeGen) !void {
  28733         if (cg.liveness.isUnused(inst)) try temp.die(cg) else switch (temp.unwrap(cg)) {
  28734             .ref => {
  28735                 const result = try cg.allocRegOrMem(inst, true);
  28736                 try cg.genCopy(cg.typeOfIndex(inst), result, temp.tracking(cg).short, .{});
  28737                 tracking_log.debug("{} => {} (birth)", .{ inst, result });
  28738                 cg.inst_tracking.putAssumeCapacityNoClobber(inst, .init(result));
  28739             },
  28740             .temp => |temp_index| {
  28741                 const temp_tracking = temp_index.tracking(cg);
  28742                 tracking_log.debug("{} => {} (birth)", .{ inst, temp_tracking.short });
  28743                 cg.inst_tracking.putAssumeCapacityNoClobber(inst, temp_tracking.*);
  28744                 assert(cg.reuseTemp(inst, temp_index.toIndex(), temp_tracking));
  28745             },
  28746         }
  28747     }
  28748 
  28749     fn die(temp: Temp, cg: *CodeGen) !void {
  28750         switch (temp.unwrap(cg)) {
  28751             .ref => {},
  28752             .temp => |temp_index| try temp_index.tracking(cg).die(cg, temp_index.toIndex()),
  28753         }
  28754     }
  28755 
  28756     const Index = enum(u4) {
  28757         _,
  28758 
  28759         fn toIndex(index: Index) Air.Inst.Index {
  28760             return .fromTargetIndex(@intFromEnum(index));
  28761         }
  28762 
  28763         fn fromIndex(index: Air.Inst.Index) Index {
  28764             return @enumFromInt(index.toTargetIndex());
  28765         }
  28766 
  28767         fn tracking(index: Index, cg: *CodeGen) *InstTracking {
  28768             return &cg.inst_tracking.values()[@intFromEnum(index)];
  28769         }
  28770 
  28771         fn isValid(index: Index, cg: *CodeGen) bool {
  28772             return index.tracking(cg).short != .dead;
  28773         }
  28774 
  28775         fn typeOf(index: Index, cg: *CodeGen) Type {
  28776             assert(index.isValid(cg));
  28777             return cg.temp_type[@intFromEnum(index)];
  28778         }
  28779 
  28780         const max = std.math.maxInt(@typeInfo(Index).@"enum".tag_type);
  28781         const Set = std.StaticBitSet(max);
  28782         const SafetySet = if (std.debug.runtime_safety) Set else struct {
  28783             inline fn initEmpty() @This() {
  28784                 return .{};
  28785             }
  28786 
  28787             inline fn isSet(_: @This(), index: usize) bool {
  28788                 assert(index < max);
  28789                 return true;
  28790             }
  28791 
  28792             inline fn set(_: @This(), index: usize) void {
  28793                 assert(index < max);
  28794             }
  28795 
  28796             inline fn eql(_: @This(), _: @This()) bool {
  28797                 return true;
  28798             }
  28799         };
  28800     };
  28801 };
  28802 
  28803 fn resetTemps(cg: *CodeGen) void {
  28804     for (0..@intFromEnum(cg.next_temp_index)) |temp_index| {
  28805         const temp: Temp.Index = @enumFromInt(temp_index);
  28806         assert(!temp.isValid(cg));
  28807         cg.temp_type[temp_index] = undefined;
  28808     }
  28809     cg.next_temp_index = @enumFromInt(0);
  28810 }
  28811 
  28812 fn reuseTemp(
  28813     cg: *CodeGen,
  28814     new_inst: Air.Inst.Index,
  28815     old_inst: Air.Inst.Index,
  28816     tracking: *InstTracking,
  28817 ) bool {
  28818     switch (tracking.short) {
  28819         .register,
  28820         .register_pair,
  28821         .register_offset,
  28822         .register_overflow,
  28823         .register_mask,
  28824         .indirect,
  28825         => for (tracking.short.getRegs()) |tracked_reg| {
  28826             if (RegisterManager.indexOfRegIntoTracked(tracked_reg)) |tracked_index| {
  28827                 cg.register_manager.registers[tracked_index] = new_inst;
  28828             }
  28829         },
  28830         .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false,
  28831         else => {},
  28832     }
  28833     switch (tracking.short) {
  28834         .eflags, .register_overflow => cg.eflags_inst = new_inst,
  28835         else => {},
  28836     }
  28837     tracking.reuse(cg, new_inst, old_inst);
  28838     return true;
  28839 }
  28840 
  28841 fn tempAlloc(cg: *CodeGen, ty: Type) !Temp {
  28842     const temp_index = cg.next_temp_index;
  28843     temp_index.tracking(cg).* = .init(
  28844         try cg.allocRegOrMemAdvanced(ty, temp_index.toIndex(), true),
  28845     );
  28846     cg.temp_type[@intFromEnum(temp_index)] = ty;
  28847     cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
  28848     return .{ .index = temp_index.toIndex() };
  28849 }
  28850 
  28851 fn tempAllocReg(cg: *CodeGen, ty: Type, rs: RegisterManager.RegisterBitSet) !Temp {
  28852     const temp_index = cg.next_temp_index;
  28853     temp_index.tracking(cg).* = .init(
  28854         .{ .register = try cg.register_manager.allocReg(temp_index.toIndex(), rs) },
  28855     );
  28856     cg.temp_type[@intFromEnum(temp_index)] = ty;
  28857     cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
  28858     return .{ .index = temp_index.toIndex() };
  28859 }
  28860 
  28861 fn tempAllocRegPair(cg: *CodeGen, ty: Type, rs: RegisterManager.RegisterBitSet) !Temp {
  28862     const temp_index = cg.next_temp_index;
  28863     temp_index.tracking(cg).* = .init(
  28864         .{ .register_pair = try cg.register_manager.allocRegs(2, temp_index.toIndex(), rs) },
  28865     );
  28866     cg.temp_type[@intFromEnum(temp_index)] = ty;
  28867     cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
  28868     return .{ .index = temp_index.toIndex() };
  28869 }
  28870 
  28871 fn tempAllocMem(cg: *CodeGen, ty: Type) !Temp {
  28872     const temp_index = cg.next_temp_index;
  28873     temp_index.tracking(cg).* = .init(
  28874         try cg.allocRegOrMemAdvanced(ty, temp_index.toIndex(), false),
  28875     );
  28876     cg.temp_type[@intFromEnum(temp_index)] = ty;
  28877     cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
  28878     return .{ .index = temp_index.toIndex() };
  28879 }
  28880 
  28881 fn tempInit(cg: *CodeGen, ty: Type, value: MCValue) !Temp {
  28882     const temp_index = cg.next_temp_index;
  28883     temp_index.tracking(cg).* = .init(value);
  28884     cg.temp_type[@intFromEnum(temp_index)] = ty;
  28885     try cg.getValue(value, temp_index.toIndex());
  28886     cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
  28887     return .{ .index = temp_index.toIndex() };
  28888 }
  28889 
  28890 fn tempFromValue(cg: *CodeGen, value: Value) !Temp {
  28891     return cg.tempInit(value.typeOf(cg.pt.zcu), try cg.genTypedValue(value));
  28892 }
  28893 
  28894 fn tempFromOperand(
  28895     cg: *CodeGen,
  28896     inst: Air.Inst.Index,
  28897     op_index: Liveness.OperandInt,
  28898     op_ref: Air.Inst.Ref,
  28899     ignore_death: bool,
  28900 ) !Temp {
  28901     const zcu = cg.pt.zcu;
  28902     const ip = &zcu.intern_pool;
  28903 
  28904     if (ignore_death or !cg.liveness.operandDies(inst, op_index)) {
  28905         if (op_ref.toIndex()) |op_inst| return .{ .index = op_inst };
  28906         const val = op_ref.toInterned().?;
  28907         const gop = try cg.const_tracking.getOrPut(cg.gpa, val);
  28908         if (!gop.found_existing) gop.value_ptr.* = .init(init: {
  28909             const const_mcv = try cg.genTypedValue(.fromInterned(val));
  28910             switch (const_mcv) {
  28911                 .lea_tlv => |tlv_sym| switch (cg.bin_file.tag) {
  28912                     .elf, .macho => {
  28913                         if (cg.mod.pic) {
  28914                             try cg.spillRegisters(&.{ .rdi, .rax });
  28915                         } else {
  28916                             try cg.spillRegisters(&.{.rax});
  28917                         }
  28918                         const frame_index = try cg.allocFrameIndex(.init(.{
  28919                             .size = 8,
  28920                             .alignment = .@"8",
  28921                         }));
  28922                         try cg.genSetMem(
  28923                             .{ .frame = frame_index },
  28924                             0,
  28925                             .usize,
  28926                             .{ .lea_symbol = .{ .sym_index = tlv_sym } },
  28927                             .{},
  28928                         );
  28929                         break :init .{ .load_frame = .{ .index = frame_index } };
  28930                     },
  28931                     else => break :init const_mcv,
  28932                 },
  28933                 else => break :init const_mcv,
  28934             }
  28935         });
  28936         return cg.tempInit(.fromInterned(ip.typeOf(val)), gop.value_ptr.short);
  28937     }
  28938 
  28939     const temp_index = cg.next_temp_index;
  28940     const temp: Temp = .{ .index = temp_index.toIndex() };
  28941     const op_inst = op_ref.toIndex().?;
  28942     const tracking = cg.getResolvedInstValue(op_inst);
  28943     temp_index.tracking(cg).* = tracking.*;
  28944     if (!cg.reuseTemp(temp.index, op_inst, tracking)) return .{ .index = op_ref.toIndex().? };
  28945     cg.temp_type[@intFromEnum(temp_index)] = cg.typeOf(op_ref);
  28946     cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1);
  28947     return temp;
  28948 }
  28949 
  28950 inline fn tempsFromOperands(cg: *CodeGen, inst: Air.Inst.Index, op_refs: anytype) ![op_refs.len]Temp {
  28951     var temps: [op_refs.len]Temp = undefined;
  28952     inline for (&temps, 0.., op_refs) |*temp, op_index, op_ref| {
  28953         temp.* = try cg.tempFromOperand(inst, op_index, op_ref, inline for (0..op_index) |prev_op_index| {
  28954             if (op_ref == op_refs[prev_op_index]) break true;
  28955         } else false);
  28956     }
  28957     return temps;
  28958 }
  28959 
  28960 const Operand = union(enum) {
  28961     none,
  28962     reg: Register,
  28963     mem: Memory,
  28964     imm: Immediate,
  28965     inst: Mir.Inst.Index,
  28966 };
  28967 
  28968 const Select = struct {
  28969     cg: *CodeGen,
  28970     temps: [@intFromEnum(Select.Operand.Ref.none)]Temp,
  28971     labels: [@intFromEnum(Label._)]struct {
  28972         backward: ?Mir.Inst.Index,
  28973         forward: [1]?Mir.Inst.Index,
  28974     },
  28975 
  28976     fn emitLabel(s: *Select, label_index: Label) void {
  28977         if (label_index == ._) return;
  28978         const label = &s.labels[@intFromEnum(label_index)];
  28979         for (&label.forward) |*reloc| {
  28980             if (reloc.*) |r| s.cg.performReloc(r);
  28981             reloc.* = null;
  28982         }
  28983         label.backward = @intCast(s.cg.mir_instructions.len);
  28984     }
  28985 
  28986     fn emit(s: *Select, inst: Instruction) !void {
  28987         s.emitLabel(inst[0]);
  28988         const mir_tag: Mir.Inst.FixedTag = .{ inst[1], inst[2] };
  28989         var mir_ops: [4]CodeGen.Operand = undefined;
  28990         inline for (&mir_ops, 3..) |*mir_op, inst_index| mir_op.* = try inst[inst_index].lower(s);
  28991         s.cg.asmOps(mir_tag, mir_ops) catch |err| switch (err) {
  28992             error.InvalidInstruction => {
  28993                 const fixes = @tagName(mir_tag[0]);
  28994                 const fixes_blank = std.mem.indexOfScalar(u8, fixes, '_').?;
  28995                 return s.cg.fail(
  28996                     "invalid instruction: '{s}{s}{s} {s} {s} {s} {s}'",
  28997                     .{
  28998                         fixes[0..fixes_blank],
  28999                         @tagName(mir_tag[1]),
  29000                         fixes[fixes_blank + 1 ..],
  29001                         @tagName(mir_ops[0]),
  29002                         @tagName(mir_ops[1]),
  29003                         @tagName(mir_ops[2]),
  29004                         @tagName(mir_ops[3]),
  29005                     },
  29006                 );
  29007             },
  29008             else => |e| return e,
  29009         };
  29010     }
  29011 
  29012     const Case = struct {
  29013         required_features: [4]?std.Target.x86.Feature = @splat(null),
  29014         dst_constraints: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]Constraint = @splat(.any),
  29015         src_constraints: [@intFromEnum(Select.Operand.Ref.none) - @intFromEnum(Select.Operand.Ref.src0)]Constraint = @splat(.any),
  29016         patterns: []const Select.Pattern,
  29017         extra_temps: [@intFromEnum(Select.Operand.Ref.dst0) - @intFromEnum(Select.Operand.Ref.tmp0)]TempSpec = @splat(.unused),
  29018         dst_temps: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]TempSpec.Kind = @splat(.unused),
  29019         clobbers: struct { eflags: bool = false } = .{},
  29020         each: union(enum) {
  29021             once: []const Instruction,
  29022         },
  29023     };
  29024 
  29025     const Constraint = union(enum) {
  29026         any,
  29027         any_bool_vec,
  29028         any_int,
  29029         any_signed_int,
  29030         any_float,
  29031         po2_any,
  29032         bool_vec: Memory.Size,
  29033         vec: Memory.Size,
  29034         signed_int_vec: Memory.Size,
  29035         signed_int_or_full_vec: Memory.Size,
  29036         unsigned_int_vec: Memory.Size,
  29037         int_or_vec: Memory.Size,
  29038         exact_remainder_int_or_vec: struct { of: Memory.Size, is: Memory.Size },
  29039         int: Memory.Size,
  29040         scalar_int: Memory.Size,
  29041         scalar_signed_int: Memory.Size,
  29042         scalar_unsigned_int: Memory.Size,
  29043         scalar_remainder_int: struct { of: Memory.Size, is: Memory.Size },
  29044         exact_int: u16,
  29045         exact_signed_int: u16,
  29046         exact_unsigned_int: u16,
  29047         signed_or_exact_int: Memory.Size,
  29048         unsigned_or_exact_int: Memory.Size,
  29049         po2_int: Memory.Size,
  29050         signed_po2_int: Memory.Size,
  29051         unsigned_po2_or_exact_int: Memory.Size,
  29052         remainder_int: struct { of: Memory.Size, is: Memory.Size },
  29053         exact_remainder_int: struct { of: Memory.Size, is: Memory.Size },
  29054         signed_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size },
  29055         unsigned_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size },
  29056         signed_int: Memory.Size,
  29057         unsigned_int: Memory.Size,
  29058         elem_size_is: u8,
  29059         po2_elem_size,
  29060         elem_int: Memory.Size,
  29061 
  29062         fn accepts(constraint: Constraint, ty: Type, cg: *CodeGen) bool {
  29063             const zcu = cg.pt.zcu;
  29064             switch (constraint) {
  29065                 .any => return true,
  29066                 .any_bool_vec => return ty.isVector(zcu) and ty.childType(zcu).toIntern() == .bool_type,
  29067                 .any_int => return ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu) or ty.isAbiInt(zcu),
  29068                 .any_signed_int => return ty.isAbiInt(zcu) and ty.intInfo(zcu).signedness == .signed,
  29069                 .any_float => return ty.isRuntimeFloat(),
  29070                 .po2_any => return std.math.isPowerOfTwo(ty.abiSize(zcu)),
  29071                 .bool_vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and
  29072                     size.bitSize(cg.target) >= ty.vectorLen(zcu),
  29073                 .vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() != .bool_type and
  29074                     size.bitSize(cg.target) >= ty.abiSize(zcu),
  29075                 .signed_int_vec => |size| {
  29076                     if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false;
  29077                     const scalar_ty = ty.scalarType(zcu);
  29078                     return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .signed;
  29079                 },
  29080                 .signed_int_or_full_vec => |size| {
  29081                     if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false;
  29082                     const scalar_ty = ty.scalarType(zcu);
  29083                     if (scalar_ty.isPtrAtRuntime(zcu)) return true;
  29084                     if (!scalar_ty.isAbiInt(zcu)) return false;
  29085                     const scalar_int_info = scalar_ty.intInfo(zcu);
  29086                     return switch (scalar_int_info.signedness) {
  29087                         .signed => true,
  29088                         .unsigned => scalar_int_info.bits >= 8 and std.math.isPowerOfTwo(scalar_int_info.bits),
  29089                     };
  29090                 },
  29091                 .unsigned_int_vec => |size| {
  29092                     if (!ty.isVector(zcu) or size.bitSize(cg.target) < ty.bitSize(zcu)) return false;
  29093                     const scalar_ty = ty.scalarType(zcu);
  29094                     if (scalar_ty.isPtrAtRuntime(zcu)) return true;
  29095                     return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .unsigned;
  29096                 },
  29097                 .int_or_vec => |size| {
  29098                     if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and
  29099                         size.bitSize(cg.target) >= 8 * ty.abiSize(zcu);
  29100                     if (ty.toIntern() == .bool_type) return true;
  29101                     if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
  29102                     return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits;
  29103                 },
  29104                 .exact_remainder_int_or_vec => |of_is| {
  29105                     if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and
  29106                         of_is.is.bitSize(cg.target) == (8 * ty.abiSize(zcu) - 1) % of_is.of.bitSize(cg.target) + 1;
  29107                     if (ty.isPtrAtRuntime(zcu))
  29108                         return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
  29109                     if (!ty.isAbiInt(zcu)) return false;
  29110                     return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1;
  29111                 },
  29112                 .int => |size| {
  29113                     if (ty.toIntern() == .bool_type) return true;
  29114                     if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
  29115                     return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits;
  29116                 },
  29117                 .scalar_int => |size| {
  29118                     const scalar_ty = ty.scalarType(zcu);
  29119                     if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
  29120                     return scalar_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= scalar_ty.intInfo(zcu).bits;
  29121                 },
  29122                 .scalar_signed_int => |size| {
  29123                     const scalar_ty = ty.scalarType(zcu);
  29124                     if (!scalar_ty.isAbiInt(zcu)) return false;
  29125                     const scalar_int_info = scalar_ty.intInfo(zcu);
  29126                     return scalar_int_info.signedness == .signed and size.bitSize(cg.target) >= scalar_int_info.bits;
  29127                 },
  29128                 .scalar_unsigned_int => |size| {
  29129                     const scalar_ty = ty.scalarType(zcu);
  29130                     if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
  29131                     if (!scalar_ty.isAbiInt(zcu)) return false;
  29132                     const scalar_int_info = scalar_ty.intInfo(zcu);
  29133                     return scalar_int_info.signedness == .unsigned and size.bitSize(cg.target) >= scalar_int_info.bits;
  29134                 },
  29135                 .scalar_remainder_int => |of_is| {
  29136                     const scalar_ty = ty.scalarType(zcu);
  29137                     if (scalar_ty.isPtrAtRuntime(zcu))
  29138                         return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
  29139                     if (!scalar_ty.isAbiInt(zcu)) return false;
  29140                     return of_is.is.bitSize(cg.target) >= (scalar_ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1;
  29141                 },
  29142                 .exact_int => |bit_size| {
  29143                     if (ty.toIntern() == .bool_type) return bit_size == 1;
  29144                     if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth();
  29145                     return ty.isAbiInt(zcu) and bit_size == ty.intInfo(zcu).bits;
  29146                 },
  29147                 .exact_signed_int => |bit_size| {
  29148                     if (!ty.isAbiInt(zcu)) return false;
  29149                     const int_info = ty.intInfo(zcu);
  29150                     return int_info.signedness == .signed and bit_size == int_info.bits;
  29151                 },
  29152                 .exact_unsigned_int => |bit_size| {
  29153                     if (ty.toIntern() == .bool_type) return bit_size == 1;
  29154                     if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth();
  29155                     if (!ty.isAbiInt(zcu)) return false;
  29156                     const int_info = ty.intInfo(zcu);
  29157                     return int_info.signedness == .unsigned and bit_size == int_info.bits;
  29158                 },
  29159                 .signed_or_exact_int => |size| {
  29160                     if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) == cg.target.ptrBitWidth();
  29161                     if (!ty.isAbiInt(zcu)) return false;
  29162                     const int_info = ty.intInfo(zcu);
  29163                     return switch (int_info.signedness) {
  29164                         .signed => size.bitSize(cg.target) >= int_info.bits,
  29165                         .unsigned => size.bitSize(cg.target) == int_info.bits,
  29166                     };
  29167                 },
  29168                 .unsigned_or_exact_int => |size| {
  29169                     if (ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu)) return true;
  29170                     if (!ty.isAbiInt(zcu)) return false;
  29171                     const int_info = ty.intInfo(zcu);
  29172                     return switch (int_info.signedness) {
  29173                         .signed => size.bitSize(cg.target) == int_info.bits,
  29174                         .unsigned => size.bitSize(cg.target) >= int_info.bits,
  29175                     };
  29176                 },
  29177                 .po2_int => |size| {
  29178                     if (ty.toIntern() == .bool_type) return true;
  29179                     if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
  29180                     if (!ty.isAbiInt(zcu)) return false;
  29181                     const bit_size = ty.intInfo(zcu).bits;
  29182                     return std.math.isPowerOfTwo(bit_size) and size.bitSize(cg.target) >= bit_size;
  29183                 },
  29184                 .signed_po2_int => |size| {
  29185                     if (!ty.isAbiInt(zcu)) return false;
  29186                     const int_info = ty.intInfo(zcu);
  29187                     return int_info.signedness == .signed and std.math.isPowerOfTwo(int_info.bits) and
  29188                         size.bitSize(cg.target) >= int_info.bits;
  29189                 },
  29190                 .unsigned_po2_or_exact_int => |size| {
  29191                     if (ty.toIntern() == .bool_type) return true;
  29192                     if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
  29193                     if (!ty.isAbiInt(zcu)) return false;
  29194                     const int_info = ty.intInfo(zcu);
  29195                     return switch (int_info.signedness) {
  29196                         .signed => size.bitSize(cg.target) == int_info.bits,
  29197                         .unsigned => std.math.isPowerOfTwo(int_info.bits) and size.bitSize(cg.target) >= int_info.bits,
  29198                     };
  29199                 },
  29200                 .remainder_int => |of_is| {
  29201                     if (ty.toIntern() == .bool_type) return true;
  29202                     if (ty.isPtrAtRuntime(zcu))
  29203                         return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
  29204                     if (!ty.isAbiInt(zcu)) return false;
  29205                     return of_is.is.bitSize(cg.target) >= (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1;
  29206                 },
  29207                 .exact_remainder_int => |of_is| {
  29208                     if (ty.isPtrAtRuntime(zcu))
  29209                         return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
  29210                     if (!ty.isAbiInt(zcu)) return false;
  29211                     return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1;
  29212                 },
  29213                 .signed_or_exact_remainder_int => |of_is| {
  29214                     if (ty.isPtrAtRuntime(zcu))
  29215                         return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
  29216                     if (!ty.isAbiInt(zcu)) return false;
  29217                     const int_info = ty.intInfo(zcu);
  29218                     return switch (int_info.signedness) {
  29219                         .signed => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1,
  29220                         .unsigned => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1,
  29221                     };
  29222                 },
  29223                 .unsigned_or_exact_remainder_int => |of_is| {
  29224                     if (ty.toIntern() == .bool_type) return true;
  29225                     if (ty.isPtrAtRuntime(zcu))
  29226                         return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1;
  29227                     if (!ty.isAbiInt(zcu)) return false;
  29228                     const int_info = ty.intInfo(zcu);
  29229                     return switch (int_info.signedness) {
  29230                         .signed => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1,
  29231                         .unsigned => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1,
  29232                     };
  29233                 },
  29234                 .signed_int => |size| {
  29235                     if (!ty.isAbiInt(zcu)) return false;
  29236                     const int_info = ty.intInfo(zcu);
  29237                     return int_info.signedness == .signed and size.bitSize(cg.target) >= int_info.bits;
  29238                 },
  29239                 .unsigned_int => |size| {
  29240                     if (ty.toIntern() == .bool_type) return true;
  29241                     if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
  29242                     if (!ty.isAbiInt(zcu)) return false;
  29243                     const int_info = ty.intInfo(zcu);
  29244                     return int_info.signedness == .unsigned and size.bitSize(cg.target) >= int_info.bits;
  29245                 },
  29246                 .elem_size_is => |size| return size == ty.elemType2(zcu).abiSize(zcu),
  29247                 .po2_elem_size => return std.math.isPowerOfTwo(ty.elemType2(zcu).abiSize(zcu)),
  29248                 .elem_int => |size| {
  29249                     const elem_ty = ty.elemType2(zcu);
  29250                     if (elem_ty.toIntern() == .bool_type) return true;
  29251                     if (elem_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth();
  29252                     return elem_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= elem_ty.intInfo(zcu).bits;
  29253                 },
  29254             }
  29255         }
  29256     };
  29257 
  29258     const Pattern = struct {
  29259         src: [2]Src,
  29260         commute: struct { u8, u8 } = .{ 0, 0 },
  29261 
  29262         const Src = enum {
  29263             none,
  29264             any,
  29265             imm8,
  29266             imm16,
  29267             imm32,
  29268             simm32,
  29269             mem,
  29270             to_mem,
  29271             mut_mem,
  29272             to_mut_mem,
  29273             gpr,
  29274             to_gpr,
  29275             mut_gpr,
  29276             to_mut_gpr,
  29277             mm,
  29278             to_mm,
  29279             mut_mm,
  29280             to_mut_mm,
  29281             xmm,
  29282             to_xmm,
  29283             mut_xmm,
  29284             to_mut_xmm,
  29285             ymm,
  29286             to_ymm,
  29287             mut_ymm,
  29288             to_mut_ymm,
  29289 
  29290             fn matches(src: Src, temp: Temp, cg: *CodeGen) bool {
  29291                 return switch (src) {
  29292                     .none => unreachable,
  29293                     .any => true,
  29294                     .imm8 => switch (temp.tracking(cg).short) {
  29295                         .immediate => |imm| std.math.cast(u8, imm) != null,
  29296                         else => false,
  29297                     },
  29298                     .imm16 => switch (temp.tracking(cg).short) {
  29299                         .immediate => |imm| std.math.cast(u16, imm) != null,
  29300                         else => false,
  29301                     },
  29302                     .imm32 => switch (temp.tracking(cg).short) {
  29303                         .immediate => |imm| std.math.cast(u32, imm) != null,
  29304                         else => false,
  29305                     },
  29306                     .simm32 => switch (temp.tracking(cg).short) {
  29307                         .immediate => |imm| std.math.cast(i32, @as(i64, @bitCast(imm))) != null,
  29308                         else => false,
  29309                     },
  29310                     .mem => temp.tracking(cg).short.isMemory(),
  29311                     .to_mem, .to_mut_mem => true,
  29312                     .mut_mem => temp.isMut(cg) and temp.tracking(cg).short.isMemory(),
  29313                     .gpr => temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8 and switch (temp.tracking(cg).short) {
  29314                         .register => |reg| reg.class() == .general_purpose,
  29315                         .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0,
  29316                         else => false,
  29317                     },
  29318                     .mut_gpr => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8 and switch (temp.tracking(cg).short) {
  29319                         .register => |reg| reg.class() == .general_purpose,
  29320                         .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0,
  29321                         else => false,
  29322                     },
  29323                     .to_gpr, .to_mut_gpr => temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8,
  29324                     .mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) {
  29325                         .register => |reg| reg.class() == .mmx,
  29326                         .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0,
  29327                         else => false,
  29328                     },
  29329                     .mut_mm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) {
  29330                         .register => |reg| reg.class() == .mmx,
  29331                         .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0,
  29332                         else => false,
  29333                     },
  29334                     .to_mm, .to_mut_mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8,
  29335                     .xmm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 16 and switch (temp.tracking(cg).short) {
  29336                         .register => |reg| reg.class() == .sse,
  29337                         .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
  29338                         else => false,
  29339                     },
  29340                     .mut_xmm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 16 and switch (temp.tracking(cg).short) {
  29341                         .register => |reg| reg.class() == .sse,
  29342                         .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
  29343                         else => false,
  29344                     },
  29345                     .to_xmm, .to_mut_xmm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 16,
  29346                     .ymm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 32 and switch (temp.tracking(cg).short) {
  29347                         .register => |reg| reg.class() == .sse,
  29348                         .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
  29349                         else => false,
  29350                     },
  29351                     .mut_ymm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 32 and switch (temp.tracking(cg).short) {
  29352                         .register => |reg| reg.class() == .sse,
  29353                         .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0,
  29354                         else => false,
  29355                     },
  29356                     .to_ymm, .to_mut_ymm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 32,
  29357                 };
  29358             }
  29359 
  29360             fn convert(src: Src, temp: *Temp, cg: *CodeGen) !bool {
  29361                 return switch (src) {
  29362                     .none => unreachable,
  29363                     .any, .imm8, .imm16, .imm32, .simm32 => false,
  29364                     .mem, .to_mem, .mut_mem, .to_mut_mem => try temp.toBase(cg),
  29365                     .gpr, .to_gpr => try temp.toRegClass(false, .general_purpose, cg),
  29366                     .mut_gpr, .to_mut_gpr => try temp.toRegClass(true, .general_purpose, cg),
  29367                     .mm, .to_mm => try temp.toRegClass(false, .mmx, cg),
  29368                     .mut_mm, .to_mut_mm => try temp.toRegClass(true, .mmx, cg),
  29369                     .xmm, .to_xmm, .ymm, .to_ymm => try temp.toRegClass(false, .sse, cg),
  29370                     .mut_xmm, .to_mut_xmm, .mut_ymm, .to_mut_ymm => try temp.toRegClass(true, .sse, cg),
  29371                 };
  29372             }
  29373         };
  29374     };
  29375 
  29376     const TempSpec = struct {
  29377         type: Type = .noreturn,
  29378         kind: Kind,
  29379 
  29380         const unused: TempSpec = .{ .kind = .unused };
  29381 
  29382         const Kind = union(enum) {
  29383             unused,
  29384             any,
  29385             cc: Condition,
  29386             reg: Register,
  29387             rc: Register.Class,
  29388             rc_mask: struct { rc: Register.Class, info: MaskInfo },
  29389             mem,
  29390             smin_mem: Select.Operand.Ref,
  29391             smax_mem: Select.Operand.Ref,
  29392             umin_mem: Select.Operand.Ref,
  29393             umax_mem: Select.Operand.Ref,
  29394             ref: Select.Operand.Ref,
  29395             ref_mask: struct { ref: Select.Operand.Ref, info: MaskInfo },
  29396 
  29397             fn finish(kind: Kind, temp: Temp, s: *const Select) void {
  29398                 switch (kind) {
  29399                     else => {},
  29400                     inline .rc_mask, .ref_mask => |mask| temp.asMask(mask.info, s.cg),
  29401                 }
  29402             }
  29403         };
  29404 
  29405         fn create(spec: TempSpec, s: *Select) !?Temp {
  29406             const cg = s.cg;
  29407             return switch (spec.kind) {
  29408                 .unused => null,
  29409                 .any => try cg.tempAlloc(spec.type),
  29410                 .cc => |cc| try cg.tempInit(spec.type, .{ .eflags = cc }),
  29411                 .reg => |reg| try cg.tempInit(spec.type, .{ .register = reg }),
  29412                 .rc => |rc| try cg.tempAllocReg(spec.type, regSetForRegClass(rc)),
  29413                 .rc_mask => |rc_mask| try cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)),
  29414                 .mem => try cg.tempAllocMem(spec.type),
  29415                 .smin_mem, .smax_mem, .umin_mem, .umax_mem => |ty_ref| {
  29416                     const pt = cg.pt;
  29417                     const zcu = pt.zcu;
  29418                     const ip = &zcu.intern_pool;
  29419                     const ty = ty_ref.deref(s).typeOf(s.cg);
  29420                     const vector_len, const scalar_ty: Type = switch (ip.indexToKey(ty.toIntern())) {
  29421                         else => .{ null, ty },
  29422                         .vector_type => |vector_type| .{ vector_type.len, .fromInterned(vector_type.child) },
  29423                     };
  29424                     const res_scalar_ty, const res_scalar_val: Value = res_scalar: switch (scalar_ty.toIntern()) {
  29425                         .bool_type => .{
  29426                             scalar_ty,
  29427                             .fromInterned(switch (spec.kind) {
  29428                                 else => unreachable,
  29429                                 .smin_mem, .umax_mem => .bool_true,
  29430                                 .smax_mem, .umin_mem => .bool_false,
  29431                             }),
  29432                         },
  29433                         else => {
  29434                             const scalar_info: InternPool.Key.IntType = if (scalar_ty.isAbiInt(zcu))
  29435                                 scalar_ty.intInfo(zcu)
  29436                             else
  29437                                 .{ .signedness = .unsigned, .bits = @intCast(scalar_ty.bitSize(zcu)) };
  29438                             const scalar_int_ty = try pt.intType(scalar_info.signedness, scalar_info.bits);
  29439                             if (scalar_info.bits <= 64) {
  29440                                 const int_val: i64 = switch (spec.kind) {
  29441                                     else => unreachable,
  29442                                     .smin_mem => std.math.minInt(i64),
  29443                                     .smax_mem => std.math.maxInt(i64),
  29444                                     .umin_mem => 0,
  29445                                     .umax_mem => -1,
  29446                                 };
  29447                                 const shift: u6 = @intCast(64 - scalar_info.bits);
  29448                                 break :res_scalar .{ scalar_int_ty, switch (scalar_info.signedness) {
  29449                                     .signed => try pt.intValue_i64(scalar_int_ty, int_val >> shift),
  29450                                     .unsigned => try pt.intValue_u64(scalar_int_ty, @as(u64, @bitCast(int_val)) >> shift),
  29451                                 } };
  29452                             }
  29453                             var big_int: std.math.big.int.Managed = try .init(cg.gpa);
  29454                             defer big_int.deinit();
  29455                             try big_int.setTwosCompIntLimit(switch (spec.kind) {
  29456                                 else => unreachable,
  29457                                 .smin_mem, .umin_mem => .min,
  29458                                 .smax_mem, .umax_mem => .max,
  29459                             }, switch (spec.kind) {
  29460                                 else => unreachable,
  29461                                 .smin_mem, .smax_mem => .signed,
  29462                                 .umin_mem, .umax_mem => .unsigned,
  29463                             }, scalar_info.bits);
  29464                             try big_int.truncate(&big_int, scalar_info.signedness, scalar_info.bits);
  29465                             break :res_scalar .{ scalar_int_ty, try pt.intValue_big(scalar_int_ty, big_int.toConst()) };
  29466                         },
  29467                     };
  29468                     const res_val: Value = if (vector_len) |len| .fromInterned(try pt.intern(.{ .aggregate = .{
  29469                         .ty = (try pt.vectorType(.{
  29470                             .len = len,
  29471                             .child = res_scalar_ty.toIntern(),
  29472                         })).toIntern(),
  29473                         .storage = .{ .repeated_elem = res_scalar_val.toIntern() },
  29474                     } })) else res_scalar_val;
  29475                     return try cg.tempFromValue(res_val);
  29476                 },
  29477                 .ref => |ref| ref.deref(s),
  29478                 .ref_mask => |ref_mask| ref_mask.ref.deref(s),
  29479             };
  29480         }
  29481     };
  29482 
  29483     const Instruction = struct {
  29484         Label,
  29485         Mir.Inst.Fixes,
  29486         Mir.Inst.Tag,
  29487         Select.Operand,
  29488         Select.Operand,
  29489         Select.Operand,
  29490         Select.Operand,
  29491     };
  29492     const Label = enum { @"0:", @"1:", @"_" };
  29493     const Operand = struct {
  29494         tag: Tag,
  29495         base: Ref.Sized = .none,
  29496         index: packed struct(u6) {
  29497             ref: Ref,
  29498             scale: Memory.Scale,
  29499         } = .{ .ref = .none, .scale = .@"1" },
  29500         adjust: Adjust = .none,
  29501         imm: i32 = 0,
  29502 
  29503         const Tag = enum {
  29504             none,
  29505             backward_label,
  29506             forward_label,
  29507             ref,
  29508             simm,
  29509             uimm,
  29510             lea,
  29511             mem,
  29512         };
  29513         const Adjust = packed struct(u8) {
  29514             factor: i2,
  29515             scale: Memory.Scale,
  29516             amount: enum(u4) {
  29517                 none,
  29518                 ptr_size,
  29519                 ptr_bit_size,
  29520                 size,
  29521                 src0_size,
  29522                 bit_size,
  29523                 src0_bit_size,
  29524                 len,
  29525                 elem_limbs,
  29526                 src0_elem_size,
  29527                 src0_elem_size_times_src1,
  29528                 log2_src0_elem_size,
  29529                 smin,
  29530                 smax,
  29531                 umax,
  29532             },
  29533 
  29534             const none: Adjust = .{ .factor = 0, .scale = .@"1", .amount = .none };
  29535             const sub_ptr_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .ptr_size };
  29536             const add_ptr_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .ptr_bit_size };
  29537             const add_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .size };
  29538             const sub_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .size };
  29539             const add_src0_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_size };
  29540             const sub_src0_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_size };
  29541             const add_2_bit_size: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .bit_size };
  29542             const add_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .bit_size };
  29543             const sub_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .bit_size };
  29544             const add_src0_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_bit_size };
  29545             const sub_src0_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_bit_size };
  29546             const add_8_len: Adjust = .{ .factor = 1, .scale = .@"8", .amount = .len };
  29547             const add_4_len: Adjust = .{ .factor = 1, .scale = .@"4", .amount = .len };
  29548             const add_3_len: Adjust = .{ .factor = 1, .scale = .@"3", .amount = .len };
  29549             const add_2_len: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .len };
  29550             const add_len: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .len };
  29551             const sub_len: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .len };
  29552             const add_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size };
  29553             const add_2_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .src0_elem_size };
  29554             const add_4_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"4", .amount = .src0_elem_size };
  29555             const add_8_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"8", .amount = .src0_elem_size };
  29556             const sub_src0_elem_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size };
  29557             const add_src0_elem_size_times_src1: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size_times_src1 };
  29558             const sub_src0_elem_size_times_src1: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size_times_src1 };
  29559             const add_log2_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .log2_src0_elem_size };
  29560             const add_elem_limbs: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .elem_limbs };
  29561             const add_umax: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .umax };
  29562         };
  29563         const Ref = enum(u4) {
  29564             tmp0,
  29565             tmp1,
  29566             tmp2,
  29567             tmp3,
  29568             tmp4,
  29569             tmp5,
  29570             dst0,
  29571             src0,
  29572             src1,
  29573             none,
  29574 
  29575             const Sized = packed struct(u8) {
  29576                 ref: Ref,
  29577                 size: Memory.Size,
  29578 
  29579                 const none: Sized = .{ .ref = .none, .size = .none };
  29580 
  29581                 const tmp0: Sized = .{ .ref = .tmp0, .size = .none };
  29582                 const tmp0b: Sized = .{ .ref = .tmp0, .size = .byte };
  29583                 const tmp0w: Sized = .{ .ref = .tmp0, .size = .word };
  29584                 const tmp0d: Sized = .{ .ref = .tmp0, .size = .dword };
  29585                 const tmp0p: Sized = .{ .ref = .tmp0, .size = .ptr };
  29586                 const tmp0q: Sized = .{ .ref = .tmp0, .size = .qword };
  29587                 const tmp0x: Sized = .{ .ref = .tmp0, .size = .xword };
  29588                 const tmp0y: Sized = .{ .ref = .tmp0, .size = .yword };
  29589 
  29590                 const tmp1: Sized = .{ .ref = .tmp1, .size = .none };
  29591                 const tmp1b: Sized = .{ .ref = .tmp1, .size = .byte };
  29592                 const tmp1w: Sized = .{ .ref = .tmp1, .size = .word };
  29593                 const tmp1d: Sized = .{ .ref = .tmp1, .size = .dword };
  29594                 const tmp1p: Sized = .{ .ref = .tmp1, .size = .ptr };
  29595                 const tmp1q: Sized = .{ .ref = .tmp1, .size = .qword };
  29596                 const tmp1x: Sized = .{ .ref = .tmp1, .size = .xword };
  29597                 const tmp1y: Sized = .{ .ref = .tmp1, .size = .yword };
  29598 
  29599                 const tmp2: Sized = .{ .ref = .tmp2, .size = .none };
  29600                 const tmp2b: Sized = .{ .ref = .tmp2, .size = .byte };
  29601                 const tmp2w: Sized = .{ .ref = .tmp2, .size = .word };
  29602                 const tmp2d: Sized = .{ .ref = .tmp2, .size = .dword };
  29603                 const tmp2p: Sized = .{ .ref = .tmp2, .size = .ptr };
  29604                 const tmp2q: Sized = .{ .ref = .tmp2, .size = .qword };
  29605                 const tmp2x: Sized = .{ .ref = .tmp2, .size = .xword };
  29606                 const tmp2y: Sized = .{ .ref = .tmp2, .size = .yword };
  29607 
  29608                 const tmp3: Sized = .{ .ref = .tmp3, .size = .none };
  29609                 const tmp3b: Sized = .{ .ref = .tmp3, .size = .byte };
  29610                 const tmp3w: Sized = .{ .ref = .tmp3, .size = .word };
  29611                 const tmp3d: Sized = .{ .ref = .tmp3, .size = .dword };
  29612                 const tmp3p: Sized = .{ .ref = .tmp3, .size = .ptr };
  29613                 const tmp3q: Sized = .{ .ref = .tmp3, .size = .qword };
  29614                 const tmp3x: Sized = .{ .ref = .tmp3, .size = .xword };
  29615                 const tmp3y: Sized = .{ .ref = .tmp3, .size = .yword };
  29616 
  29617                 const tmp4: Sized = .{ .ref = .tmp4, .size = .none };
  29618                 const tmp4b: Sized = .{ .ref = .tmp4, .size = .byte };
  29619                 const tmp4w: Sized = .{ .ref = .tmp4, .size = .word };
  29620                 const tmp4d: Sized = .{ .ref = .tmp4, .size = .dword };
  29621                 const tmp4p: Sized = .{ .ref = .tmp4, .size = .ptr };
  29622                 const tmp4q: Sized = .{ .ref = .tmp4, .size = .qword };
  29623                 const tmp4x: Sized = .{ .ref = .tmp4, .size = .xword };
  29624                 const tmp4y: Sized = .{ .ref = .tmp4, .size = .yword };
  29625 
  29626                 const tmp5: Sized = .{ .ref = .tmp5, .size = .none };
  29627                 const tmp5b: Sized = .{ .ref = .tmp5, .size = .byte };
  29628                 const tmp5w: Sized = .{ .ref = .tmp5, .size = .word };
  29629                 const tmp5d: Sized = .{ .ref = .tmp5, .size = .dword };
  29630                 const tmp5p: Sized = .{ .ref = .tmp5, .size = .ptr };
  29631                 const tmp5q: Sized = .{ .ref = .tmp5, .size = .qword };
  29632                 const tmp5x: Sized = .{ .ref = .tmp5, .size = .xword };
  29633                 const tmp5y: Sized = .{ .ref = .tmp5, .size = .yword };
  29634 
  29635                 const dst0: Sized = .{ .ref = .dst0, .size = .none };
  29636                 const dst0b: Sized = .{ .ref = .dst0, .size = .byte };
  29637                 const dst0w: Sized = .{ .ref = .dst0, .size = .word };
  29638                 const dst0d: Sized = .{ .ref = .dst0, .size = .dword };
  29639                 const dst0p: Sized = .{ .ref = .dst0, .size = .ptr };
  29640                 const dst0q: Sized = .{ .ref = .dst0, .size = .qword };
  29641                 const dst0x: Sized = .{ .ref = .dst0, .size = .xword };
  29642                 const dst0y: Sized = .{ .ref = .dst0, .size = .yword };
  29643 
  29644                 const src0: Sized = .{ .ref = .src0, .size = .none };
  29645                 const src0b: Sized = .{ .ref = .src0, .size = .byte };
  29646                 const src0w: Sized = .{ .ref = .src0, .size = .word };
  29647                 const src0d: Sized = .{ .ref = .src0, .size = .dword };
  29648                 const src0p: Sized = .{ .ref = .src0, .size = .ptr };
  29649                 const src0q: Sized = .{ .ref = .src0, .size = .qword };
  29650                 const src0x: Sized = .{ .ref = .src0, .size = .xword };
  29651                 const src0y: Sized = .{ .ref = .src0, .size = .yword };
  29652 
  29653                 const src1: Sized = .{ .ref = .src1, .size = .none };
  29654                 const src1b: Sized = .{ .ref = .src1, .size = .byte };
  29655                 const src1w: Sized = .{ .ref = .src1, .size = .word };
  29656                 const src1d: Sized = .{ .ref = .src1, .size = .dword };
  29657                 const src1p: Sized = .{ .ref = .src1, .size = .ptr };
  29658                 const src1q: Sized = .{ .ref = .src1, .size = .qword };
  29659                 const src1x: Sized = .{ .ref = .src1, .size = .xword };
  29660                 const src1y: Sized = .{ .ref = .src1, .size = .yword };
  29661             };
  29662 
  29663             fn deref(ref: Ref, s: *const Select) Temp {
  29664                 return s.temps[@intFromEnum(ref)];
  29665             }
  29666         };
  29667 
  29668         const @"_": Select.Operand = .{ .tag = .none };
  29669 
  29670         const @"0b": Select.Operand = .{ .tag = .backward_label, .base = .{ .ref = .tmp0, .size = .none } };
  29671         const @"0f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp0, .size = .none } };
  29672         const @"1b": Select.Operand = .{ .tag = .backward_label, .base = .{ .ref = .tmp1, .size = .none } };
  29673         const @"1f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp1, .size = .none } };
  29674 
  29675         const tmp0b: Select.Operand = .{ .tag = .ref, .base = .tmp0b };
  29676         const tmp0w: Select.Operand = .{ .tag = .ref, .base = .tmp0w };
  29677         const tmp0d: Select.Operand = .{ .tag = .ref, .base = .tmp0d };
  29678         const tmp0p: Select.Operand = .{ .tag = .ref, .base = .tmp0p };
  29679         const tmp0q: Select.Operand = .{ .tag = .ref, .base = .tmp0q };
  29680         const tmp0x: Select.Operand = .{ .tag = .ref, .base = .tmp0x };
  29681         const tmp0y: Select.Operand = .{ .tag = .ref, .base = .tmp0y };
  29682 
  29683         const tmp1b: Select.Operand = .{ .tag = .ref, .base = .tmp1b };
  29684         const tmp1w: Select.Operand = .{ .tag = .ref, .base = .tmp1w };
  29685         const tmp1d: Select.Operand = .{ .tag = .ref, .base = .tmp1d };
  29686         const tmp1p: Select.Operand = .{ .tag = .ref, .base = .tmp1p };
  29687         const tmp1q: Select.Operand = .{ .tag = .ref, .base = .tmp1q };
  29688         const tmp1x: Select.Operand = .{ .tag = .ref, .base = .tmp1x };
  29689         const tmp1y: Select.Operand = .{ .tag = .ref, .base = .tmp1y };
  29690 
  29691         const tmp2b: Select.Operand = .{ .tag = .ref, .base = .tmp2b };
  29692         const tmp2w: Select.Operand = .{ .tag = .ref, .base = .tmp2w };
  29693         const tmp2d: Select.Operand = .{ .tag = .ref, .base = .tmp2d };
  29694         const tmp2p: Select.Operand = .{ .tag = .ref, .base = .tmp2p };
  29695         const tmp2q: Select.Operand = .{ .tag = .ref, .base = .tmp2q };
  29696         const tmp2x: Select.Operand = .{ .tag = .ref, .base = .tmp2x };
  29697         const tmp2y: Select.Operand = .{ .tag = .ref, .base = .tmp2y };
  29698 
  29699         const tmp3b: Select.Operand = .{ .tag = .ref, .base = .tmp3b };
  29700         const tmp3w: Select.Operand = .{ .tag = .ref, .base = .tmp3w };
  29701         const tmp3d: Select.Operand = .{ .tag = .ref, .base = .tmp3d };
  29702         const tmp3p: Select.Operand = .{ .tag = .ref, .base = .tmp3p };
  29703         const tmp3q: Select.Operand = .{ .tag = .ref, .base = .tmp3q };
  29704         const tmp3x: Select.Operand = .{ .tag = .ref, .base = .tmp3x };
  29705         const tmp3y: Select.Operand = .{ .tag = .ref, .base = .tmp3y };
  29706 
  29707         const tmp4b: Select.Operand = .{ .tag = .ref, .base = .tmp4b };
  29708         const tmp4w: Select.Operand = .{ .tag = .ref, .base = .tmp4w };
  29709         const tmp4d: Select.Operand = .{ .tag = .ref, .base = .tmp4d };
  29710         const tmp4p: Select.Operand = .{ .tag = .ref, .base = .tmp4p };
  29711         const tmp4q: Select.Operand = .{ .tag = .ref, .base = .tmp4q };
  29712         const tmp4x: Select.Operand = .{ .tag = .ref, .base = .tmp4x };
  29713         const tmp4y: Select.Operand = .{ .tag = .ref, .base = .tmp4y };
  29714 
  29715         const tmp5b: Select.Operand = .{ .tag = .ref, .base = .tmp5b };
  29716         const tmp5w: Select.Operand = .{ .tag = .ref, .base = .tmp5w };
  29717         const tmp5d: Select.Operand = .{ .tag = .ref, .base = .tmp5d };
  29718         const tmp5p: Select.Operand = .{ .tag = .ref, .base = .tmp5p };
  29719         const tmp5q: Select.Operand = .{ .tag = .ref, .base = .tmp5q };
  29720         const tmp5x: Select.Operand = .{ .tag = .ref, .base = .tmp5x };
  29721         const tmp5y: Select.Operand = .{ .tag = .ref, .base = .tmp5y };
  29722 
  29723         const dst0b: Select.Operand = .{ .tag = .ref, .base = .dst0b };
  29724         const dst0w: Select.Operand = .{ .tag = .ref, .base = .dst0w };
  29725         const dst0d: Select.Operand = .{ .tag = .ref, .base = .dst0d };
  29726         const dst0p: Select.Operand = .{ .tag = .ref, .base = .dst0p };
  29727         const dst0q: Select.Operand = .{ .tag = .ref, .base = .dst0q };
  29728         const dst0x: Select.Operand = .{ .tag = .ref, .base = .dst0x };
  29729         const dst0y: Select.Operand = .{ .tag = .ref, .base = .dst0y };
  29730 
  29731         const src0b: Select.Operand = .{ .tag = .ref, .base = .src0b };
  29732         const src0w: Select.Operand = .{ .tag = .ref, .base = .src0w };
  29733         const src0d: Select.Operand = .{ .tag = .ref, .base = .src0d };
  29734         const src0p: Select.Operand = .{ .tag = .ref, .base = .src0p };
  29735         const src0q: Select.Operand = .{ .tag = .ref, .base = .src0q };
  29736         const src0x: Select.Operand = .{ .tag = .ref, .base = .src0x };
  29737         const src0y: Select.Operand = .{ .tag = .ref, .base = .src0y };
  29738 
  29739         const src1b: Select.Operand = .{ .tag = .ref, .base = .src1b };
  29740         const src1w: Select.Operand = .{ .tag = .ref, .base = .src1w };
  29741         const src1d: Select.Operand = .{ .tag = .ref, .base = .src1d };
  29742         const src1p: Select.Operand = .{ .tag = .ref, .base = .src1p };
  29743         const src1q: Select.Operand = .{ .tag = .ref, .base = .src1q };
  29744         const src1x: Select.Operand = .{ .tag = .ref, .base = .src1x };
  29745         const src1y: Select.Operand = .{ .tag = .ref, .base = .src1y };
  29746 
  29747         fn si(imm: i32) Select.Operand {
  29748             return .{ .tag = .simm, .imm = imm };
  29749         }
  29750         fn sa(base: Ref.Sized, adjust: Adjust) Select.Operand {
  29751             return .{ .tag = .simm, .base = base, .adjust = adjust };
  29752         }
  29753         fn sia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand {
  29754             return .{ .tag = .simm, .base = base, .adjust = adjust, .imm = imm };
  29755         }
  29756         fn ui(imm: i32) Select.Operand {
  29757             return .{ .tag = .uimm, .imm = imm };
  29758         }
  29759         fn ua(base: Ref.Sized, adjust: Adjust) Select.Operand {
  29760             return .{ .tag = .uimm, .base = base, .adjust = adjust };
  29761         }
  29762         fn uia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand {
  29763             return .{ .tag = .uimm, .base = base, .adjust = adjust, .imm = imm };
  29764         }
  29765 
  29766         fn lea(size: Memory.Size, base: Ref) Select.Operand {
  29767             return .{
  29768                 .tag = .lea,
  29769                 .base = .{ .ref = base, .size = size },
  29770             };
  29771         }
  29772         fn leaa(size: Memory.Size, base: Ref, adjust: Adjust) Select.Operand {
  29773             return .{
  29774                 .tag = .lea,
  29775                 .base = .{ .ref = base, .size = size },
  29776                 .adjust = adjust,
  29777             };
  29778         }
  29779         fn lead(size: Memory.Size, base: Ref, disp: i32) Select.Operand {
  29780             return .{
  29781                 .tag = .lea,
  29782                 .base = .{ .ref = base, .size = size },
  29783                 .imm = disp,
  29784             };
  29785         }
  29786         fn leai(size: Memory.Size, base: Ref, index: Ref) Select.Operand {
  29787             return .{
  29788                 .tag = .lea,
  29789                 .base = .{ .ref = base, .size = size },
  29790                 .index = .{ .ref = index, .scale = .@"1" },
  29791             };
  29792         }
  29793         fn leaia(size: Memory.Size, base: Ref, index: Ref, adjust: Adjust) Select.Operand {
  29794             return .{
  29795                 .tag = .lea,
  29796                 .base = .{ .ref = base, .size = size },
  29797                 .index = .{ .ref = index, .scale = .@"1" },
  29798                 .adjust = adjust,
  29799             };
  29800         }
  29801         fn leaid(size: Memory.Size, base: Ref, index: Ref, disp: i32) Select.Operand {
  29802             return .{
  29803                 .tag = .lea,
  29804                 .base = .{ .ref = base, .size = size },
  29805                 .index = .{ .ref = index, .scale = .@"1" },
  29806                 .imm = disp,
  29807             };
  29808         }
  29809         fn leasi(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref) Select.Operand {
  29810             return .{
  29811                 .tag = .lea,
  29812                 .base = .{ .ref = base, .size = size },
  29813                 .index = .{ .ref = index, .scale = scale },
  29814             };
  29815         }
  29816         fn leasid(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand {
  29817             return .{
  29818                 .tag = .lea,
  29819                 .base = .{ .ref = base, .size = size },
  29820                 .index = .{ .ref = index, .scale = scale },
  29821                 .imm = disp,
  29822             };
  29823         }
  29824         fn leasiad(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, adjust: Adjust, disp: i32) Select.Operand {
  29825             return .{
  29826                 .tag = .lea,
  29827                 .base = .{ .ref = base, .size = size },
  29828                 .index = .{ .ref = index, .scale = scale },
  29829                 .adjust = adjust,
  29830                 .imm = disp,
  29831             };
  29832         }
  29833 
  29834         fn mem(base: Ref.Sized) Select.Operand {
  29835             return .{
  29836                 .tag = .mem,
  29837                 .base = base,
  29838             };
  29839         }
  29840         fn memd(base: Ref.Sized, disp: i32) Select.Operand {
  29841             return .{
  29842                 .tag = .mem,
  29843                 .base = base,
  29844                 .imm = disp,
  29845             };
  29846         }
  29847         fn mema(base: Ref.Sized, adjust: Adjust) Select.Operand {
  29848             return .{
  29849                 .tag = .mem,
  29850                 .base = base,
  29851                 .adjust = adjust,
  29852             };
  29853         }
  29854         fn memad(base: Ref.Sized, adjust: Adjust, disp: i32) Select.Operand {
  29855             return .{
  29856                 .tag = .mem,
  29857                 .base = base,
  29858                 .adjust = adjust,
  29859                 .imm = disp,
  29860             };
  29861         }
  29862         fn memi(base: Ref.Sized, index: Ref) Select.Operand {
  29863             return .{
  29864                 .tag = .mem,
  29865                 .base = base,
  29866                 .index = .{ .ref = index, .scale = .@"1" },
  29867             };
  29868         }
  29869         fn memia(base: Ref.Sized, index: Ref, adjust: Adjust) Select.Operand {
  29870             return .{
  29871                 .tag = .mem,
  29872                 .base = base,
  29873                 .index = .{ .ref = index, .scale = .@"1" },
  29874                 .adjust = adjust,
  29875             };
  29876         }
  29877         fn memiad(base: Ref.Sized, index: Ref, adjust: Adjust, disp: i32) Select.Operand {
  29878             return .{
  29879                 .tag = .mem,
  29880                 .base = base,
  29881                 .index = .{ .ref = index, .scale = .@"1" },
  29882                 .adjust = adjust,
  29883                 .imm = disp,
  29884             };
  29885         }
  29886         fn memid(base: Ref.Sized, index: Ref, disp: i32) Select.Operand {
  29887             return .{
  29888                 .tag = .mem,
  29889                 .base = base,
  29890                 .index = .{ .ref = index, .scale = .@"1" },
  29891                 .imm = disp,
  29892             };
  29893         }
  29894         fn memsi(base: Ref.Sized, scale: Memory.Scale, index: Ref) Select.Operand {
  29895             return .{
  29896                 .tag = .mem,
  29897                 .base = base,
  29898                 .index = .{ .ref = index, .scale = scale },
  29899             };
  29900         }
  29901         fn memsia(base: Ref.Sized, scale: Memory.Scale, index: Ref, adjust: Adjust) Select.Operand {
  29902             return .{
  29903                 .tag = .mem,
  29904                 .base = base,
  29905                 .index = .{ .ref = index, .scale = scale },
  29906                 .adjust = adjust,
  29907             };
  29908         }
  29909         fn memsid(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand {
  29910             return .{
  29911                 .tag = .mem,
  29912                 .base = base,
  29913                 .index = .{ .ref = index, .scale = scale },
  29914                 .imm = disp,
  29915             };
  29916         }
  29917         fn memsiad(base: Ref.Sized, scale: Memory.Scale, index: Ref, adjust: Adjust, disp: i32) Select.Operand {
  29918             return .{
  29919                 .tag = .mem,
  29920                 .base = base,
  29921                 .index = .{ .ref = index, .scale = scale },
  29922                 .adjust = adjust,
  29923                 .imm = disp,
  29924             };
  29925         }
  29926 
  29927         fn adjustedImm(op: Select.Operand, comptime SignedImm: type, s: *const Select) SignedImm {
  29928             const UnsignedImm = @Type(.{
  29929                 .int = .{ .signedness = .unsigned, .bits = @typeInfo(SignedImm).int.bits },
  29930             });
  29931             return @as(i5, op.adjust.factor) * op.adjust.scale.toFactor() * @as(SignedImm, switch (op.adjust.amount) {
  29932                 .none => 0,
  29933                 .ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8),
  29934                 .ptr_bit_size => s.cg.target.ptrBitWidth(),
  29935                 .size => @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)),
  29936                 .src0_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)),
  29937                 .bit_size => @intCast(op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)),
  29938                 .src0_bit_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)),
  29939                 .len => @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu)),
  29940                 .elem_limbs => @intCast(@divExact(
  29941                     op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu),
  29942                     @divExact(op.base.size.bitSize(s.cg.target), 8),
  29943                 )),
  29944                 .src0_elem_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)),
  29945                 .src0_elem_size_times_src1 => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) *
  29946                     Select.Operand.Ref.src1.deref(s).tracking(s.cg).short.immediate),
  29947                 .log2_src0_elem_size => @intCast(std.math.log2(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))),
  29948                 .smin => @as(SignedImm, std.math.minInt(SignedImm)) >> @truncate(
  29949                     -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu),
  29950                 ),
  29951                 .smax => @as(SignedImm, std.math.maxInt(SignedImm)) >> @truncate(
  29952                     -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu),
  29953                 ),
  29954                 .umax => @bitCast(@as(UnsignedImm, std.math.maxInt(UnsignedImm)) >> @truncate(
  29955                     -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu),
  29956                 )),
  29957             }) + op.imm;
  29958         }
  29959 
  29960         fn lower(op: Select.Operand, s: *Select) !CodeGen.Operand {
  29961             return switch (op.tag) {
  29962                 .none => .none,
  29963                 .backward_label => .{ .inst = s.labels[@intFromEnum(op.base.ref)].backward.? },
  29964                 .forward_label => for (&s.labels[@intFromEnum(op.base.ref)].forward) |*label| {
  29965                     if (label.*) |_| continue;
  29966                     label.* = @intCast(s.cg.mir_instructions.len);
  29967                     break .{ .inst = undefined };
  29968                 } else unreachable,
  29969                 .ref => switch (op.base.ref.deref(s).tracking(s.cg).short) {
  29970                     .immediate => |imm| .{ .imm = switch (op.base.size) {
  29971                         .byte => if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u8, @intCast(imm))),
  29972                         .word => if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u16, @intCast(imm))),
  29973                         .dword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u32, @intCast(imm))),
  29974                         .qword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(imm),
  29975                         else => unreachable,
  29976                     } },
  29977                     else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = op.base.size }) },
  29978                     .register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))) },
  29979                 },
  29980                 .simm => .{ .imm = .s(op.adjustedImm(i32, s)) },
  29981                 .uimm => .{ .imm = .u(@bitCast(op.adjustedImm(i64, s))) },
  29982                 .lea => .{ .mem = .{
  29983                     .base = .{ .reg = registerAlias(op.base.ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)) },
  29984                     .mod = .{ .rm = .{
  29985                         .size = op.base.size,
  29986                         .index = switch (op.index.ref) {
  29987                             else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)),
  29988                             .none => .none,
  29989                         },
  29990                         .scale = op.index.scale,
  29991                         .disp = op.adjustedImm(i32, s),
  29992                     } },
  29993                 } },
  29994                 .mem => .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, .{
  29995                     .size = op.base.size,
  29996                     .index = switch (op.index.ref) {
  29997                         else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)),
  29998                         .none => .none,
  29999                     },
  30000                     .scale = op.index.scale,
  30001                     .disp = op.adjustedImm(i32, s),
  30002                 }) },
  30003             };
  30004         }
  30005     };
  30006 };
  30007 fn select(
  30008     cg: *CodeGen,
  30009     dst_temps: []Temp,
  30010     dst_tys: []const Type,
  30011     src_temps: []Temp,
  30012     cases: []const Select.Case,
  30013 ) !void {
  30014     cases: for (cases) |case| {
  30015         for (case.required_features) |required_feature| if (required_feature) |feature| if (!cg.hasFeature(feature)) continue :cases;
  30016         for (case.dst_constraints[0..dst_temps.len], dst_tys) |dst_constraint, dst_ty| if (!dst_constraint.accepts(dst_ty, cg)) continue :cases;
  30017         for (case.src_constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp.typeOf(cg), cg)) continue :cases;
  30018         if (std.debug.runtime_safety) {
  30019             for (case.dst_constraints[dst_temps.len..]) |dst_constraint| assert(dst_constraint == .any);
  30020             for (case.src_constraints[src_temps.len..]) |src_constraint| assert(src_constraint == .any);
  30021         }
  30022         patterns: for (case.patterns) |pattern| {
  30023             for (pattern.src[0..src_temps.len], src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns;
  30024             if (std.debug.runtime_safety) for (pattern.src[src_temps.len..]) |src_pattern| assert(src_pattern == .none);
  30025 
  30026             var s: Select = .{
  30027                 .cg = cg,
  30028                 .temps = undefined,
  30029                 .labels = @splat(.{ .forward = @splat(null), .backward = null }),
  30030             };
  30031             const tmp_slots = s.temps[@intFromEnum(Select.Operand.Ref.tmp0)..@intFromEnum(Select.Operand.Ref.dst0)];
  30032             const dst_slots = s.temps[@intFromEnum(Select.Operand.Ref.dst0)..@intFromEnum(Select.Operand.Ref.src0)];
  30033             const src_slots = s.temps[@intFromEnum(Select.Operand.Ref.src0)..@intFromEnum(Select.Operand.Ref.none)];
  30034 
  30035             @memcpy(src_slots[0..src_temps.len], src_temps);
  30036             std.mem.swap(Temp, &src_slots[pattern.commute[0]], &src_slots[pattern.commute[1]]);
  30037             for (tmp_slots, case.extra_temps) |*slot, spec| slot.* = try spec.create(&s) orelse continue;
  30038 
  30039             while (true) for (pattern.src[0..src_temps.len], src_temps) |src_pattern, *src_temp| {
  30040                 if (try src_pattern.convert(src_temp, cg)) break;
  30041             } else break;
  30042             @memcpy(src_slots[0..src_temps.len], src_temps);
  30043             std.mem.swap(Temp, &src_slots[pattern.commute[0]], &src_slots[pattern.commute[1]]);
  30044 
  30045             if (case.clobbers.eflags or case.each != .once) try cg.spillEflagsIfOccupied();
  30046 
  30047             for (dst_temps, dst_tys, case.dst_temps[0..dst_temps.len]) |*dst_temp, dst_ty, dst_kind|
  30048                 dst_temp.* = (try Select.TempSpec.create(.{ .type = dst_ty, .kind = dst_kind }, &s)).?;
  30049             @memcpy(dst_slots[0..dst_temps.len], dst_temps);
  30050 
  30051             switch (case.each) {
  30052                 .once => |body| {
  30053                     for (body) |inst| try s.emit(inst);
  30054                     s.emitLabel(.@"0:");
  30055                 },
  30056             }
  30057 
  30058             for (dst_temps, case.dst_temps[0..dst_temps.len]) |dst_temp, dst_kind| dst_kind.finish(dst_temp, &s);
  30059             for (case.extra_temps, tmp_slots) |spec, temp| if (spec.kind != .unused) try temp.die(cg);
  30060             return;
  30061         }
  30062     }
  30063     return error.SelectFailed;
  30064 }