src/arch/x86_64/CodeGen.zig (1455266B) - Raw
1 const std = @import("std"); 2 const assert = std.debug.assert; 3 const codegen = @import("../../codegen.zig"); 4 const link = @import("../../link.zig"); 5 const log = std.log.scoped(.codegen); 6 const tracking_log = std.log.scoped(.tracking); 7 const verbose_tracking_log = std.log.scoped(.verbose_tracking); 8 const wip_mir_log = std.log.scoped(.wip_mir); 9 10 const Air = @import("../../Air.zig"); 11 const Allocator = std.mem.Allocator; 12 const Emit = @import("Emit.zig"); 13 const Liveness = @import("../../Liveness.zig"); 14 const Lower = @import("Lower.zig"); 15 const Mir = @import("Mir.zig"); 16 const Zcu = @import("../../Zcu.zig"); 17 const Module = @import("../../Package/Module.zig"); 18 const InternPool = @import("../../InternPool.zig"); 19 const Type = @import("../../Type.zig"); 20 const Value = @import("../../Value.zig"); 21 22 const abi = @import("abi.zig"); 23 const bits = @import("bits.zig"); 24 const encoder = @import("encoder.zig"); 25 26 const Condition = bits.Condition; 27 const Immediate = bits.Immediate; 28 const Memory = bits.Memory; 29 const Register = bits.Register; 30 const RegisterManager = abi.RegisterManager; 31 const RegisterLock = RegisterManager.RegisterLock; 32 const FrameIndex = bits.FrameIndex; 33 34 const InnerError = codegen.CodeGenError || error{OutOfRegisters}; 35 36 gpa: Allocator, 37 pt: Zcu.PerThread, 38 air: Air, 39 liveness: Liveness, 40 bin_file: *link.File, 41 debug_output: link.File.DebugInfoOutput, 42 target: *const std.Target, 43 owner: Owner, 44 inline_func: InternPool.Index, 45 mod: *Module, 46 arg_index: u32, 47 args: []MCValue, 48 va_info: union { 49 sysv: struct { 50 gp_count: u32, 51 fp_count: u32, 52 overflow_arg_area: bits.FrameAddr, 53 reg_save_area: bits.FrameAddr, 54 }, 55 win64: struct {}, 56 }, 57 ret_mcv: InstTracking, 58 fn_type: Type, 59 src_loc: Zcu.LazySrcLoc, 60 61 eflags_inst: ?Air.Inst.Index = null, 62 63 /// MIR Instructions 64 mir_instructions: std.MultiArrayList(Mir.Inst) = .empty, 65 /// MIR extra data 66 mir_extra: std.ArrayListUnmanaged(u32) = .empty, 67 mir_table: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, 68 69 /// Byte offset within the source file of the ending curly. 70 end_di_line: u32, 71 end_di_column: u32, 72 73 /// The value is an offset into the `Function` `code` from the beginning. 74 /// To perform the reloc, write 32-bit signed little-endian integer 75 /// which is a relative jump, based on the address following the reloc. 76 epilogue_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, 77 78 reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined, 79 const_tracking: ConstTrackingMap = .empty, 80 inst_tracking: InstTrackingMap = .empty, 81 82 // Key is the block instruction 83 blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockData) = .empty, 84 85 register_manager: RegisterManager = .{}, 86 87 /// Generation of the current scope, increments by 1 for every entered scope. 88 scope_generation: u32 = 0, 89 90 frame_allocs: std.MultiArrayList(FrameAlloc) = .empty, 91 free_frame_indices: std.AutoArrayHashMapUnmanaged(FrameIndex, void) = .empty, 92 frame_locs: std.MultiArrayList(Mir.FrameLoc) = .empty, 93 94 loops: std.AutoHashMapUnmanaged(Air.Inst.Index, struct { 95 /// The state to restore before branching. 96 state: State, 97 /// The branch target. 98 target: Mir.Inst.Index, 99 }) = .empty, 100 loop_switches: std.AutoHashMapUnmanaged(Air.Inst.Index, struct { 101 start: u31, 102 len: u11, 103 min: Value, 104 else_relocs: union(enum) { 105 @"unreachable", 106 forward: std.ArrayListUnmanaged(Mir.Inst.Index), 107 backward: Mir.Inst.Index, 108 }, 109 }) = .empty, 110 111 next_temp_index: Temp.Index = @enumFromInt(0), 112 temp_type: [Temp.Index.max]Type = undefined, 113 114 const Owner = union(enum) { 115 nav_index: InternPool.Nav.Index, 116 lazy_sym: link.File.LazySymbol, 117 118 fn getSymbolIndex(owner: Owner, ctx: *CodeGen) !u32 { 119 const pt = ctx.pt; 120 switch (owner) { 121 .nav_index => |nav_index| if (ctx.bin_file.cast(.elf)) |elf_file| { 122 return elf_file.zigObjectPtr().?.getOrCreateMetadataForNav(pt.zcu, nav_index); 123 } else if (ctx.bin_file.cast(.macho)) |macho_file| { 124 return macho_file.getZigObject().?.getOrCreateMetadataForNav(macho_file, nav_index); 125 } else if (ctx.bin_file.cast(.coff)) |coff_file| { 126 const atom = try coff_file.getOrCreateAtomForNav(nav_index); 127 return coff_file.getAtom(atom).getSymbolIndex().?; 128 } else if (ctx.bin_file.cast(.plan9)) |p9_file| { 129 return p9_file.seeNav(pt, nav_index); 130 } else unreachable, 131 .lazy_sym => |lazy_sym| if (ctx.bin_file.cast(.elf)) |elf_file| { 132 return elf_file.zigObjectPtr().?.getOrCreateMetadataForLazySymbol(elf_file, pt, lazy_sym) catch |err| 133 ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); 134 } else if (ctx.bin_file.cast(.macho)) |macho_file| { 135 return macho_file.getZigObject().?.getOrCreateMetadataForLazySymbol(macho_file, pt, lazy_sym) catch |err| 136 ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); 137 } else if (ctx.bin_file.cast(.coff)) |coff_file| { 138 const atom = coff_file.getOrCreateAtomForLazySymbol(pt, lazy_sym) catch |err| 139 return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); 140 return coff_file.getAtom(atom).getSymbolIndex().?; 141 } else if (ctx.bin_file.cast(.plan9)) |p9_file| { 142 return p9_file.getOrCreateAtomForLazySymbol(pt, lazy_sym) catch |err| 143 return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); 144 } else unreachable, 145 } 146 } 147 }; 148 149 const MaskKind = enum(u1) { sign, all }; 150 const MaskInfo = packed struct { kind: MaskKind, inverted: bool, scalar: Memory.Size }; 151 152 pub const MCValue = union(enum) { 153 /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. 154 /// TODO Look into deleting this tag and using `dead` instead, since every use 155 /// of MCValue.none should be instead looking at the type and noticing it is 0 bits. 156 none, 157 /// Control flow will not allow this value to be observed. 158 unreach, 159 /// No more references to this value remain. 160 /// The payload is the value of scope_generation at the point where the death occurred 161 dead: u32, 162 /// The value is undefined. 163 undef, 164 /// A pointer-sized integer that fits in a register. 165 /// If the type is a pointer, this is the pointer address in virtual address space. 166 immediate: u64, 167 /// The value resides in the EFLAGS register. 168 eflags: Condition, 169 /// The value is in a register. 170 register: Register, 171 /// The value is split across two registers. 172 register_pair: [2]Register, 173 /// The value is split across three registers. 174 register_triple: [3]Register, 175 /// The value is split across four registers. 176 register_quadruple: [4]Register, 177 /// The value is a constant offset from the value in a register. 178 register_offset: bits.RegisterOffset, 179 /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register. 180 register_overflow: struct { reg: Register, eflags: Condition }, 181 /// The value is a bool vector stored in a vector register with a different scalar type. 182 register_mask: struct { reg: Register, info: MaskInfo }, 183 /// The value is in memory at a hard-coded address. 184 /// If the type is a pointer, it means the pointer address is stored at this memory location. 185 memory: u64, 186 /// The value is in memory at an address not-yet-allocated by the linker. 187 /// This traditionally corresponds to a relocation emitted in a relocatable object file. 188 load_symbol: bits.SymbolOffset, 189 /// The address of the memory location not-yet-allocated by the linker. 190 lea_symbol: bits.SymbolOffset, 191 /// The value is in memory at a constant offset from the address in a register. 192 indirect: bits.RegisterOffset, 193 /// The value is in memory. 194 /// Payload is a symbol index. 195 load_direct: u32, 196 /// The value is a pointer to a value in memory. 197 /// Payload is a symbol index. 198 lea_direct: u32, 199 /// The value is in memory referenced indirectly via GOT. 200 /// Payload is a symbol index. 201 load_got: u32, 202 /// The value is a pointer to a value referenced indirectly via GOT. 203 /// Payload is a symbol index. 204 lea_got: u32, 205 /// The value is a threadlocal variable. 206 /// Payload is a symbol index. 207 load_tlv: u32, 208 /// The value is a pointer to a threadlocal variable. 209 /// Payload is a symbol index. 210 lea_tlv: u32, 211 /// The value stored at an offset from a frame index 212 /// Payload is a frame address. 213 load_frame: bits.FrameAddr, 214 /// The address of an offset from a frame index 215 /// Payload is a frame address. 216 lea_frame: bits.FrameAddr, 217 /// Supports integer_per_element abi 218 elementwise_regs_then_frame: packed struct { regs: u3, frame_off: i29, frame_index: FrameIndex }, 219 /// This indicates that we have already allocated a frame index for this instruction, 220 /// but it has not been spilled there yet in the current control flow. 221 /// Payload is a frame index. 222 reserved_frame: FrameIndex, 223 air_ref: Air.Inst.Ref, 224 225 fn isModifiable(mcv: MCValue) bool { 226 return switch (mcv) { 227 .none, 228 .unreach, 229 .dead, 230 .undef, 231 .immediate, 232 .register_offset, 233 .register_mask, 234 .eflags, 235 .register_overflow, 236 .lea_symbol, 237 .lea_direct, 238 .lea_got, 239 .lea_tlv, 240 .lea_frame, 241 .elementwise_regs_then_frame, 242 .reserved_frame, 243 .air_ref, 244 => false, 245 .register, 246 .register_pair, 247 .register_triple, 248 .register_quadruple, 249 .memory, 250 .load_symbol, 251 .load_got, 252 .load_direct, 253 .load_tlv, 254 .indirect, 255 => true, 256 .load_frame => |frame_addr| !frame_addr.index.isNamed(), 257 }; 258 } 259 260 // hack around linker relocation bugs 261 fn isBase(mcv: MCValue) bool { 262 return switch (mcv) { 263 .memory, .indirect, .load_frame => true, 264 else => false, 265 }; 266 } 267 268 fn isMemory(mcv: MCValue) bool { 269 return switch (mcv) { 270 .memory, .indirect, .load_frame, .load_symbol => true, 271 else => false, 272 }; 273 } 274 275 fn isImmediate(mcv: MCValue) bool { 276 return switch (mcv) { 277 .immediate => true, 278 else => false, 279 }; 280 } 281 282 fn isRegister(mcv: MCValue) bool { 283 return switch (mcv) { 284 .register => true, 285 .register_offset => |reg_off| return reg_off.off == 0, 286 else => false, 287 }; 288 } 289 290 fn isRegisterOffset(mcv: MCValue) bool { 291 return switch (mcv) { 292 .register, .register_offset => true, 293 else => false, 294 }; 295 } 296 297 fn getReg(mcv: MCValue) ?Register { 298 return switch (mcv) { 299 .register => |reg| reg, 300 .register_offset, .indirect => |ro| ro.reg, 301 .register_overflow => |ro| ro.reg, 302 .register_mask => |rm| rm.reg, 303 else => null, 304 }; 305 } 306 307 fn getRegs(mcv: *const MCValue) []const Register { 308 return switch (mcv.*) { 309 .register => |*reg| reg[0..1], 310 inline .register_pair, 311 .register_triple, 312 .register_quadruple, 313 => |*regs| regs, 314 inline .register_offset, 315 .indirect, 316 .register_overflow, 317 .register_mask, 318 => |*pl| (&pl.reg)[0..1], 319 else => &.{}, 320 }; 321 } 322 323 fn getCondition(mcv: MCValue) ?Condition { 324 return switch (mcv) { 325 .eflags => |cc| cc, 326 .register_overflow => |reg_ov| reg_ov.eflags, 327 else => null, 328 }; 329 } 330 331 fn isAddress(mcv: MCValue) bool { 332 return switch (mcv) { 333 .immediate, .register, .register_offset, .lea_frame => true, 334 else => false, 335 }; 336 } 337 338 fn address(mcv: MCValue) MCValue { 339 return switch (mcv) { 340 .none, 341 .unreach, 342 .dead, 343 .undef, 344 .immediate, 345 .eflags, 346 .register, 347 .register_pair, 348 .register_triple, 349 .register_quadruple, 350 .register_offset, 351 .register_overflow, 352 .register_mask, 353 .lea_symbol, 354 .lea_direct, 355 .lea_got, 356 .lea_tlv, 357 .lea_frame, 358 .elementwise_regs_then_frame, 359 .reserved_frame, 360 .air_ref, 361 => unreachable, // not in memory 362 .memory => |addr| .{ .immediate = addr }, 363 .indirect => |reg_off| switch (reg_off.off) { 364 0 => .{ .register = reg_off.reg }, 365 else => .{ .register_offset = reg_off }, 366 }, 367 .load_direct => |sym_index| .{ .lea_direct = sym_index }, 368 .load_got => |sym_index| .{ .lea_got = sym_index }, 369 .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, 370 .load_frame => |frame_addr| .{ .lea_frame = frame_addr }, 371 .load_symbol => |sym_off| .{ .lea_symbol = sym_off }, 372 }; 373 } 374 375 fn deref(mcv: MCValue) MCValue { 376 return switch (mcv) { 377 .none, 378 .unreach, 379 .dead, 380 .undef, 381 .eflags, 382 .register_pair, 383 .register_triple, 384 .register_quadruple, 385 .register_overflow, 386 .register_mask, 387 .memory, 388 .indirect, 389 .load_direct, 390 .load_got, 391 .load_tlv, 392 .load_frame, 393 .load_symbol, 394 .elementwise_regs_then_frame, 395 .reserved_frame, 396 .air_ref, 397 => unreachable, // not dereferenceable 398 .immediate => |addr| .{ .memory = addr }, 399 .register => |reg| .{ .indirect = .{ .reg = reg } }, 400 .register_offset => |reg_off| .{ .indirect = reg_off }, 401 .lea_direct => |sym_index| .{ .load_direct = sym_index }, 402 .lea_got => |sym_index| .{ .load_got = sym_index }, 403 .lea_tlv => |sym_index| .{ .load_tlv = sym_index }, 404 .lea_frame => |frame_addr| .{ .load_frame = frame_addr }, 405 .lea_symbol => |sym_index| .{ .load_symbol = sym_index }, 406 }; 407 } 408 409 fn offset(mcv: MCValue, off: i32) MCValue { 410 return switch (mcv) { 411 .none, 412 .unreach, 413 .dead, 414 .undef, 415 .elementwise_regs_then_frame, 416 .reserved_frame, 417 .air_ref, 418 => unreachable, // not valid 419 .eflags, 420 .register_pair, 421 .register_triple, 422 .register_quadruple, 423 .register_overflow, 424 .register_mask, 425 .memory, 426 .indirect, 427 .load_direct, 428 .lea_direct, 429 .load_got, 430 .lea_got, 431 .load_tlv, 432 .lea_tlv, 433 .load_frame, 434 .load_symbol, 435 .lea_symbol, 436 => switch (off) { 437 0 => mcv, 438 else => unreachable, // not offsettable 439 }, 440 .immediate => |imm| .{ .immediate = @bitCast(@as(i64, @bitCast(imm)) +% off) }, 441 .register => |reg| .{ .register_offset = .{ .reg = reg, .off = off } }, 442 .register_offset => |reg_off| .{ 443 .register_offset = .{ .reg = reg_off.reg, .off = reg_off.off + off }, 444 }, 445 .lea_frame => |frame_addr| .{ 446 .lea_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off }, 447 }, 448 }; 449 } 450 451 fn mem(mcv: MCValue, function: *CodeGen, mod_rm: Memory.Mod.Rm) !Memory { 452 return switch (mcv) { 453 .none, 454 .unreach, 455 .dead, 456 .undef, 457 .immediate, 458 .eflags, 459 .register, 460 .register_pair, 461 .register_triple, 462 .register_quadruple, 463 .register_offset, 464 .register_overflow, 465 .register_mask, 466 .load_direct, 467 .lea_direct, 468 .load_got, 469 .lea_got, 470 .load_tlv, 471 .lea_tlv, 472 .lea_frame, 473 .elementwise_regs_then_frame, 474 .reserved_frame, 475 .lea_symbol, 476 => unreachable, 477 .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| .{ 478 .base = .{ .reg = .ds }, 479 .mod = .{ .rm = .{ 480 .size = mod_rm.size, 481 .index = mod_rm.index, 482 .scale = mod_rm.scale, 483 .disp = small_addr + mod_rm.disp, 484 } }, 485 } else .{ .base = .{ .reg = .ds }, .mod = .{ .off = addr } }, 486 .indirect => |reg_off| .{ 487 .base = .{ .reg = registerAlias(reg_off.reg, @divExact(function.target.ptrBitWidth(), 8)) }, 488 .mod = .{ .rm = .{ 489 .size = mod_rm.size, 490 .index = mod_rm.index, 491 .scale = mod_rm.scale, 492 .disp = reg_off.off + mod_rm.disp, 493 } }, 494 }, 495 .load_frame => |frame_addr| .{ 496 .base = .{ .frame = frame_addr.index }, 497 .mod = .{ .rm = .{ 498 .size = mod_rm.size, 499 .index = mod_rm.index, 500 .scale = mod_rm.scale, 501 .disp = frame_addr.off + mod_rm.disp, 502 } }, 503 }, 504 .load_symbol => |sym_off| { 505 assert(sym_off.off == 0); 506 return .{ 507 .base = .{ .reloc = sym_off.sym_index }, 508 .mod = .{ .rm = .{ 509 .size = mod_rm.size, 510 .index = mod_rm.index, 511 .scale = mod_rm.scale, 512 .disp = sym_off.off + mod_rm.disp, 513 } }, 514 }; 515 }, 516 .air_ref => |ref| (try function.resolveInst(ref)).mem(function, mod_rm), 517 }; 518 } 519 520 pub fn format( 521 mcv: MCValue, 522 comptime _: []const u8, 523 _: std.fmt.FormatOptions, 524 writer: anytype, 525 ) @TypeOf(writer).Error!void { 526 switch (mcv) { 527 .none, .unreach, .dead, .undef => try writer.print("({s})", .{@tagName(mcv)}), 528 .immediate => |pl| try writer.print("0x{x}", .{pl}), 529 .memory => |pl| try writer.print("[ds:0x{x}]", .{pl}), 530 inline .eflags, .register => |pl| try writer.print("{s}", .{@tagName(pl)}), 531 .register_pair => |pl| try writer.print("{s}:{s}", .{ @tagName(pl[1]), @tagName(pl[0]) }), 532 .register_triple => |pl| try writer.print("{s}:{s}:{s}", .{ 533 @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]), 534 }), 535 .register_quadruple => |pl| try writer.print("{s}:{s}:{s}:{s}", .{ 536 @tagName(pl[3]), @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]), 537 }), 538 .register_offset => |pl| try writer.print("{s} + 0x{x}", .{ @tagName(pl.reg), pl.off }), 539 .register_overflow => |pl| try writer.print("{s}:{s}", .{ 540 @tagName(pl.eflags), 541 @tagName(pl.reg), 542 }), 543 .register_mask => |pl| try writer.print("mask({s},{}):{c}{s}", .{ 544 @tagName(pl.info.kind), 545 pl.info.scalar, 546 @as(u8, if (pl.info.inverted) '!' else ' '), 547 @tagName(pl.reg), 548 }), 549 .load_symbol => |pl| try writer.print("[sym:{} + 0x{x}]", .{ pl.sym_index, pl.off }), 550 .lea_symbol => |pl| try writer.print("sym:{} + 0x{x}", .{ pl.sym_index, pl.off }), 551 .indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }), 552 .load_direct => |pl| try writer.print("[direct:{d}]", .{pl}), 553 .lea_direct => |pl| try writer.print("direct:{d}", .{pl}), 554 .load_got => |pl| try writer.print("[got:{d}]", .{pl}), 555 .lea_got => |pl| try writer.print("got:{d}", .{pl}), 556 .load_tlv => |pl| try writer.print("[tlv:{d}]", .{pl}), 557 .lea_tlv => |pl| try writer.print("tlv:{d}", .{pl}), 558 .load_frame => |pl| try writer.print("[{} + 0x{x}]", .{ pl.index, pl.off }), 559 .elementwise_regs_then_frame => |pl| try writer.print("elementwise:{d}:[{} + 0x{x}]", .{ 560 pl.regs, pl.frame_index, pl.frame_off, 561 }), 562 .lea_frame => |pl| try writer.print("{} + 0x{x}", .{ pl.index, pl.off }), 563 .reserved_frame => |pl| try writer.print("(dead:{})", .{pl}), 564 .air_ref => |pl| try writer.print("(air:0x{x})", .{@intFromEnum(pl)}), 565 } 566 } 567 }; 568 569 const InstTrackingMap = std.AutoArrayHashMapUnmanaged(Air.Inst.Index, InstTracking); 570 const ConstTrackingMap = std.AutoArrayHashMapUnmanaged(InternPool.Index, InstTracking); 571 const InstTracking = struct { 572 long: MCValue, 573 short: MCValue, 574 575 fn init(result: MCValue) InstTracking { 576 return .{ .long = switch (result) { 577 .none, 578 .unreach, 579 .undef, 580 .immediate, 581 .memory, 582 .load_direct, 583 .lea_direct, 584 .load_got, 585 .lea_got, 586 .load_tlv, 587 .lea_tlv, 588 .load_frame, 589 .lea_frame, 590 .load_symbol, 591 .lea_symbol, 592 => result, 593 .dead, 594 .elementwise_regs_then_frame, 595 .reserved_frame, 596 .air_ref, 597 => unreachable, 598 .eflags, 599 .register, 600 .register_pair, 601 .register_triple, 602 .register_quadruple, 603 .register_offset, 604 .register_overflow, 605 .register_mask, 606 .indirect, 607 => .none, 608 }, .short = result }; 609 } 610 611 fn getReg(self: InstTracking) ?Register { 612 return self.short.getReg(); 613 } 614 615 fn getRegs(self: *const InstTracking) []const Register { 616 return self.short.getRegs(); 617 } 618 619 fn getCondition(self: InstTracking) ?Condition { 620 return self.short.getCondition(); 621 } 622 623 fn spill(self: *InstTracking, cg: *CodeGen, inst: Air.Inst.Index) !void { 624 if (std.meta.eql(self.long, self.short)) return; // Already spilled 625 // Allocate or reuse frame index 626 switch (self.long) { 627 .none => self.long = try cg.allocRegOrMem(inst, false), 628 .load_frame => {}, 629 .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } }, 630 else => unreachable, 631 } 632 tracking_log.debug("spill {} from {} to {}", .{ inst, self.short, self.long }); 633 try cg.genCopy(cg.typeOfIndex(inst), self.long, self.short, .{}); 634 } 635 636 fn reuseFrame(self: *InstTracking) void { 637 switch (self.long) { 638 .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } }, 639 else => {}, 640 } 641 self.short = switch (self.long) { 642 .none, 643 .unreach, 644 .undef, 645 .immediate, 646 .memory, 647 .load_direct, 648 .lea_direct, 649 .load_got, 650 .lea_got, 651 .load_tlv, 652 .lea_tlv, 653 .load_frame, 654 .lea_frame, 655 .load_symbol, 656 .lea_symbol, 657 => self.long, 658 .dead, 659 .eflags, 660 .register, 661 .register_pair, 662 .register_triple, 663 .register_quadruple, 664 .register_offset, 665 .register_overflow, 666 .register_mask, 667 .indirect, 668 .elementwise_regs_then_frame, 669 .reserved_frame, 670 .air_ref, 671 => unreachable, 672 }; 673 } 674 675 fn trackSpill(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void { 676 try function.freeValue(self.short); 677 self.reuseFrame(); 678 tracking_log.debug("{} => {} (spilled)", .{ inst, self.* }); 679 } 680 681 fn verifyMaterialize(self: InstTracking, target: InstTracking) void { 682 switch (self.long) { 683 .none, 684 .unreach, 685 .undef, 686 .immediate, 687 .memory, 688 .load_direct, 689 .lea_direct, 690 .load_got, 691 .lea_got, 692 .load_tlv, 693 .lea_tlv, 694 .lea_frame, 695 .load_symbol, 696 .lea_symbol, 697 => assert(std.meta.eql(self.long, target.long)), 698 .load_frame, 699 .reserved_frame, 700 => switch (target.long) { 701 .none, 702 .load_frame, 703 .reserved_frame, 704 => {}, 705 else => unreachable, 706 }, 707 .dead, 708 .eflags, 709 .register, 710 .register_pair, 711 .register_triple, 712 .register_quadruple, 713 .register_offset, 714 .register_overflow, 715 .register_mask, 716 .indirect, 717 .elementwise_regs_then_frame, 718 .air_ref, 719 => unreachable, 720 } 721 } 722 723 fn materialize( 724 self: *InstTracking, 725 function: *CodeGen, 726 inst: Air.Inst.Index, 727 target: InstTracking, 728 ) !void { 729 self.verifyMaterialize(target); 730 try self.materializeUnsafe(function, inst, target); 731 } 732 733 fn materializeUnsafe( 734 self: InstTracking, 735 function: *CodeGen, 736 inst: Air.Inst.Index, 737 target: InstTracking, 738 ) !void { 739 const ty = function.typeOfIndex(inst); 740 if ((self.long == .none or self.long == .reserved_frame) and target.long == .load_frame) 741 try function.genCopy(ty, target.long, self.short, .{}); 742 try function.genCopy(ty, target.short, self.short, .{}); 743 } 744 745 fn trackMaterialize(self: *InstTracking, inst: Air.Inst.Index, target: InstTracking) void { 746 self.verifyMaterialize(target); 747 // Don't clobber reserved frame indices 748 self.long = if (target.long == .none) switch (self.long) { 749 .load_frame => |addr| .{ .reserved_frame = addr.index }, 750 .reserved_frame => self.long, 751 else => target.long, 752 } else target.long; 753 self.short = target.short; 754 tracking_log.debug("{} => {} (materialize)", .{ inst, self.* }); 755 } 756 757 fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void { 758 switch (self.short) { 759 .dead => |die_generation| if (die_generation >= scope_generation) { 760 self.reuseFrame(); 761 tracking_log.debug("{} => {} (resurrect)", .{ inst, self.* }); 762 }, 763 else => {}, 764 } 765 } 766 767 fn die(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void { 768 if (self.short == .dead) return; 769 try function.freeValue(self.short); 770 self.short = .{ .dead = function.scope_generation }; 771 tracking_log.debug("{} => {} (death)", .{ inst, self.* }); 772 } 773 774 fn reuse( 775 self: *InstTracking, 776 function: *CodeGen, 777 new_inst: ?Air.Inst.Index, 778 old_inst: Air.Inst.Index, 779 ) void { 780 self.short = .{ .dead = function.scope_generation }; 781 tracking_log.debug("{?} => {} (reuse {})", .{ new_inst, self.*, old_inst }); 782 } 783 784 fn liveOut(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) void { 785 for (self.getRegs()) |reg| { 786 if (function.register_manager.isRegFree(reg)) { 787 tracking_log.debug("{} => {} (live-out)", .{ inst, self.* }); 788 continue; 789 } 790 791 const index = RegisterManager.indexOfRegIntoTracked(reg).?; 792 const tracked_inst = function.register_manager.registers[index]; 793 const tracking = function.getResolvedInstValue(tracked_inst); 794 795 // Disable death. 796 var found_reg = false; 797 var remaining_reg: Register = .none; 798 for (tracking.getRegs()) |tracked_reg| if (tracked_reg.id() == reg.id()) { 799 assert(!found_reg); 800 found_reg = true; 801 } else { 802 assert(remaining_reg == .none); 803 remaining_reg = tracked_reg; 804 }; 805 assert(found_reg); 806 tracking.short = switch (remaining_reg) { 807 .none => .{ .dead = function.scope_generation }, 808 else => .{ .register = remaining_reg }, 809 }; 810 811 // Perform side-effects of freeValue manually. 812 function.register_manager.freeReg(reg); 813 814 tracking_log.debug("{} => {} (live-out {})", .{ inst, self.*, tracked_inst }); 815 } 816 } 817 818 pub fn format( 819 tracking: InstTracking, 820 comptime _: []const u8, 821 _: std.fmt.FormatOptions, 822 writer: anytype, 823 ) @TypeOf(writer).Error!void { 824 if (!std.meta.eql(tracking.long, tracking.short)) try writer.print("|{}| ", .{tracking.long}); 825 try writer.print("{}", .{tracking.short}); 826 } 827 }; 828 829 const FrameAlloc = struct { 830 abi_size: u31, 831 spill_pad: u3, 832 abi_align: InternPool.Alignment, 833 ref_count: u16, 834 835 fn init(alloc_abi: struct { size: u64, pad: u3 = 0, alignment: InternPool.Alignment }) FrameAlloc { 836 return .{ 837 .abi_size = @intCast(alloc_abi.size), 838 .spill_pad = alloc_abi.pad, 839 .abi_align = alloc_abi.alignment, 840 .ref_count = 0, 841 }; 842 } 843 fn initType(ty: Type, zcu: *Zcu) FrameAlloc { 844 return init(.{ 845 .size = ty.abiSize(zcu), 846 .alignment = ty.abiAlignment(zcu), 847 }); 848 } 849 fn initSpill(ty: Type, zcu: *Zcu) FrameAlloc { 850 const abi_size = ty.abiSize(zcu); 851 const spill_size = if (abi_size < 8) 852 std.math.ceilPowerOfTwoAssert(u64, abi_size) 853 else 854 std.mem.alignForward(u64, abi_size, 8); 855 return init(.{ 856 .size = spill_size, 857 .pad = @intCast(spill_size - abi_size), 858 .alignment = ty.abiAlignment(zcu).maxStrict( 859 .fromNonzeroByteUnits(@min(spill_size, 8)), 860 ), 861 }); 862 } 863 }; 864 865 const StackAllocation = struct { 866 inst: ?Air.Inst.Index, 867 /// TODO do we need size? should be determined by inst.ty.abiSize(zcu) 868 size: u32, 869 }; 870 871 const BlockData = struct { 872 relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, 873 state: State, 874 875 fn deinit(self: *BlockData, gpa: Allocator) void { 876 self.relocs.deinit(gpa); 877 self.* = undefined; 878 } 879 }; 880 881 const CodeGen = @This(); 882 883 pub fn generate( 884 bin_file: *link.File, 885 pt: Zcu.PerThread, 886 src_loc: Zcu.LazySrcLoc, 887 func_index: InternPool.Index, 888 air: Air, 889 liveness: Liveness, 890 code: *std.ArrayListUnmanaged(u8), 891 debug_output: link.File.DebugInfoOutput, 892 ) codegen.CodeGenError!void { 893 const zcu = pt.zcu; 894 const comp = zcu.comp; 895 const gpa = zcu.gpa; 896 const ip = &zcu.intern_pool; 897 const func = zcu.funcInfo(func_index); 898 const fn_type: Type = .fromInterned(func.ty); 899 const mod = zcu.navFileScope(func.owner_nav).mod; 900 901 var function: CodeGen = .{ 902 .gpa = gpa, 903 .pt = pt, 904 .air = air, 905 .liveness = liveness, 906 .target = &mod.resolved_target.result, 907 .mod = mod, 908 .bin_file = bin_file, 909 .debug_output = debug_output, 910 .owner = .{ .nav_index = func.owner_nav }, 911 .inline_func = func_index, 912 .arg_index = undefined, 913 .args = undefined, // populated after `resolveCallingConventionValues` 914 .va_info = undefined, // populated after `resolveCallingConventionValues` 915 .ret_mcv = undefined, // populated after `resolveCallingConventionValues` 916 .fn_type = fn_type, 917 .src_loc = src_loc, 918 .end_di_line = func.rbrace_line, 919 .end_di_column = func.rbrace_column, 920 }; 921 defer { 922 function.frame_allocs.deinit(gpa); 923 function.free_frame_indices.deinit(gpa); 924 function.frame_locs.deinit(gpa); 925 function.loops.deinit(gpa); 926 function.loop_switches.deinit(gpa); 927 var block_it = function.blocks.valueIterator(); 928 while (block_it.next()) |block| block.deinit(gpa); 929 function.blocks.deinit(gpa); 930 function.inst_tracking.deinit(gpa); 931 function.const_tracking.deinit(gpa); 932 function.epilogue_relocs.deinit(gpa); 933 function.mir_instructions.deinit(gpa); 934 function.mir_extra.deinit(gpa); 935 function.mir_table.deinit(gpa); 936 } 937 try function.inst_tracking.ensureTotalCapacity(gpa, Temp.Index.max); 938 for (0..Temp.Index.max) |temp_index| { 939 const temp: Temp.Index = @enumFromInt(temp_index); 940 function.inst_tracking.putAssumeCapacityNoClobber(temp.toIndex(), .init(.none)); 941 } 942 943 wip_mir_log.debug("{}:", .{fmtNav(func.owner_nav, ip)}); 944 945 try function.frame_allocs.resize(gpa, FrameIndex.named_count); 946 function.frame_allocs.set( 947 @intFromEnum(FrameIndex.stack_frame), 948 .init(.{ .size = 0, .alignment = .@"1" }), 949 ); 950 function.frame_allocs.set( 951 @intFromEnum(FrameIndex.call_frame), 952 .init(.{ .size = 0, .alignment = .@"1" }), 953 ); 954 955 const fn_info = zcu.typeToFunc(fn_type).?; 956 var call_info = function.resolveCallingConventionValues(fn_info, &.{}, .args_frame) catch |err| switch (err) { 957 error.CodegenFail => return error.CodegenFail, 958 else => |e| return e, 959 }; 960 defer call_info.deinit(&function); 961 962 function.args = call_info.args; 963 function.ret_mcv = call_info.return_value; 964 function.frame_allocs.set(@intFromEnum(FrameIndex.ret_addr), .init(.{ 965 .size = Type.usize.abiSize(zcu), 966 .alignment = Type.usize.abiAlignment(zcu).min(call_info.stack_align), 967 })); 968 function.frame_allocs.set(@intFromEnum(FrameIndex.base_ptr), .init(.{ 969 .size = Type.usize.abiSize(zcu), 970 .alignment = call_info.stack_align.min( 971 .fromNonzeroByteUnits(function.target.stackAlignment()), 972 ), 973 })); 974 function.frame_allocs.set( 975 @intFromEnum(FrameIndex.args_frame), 976 .init(.{ 977 .size = call_info.stack_byte_count, 978 .alignment = call_info.stack_align, 979 }), 980 ); 981 function.va_info = switch (fn_info.cc) { 982 else => undefined, 983 .x86_64_sysv => .{ .sysv = .{ 984 .gp_count = call_info.gp_count, 985 .fp_count = call_info.fp_count, 986 .overflow_arg_area = .{ .index = .args_frame, .off = call_info.stack_byte_count }, 987 .reg_save_area = undefined, 988 } }, 989 .x86_64_win => .{ .win64 = .{} }, 990 }; 991 992 function.gen() catch |err| switch (err) { 993 error.CodegenFail => return error.CodegenFail, 994 error.OutOfRegisters => return function.fail("ran out of registers (Zig compiler bug)", .{}), 995 else => |e| return e, 996 }; 997 998 var mir: Mir = .{ 999 .instructions = function.mir_instructions.toOwnedSlice(), 1000 .extra = try function.mir_extra.toOwnedSlice(gpa), 1001 .table = try function.mir_table.toOwnedSlice(gpa), 1002 .frame_locs = function.frame_locs.toOwnedSlice(), 1003 }; 1004 defer mir.deinit(gpa); 1005 1006 var emit: Emit = .{ 1007 .air = function.air, 1008 .lower = .{ 1009 .bin_file = bin_file, 1010 .target = function.target, 1011 .allocator = gpa, 1012 .mir = mir, 1013 .cc = fn_info.cc, 1014 .src_loc = src_loc, 1015 .output_mode = comp.config.output_mode, 1016 .link_mode = comp.config.link_mode, 1017 .pic = mod.pic, 1018 }, 1019 .atom_index = function.owner.getSymbolIndex(&function) catch |err| switch (err) { 1020 error.CodegenFail => return error.CodegenFail, 1021 else => |e| return e, 1022 }, 1023 .debug_output = debug_output, 1024 .code = code, 1025 .prev_di_loc = .{ 1026 .line = func.lbrace_line, 1027 .column = func.lbrace_column, 1028 .is_stmt = switch (debug_output) { 1029 .dwarf => |dwarf| dwarf.dwarf.debug_line.header.default_is_stmt, 1030 .plan9 => undefined, 1031 .none => undefined, 1032 }, 1033 }, 1034 .prev_di_pc = 0, 1035 }; 1036 emit.emitMir() catch |err| switch (err) { 1037 error.LowerFail, error.EmitFail => return function.failMsg(emit.lower.err_msg.?), 1038 1039 error.InvalidInstruction, error.CannotEncode => |e| return function.fail("emit MIR failed: {s} (Zig compiler bug)", .{@errorName(e)}), 1040 else => |e| return function.fail("emit MIR failed: {s}", .{@errorName(e)}), 1041 }; 1042 } 1043 1044 pub fn generateLazy( 1045 bin_file: *link.File, 1046 pt: Zcu.PerThread, 1047 src_loc: Zcu.LazySrcLoc, 1048 lazy_sym: link.File.LazySymbol, 1049 code: *std.ArrayListUnmanaged(u8), 1050 debug_output: link.File.DebugInfoOutput, 1051 ) codegen.CodeGenError!void { 1052 const comp = bin_file.comp; 1053 const gpa = comp.gpa; 1054 // This function is for generating global code, so we use the root module. 1055 const mod = comp.root_mod; 1056 var function: CodeGen = .{ 1057 .gpa = gpa, 1058 .pt = pt, 1059 .air = undefined, 1060 .liveness = undefined, 1061 .target = &mod.resolved_target.result, 1062 .mod = mod, 1063 .bin_file = bin_file, 1064 .debug_output = debug_output, 1065 .owner = .{ .lazy_sym = lazy_sym }, 1066 .inline_func = undefined, 1067 .arg_index = undefined, 1068 .args = undefined, 1069 .va_info = undefined, 1070 .ret_mcv = undefined, 1071 .fn_type = undefined, 1072 .src_loc = src_loc, 1073 .end_di_line = undefined, // no debug info yet 1074 .end_di_column = undefined, // no debug info yet 1075 }; 1076 defer { 1077 function.mir_instructions.deinit(gpa); 1078 function.mir_extra.deinit(gpa); 1079 function.mir_table.deinit(gpa); 1080 } 1081 1082 function.genLazy(lazy_sym) catch |err| switch (err) { 1083 error.CodegenFail => return error.CodegenFail, 1084 error.OutOfRegisters => return function.fail("ran out of registers (Zig compiler bug)", .{}), 1085 else => |e| return e, 1086 }; 1087 1088 var mir: Mir = .{ 1089 .instructions = function.mir_instructions.toOwnedSlice(), 1090 .extra = try function.mir_extra.toOwnedSlice(gpa), 1091 .table = try function.mir_table.toOwnedSlice(gpa), 1092 .frame_locs = function.frame_locs.toOwnedSlice(), 1093 }; 1094 defer mir.deinit(gpa); 1095 1096 var emit: Emit = .{ 1097 .air = function.air, 1098 .lower = .{ 1099 .bin_file = bin_file, 1100 .target = function.target, 1101 .allocator = gpa, 1102 .mir = mir, 1103 .cc = .auto, 1104 .src_loc = src_loc, 1105 .output_mode = comp.config.output_mode, 1106 .link_mode = comp.config.link_mode, 1107 .pic = mod.pic, 1108 }, 1109 .atom_index = function.owner.getSymbolIndex(&function) catch |err| switch (err) { 1110 error.CodegenFail => return error.CodegenFail, 1111 else => |e| return e, 1112 }, 1113 .debug_output = debug_output, 1114 .code = code, 1115 .prev_di_loc = undefined, // no debug info yet 1116 .prev_di_pc = undefined, // no debug info yet 1117 }; 1118 emit.emitMir() catch |err| switch (err) { 1119 error.LowerFail, error.EmitFail => return function.failMsg(emit.lower.err_msg.?), 1120 error.InvalidInstruction => return function.fail("failed to find a viable x86 instruction (Zig compiler bug)", .{}), 1121 error.CannotEncode => return function.fail("failed to encode x86 instruction (Zig compiler bug)", .{}), 1122 else => |e| return function.fail("failed to emit MIR: {s}", .{@errorName(e)}), 1123 }; 1124 } 1125 1126 const FormatNavData = struct { 1127 ip: *const InternPool, 1128 nav_index: InternPool.Nav.Index, 1129 }; 1130 fn formatNav( 1131 data: FormatNavData, 1132 comptime _: []const u8, 1133 _: std.fmt.FormatOptions, 1134 writer: anytype, 1135 ) @TypeOf(writer).Error!void { 1136 try writer.print("{}", .{data.ip.getNav(data.nav_index).fqn.fmt(data.ip)}); 1137 } 1138 fn fmtNav(nav_index: InternPool.Nav.Index, ip: *const InternPool) std.fmt.Formatter(formatNav) { 1139 return .{ .data = .{ 1140 .ip = ip, 1141 .nav_index = nav_index, 1142 } }; 1143 } 1144 1145 const FormatAirData = struct { 1146 self: *CodeGen, 1147 inst: Air.Inst.Index, 1148 }; 1149 fn formatAir( 1150 data: FormatAirData, 1151 comptime _: []const u8, 1152 _: std.fmt.FormatOptions, 1153 writer: anytype, 1154 ) @TypeOf(writer).Error!void { 1155 @import("../../print_air.zig").dumpInst( 1156 data.inst, 1157 data.self.pt, 1158 data.self.air, 1159 data.self.liveness, 1160 ); 1161 } 1162 fn fmtAir(self: *CodeGen, inst: Air.Inst.Index) std.fmt.Formatter(formatAir) { 1163 return .{ .data = .{ .self = self, .inst = inst } }; 1164 } 1165 1166 const FormatWipMirData = struct { 1167 self: *CodeGen, 1168 inst: Mir.Inst.Index, 1169 }; 1170 fn formatWipMir( 1171 data: FormatWipMirData, 1172 comptime _: []const u8, 1173 _: std.fmt.FormatOptions, 1174 writer: anytype, 1175 ) @TypeOf(writer).Error!void { 1176 const comp = data.self.bin_file.comp; 1177 const mod = comp.root_mod; 1178 var lower: Lower = .{ 1179 .bin_file = data.self.bin_file, 1180 .target = data.self.target, 1181 .allocator = data.self.gpa, 1182 .mir = .{ 1183 .instructions = data.self.mir_instructions.slice(), 1184 .extra = data.self.mir_extra.items, 1185 .table = data.self.mir_table.items, 1186 .frame_locs = (std.MultiArrayList(Mir.FrameLoc){}).slice(), 1187 }, 1188 .cc = .auto, 1189 .src_loc = data.self.src_loc, 1190 .output_mode = comp.config.output_mode, 1191 .link_mode = comp.config.link_mode, 1192 .pic = mod.pic, 1193 }; 1194 var first = true; 1195 for ((lower.lowerMir(data.inst) catch |err| switch (err) { 1196 error.LowerFail => { 1197 defer { 1198 lower.err_msg.?.deinit(data.self.gpa); 1199 lower.err_msg = null; 1200 } 1201 try writer.writeAll(lower.err_msg.?.msg); 1202 return; 1203 }, 1204 error.OutOfMemory, error.InvalidInstruction, error.CannotEncode => |e| { 1205 try writer.writeAll(switch (e) { 1206 error.OutOfMemory => "Out of memory", 1207 error.InvalidInstruction => "CodeGen failed to find a viable instruction.", 1208 error.CannotEncode => "CodeGen failed to encode the instruction.", 1209 }); 1210 return; 1211 }, 1212 else => |e| return e, 1213 }).insts) |lowered_inst| { 1214 if (!first) try writer.writeAll("\ndebug(wip_mir): "); 1215 try writer.print(" | {}", .{lowered_inst}); 1216 first = false; 1217 } 1218 if (first) { 1219 const ip = &data.self.pt.zcu.intern_pool; 1220 const mir_inst = lower.mir.instructions.get(data.inst); 1221 try writer.print(" | .{s}", .{@tagName(mir_inst.ops)}); 1222 switch (mir_inst.ops) { 1223 else => unreachable, 1224 .pseudo_dbg_prologue_end_none, 1225 .pseudo_dbg_epilogue_begin_none, 1226 .pseudo_dbg_enter_block_none, 1227 .pseudo_dbg_leave_block_none, 1228 .pseudo_dbg_var_args_none, 1229 .pseudo_dead_none, 1230 => {}, 1231 .pseudo_dbg_line_stmt_line_column, .pseudo_dbg_line_line_column => try writer.print( 1232 " {[line]d}, {[column]d}", 1233 mir_inst.data.line_column, 1234 ), 1235 .pseudo_dbg_enter_inline_func, .pseudo_dbg_leave_inline_func => try writer.print(" {}", .{ 1236 ip.getNav(ip.indexToKey(mir_inst.data.func).func.owner_nav).name.fmt(ip), 1237 }), 1238 .pseudo_dbg_local_a => try writer.print(" {}", .{mir_inst.data.a.air_inst}), 1239 .pseudo_dbg_local_ai_s => try writer.print(" {}, {d}", .{ 1240 mir_inst.data.ai.air_inst, 1241 @as(i32, @bitCast(mir_inst.data.ai.i)), 1242 }), 1243 .pseudo_dbg_local_ai_u => try writer.print(" {}, {d}", .{ 1244 mir_inst.data.ai.air_inst, 1245 mir_inst.data.ai.i, 1246 }), 1247 .pseudo_dbg_local_ai_64 => try writer.print(" {}, {d}", .{ 1248 mir_inst.data.ai.air_inst, 1249 lower.mir.extraData(Mir.Imm64, mir_inst.data.ai.i).data.decode(), 1250 }), 1251 .pseudo_dbg_local_as => { 1252 const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{ 1253 .base = .{ .reloc = mir_inst.data.as.sym_index }, 1254 }) }; 1255 try writer.print(" {}, {}", .{ mir_inst.data.as.air_inst, mem_op.fmt(.m) }); 1256 }, 1257 .pseudo_dbg_local_aso => { 1258 const sym_off = lower.mir.extraData(bits.SymbolOffset, mir_inst.data.ax.payload).data; 1259 const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{ 1260 .base = .{ .reloc = sym_off.sym_index }, 1261 .disp = sym_off.off, 1262 }) }; 1263 try writer.print(" {}, {}", .{ mir_inst.data.ax.air_inst, mem_op.fmt(.m) }); 1264 }, 1265 .pseudo_dbg_local_aro => { 1266 const air_off = lower.mir.extraData(Mir.AirOffset, mir_inst.data.rx.payload).data; 1267 const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{ 1268 .base = .{ .reg = mir_inst.data.rx.r1 }, 1269 .disp = air_off.off, 1270 }) }; 1271 try writer.print(" {}, {}", .{ air_off.air_inst, mem_op.fmt(.m) }); 1272 }, 1273 .pseudo_dbg_local_af => { 1274 const frame_addr = lower.mir.extraData(bits.FrameAddr, mir_inst.data.ax.payload).data; 1275 const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{ 1276 .base = .{ .frame = frame_addr.index }, 1277 .disp = frame_addr.off, 1278 }) }; 1279 try writer.print(" {}, {}", .{ mir_inst.data.ax.air_inst, mem_op.fmt(.m) }); 1280 }, 1281 .pseudo_dbg_local_am => { 1282 const mem_op: encoder.Instruction.Operand = .{ 1283 .mem = lower.mir.extraData(Mir.Memory, mir_inst.data.ax.payload).data.decode(), 1284 }; 1285 try writer.print(" {}, {}", .{ mir_inst.data.ax.air_inst, mem_op.fmt(.m) }); 1286 }, 1287 } 1288 } 1289 } 1290 fn fmtWipMir(self: *CodeGen, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) { 1291 return .{ .data = .{ .self = self, .inst = inst } }; 1292 } 1293 1294 const FormatTrackingData = struct { 1295 self: *CodeGen, 1296 }; 1297 fn formatTracking( 1298 data: FormatTrackingData, 1299 comptime _: []const u8, 1300 _: std.fmt.FormatOptions, 1301 writer: anytype, 1302 ) @TypeOf(writer).Error!void { 1303 var it = data.self.inst_tracking.iterator(); 1304 while (it.next()) |entry| try writer.print("\n{} = {}", .{ entry.key_ptr.*, entry.value_ptr.* }); 1305 } 1306 fn fmtTracking(self: *CodeGen) std.fmt.Formatter(formatTracking) { 1307 return .{ .data = .{ .self = self } }; 1308 } 1309 1310 fn addInst(self: *CodeGen, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { 1311 const gpa = self.gpa; 1312 try self.mir_instructions.ensureUnusedCapacity(gpa, 1); 1313 const result_index: Mir.Inst.Index = @intCast(self.mir_instructions.len); 1314 self.mir_instructions.appendAssumeCapacity(inst); 1315 if (inst.ops != .pseudo_dead_none) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}); 1316 return result_index; 1317 } 1318 1319 fn addExtra(self: *CodeGen, extra: anytype) Allocator.Error!u32 { 1320 const fields = std.meta.fields(@TypeOf(extra)); 1321 try self.mir_extra.ensureUnusedCapacity(self.gpa, fields.len); 1322 return self.addExtraAssumeCapacity(extra); 1323 } 1324 1325 fn addExtraAssumeCapacity(self: *CodeGen, extra: anytype) u32 { 1326 const fields = std.meta.fields(@TypeOf(extra)); 1327 const result: u32 = @intCast(self.mir_extra.items.len); 1328 inline for (fields) |field| { 1329 self.mir_extra.appendAssumeCapacity(switch (field.type) { 1330 u32 => @field(extra, field.name), 1331 i32, Mir.Memory.Info => @bitCast(@field(extra, field.name)), 1332 bits.FrameIndex => @intFromEnum(@field(extra, field.name)), 1333 else => @compileError("bad field type: " ++ field.name ++ ": " ++ @typeName(field.type)), 1334 }); 1335 } 1336 return result; 1337 } 1338 1339 fn asmOps(self: *CodeGen, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void { 1340 return switch (ops[0]) { 1341 .none => self.asmOpOnly(tag), 1342 .reg => |reg0| switch (ops[1]) { 1343 .none => self.asmRegister(tag, reg0), 1344 .reg => |reg1| switch (ops[2]) { 1345 .none => self.asmRegisterRegister(tag, reg0, reg1), 1346 .reg => |reg2| switch (ops[3]) { 1347 .none => self.asmRegisterRegisterRegister(tag, reg0, reg1, reg2), 1348 .reg => |reg3| self.asmRegisterRegisterRegisterRegister(tag, reg0, reg1, reg2, reg3), 1349 .imm => |imm3| self.asmRegisterRegisterRegisterImmediate(tag, reg0, reg1, reg2, imm3), 1350 else => error.InvalidInstruction, 1351 }, 1352 .mem => |mem2| switch (ops[3]) { 1353 .none => self.asmRegisterRegisterMemory(tag, reg0, reg1, mem2), 1354 .reg => |reg3| self.asmRegisterRegisterMemoryRegister(tag, reg0, reg1, mem2, reg3), 1355 .imm => |imm3| self.asmRegisterRegisterMemoryImmediate(tag, reg0, reg1, mem2, imm3), 1356 else => error.InvalidInstruction, 1357 }, 1358 .imm => |imm2| switch (ops[3]) { 1359 .none => self.asmRegisterRegisterImmediate(tag, reg0, reg1, imm2), 1360 else => error.InvalidInstruction, 1361 }, 1362 else => error.InvalidInstruction, 1363 }, 1364 .mem => |mem1| switch (ops[2]) { 1365 .none => self.asmRegisterMemory(tag, reg0, mem1), 1366 .reg => |reg2| switch (ops[3]) { 1367 .none => self.asmRegisterMemoryRegister(tag, reg0, mem1, reg2), 1368 else => error.InvalidInstruction, 1369 }, 1370 .imm => |imm2| switch (ops[3]) { 1371 .none => self.asmRegisterMemoryImmediate(tag, reg0, mem1, imm2), 1372 else => error.InvalidInstruction, 1373 }, 1374 else => error.InvalidInstruction, 1375 }, 1376 .imm => |imm1| switch (ops[2]) { 1377 .none => self.asmRegisterImmediate(tag, reg0, imm1), 1378 else => error.InvalidInstruction, 1379 }, 1380 else => error.InvalidInstruction, 1381 }, 1382 .mem => |mem0| switch (ops[1]) { 1383 .none => self.asmMemory(tag, mem0), 1384 .reg => |reg1| switch (ops[2]) { 1385 .none => self.asmMemoryRegister(tag, mem0, reg1), 1386 .reg => |reg2| switch (ops[3]) { 1387 .none => self.asmMemoryRegisterRegister(tag, mem0, reg1, reg2), 1388 else => error.InvalidInstruction, 1389 }, 1390 .imm => |imm2| switch (ops[3]) { 1391 .none => self.asmMemoryRegisterImmediate(tag, mem0, reg1, imm2), 1392 else => error.InvalidInstruction, 1393 }, 1394 else => error.InvalidInstruction, 1395 }, 1396 .imm => |imm1| switch (ops[2]) { 1397 .none => self.asmMemoryImmediate(tag, mem0, imm1), 1398 else => error.InvalidInstruction, 1399 }, 1400 else => error.InvalidInstruction, 1401 }, 1402 .imm => |imm0| switch (ops[1]) { 1403 .none => self.asmImmediate(tag, imm0), 1404 else => error.InvalidInstruction, 1405 }, 1406 .inst => |inst0| switch (ops[1]) { 1407 .none => self.asmReloc(tag, inst0), 1408 else => error.InvalidInstruction, 1409 }, 1410 }; 1411 } 1412 1413 /// A `cc` of `.z_and_np` clobbers `reg2`! 1414 fn asmCmovccRegisterRegister(self: *CodeGen, cc: Condition, reg1: Register, reg2: Register) !void { 1415 if (self.hasFeature(.cmov)) _ = try self.addInst(.{ 1416 .tag = switch (cc) { 1417 else => .cmov, 1418 .z_and_np, .nz_or_p => .pseudo, 1419 }, 1420 .ops = switch (cc) { 1421 else => .rr, 1422 .z_and_np => .pseudo_cmov_z_and_np_rr, 1423 .nz_or_p => .pseudo_cmov_nz_or_p_rr, 1424 }, 1425 .data = .{ .rr = .{ 1426 .fixes = switch (cc) { 1427 else => .fromCondition(cc), 1428 .z_and_np, .nz_or_p => ._, 1429 }, 1430 .r1 = reg1, 1431 .r2 = reg2, 1432 } }, 1433 }) else { 1434 const reloc = try self.asmJccReloc(cc.negate(), undefined); 1435 try self.asmRegisterRegister(.{ ._, .mov }, reg1, reg2); 1436 self.performReloc(reloc); 1437 } 1438 } 1439 1440 /// A `cc` of `.z_and_np` is not supported by this encoding! 1441 fn asmCmovccRegisterMemory(self: *CodeGen, cc: Condition, reg: Register, m: Memory) !void { 1442 if (self.hasFeature(.cmov)) _ = try self.addInst(.{ 1443 .tag = switch (cc) { 1444 else => .cmov, 1445 .z_and_np => unreachable, 1446 .nz_or_p => .pseudo, 1447 }, 1448 .ops = switch (cc) { 1449 else => .rm, 1450 .z_and_np => unreachable, 1451 .nz_or_p => .pseudo_cmov_nz_or_p_rm, 1452 }, 1453 .data = .{ .rx = .{ 1454 .fixes = switch (cc) { 1455 else => .fromCondition(cc), 1456 .z_and_np => unreachable, 1457 .nz_or_p => ._, 1458 }, 1459 .r1 = reg, 1460 .payload = try self.addExtra(Mir.Memory.encode(m)), 1461 } }, 1462 }) else { 1463 const reloc = try self.asmJccReloc(cc.negate(), undefined); 1464 try self.asmRegisterMemory(.{ ._, .mov }, reg, m); 1465 self.performReloc(reloc); 1466 } 1467 } 1468 1469 fn asmSetccRegister(self: *CodeGen, cc: Condition, reg: Register) !void { 1470 _ = try self.addInst(.{ 1471 .tag = switch (cc) { 1472 else => .set, 1473 .z_and_np, .nz_or_p => .pseudo, 1474 }, 1475 .ops = switch (cc) { 1476 else => .r, 1477 .z_and_np => .pseudo_set_z_and_np_r, 1478 .nz_or_p => .pseudo_set_nz_or_p_r, 1479 }, 1480 .data = switch (cc) { 1481 else => .{ .r = .{ 1482 .fixes = .fromCondition(cc), 1483 .r1 = reg, 1484 } }, 1485 .z_and_np, .nz_or_p => .{ .rr = .{ 1486 .r1 = reg, 1487 .r2 = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to8(), 1488 } }, 1489 }, 1490 }); 1491 } 1492 1493 fn asmSetccMemory(self: *CodeGen, cc: Condition, m: Memory) !void { 1494 const payload = try self.addExtra(Mir.Memory.encode(m)); 1495 _ = try self.addInst(.{ 1496 .tag = switch (cc) { 1497 else => .set, 1498 .z_and_np, .nz_or_p => .pseudo, 1499 }, 1500 .ops = switch (cc) { 1501 else => .m, 1502 .z_and_np => .pseudo_set_z_and_np_m, 1503 .nz_or_p => .pseudo_set_nz_or_p_m, 1504 }, 1505 .data = switch (cc) { 1506 else => .{ .x = .{ 1507 .fixes = .fromCondition(cc), 1508 .payload = payload, 1509 } }, 1510 .z_and_np, .nz_or_p => .{ .rx = .{ 1511 .r1 = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to8(), 1512 .payload = payload, 1513 } }, 1514 }, 1515 }); 1516 } 1517 1518 fn asmJmpReloc(self: *CodeGen, target: Mir.Inst.Index) !Mir.Inst.Index { 1519 return self.addInst(.{ 1520 .tag = .jmp, 1521 .ops = .inst, 1522 .data = .{ .inst = .{ 1523 .inst = target, 1524 } }, 1525 }); 1526 } 1527 1528 fn asmJccReloc(self: *CodeGen, cc: Condition, target: Mir.Inst.Index) !Mir.Inst.Index { 1529 return self.addInst(.{ 1530 .tag = switch (cc) { 1531 else => .j, 1532 .z_and_np, .nz_or_p => .pseudo, 1533 }, 1534 .ops = switch (cc) { 1535 else => .inst, 1536 .z_and_np => .pseudo_j_z_and_np_inst, 1537 .nz_or_p => .pseudo_j_nz_or_p_inst, 1538 }, 1539 .data = .{ .inst = .{ 1540 .fixes = switch (cc) { 1541 else => .fromCondition(cc), 1542 .z_and_np, .nz_or_p => ._, 1543 }, 1544 .inst = target, 1545 } }, 1546 }); 1547 } 1548 1549 fn asmReloc(self: *CodeGen, tag: Mir.Inst.FixedTag, target: Mir.Inst.Index) !void { 1550 _ = try self.addInst(.{ 1551 .tag = tag[1], 1552 .ops = .inst, 1553 .data = .{ .inst = .{ 1554 .fixes = tag[0], 1555 .inst = target, 1556 } }, 1557 }); 1558 } 1559 1560 fn asmPlaceholder(self: *CodeGen) !Mir.Inst.Index { 1561 return self.addInst(.{ 1562 .tag = .pseudo, 1563 .ops = .pseudo_dead_none, 1564 .data = undefined, 1565 }); 1566 } 1567 1568 const MirTagAir = enum { dbg_local }; 1569 1570 fn asmAir(self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index) !void { 1571 _ = try self.addInst(.{ 1572 .tag = .pseudo, 1573 .ops = switch (tag) { 1574 .dbg_local => .pseudo_dbg_local_a, 1575 }, 1576 .data = .{ .a = .{ .air_inst = inst } }, 1577 }); 1578 } 1579 1580 fn asmAirImmediate(self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index, imm: Immediate) !void { 1581 switch (imm) { 1582 .signed => |s| _ = try self.addInst(.{ 1583 .tag = .pseudo, 1584 .ops = switch (tag) { 1585 .dbg_local => .pseudo_dbg_local_ai_s, 1586 }, 1587 .data = .{ .ai = .{ 1588 .air_inst = inst, 1589 .i = @bitCast(s), 1590 } }, 1591 }), 1592 .unsigned => |u| _ = if (std.math.cast(u32, u)) |small| try self.addInst(.{ 1593 .tag = .pseudo, 1594 .ops = switch (tag) { 1595 .dbg_local => .pseudo_dbg_local_ai_u, 1596 }, 1597 .data = .{ .ai = .{ 1598 .air_inst = inst, 1599 .i = small, 1600 } }, 1601 }) else try self.addInst(.{ 1602 .tag = .pseudo, 1603 .ops = switch (tag) { 1604 .dbg_local => .pseudo_dbg_local_ai_64, 1605 }, 1606 .data = .{ .ai = .{ 1607 .air_inst = inst, 1608 .i = try self.addExtra(Mir.Imm64.encode(u)), 1609 } }, 1610 }), 1611 .reloc => |sym_off| _ = if (sym_off.off == 0) try self.addInst(.{ 1612 .tag = .pseudo, 1613 .ops = switch (tag) { 1614 .dbg_local => .pseudo_dbg_local_as, 1615 }, 1616 .data = .{ .as = .{ 1617 .air_inst = inst, 1618 .sym_index = sym_off.sym_index, 1619 } }, 1620 }) else try self.addInst(.{ 1621 .tag = .pseudo, 1622 .ops = switch (tag) { 1623 .dbg_local => .pseudo_dbg_local_aso, 1624 }, 1625 .data = .{ .ax = .{ 1626 .air_inst = inst, 1627 .payload = try self.addExtra(sym_off), 1628 } }, 1629 }), 1630 } 1631 } 1632 1633 fn asmAirRegisterImmediate( 1634 self: *CodeGen, 1635 tag: MirTagAir, 1636 inst: Air.Inst.Index, 1637 reg: Register, 1638 imm: Immediate, 1639 ) !void { 1640 _ = try self.addInst(.{ 1641 .tag = .pseudo, 1642 .ops = switch (tag) { 1643 .dbg_local => .pseudo_dbg_local_aro, 1644 }, 1645 .data = .{ .rx = .{ 1646 .r1 = reg, 1647 .payload = try self.addExtra(Mir.AirOffset{ 1648 .air_inst = inst, 1649 .off = imm.signed, 1650 }), 1651 } }, 1652 }); 1653 } 1654 1655 fn asmAirFrameAddress( 1656 self: *CodeGen, 1657 tag: MirTagAir, 1658 inst: Air.Inst.Index, 1659 frame_addr: bits.FrameAddr, 1660 ) !void { 1661 _ = try self.addInst(.{ 1662 .tag = .pseudo, 1663 .ops = switch (tag) { 1664 .dbg_local => .pseudo_dbg_local_af, 1665 }, 1666 .data = .{ .ax = .{ 1667 .air_inst = inst, 1668 .payload = try self.addExtra(frame_addr), 1669 } }, 1670 }); 1671 } 1672 1673 fn asmAirMemory(self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index, m: Memory) !void { 1674 _ = try self.addInst(.{ 1675 .tag = .pseudo, 1676 .ops = switch (tag) { 1677 .dbg_local => .pseudo_dbg_local_am, 1678 }, 1679 .data = .{ .ax = .{ 1680 .air_inst = inst, 1681 .payload = try self.addExtra(Mir.Memory.encode(m)), 1682 } }, 1683 }); 1684 } 1685 1686 fn asmOpOnly(self: *CodeGen, tag: Mir.Inst.FixedTag) !void { 1687 _ = try self.addInst(.{ 1688 .tag = tag[1], 1689 .ops = .none, 1690 .data = .{ .none = .{ 1691 .fixes = tag[0], 1692 } }, 1693 }); 1694 } 1695 1696 fn asmPseudo(self: *CodeGen, ops: Mir.Inst.Ops) !void { 1697 assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and 1698 std.mem.endsWith(u8, @tagName(ops), "_none")); 1699 _ = try self.addInst(.{ 1700 .tag = .pseudo, 1701 .ops = ops, 1702 .data = undefined, 1703 }); 1704 } 1705 1706 fn asmPseudoRegister(self: *CodeGen, ops: Mir.Inst.Ops, reg: Register) !void { 1707 assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and 1708 std.mem.endsWith(u8, @tagName(ops), "_r")); 1709 _ = try self.addInst(.{ 1710 .tag = .pseudo, 1711 .ops = ops, 1712 .data = .{ .r = .{ .r1 = reg } }, 1713 }); 1714 } 1715 1716 fn asmPseudoImmediate(self: *CodeGen, ops: Mir.Inst.Ops, imm: Immediate) !void { 1717 assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and 1718 std.mem.endsWith(u8, @tagName(ops), "_i_s")); 1719 _ = try self.addInst(.{ 1720 .tag = .pseudo, 1721 .ops = ops, 1722 .data = .{ .i = .{ .i = @bitCast(imm.signed) } }, 1723 }); 1724 } 1725 1726 fn asmPseudoRegisterRegister(self: *CodeGen, ops: Mir.Inst.Ops, reg1: Register, reg2: Register) !void { 1727 assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and 1728 std.mem.endsWith(u8, @tagName(ops), "_rr")); 1729 _ = try self.addInst(.{ 1730 .tag = .pseudo, 1731 .ops = ops, 1732 .data = .{ .rr = .{ .r1 = reg1, .r2 = reg2 } }, 1733 }); 1734 } 1735 1736 fn asmPseudoRegisterImmediate(self: *CodeGen, ops: Mir.Inst.Ops, reg: Register, imm: Immediate) !void { 1737 assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and 1738 std.mem.endsWith(u8, @tagName(ops), "_ri_s")); 1739 _ = try self.addInst(.{ 1740 .tag = .pseudo, 1741 .ops = ops, 1742 .data = .{ .ri = .{ .r1 = reg, .i = @bitCast(imm.signed) } }, 1743 }); 1744 } 1745 1746 fn asmRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, reg: Register) !void { 1747 _ = try self.addInst(.{ 1748 .tag = tag[1], 1749 .ops = .r, 1750 .data = .{ .r = .{ 1751 .fixes = tag[0], 1752 .r1 = reg, 1753 } }, 1754 }); 1755 } 1756 1757 fn asmImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, imm: Immediate) !void { 1758 _ = try self.addInst(.{ 1759 .tag = tag[1], 1760 .ops = switch (imm) { 1761 .signed => .i_s, 1762 .unsigned => .i_u, 1763 .reloc => .rel, 1764 }, 1765 .data = switch (imm) { 1766 .reloc => |sym_off| reloc: { 1767 assert(tag[0] == ._); 1768 break :reloc .{ .reloc = sym_off }; 1769 }, 1770 .signed, .unsigned => .{ .i = .{ 1771 .fixes = tag[0], 1772 .i = switch (imm) { 1773 .signed => |s| @bitCast(s), 1774 .unsigned => |u| @intCast(u), 1775 .reloc => unreachable, 1776 }, 1777 } }, 1778 }, 1779 }); 1780 } 1781 1782 fn asmRegisterRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void { 1783 _ = try self.addInst(.{ 1784 .tag = tag[1], 1785 .ops = .rr, 1786 .data = .{ .rr = .{ 1787 .fixes = tag[0], 1788 .r1 = reg1, 1789 .r2 = reg2, 1790 } }, 1791 }); 1792 } 1793 1794 fn asmRegisterImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void { 1795 const ops: Mir.Inst.Ops, const i: u32 = switch (imm) { 1796 .signed => |s| .{ .ri_s, @bitCast(s) }, 1797 .unsigned => |u| if (std.math.cast(u32, u)) |small| 1798 .{ .ri_u, small } 1799 else 1800 .{ .ri_64, try self.addExtra(Mir.Imm64.encode(imm.unsigned)) }, 1801 .reloc => unreachable, 1802 }; 1803 _ = try self.addInst(.{ 1804 .tag = tag[1], 1805 .ops = ops, 1806 .data = .{ .ri = .{ 1807 .fixes = tag[0], 1808 .r1 = reg, 1809 .i = i, 1810 } }, 1811 }); 1812 } 1813 1814 fn asmRegisterRegisterRegister( 1815 self: *CodeGen, 1816 tag: Mir.Inst.FixedTag, 1817 reg1: Register, 1818 reg2: Register, 1819 reg3: Register, 1820 ) !void { 1821 _ = try self.addInst(.{ 1822 .tag = tag[1], 1823 .ops = .rrr, 1824 .data = .{ .rrr = .{ 1825 .fixes = tag[0], 1826 .r1 = reg1, 1827 .r2 = reg2, 1828 .r3 = reg3, 1829 } }, 1830 }); 1831 } 1832 1833 fn asmRegisterRegisterRegisterRegister( 1834 self: *CodeGen, 1835 tag: Mir.Inst.FixedTag, 1836 reg1: Register, 1837 reg2: Register, 1838 reg3: Register, 1839 reg4: Register, 1840 ) !void { 1841 _ = try self.addInst(.{ 1842 .tag = tag[1], 1843 .ops = .rrrr, 1844 .data = .{ .rrrr = .{ 1845 .fixes = tag[0], 1846 .r1 = reg1, 1847 .r2 = reg2, 1848 .r3 = reg3, 1849 .r4 = reg4, 1850 } }, 1851 }); 1852 } 1853 1854 fn asmRegisterRegisterRegisterImmediate( 1855 self: *CodeGen, 1856 tag: Mir.Inst.FixedTag, 1857 reg1: Register, 1858 reg2: Register, 1859 reg3: Register, 1860 imm: Immediate, 1861 ) !void { 1862 _ = try self.addInst(.{ 1863 .tag = tag[1], 1864 .ops = .rrri, 1865 .data = .{ .rrri = .{ 1866 .fixes = tag[0], 1867 .r1 = reg1, 1868 .r2 = reg2, 1869 .r3 = reg3, 1870 .i = switch (imm) { 1871 .signed => |s| @bitCast(@as(i8, @intCast(s))), 1872 .unsigned => |u| @intCast(u), 1873 .reloc => unreachable, 1874 }, 1875 } }, 1876 }); 1877 } 1878 1879 fn asmRegisterRegisterImmediate( 1880 self: *CodeGen, 1881 tag: Mir.Inst.FixedTag, 1882 reg1: Register, 1883 reg2: Register, 1884 imm: Immediate, 1885 ) !void { 1886 _ = try self.addInst(.{ 1887 .tag = tag[1], 1888 .ops = switch (imm) { 1889 .signed => .rri_s, 1890 .unsigned => .rri_u, 1891 .reloc => unreachable, 1892 }, 1893 .data = .{ .rri = .{ 1894 .fixes = tag[0], 1895 .r1 = reg1, 1896 .r2 = reg2, 1897 .i = switch (imm) { 1898 .signed => |s| @bitCast(s), 1899 .unsigned => |u| @intCast(u), 1900 .reloc => unreachable, 1901 }, 1902 } }, 1903 }); 1904 } 1905 1906 fn asmRegisterRegisterMemory( 1907 self: *CodeGen, 1908 tag: Mir.Inst.FixedTag, 1909 reg1: Register, 1910 reg2: Register, 1911 m: Memory, 1912 ) !void { 1913 _ = try self.addInst(.{ 1914 .tag = tag[1], 1915 .ops = .rrm, 1916 .data = .{ .rrx = .{ 1917 .fixes = tag[0], 1918 .r1 = reg1, 1919 .r2 = reg2, 1920 .payload = try self.addExtra(Mir.Memory.encode(m)), 1921 } }, 1922 }); 1923 } 1924 1925 fn asmRegisterRegisterMemoryRegister( 1926 self: *CodeGen, 1927 tag: Mir.Inst.FixedTag, 1928 reg1: Register, 1929 reg2: Register, 1930 m: Memory, 1931 reg3: Register, 1932 ) !void { 1933 _ = try self.addInst(.{ 1934 .tag = tag[1], 1935 .ops = .rrmr, 1936 .data = .{ .rrrx = .{ 1937 .fixes = tag[0], 1938 .r1 = reg1, 1939 .r2 = reg2, 1940 .r3 = reg3, 1941 .payload = try self.addExtra(Mir.Memory.encode(m)), 1942 } }, 1943 }); 1944 } 1945 1946 fn asmMemory(self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory) !void { 1947 _ = try self.addInst(.{ 1948 .tag = tag[1], 1949 .ops = .m, 1950 .data = .{ .x = .{ 1951 .fixes = tag[0], 1952 .payload = try self.addExtra(Mir.Memory.encode(m)), 1953 } }, 1954 }); 1955 } 1956 1957 fn asmRegisterMemory(self: *CodeGen, tag: Mir.Inst.FixedTag, reg: Register, m: Memory) !void { 1958 _ = try self.addInst(.{ 1959 .tag = tag[1], 1960 .ops = .rm, 1961 .data = .{ .rx = .{ 1962 .fixes = tag[0], 1963 .r1 = reg, 1964 .payload = try self.addExtra(Mir.Memory.encode(m)), 1965 } }, 1966 }); 1967 } 1968 1969 fn asmRegisterMemoryRegister( 1970 self: *CodeGen, 1971 tag: Mir.Inst.FixedTag, 1972 reg1: Register, 1973 m: Memory, 1974 reg2: Register, 1975 ) !void { 1976 _ = try self.addInst(.{ 1977 .tag = tag[1], 1978 .ops = .rmr, 1979 .data = .{ .rrx = .{ 1980 .fixes = tag[0], 1981 .r1 = reg1, 1982 .r2 = reg2, 1983 .payload = try self.addExtra(Mir.Memory.encode(m)), 1984 } }, 1985 }); 1986 } 1987 1988 fn asmRegisterMemoryImmediate( 1989 self: *CodeGen, 1990 tag: Mir.Inst.FixedTag, 1991 reg: Register, 1992 m: Memory, 1993 imm: Immediate, 1994 ) !void { 1995 if (switch (imm) { 1996 .signed => |s| if (std.math.cast(i16, s)) |x| @as(u16, @bitCast(x)) else null, 1997 .unsigned => |u| std.math.cast(u16, u), 1998 .reloc => unreachable, 1999 }) |small_imm| { 2000 _ = try self.addInst(.{ 2001 .tag = tag[1], 2002 .ops = .rmi, 2003 .data = .{ .rix = .{ 2004 .fixes = tag[0], 2005 .r1 = reg, 2006 .i = small_imm, 2007 .payload = try self.addExtra(Mir.Memory.encode(m)), 2008 } }, 2009 }); 2010 } else { 2011 const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) { 2012 .signed => |s| @bitCast(s), 2013 .unsigned => unreachable, 2014 .reloc => unreachable, 2015 } }); 2016 assert(payload + 1 == try self.addExtra(Mir.Memory.encode(m))); 2017 _ = try self.addInst(.{ 2018 .tag = tag[1], 2019 .ops = switch (imm) { 2020 .signed => .rmi_s, 2021 .unsigned => .rmi_u, 2022 .reloc => unreachable, 2023 }, 2024 .data = .{ .rx = .{ 2025 .fixes = tag[0], 2026 .r1 = reg, 2027 .payload = payload, 2028 } }, 2029 }); 2030 } 2031 } 2032 2033 fn asmRegisterRegisterMemoryImmediate( 2034 self: *CodeGen, 2035 tag: Mir.Inst.FixedTag, 2036 reg1: Register, 2037 reg2: Register, 2038 m: Memory, 2039 imm: Immediate, 2040 ) !void { 2041 _ = try self.addInst(.{ 2042 .tag = tag[1], 2043 .ops = .rrmi, 2044 .data = .{ .rrix = .{ 2045 .fixes = tag[0], 2046 .r1 = reg1, 2047 .r2 = reg2, 2048 .i = @intCast(imm.unsigned), 2049 .payload = try self.addExtra(Mir.Memory.encode(m)), 2050 } }, 2051 }); 2052 } 2053 2054 fn asmMemoryRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory, reg: Register) !void { 2055 _ = try self.addInst(.{ 2056 .tag = tag[1], 2057 .ops = .mr, 2058 .data = .{ .rx = .{ 2059 .fixes = tag[0], 2060 .r1 = reg, 2061 .payload = try self.addExtra(Mir.Memory.encode(m)), 2062 } }, 2063 }); 2064 } 2065 2066 fn asmMemoryImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void { 2067 const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) { 2068 .signed => |s| @bitCast(s), 2069 .unsigned => |u| @intCast(u), 2070 .reloc => unreachable, 2071 } }); 2072 assert(payload + 1 == try self.addExtra(Mir.Memory.encode(m))); 2073 _ = try self.addInst(.{ 2074 .tag = tag[1], 2075 .ops = switch (imm) { 2076 .signed => .mi_s, 2077 .unsigned => .mi_u, 2078 .reloc => unreachable, 2079 }, 2080 .data = .{ .x = .{ 2081 .fixes = tag[0], 2082 .payload = payload, 2083 } }, 2084 }); 2085 } 2086 2087 fn asmMemoryRegisterRegister( 2088 self: *CodeGen, 2089 tag: Mir.Inst.FixedTag, 2090 m: Memory, 2091 reg1: Register, 2092 reg2: Register, 2093 ) !void { 2094 _ = try self.addInst(.{ 2095 .tag = tag[1], 2096 .ops = .mrr, 2097 .data = .{ .rrx = .{ 2098 .fixes = tag[0], 2099 .r1 = reg1, 2100 .r2 = reg2, 2101 .payload = try self.addExtra(Mir.Memory.encode(m)), 2102 } }, 2103 }); 2104 } 2105 2106 fn asmMemoryRegisterImmediate( 2107 self: *CodeGen, 2108 tag: Mir.Inst.FixedTag, 2109 m: Memory, 2110 reg: Register, 2111 imm: Immediate, 2112 ) !void { 2113 _ = try self.addInst(.{ 2114 .tag = tag[1], 2115 .ops = .mri, 2116 .data = .{ .rix = .{ 2117 .fixes = tag[0], 2118 .r1 = reg, 2119 .i = @intCast(imm.unsigned), 2120 .payload = try self.addExtra(Mir.Memory.encode(m)), 2121 } }, 2122 }); 2123 } 2124 2125 fn gen(self: *CodeGen) InnerError!void { 2126 const pt = self.pt; 2127 const zcu = pt.zcu; 2128 const fn_info = zcu.typeToFunc(self.fn_type).?; 2129 if (fn_info.cc != .naked) { 2130 try self.asmRegister(.{ ._, .push }, .rbp); 2131 try self.asmPseudoImmediate(.pseudo_cfi_adjust_cfa_offset_i_s, .s(8)); 2132 try self.asmPseudoRegisterImmediate(.pseudo_cfi_rel_offset_ri_s, .rbp, .s(0)); 2133 try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp); 2134 try self.asmPseudoRegister(.pseudo_cfi_def_cfa_register_r, .rbp); 2135 const backpatch_push_callee_preserved_regs = try self.asmPlaceholder(); 2136 const backpatch_frame_align = try self.asmPlaceholder(); 2137 const backpatch_frame_align_extra = try self.asmPlaceholder(); 2138 const backpatch_stack_alloc = try self.asmPlaceholder(); 2139 const backpatch_stack_alloc_extra = try self.asmPlaceholder(); 2140 2141 switch (self.ret_mcv.long) { 2142 .none, .unreach => {}, 2143 .indirect => { 2144 // The address where to store the return value for the caller is in a 2145 // register which the callee is free to clobber. Therefore, we purposely 2146 // spill it to stack immediately. 2147 const frame_index = try self.allocFrameIndex(.initSpill(.usize, zcu)); 2148 try self.genSetMem( 2149 .{ .frame = frame_index }, 2150 0, 2151 .usize, 2152 self.ret_mcv.long.address().offset(-self.ret_mcv.short.indirect.off), 2153 .{}, 2154 ); 2155 self.ret_mcv.long = .{ .load_frame = .{ .index = frame_index } }; 2156 tracking_log.debug("spill {} to {}", .{ self.ret_mcv.long, frame_index }); 2157 }, 2158 else => unreachable, 2159 } 2160 2161 if (fn_info.is_var_args) switch (fn_info.cc) { 2162 .x86_64_sysv => { 2163 const info = &self.va_info.sysv; 2164 const reg_save_area_fi = try self.allocFrameIndex(.init(.{ 2165 .size = abi.SysV.c_abi_int_param_regs.len * 8 + 2166 abi.SysV.c_abi_sse_param_regs.len * 16, 2167 .alignment = .@"16", 2168 })); 2169 info.reg_save_area = .{ .index = reg_save_area_fi }; 2170 2171 for (abi.SysV.c_abi_int_param_regs[info.gp_count..], info.gp_count..) |reg, reg_i| 2172 try self.genSetMem(.{ .frame = reg_save_area_fi }, @intCast(reg_i * 8), .usize, .{ .register = reg }, .{}); 2173 2174 try self.asmRegisterImmediate(.{ ._, .cmp }, .al, .u(info.fp_count)); 2175 const skip_sse_reloc = try self.asmJccReloc(.na, undefined); 2176 2177 const vec_2_f64 = try pt.vectorType(.{ .len = 2, .child = .f64_type }); 2178 for (abi.SysV.c_abi_sse_param_regs[info.fp_count..], info.fp_count..) |reg, reg_i| 2179 try self.genSetMem( 2180 .{ .frame = reg_save_area_fi }, 2181 @intCast(abi.SysV.c_abi_int_param_regs.len * 8 + reg_i * 16), 2182 vec_2_f64, 2183 .{ .register = reg }, 2184 .{}, 2185 ); 2186 2187 self.performReloc(skip_sse_reloc); 2188 }, 2189 .x86_64_win => return self.fail("TODO implement gen var arg function for Win64", .{}), 2190 else => |cc| return self.fail("{s} does not support var args", .{@tagName(cc)}), 2191 }; 2192 2193 try self.asmPseudo(.pseudo_dbg_prologue_end_none); 2194 2195 try self.genBody(self.air.getMainBody()); 2196 2197 const epilogue = if (self.epilogue_relocs.items.len > 0) epilogue: { 2198 const epilogue_relocs_last_index = self.epilogue_relocs.items.len - 1; 2199 for (if (self.epilogue_relocs.items[epilogue_relocs_last_index] == self.mir_instructions.len - 1) epilogue_relocs: { 2200 _ = self.mir_instructions.pop(); 2201 break :epilogue_relocs self.epilogue_relocs.items[0..epilogue_relocs_last_index]; 2202 } else self.epilogue_relocs.items) |epilogue_reloc| self.performReloc(epilogue_reloc); 2203 2204 try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); 2205 const backpatch_stack_dealloc = try self.asmPlaceholder(); 2206 const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder(); 2207 try self.asmRegister(.{ ._, .pop }, .rbp); 2208 try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, .s(8)); 2209 try self.asmOpOnly(.{ ._, .ret }); 2210 break :epilogue .{ 2211 .backpatch_stack_dealloc = backpatch_stack_dealloc, 2212 .backpatch_pop_callee_preserved_regs = backpatch_pop_callee_preserved_regs, 2213 }; 2214 } else null; 2215 2216 const frame_layout = try self.computeFrameLayout(fn_info.cc); 2217 const need_frame_align = frame_layout.stack_mask != std.math.maxInt(u32); 2218 const need_stack_adjust = frame_layout.stack_adjust > 0; 2219 const need_save_reg = frame_layout.save_reg_list.count() > 0; 2220 if (need_frame_align) { 2221 const page_align = @as(u32, std.math.maxInt(u32)) << 12; 2222 self.mir_instructions.set(backpatch_frame_align, .{ 2223 .tag = .@"and", 2224 .ops = .ri_s, 2225 .data = .{ .ri = .{ 2226 .r1 = .rsp, 2227 .i = @max(frame_layout.stack_mask, page_align), 2228 } }, 2229 }); 2230 if (frame_layout.stack_mask < page_align) { 2231 self.mir_instructions.set(backpatch_frame_align_extra, .{ 2232 .tag = .pseudo, 2233 .ops = .pseudo_probe_align_ri_s, 2234 .data = .{ .ri = .{ 2235 .r1 = .rsp, 2236 .i = ~frame_layout.stack_mask & page_align, 2237 } }, 2238 }); 2239 } 2240 } 2241 if (need_stack_adjust) { 2242 const page_size: u32 = 1 << 12; 2243 if (frame_layout.stack_adjust <= page_size) { 2244 self.mir_instructions.set(backpatch_stack_alloc, .{ 2245 .tag = .sub, 2246 .ops = .ri_s, 2247 .data = .{ .ri = .{ 2248 .r1 = .rsp, 2249 .i = frame_layout.stack_adjust, 2250 } }, 2251 }); 2252 } else if (frame_layout.stack_adjust < 2253 page_size * Lower.pseudo_probe_adjust_unrolled_max_insts) 2254 { 2255 self.mir_instructions.set(backpatch_stack_alloc, .{ 2256 .tag = .pseudo, 2257 .ops = .pseudo_probe_adjust_unrolled_ri_s, 2258 .data = .{ .ri = .{ 2259 .r1 = .rsp, 2260 .i = frame_layout.stack_adjust, 2261 } }, 2262 }); 2263 } else { 2264 const scratch_reg = abi.getCAbiLinkerScratchReg(fn_info.cc); 2265 self.mir_instructions.set(backpatch_stack_alloc, .{ 2266 .tag = .pseudo, 2267 .ops = .pseudo_probe_adjust_setup_rri_s, 2268 .data = .{ .rri = .{ 2269 .r1 = .rsp, 2270 .r2 = scratch_reg, 2271 .i = frame_layout.stack_adjust, 2272 } }, 2273 }); 2274 self.mir_instructions.set(backpatch_stack_alloc_extra, .{ 2275 .tag = .pseudo, 2276 .ops = .pseudo_probe_adjust_loop_rr, 2277 .data = .{ .rr = .{ 2278 .r1 = .rsp, 2279 .r2 = scratch_reg, 2280 } }, 2281 }); 2282 } 2283 } 2284 if (epilogue) |e| if (need_frame_align or need_stack_adjust) { 2285 self.mir_instructions.set(e.backpatch_stack_dealloc, switch (-frame_layout.save_reg_list.size(self.target)) { 2286 0 => .{ 2287 .tag = .mov, 2288 .ops = .rr, 2289 .data = .{ .rr = .{ 2290 .r1 = .rsp, 2291 .r2 = .rbp, 2292 } }, 2293 }, 2294 else => |disp| .{ 2295 .tag = .lea, 2296 .ops = .rm, 2297 .data = .{ .rx = .{ 2298 .r1 = .rsp, 2299 .payload = try self.addExtra(Mir.Memory.encode(.{ 2300 .base = .{ .reg = .rbp }, 2301 .mod = .{ .rm = .{ 2302 .size = .qword, 2303 .disp = disp, 2304 } }, 2305 })), 2306 } }, 2307 }, 2308 }); 2309 }; 2310 if (need_save_reg) { 2311 self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{ 2312 .tag = .pseudo, 2313 .ops = .pseudo_push_reg_list, 2314 .data = .{ .reg_list = frame_layout.save_reg_list }, 2315 }); 2316 if (epilogue) |e| self.mir_instructions.set(e.backpatch_pop_callee_preserved_regs, .{ 2317 .tag = .pseudo, 2318 .ops = .pseudo_pop_reg_list, 2319 .data = .{ .reg_list = frame_layout.save_reg_list }, 2320 }); 2321 } 2322 } else { 2323 try self.asmPseudo(.pseudo_dbg_prologue_end_none); 2324 try self.genBody(self.air.getMainBody()); 2325 try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); 2326 } 2327 2328 // Drop them off at the rbrace. 2329 _ = try self.addInst(.{ 2330 .tag = .pseudo, 2331 .ops = .pseudo_dbg_line_stmt_line_column, 2332 .data = .{ .line_column = .{ 2333 .line = self.end_di_line, 2334 .column = self.end_di_column, 2335 } }, 2336 }); 2337 } 2338 2339 fn checkInvariantsAfterAirInst(self: *CodeGen) void { 2340 assert(!self.register_manager.lockedRegsExist()); 2341 2342 if (std.debug.runtime_safety) { 2343 // check consistency of tracked registers 2344 var it = self.register_manager.free_registers.iterator(.{ .kind = .unset }); 2345 while (it.next()) |index| { 2346 const tracked_inst = self.register_manager.registers[index]; 2347 const tracking = self.getResolvedInstValue(tracked_inst); 2348 for (tracking.getRegs()) |reg| { 2349 if (RegisterManager.indexOfRegIntoTracked(reg).? == index) break; 2350 } else unreachable; // tracked register not in use 2351 } 2352 } 2353 } 2354 2355 fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { 2356 try self.asmPseudo(.pseudo_dbg_enter_block_none); 2357 try self.genBody(body); 2358 try self.asmPseudo(.pseudo_dbg_leave_block_none); 2359 } 2360 2361 fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { 2362 @setEvalBranchQuota(1_600); 2363 const pt = cg.pt; 2364 const zcu = pt.zcu; 2365 const ip = &zcu.intern_pool; 2366 const air_tags = cg.air.instructions.items(.tag); 2367 const air_datas = cg.air.instructions.items(.data); 2368 const use_old = cg.target.ofmt == .coff; 2369 2370 cg.arg_index = 0; 2371 for (body) |inst| switch (air_tags[@intFromEnum(inst)]) { 2372 .arg => { 2373 wip_mir_log.debug("{}", .{cg.fmtAir(inst)}); 2374 verbose_tracking_log.debug("{}", .{cg.fmtTracking()}); 2375 2376 cg.reused_operands = .initEmpty(); 2377 try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1); 2378 2379 try cg.airArg(inst); 2380 2381 cg.resetTemps(); 2382 cg.checkInvariantsAfterAirInst(); 2383 }, 2384 else => break, 2385 }; 2386 2387 if (cg.arg_index == 0) try cg.airDbgVarArgs(); 2388 cg.arg_index = 0; 2389 for (body) |inst| { 2390 if (cg.liveness.isUnused(inst) and !cg.air.mustLower(inst, ip)) continue; 2391 wip_mir_log.debug("{}", .{cg.fmtAir(inst)}); 2392 verbose_tracking_log.debug("{}", .{cg.fmtTracking()}); 2393 2394 cg.reused_operands = .initEmpty(); 2395 try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1); 2396 switch (air_tags[@intFromEnum(inst)]) { 2397 // zig fmt: off 2398 .add, 2399 .add_wrap, 2400 .sub, 2401 .sub_wrap, 2402 .min, 2403 .max, 2404 => |air_tag| try cg.airBinOp(inst, air_tag), 2405 2406 .shr, .shr_exact => try cg.airShlShrBinOp(inst), 2407 .shl, .shl_exact => try cg.airShlShrBinOp(inst), 2408 2409 .mul => try cg.airMulDivBinOp(inst), 2410 .mul_wrap => try cg.airMulDivBinOp(inst), 2411 .rem => try cg.airMulDivBinOp(inst), 2412 .mod => try cg.airMulDivBinOp(inst), 2413 2414 .add_sat => try cg.airAddSat(inst), 2415 .sub_sat => try cg.airSubSat(inst), 2416 .mul_sat => try cg.airMulSat(inst), 2417 .shl_sat => try cg.airShlSat(inst), 2418 2419 .sin, 2420 .cos, 2421 .tan, 2422 .exp, 2423 .exp2, 2424 .log, 2425 .log2, 2426 .log10, 2427 .round, 2428 => |air_tag| try cg.airUnaryMath(inst, air_tag), 2429 2430 .floor => try cg.airRound(inst, .{ .mode = .down, .precision = .inexact }), 2431 .ceil => try cg.airRound(inst, .{ .mode = .up, .precision = .inexact }), 2432 .trunc_float => try cg.airRound(inst, .{ .mode = .zero, .precision = .inexact }), 2433 .sqrt => try cg.airSqrt(inst), 2434 .neg => try cg.airFloatSign(inst), 2435 2436 .abs => try cg.airAbs(inst), 2437 2438 .add_with_overflow => try cg.airAddSubWithOverflow(inst), 2439 .sub_with_overflow => try cg.airAddSubWithOverflow(inst), 2440 .mul_with_overflow => try cg.airMulWithOverflow(inst), 2441 .shl_with_overflow => try cg.airShlWithOverflow(inst), 2442 2443 .div_float, .div_trunc, .div_floor, .div_exact => try cg.airMulDivBinOp(inst), 2444 2445 .cmp_lt_errors_len => try cg.airCmpLtErrorsLen(inst), 2446 2447 .bitcast => try cg.airBitCast(inst), 2448 .fptrunc => try cg.airFptrunc(inst), 2449 .fpext => try cg.airFpext(inst), 2450 .intcast => try cg.airIntCast(inst), 2451 .trunc => try cg.airTrunc(inst), 2452 .is_non_null => try cg.airIsNonNull(inst), 2453 .is_null => try cg.airIsNull(inst), 2454 .is_non_err => try cg.airIsNonErr(inst), 2455 .is_err => try cg.airIsErr(inst), 2456 .float_from_int => try cg.airFloatFromInt(inst), 2457 .int_from_float => try cg.airIntFromFloat(inst), 2458 .cmpxchg_strong => try cg.airCmpxchg(inst), 2459 .cmpxchg_weak => try cg.airCmpxchg(inst), 2460 .atomic_rmw => try cg.airAtomicRmw(inst), 2461 .atomic_load => try cg.airAtomicLoad(inst), 2462 .memcpy => try cg.airMemcpy(inst), 2463 .memset => try cg.airMemset(inst, false), 2464 .memset_safe => try cg.airMemset(inst, true), 2465 .ctz => try cg.airCtz(inst), 2466 .popcount => try cg.airPopCount(inst), 2467 .byte_swap => try cg.airByteSwap(inst), 2468 .bit_reverse => try cg.airBitReverse(inst), 2469 .tag_name => try cg.airTagName(inst), 2470 .error_name => try cg.airErrorName(inst), 2471 .splat => try cg.airSplat(inst), 2472 .select => try cg.airSelect(inst), 2473 .shuffle => try cg.airShuffle(inst), 2474 .reduce => try cg.airReduce(inst), 2475 .aggregate_init => try cg.airAggregateInit(inst), 2476 .prefetch => try cg.airPrefetch(inst), 2477 .mul_add => try cg.airMulAdd(inst), 2478 2479 .atomic_store_unordered => try cg.airAtomicStore(inst, .unordered), 2480 .atomic_store_monotonic => try cg.airAtomicStore(inst, .monotonic), 2481 .atomic_store_release => try cg.airAtomicStore(inst, .release), 2482 .atomic_store_seq_cst => try cg.airAtomicStore(inst, .seq_cst), 2483 2484 .array_elem_val => try cg.airArrayElemVal(inst), 2485 2486 .optional_payload => try cg.airOptionalPayload(inst), 2487 .unwrap_errunion_err => try cg.airUnwrapErrUnionErr(inst), 2488 .unwrap_errunion_payload => try cg.airUnwrapErrUnionPayload(inst), 2489 .err_return_trace => try cg.airErrReturnTrace(inst), 2490 .set_err_return_trace => try cg.airSetErrReturnTrace(inst), 2491 .save_err_return_trace_index=> try cg.airSaveErrReturnTraceIndex(inst), 2492 2493 .wrap_optional => try cg.airWrapOptional(inst), 2494 .wrap_errunion_payload => try cg.airWrapErrUnionPayload(inst), 2495 .wrap_errunion_err => try cg.airWrapErrUnionErr(inst), 2496 // zig fmt: on 2497 2498 .add_safe, 2499 .sub_safe, 2500 .mul_safe, 2501 => return cg.fail("TODO implement safety_checked_instructions", .{}), 2502 .add_optimized, 2503 .sub_optimized, 2504 .mul_optimized, 2505 .div_float_optimized, 2506 .div_trunc_optimized, 2507 .div_floor_optimized, 2508 .div_exact_optimized, 2509 .rem_optimized, 2510 .mod_optimized, 2511 .neg_optimized, 2512 .reduce_optimized, 2513 .int_from_float_optimized, 2514 => return cg.fail("TODO implement optimized float mode", .{}), 2515 2516 .arg => try cg.airDbgArg(inst), 2517 .ptr_add => |air_tag| if (use_old) try cg.airPtrArithmetic(inst, air_tag) else { 2518 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 2519 const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; 2520 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); 2521 try ops[0].toSlicePtr(cg); 2522 var res: [1]Temp = undefined; 2523 cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ 2524 .patterns = &.{ 2525 .{ .src = .{ .to_gpr, .simm32 } }, 2526 }, 2527 .dst_temps = .{.{ .rc = .general_purpose }}, 2528 .each = .{ .once = &.{ 2529 .{ ._, ._, .lea, .dst0p, .leaa(.none, .src0, .add_src0_elem_size_times_src1), ._, ._ }, 2530 } }, 2531 }, .{ 2532 .dst_constraints = .{.{ .elem_size_is = 1 }}, 2533 .patterns = &.{ 2534 .{ .src = .{ .to_gpr, .to_gpr } }, 2535 }, 2536 .dst_temps = .{.{ .rc = .general_purpose }}, 2537 .each = .{ .once = &.{ 2538 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ }, 2539 } }, 2540 }, .{ 2541 .dst_constraints = .{.{ .elem_size_is = 2 }}, 2542 .patterns = &.{ 2543 .{ .src = .{ .to_gpr, .to_gpr } }, 2544 }, 2545 .dst_temps = .{.{ .rc = .general_purpose }}, 2546 .each = .{ .once = &.{ 2547 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"2", .src1), ._, ._ }, 2548 } }, 2549 }, .{ 2550 .dst_constraints = .{.{ .elem_size_is = 2 + 1 }}, 2551 .patterns = &.{ 2552 .{ .src = .{ .to_gpr, .to_gpr } }, 2553 }, 2554 .dst_temps = .{.{ .rc = .general_purpose }}, 2555 .each = .{ .once = &.{ 2556 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"2", .src1), ._, ._ }, 2557 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, 2558 } }, 2559 }, .{ 2560 .dst_constraints = .{.{ .elem_size_is = 4 }}, 2561 .patterns = &.{ 2562 .{ .src = .{ .to_gpr, .to_gpr } }, 2563 }, 2564 .dst_temps = .{.{ .rc = .general_purpose }}, 2565 .each = .{ .once = &.{ 2566 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"4", .src1), ._, ._ }, 2567 } }, 2568 }, .{ 2569 .dst_constraints = .{.{ .elem_size_is = 4 + 1 }}, 2570 .patterns = &.{ 2571 .{ .src = .{ .to_gpr, .to_gpr } }, 2572 }, 2573 .dst_temps = .{.{ .ref = .src1 }}, 2574 .each = .{ .once = &.{ 2575 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"4", .src1), ._, ._ }, 2576 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, 2577 } }, 2578 }, .{ 2579 .required_features = .{ .@"64bit", null, null, null }, 2580 .dst_constraints = .{.{ .elem_size_is = 8 }}, 2581 .patterns = &.{ 2582 .{ .src = .{ .to_gpr, .to_gpr } }, 2583 }, 2584 .dst_temps = .{.{ .rc = .general_purpose }}, 2585 .each = .{ .once = &.{ 2586 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"8", .src1), ._, ._ }, 2587 } }, 2588 }, .{ 2589 .required_features = .{ .@"64bit", null, null, null }, 2590 .dst_constraints = .{.{ .elem_size_is = 8 + 1 }}, 2591 .patterns = &.{ 2592 .{ .src = .{ .to_gpr, .to_gpr } }, 2593 }, 2594 .dst_temps = .{.{ .ref = .src1 }}, 2595 .each = .{ .once = &.{ 2596 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"8", .src1), ._, ._ }, 2597 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, 2598 } }, 2599 }, .{ 2600 .dst_constraints = .{.po2_elem_size}, 2601 .patterns = &.{ 2602 .{ .src = .{ .to_gpr, .to_mut_gpr } }, 2603 }, 2604 .dst_temps = .{.{ .ref = .src1 }}, 2605 .clobbers = .{ .eflags = true }, 2606 .each = .{ .once = &.{ 2607 .{ ._, ._l, .sh, .src1p, .sa(.none, .add_log2_src0_elem_size), ._, ._ }, 2608 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ }, 2609 } }, 2610 }, .{ 2611 .patterns = &.{ 2612 .{ .src = .{ .to_gpr, .to_gpr } }, 2613 }, 2614 .dst_temps = .{.{ .rc = .general_purpose }}, 2615 .clobbers = .{ .eflags = true }, 2616 .each = .{ .once = &.{ 2617 .{ ._, .i_, .mul, .dst0p, .src1p, .sa(.none, .add_src0_elem_size), ._ }, 2618 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, 2619 } }, 2620 } }) catch |err| switch (err) { 2621 error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ 2622 @tagName(air_tag), 2623 cg.typeOf(bin_op.lhs).fmt(pt), 2624 ops[0].tracking(cg), 2625 ops[1].tracking(cg), 2626 }), 2627 else => |e| return e, 2628 }; 2629 for (ops) |op| for (res) |r| { 2630 if (op.index == r.index) break; 2631 } else try op.die(cg); 2632 try res[0].moveTo(inst, cg); 2633 }, 2634 .ptr_sub => |air_tag| if (use_old) try cg.airPtrArithmetic(inst, air_tag) else { 2635 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 2636 const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; 2637 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); 2638 try ops[0].toSlicePtr(cg); 2639 var res: [1]Temp = undefined; 2640 cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ 2641 .patterns = &.{ 2642 .{ .src = .{ .to_gpr, .simm32 } }, 2643 }, 2644 .dst_temps = .{.{ .rc = .general_purpose }}, 2645 .each = .{ .once = &.{ 2646 .{ ._, ._, .lea, .dst0p, .leaa(.none, .src0, .sub_src0_elem_size_times_src1), ._, ._ }, 2647 } }, 2648 }, .{ 2649 .dst_constraints = .{.{ .elem_size_is = 1 }}, 2650 .patterns = &.{ 2651 .{ .src = .{ .to_gpr, .to_mut_gpr } }, 2652 }, 2653 .dst_temps = .{.{ .ref = .src1 }}, 2654 .clobbers = .{ .eflags = true }, 2655 .each = .{ .once = &.{ 2656 .{ ._, ._, .neg, .src1p, ._, ._, ._ }, 2657 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ }, 2658 } }, 2659 }, .{ 2660 .dst_constraints = .{.{ .elem_size_is = 2 }}, 2661 .patterns = &.{ 2662 .{ .src = .{ .to_gpr, .to_mut_gpr } }, 2663 }, 2664 .dst_temps = .{.{ .ref = .src1 }}, 2665 .clobbers = .{ .eflags = true }, 2666 .each = .{ .once = &.{ 2667 .{ ._, ._, .neg, .src1p, ._, ._, ._ }, 2668 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"2", .src1), ._, ._ }, 2669 } }, 2670 }, .{ 2671 .dst_constraints = .{.{ .elem_size_is = 2 + 1 }}, 2672 .patterns = &.{ 2673 .{ .src = .{ .to_gpr, .to_gpr } }, 2674 }, 2675 .dst_temps = .{.{ .rc = .general_purpose }}, 2676 .clobbers = .{ .eflags = true }, 2677 .each = .{ .once = &.{ 2678 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"2", .src1), ._, ._ }, 2679 .{ ._, ._, .neg, .dst0p, ._, ._, ._ }, 2680 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, 2681 } }, 2682 }, .{ 2683 .dst_constraints = .{.{ .elem_size_is = 4 }}, 2684 .patterns = &.{ 2685 .{ .src = .{ .to_gpr, .to_mut_gpr } }, 2686 }, 2687 .dst_temps = .{.{ .ref = .src1 }}, 2688 .clobbers = .{ .eflags = true }, 2689 .each = .{ .once = &.{ 2690 .{ ._, ._, .neg, .src1p, ._, ._, ._ }, 2691 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"4", .src1), ._, ._ }, 2692 } }, 2693 }, .{ 2694 .dst_constraints = .{.{ .elem_size_is = 4 + 1 }}, 2695 .patterns = &.{ 2696 .{ .src = .{ .to_gpr, .to_gpr } }, 2697 }, 2698 .dst_temps = .{.{ .rc = .general_purpose }}, 2699 .clobbers = .{ .eflags = true }, 2700 .each = .{ .once = &.{ 2701 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"4", .src1), ._, ._ }, 2702 .{ ._, ._, .neg, .dst0p, ._, ._, ._ }, 2703 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, 2704 } }, 2705 }, .{ 2706 .required_features = .{ .@"64bit", null, null, null }, 2707 .dst_constraints = .{.{ .elem_size_is = 8 }}, 2708 .patterns = &.{ 2709 .{ .src = .{ .to_gpr, .to_mut_gpr } }, 2710 }, 2711 .dst_temps = .{.{ .ref = .src1 }}, 2712 .clobbers = .{ .eflags = true }, 2713 .each = .{ .once = &.{ 2714 .{ ._, ._, .neg, .src1p, ._, ._, ._ }, 2715 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"8", .src1), ._, ._ }, 2716 } }, 2717 }, .{ 2718 .required_features = .{ .@"64bit", null, null, null }, 2719 .dst_constraints = .{.{ .elem_size_is = 8 + 1 }}, 2720 .patterns = &.{ 2721 .{ .src = .{ .to_gpr, .to_gpr } }, 2722 }, 2723 .dst_temps = .{.{ .rc = .general_purpose }}, 2724 .clobbers = .{ .eflags = true }, 2725 .each = .{ .once = &.{ 2726 .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"8", .src1), ._, ._ }, 2727 .{ ._, ._, .neg, .dst0p, ._, ._, ._ }, 2728 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, 2729 } }, 2730 }, .{ 2731 .dst_constraints = .{.po2_elem_size}, 2732 .patterns = &.{ 2733 .{ .src = .{ .to_gpr, .to_mut_gpr } }, 2734 }, 2735 .dst_temps = .{.{ .ref = .src1 }}, 2736 .clobbers = .{ .eflags = true }, 2737 .each = .{ .once = &.{ 2738 .{ ._, ._l, .sa, .src1p, .sa(.none, .add_log2_src0_elem_size), ._, ._ }, 2739 .{ ._, ._, .neg, .src1p, ._, ._, ._ }, 2740 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ }, 2741 } }, 2742 }, .{ 2743 .patterns = &.{ 2744 .{ .src = .{ .to_gpr, .to_gpr } }, 2745 }, 2746 .dst_temps = .{.{ .rc = .general_purpose }}, 2747 .clobbers = .{ .eflags = true }, 2748 .each = .{ .once = &.{ 2749 .{ ._, .i_, .mul, .dst0p, .src1p, .sa(.none, .sub_src0_elem_size), ._ }, 2750 .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, 2751 } }, 2752 } }) catch |err| switch (err) { 2753 error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ 2754 @tagName(air_tag), 2755 cg.typeOf(bin_op.lhs).fmt(pt), 2756 ops[0].tracking(cg), 2757 ops[1].tracking(cg), 2758 }), 2759 else => |e| return e, 2760 }; 2761 for (ops) |op| for (res) |r| { 2762 if (op.index == r.index) break; 2763 } else try op.die(cg); 2764 try res[0].moveTo(inst, cg); 2765 }, 2766 .alloc => if (use_old) try cg.airAlloc(inst) else { 2767 const ty = air_datas[@intFromEnum(inst)].ty; 2768 var slot = try cg.tempInit(ty, .{ .lea_frame = .{ 2769 .index = try cg.allocMemPtr(inst), 2770 } }); 2771 try slot.moveTo(inst, cg); 2772 }, 2773 .inferred_alloc, .inferred_alloc_comptime => unreachable, 2774 .ret_ptr => if (use_old) try cg.airRetPtr(inst) else { 2775 const ty = air_datas[@intFromEnum(inst)].ty; 2776 var slot = switch (cg.ret_mcv.long) { 2777 else => unreachable, 2778 .none => try cg.tempInit(ty, .{ .lea_frame = .{ 2779 .index = try cg.allocMemPtr(inst), 2780 } }), 2781 .load_frame => slot: { 2782 var slot = try cg.tempInit(ty, cg.ret_mcv.long); 2783 try slot.toOffset(cg.ret_mcv.short.indirect.off, cg); 2784 break :slot slot; 2785 }, 2786 }; 2787 try slot.moveTo(inst, cg); 2788 }, 2789 .assembly => try cg.airAsm(inst), 2790 .bit_and, .bit_or, .xor, .bool_and, .bool_or => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else { 2791 const bin_op = air_datas[@intFromEnum(inst)].bin_op; 2792 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); 2793 var res: [1]Temp = undefined; 2794 cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, switch (@as(Mir.Inst.Tag, switch (air_tag) { 2795 else => unreachable, 2796 .bit_and, .bool_and => .@"and", 2797 .bit_or, .bool_or => .@"or", 2798 .xor => .xor, 2799 })) { 2800 else => unreachable, 2801 inline .@"and", .@"or", .xor => |mir_tag| comptime &.{ .{ 2802 .required_features = .{ .avx2, null, null, null }, 2803 .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } }, 2804 .patterns = &.{ 2805 .{ .src = .{ .to_ymm, .mem } }, 2806 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, 2807 .{ .src = .{ .to_ymm, .to_ymm } }, 2808 }, 2809 .dst_temps = .{.{ .rc = .sse }}, 2810 .each = .{ .once = &.{ 2811 .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ }, 2812 } }, 2813 }, .{ 2814 .required_features = .{ .avx, null, null, null }, 2815 .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } }, 2816 .patterns = &.{ 2817 .{ .src = .{ .to_ymm, .mem } }, 2818 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, 2819 .{ .src = .{ .to_ymm, .to_ymm } }, 2820 }, 2821 .dst_temps = .{.{ .rc = .sse }}, 2822 .each = .{ .once = &.{ 2823 .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ }, 2824 } }, 2825 }, .{ 2826 .required_features = .{ .avx, null, null, null }, 2827 .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, 2828 .patterns = &.{ 2829 .{ .src = .{ .to_xmm, .mem } }, 2830 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, 2831 .{ .src = .{ .to_xmm, .to_xmm } }, 2832 }, 2833 .dst_temps = .{.{ .rc = .sse }}, 2834 .each = .{ .once = &.{ 2835 .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ }, 2836 } }, 2837 }, .{ 2838 .required_features = .{ .sse2, null, null, null }, 2839 .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, 2840 .patterns = &.{ 2841 .{ .src = .{ .to_mut_xmm, .mem } }, 2842 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, 2843 .{ .src = .{ .to_mut_xmm, .to_xmm } }, 2844 }, 2845 .dst_temps = .{.{ .ref = .src0 }}, 2846 .each = .{ .once = &.{ 2847 .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ }, 2848 } }, 2849 }, .{ 2850 .required_features = .{ .sse, null, null, null }, 2851 .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, 2852 .patterns = &.{ 2853 .{ .src = .{ .to_mut_xmm, .mem } }, 2854 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, 2855 .{ .src = .{ .to_mut_xmm, .to_xmm } }, 2856 }, 2857 .dst_temps = .{.{ .ref = .src0 }}, 2858 .each = .{ .once = &.{ 2859 .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ }, 2860 } }, 2861 }, .{ 2862 .required_features = .{ .mmx, null, null, null }, 2863 .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } }, 2864 .patterns = &.{ 2865 .{ .src = .{ .to_mut_mm, .mem } }, 2866 .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, 2867 .{ .src = .{ .to_mut_mm, .to_mm } }, 2868 }, 2869 .dst_temps = .{.{ .ref = .src0 }}, 2870 .each = .{ .once = &.{ 2871 .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ }, 2872 } }, 2873 }, .{ 2874 .src_constraints = .{ .{ .int_or_vec = .byte }, .{ .int_or_vec = .byte } }, 2875 .patterns = &.{ 2876 .{ .src = .{ .mut_mem, .imm8 } }, 2877 .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, 2878 .{ .src = .{ .to_mut_gpr, .imm8 } }, 2879 .{ .src = .{ .imm8, .to_mut_gpr }, .commute = .{ 0, 1 } }, 2880 .{ .src = .{ .mut_mem, .to_gpr } }, 2881 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, 2882 .{ .src = .{ .to_mut_gpr, .mem } }, 2883 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, 2884 .{ .src = .{ .to_mut_gpr, .to_gpr } }, 2885 }, 2886 .dst_temps = .{.{ .ref = .src0 }}, 2887 .clobbers = .{ .eflags = true }, 2888 .each = .{ .once = &.{ 2889 .{ ._, ._, mir_tag, .dst0b, .src1b, ._, ._ }, 2890 } }, 2891 }, .{ 2892 .src_constraints = .{ .{ .int_or_vec = .word }, .{ .int_or_vec = .word } }, 2893 .patterns = &.{ 2894 .{ .src = .{ .mut_mem, .imm16 } }, 2895 .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, 2896 .{ .src = .{ .to_mut_gpr, .imm16 } }, 2897 .{ .src = .{ .imm16, .to_mut_gpr }, .commute = .{ 0, 1 } }, 2898 .{ .src = .{ .mut_mem, .to_gpr } }, 2899 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, 2900 .{ .src = .{ .to_mut_gpr, .mem } }, 2901 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, 2902 .{ .src = .{ .to_mut_gpr, .to_gpr } }, 2903 }, 2904 .dst_temps = .{.{ .ref = .src0 }}, 2905 .clobbers = .{ .eflags = true }, 2906 .each = .{ .once = &.{ 2907 .{ ._, ._, mir_tag, .dst0w, .src1w, ._, ._ }, 2908 } }, 2909 }, .{ 2910 .src_constraints = .{ .{ .int_or_vec = .dword }, .{ .int_or_vec = .dword } }, 2911 .patterns = &.{ 2912 .{ .src = .{ .mut_mem, .imm32 } }, 2913 .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, 2914 .{ .src = .{ .to_mut_gpr, .imm32 } }, 2915 .{ .src = .{ .imm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, 2916 .{ .src = .{ .mut_mem, .to_gpr } }, 2917 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, 2918 .{ .src = .{ .to_mut_gpr, .mem } }, 2919 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, 2920 .{ .src = .{ .to_mut_gpr, .to_gpr } }, 2921 }, 2922 .dst_temps = .{.{ .ref = .src0 }}, 2923 .clobbers = .{ .eflags = true }, 2924 .each = .{ .once = &.{ 2925 .{ ._, ._, mir_tag, .dst0d, .src1d, ._, ._ }, 2926 } }, 2927 }, .{ 2928 .required_features = .{ .@"64bit", null, null, null }, 2929 .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } }, 2930 .patterns = &.{ 2931 .{ .src = .{ .mut_mem, .simm32 } }, 2932 .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, 2933 .{ .src = .{ .to_mut_gpr, .simm32 } }, 2934 .{ .src = .{ .simm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, 2935 .{ .src = .{ .mut_mem, .to_gpr } }, 2936 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, 2937 .{ .src = .{ .to_mut_gpr, .mem } }, 2938 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, 2939 .{ .src = .{ .to_mut_gpr, .to_gpr } }, 2940 }, 2941 .dst_temps = .{.{ .ref = .src0 }}, 2942 .clobbers = .{ .eflags = true }, 2943 .each = .{ .once = &.{ 2944 .{ ._, ._, mir_tag, .dst0q, .src1q, ._, ._ }, 2945 } }, 2946 }, .{ 2947 .required_features = .{ .avx2, null, null, null }, 2948 .src_constraints = .{ 2949 .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, 2950 .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, 2951 }, 2952 .patterns = &.{ 2953 .{ .src = .{ .to_mem, .to_mem } }, 2954 }, 2955 .extra_temps = .{ 2956 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 2957 .{ .kind = .{ .rc = .sse } }, 2958 .unused, 2959 .unused, 2960 .unused, 2961 .unused, 2962 }, 2963 .dst_temps = .{.mem}, 2964 .clobbers = .{ .eflags = true }, 2965 .each = .{ .once = &.{ 2966 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 2967 .{ .@"0:", .v_dqu, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 2968 .{ ._, .vp_, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ }, 2969 .{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, 2970 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 2971 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 2972 } }, 2973 }, .{ 2974 .required_features = .{ .avx, null, null, null }, 2975 .src_constraints = .{ 2976 .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, 2977 .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, 2978 }, 2979 .patterns = &.{ 2980 .{ .src = .{ .to_mem, .to_mem } }, 2981 }, 2982 .extra_temps = .{ 2983 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 2984 .{ .kind = .{ .rc = .sse } }, 2985 .unused, 2986 .unused, 2987 .unused, 2988 .unused, 2989 }, 2990 .dst_temps = .{.mem}, 2991 .clobbers = .{ .eflags = true }, 2992 .each = .{ .once = &.{ 2993 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 2994 .{ .@"0:", .v_pd, .movu, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 2995 .{ ._, .v_pd, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ }, 2996 .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, 2997 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 2998 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 2999 } }, 3000 }, .{ 3001 .required_features = .{ .avx, null, null, null }, 3002 .src_constraints = .{ 3003 .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, 3004 .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, 3005 }, 3006 .patterns = &.{ 3007 .{ .src = .{ .to_mem, .to_mem } }, 3008 }, 3009 .extra_temps = .{ 3010 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3011 .{ .kind = .{ .rc = .sse } }, 3012 .unused, 3013 .unused, 3014 .unused, 3015 .unused, 3016 }, 3017 .dst_temps = .{.mem}, 3018 .clobbers = .{ .eflags = true }, 3019 .each = .{ .once = &.{ 3020 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3021 .{ .@"0:", .v_dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 3022 .{ ._, .vp_, mir_tag, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ }, 3023 .{ ._, .v_dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, 3024 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 3025 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3026 } }, 3027 }, .{ 3028 .required_features = .{ .sse2, null, null, null }, 3029 .src_constraints = .{ 3030 .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, 3031 .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, 3032 }, 3033 .patterns = &.{ 3034 .{ .src = .{ .to_mem, .to_mem } }, 3035 }, 3036 .extra_temps = .{ 3037 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3038 .{ .kind = .{ .rc = .sse } }, 3039 .unused, 3040 .unused, 3041 .unused, 3042 .unused, 3043 }, 3044 .dst_temps = .{.mem}, 3045 .clobbers = .{ .eflags = true }, 3046 .each = .{ .once = &.{ 3047 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3048 .{ .@"0:", ._dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 3049 .{ ._, .p_, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 3050 .{ ._, ._dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, 3051 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 3052 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3053 } }, 3054 }, .{ 3055 .required_features = .{ .sse, null, null, null }, 3056 .src_constraints = .{ 3057 .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, 3058 .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, 3059 }, 3060 .patterns = &.{ 3061 .{ .src = .{ .to_mem, .to_mem } }, 3062 }, 3063 .extra_temps = .{ 3064 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3065 .{ .kind = .{ .rc = .sse } }, 3066 .unused, 3067 .unused, 3068 .unused, 3069 .unused, 3070 }, 3071 .dst_temps = .{.mem}, 3072 .clobbers = .{ .eflags = true }, 3073 .each = .{ .once = &.{ 3074 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3075 .{ .@"0:", ._ps, .movu, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 3076 .{ ._, ._ps, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 3077 .{ ._, ._ps, .movu, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, 3078 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 3079 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3080 } }, 3081 }, .{ 3082 .required_features = .{ .mmx, null, null, null }, 3083 .src_constraints = .{ 3084 .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, 3085 .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, 3086 }, 3087 .patterns = &.{ 3088 .{ .src = .{ .to_mem, .to_mem } }, 3089 }, 3090 .extra_temps = .{ 3091 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3092 .{ .kind = .{ .rc = .mmx } }, 3093 .unused, 3094 .unused, 3095 .unused, 3096 .unused, 3097 }, 3098 .dst_temps = .{.mem}, 3099 .clobbers = .{ .eflags = true }, 3100 .each = .{ .once = &.{ 3101 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3102 .{ .@"0:", ._q, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 3103 .{ ._, .p_, mir_tag, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 3104 .{ ._, ._q, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ }, 3105 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3106 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3107 } }, 3108 }, .{ 3109 .src_constraints = .{ 3110 .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, 3111 .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, 3112 }, 3113 .patterns = &.{ 3114 .{ .src = .{ .to_mem, .to_mem } }, 3115 }, 3116 .extra_temps = .{ 3117 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3118 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 3119 .unused, 3120 .unused, 3121 .unused, 3122 .unused, 3123 }, 3124 .dst_temps = .{.mem}, 3125 .clobbers = .{ .eflags = true }, 3126 .each = .{ .once = &.{ 3127 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3128 .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, 3129 .{ ._, ._, mir_tag, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, 3130 .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, 3131 .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, 3132 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3133 } }, 3134 } }, 3135 }) catch |err| switch (err) { 3136 error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ 3137 @tagName(air_tag), 3138 cg.typeOf(bin_op.lhs).fmt(pt), 3139 ops[0].tracking(cg), 3140 ops[1].tracking(cg), 3141 }), 3142 else => |e| return e, 3143 }; 3144 for (ops) |op| for (res) |r| { 3145 if (op.index == r.index) break; 3146 } else try op.die(cg); 3147 try res[0].moveTo(inst, cg); 3148 }, 3149 .not => |air_tag| if (use_old) try cg.airUnOp(inst, air_tag) else { 3150 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 3151 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 3152 var res: [1]Temp = undefined; 3153 cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{ 3154 .src_constraints = .{ .{ .signed_or_exact_int = .byte }, .any }, 3155 .patterns = &.{ 3156 .{ .src = .{ .mut_mem, .none } }, 3157 .{ .src = .{ .to_mut_gpr, .none } }, 3158 }, 3159 .dst_temps = .{.{ .ref = .src0 }}, 3160 .each = .{ .once = &.{ 3161 .{ ._, ._, .not, .dst0b, ._, ._, ._ }, 3162 } }, 3163 }, .{ 3164 .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, 3165 .patterns = &.{ 3166 .{ .src = .{ .mut_mem, .none } }, 3167 .{ .src = .{ .to_mut_gpr, .none } }, 3168 }, 3169 .dst_temps = .{.{ .ref = .src0 }}, 3170 .clobbers = .{ .eflags = true }, 3171 .each = .{ .once = &.{ 3172 .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_umax), ._, ._ }, 3173 } }, 3174 }, .{ 3175 .src_constraints = .{ .{ .signed_or_exact_int = .word }, .any }, 3176 .patterns = &.{ 3177 .{ .src = .{ .mut_mem, .none } }, 3178 .{ .src = .{ .to_mut_gpr, .none } }, 3179 }, 3180 .dst_temps = .{.{ .ref = .src0 }}, 3181 .each = .{ .once = &.{ 3182 .{ ._, ._, .not, .dst0w, ._, ._, ._ }, 3183 } }, 3184 }, .{ 3185 .src_constraints = .{ .{ .unsigned_int = .word }, .any }, 3186 .patterns = &.{ 3187 .{ .src = .{ .mut_mem, .none } }, 3188 .{ .src = .{ .to_mut_gpr, .none } }, 3189 }, 3190 .dst_temps = .{.{ .ref = .src0 }}, 3191 .clobbers = .{ .eflags = true }, 3192 .each = .{ .once = &.{ 3193 .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_umax), ._, ._ }, 3194 } }, 3195 }, .{ 3196 .src_constraints = .{ .{ .signed_or_exact_int = .dword }, .any }, 3197 .patterns = &.{ 3198 .{ .src = .{ .mut_mem, .none } }, 3199 .{ .src = .{ .to_mut_gpr, .none } }, 3200 }, 3201 .dst_temps = .{.{ .ref = .src0 }}, 3202 .each = .{ .once = &.{ 3203 .{ ._, ._, .not, .dst0d, ._, ._, ._ }, 3204 } }, 3205 }, .{ 3206 .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, 3207 .patterns = &.{ 3208 .{ .src = .{ .mut_mem, .none } }, 3209 .{ .src = .{ .to_mut_gpr, .none } }, 3210 }, 3211 .dst_temps = .{.{ .ref = .src0 }}, 3212 .clobbers = .{ .eflags = true }, 3213 .each = .{ .once = &.{ 3214 .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_umax), ._, ._ }, 3215 } }, 3216 }, .{ 3217 .required_features = .{ .@"64bit", null, null, null }, 3218 .src_constraints = .{ .{ .signed_or_exact_int = .qword }, .any }, 3219 .patterns = &.{ 3220 .{ .src = .{ .mut_mem, .none } }, 3221 .{ .src = .{ .to_mut_gpr, .none } }, 3222 }, 3223 .dst_temps = .{.{ .ref = .src0 }}, 3224 .each = .{ .once = &.{ 3225 .{ ._, ._, .not, .dst0q, ._, ._, ._ }, 3226 } }, 3227 }, .{ 3228 .required_features = .{ .@"64bit", null, null, null }, 3229 .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, 3230 .patterns = &.{ 3231 .{ .src = .{ .mem, .none } }, 3232 .{ .src = .{ .to_gpr, .none } }, 3233 }, 3234 .dst_temps = .{.{ .rc = .general_purpose }}, 3235 .each = .{ .once = &.{ 3236 .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, 3237 .{ ._, ._, .xor, .dst0q, .src0q, ._, ._ }, 3238 } }, 3239 }, .{ 3240 .required_features = .{ .mmx, null, null, null }, 3241 .src_constraints = .{ .{ .signed_or_exact_int = .qword }, .any }, 3242 .patterns = &.{ 3243 .{ .src = .{ .mem, .none } }, 3244 .{ .src = .{ .to_mm, .none } }, 3245 }, 3246 .dst_temps = .{.{ .rc = .mmx }}, 3247 .each = .{ .once = &.{ 3248 .{ ._, .p_d, .cmpeq, .dst0q, .dst0q, ._, ._ }, 3249 .{ ._, .p_, .xor, .dst0q, .src0q, ._, ._ }, 3250 } }, 3251 }, .{ 3252 .required_features = .{ .mmx, null, null, null }, 3253 .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, 3254 .patterns = &.{ 3255 .{ .src = .{ .to_mut_mm, .none } }, 3256 }, 3257 .extra_temps = .{ 3258 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 3259 .{ .kind = .{ .umax_mem = .src0 } }, 3260 .unused, 3261 .unused, 3262 .unused, 3263 .unused, 3264 }, 3265 .dst_temps = .{.{ .ref = .src0 }}, 3266 .each = .{ .once = &.{ 3267 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 3268 .{ ._, .p_, .xor, .dst0q, .lea(.qword, .tmp0), ._, ._ }, 3269 } }, 3270 }, .{ 3271 .required_features = .{ .avx, null, null, null }, 3272 .src_constraints = .{ .{ .signed_or_exact_int = .xword }, .any }, 3273 .patterns = &.{ 3274 .{ .src = .{ .mem, .none } }, 3275 .{ .src = .{ .to_xmm, .none } }, 3276 }, 3277 .dst_temps = .{.{ .rc = .sse }}, 3278 .each = .{ .once = &.{ 3279 .{ ._, .vp_q, .cmpeq, .dst0x, .dst0x, .dst0x, ._ }, 3280 .{ ._, .vp_, .xor, .dst0x, .dst0x, .src0x, ._ }, 3281 } }, 3282 }, .{ 3283 .required_features = .{ .avx, null, null, null }, 3284 .src_constraints = .{ .{ .unsigned_int = .xword }, .any }, 3285 .patterns = &.{ 3286 .{ .src = .{ .to_xmm, .none } }, 3287 }, 3288 .extra_temps = .{ 3289 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 3290 .{ .kind = .{ .umax_mem = .src0 } }, 3291 .unused, 3292 .unused, 3293 .unused, 3294 .unused, 3295 }, 3296 .dst_temps = .{.{ .rc = .sse }}, 3297 .each = .{ .once = &.{ 3298 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 3299 .{ ._, .vp_, .xor, .dst0x, .src0x, .lea(.xword, .tmp0), ._ }, 3300 } }, 3301 }, .{ 3302 .required_features = .{ .sse2, null, null, null }, 3303 .src_constraints = .{ .{ .signed_or_exact_int = .xword }, .any }, 3304 .patterns = &.{ 3305 .{ .src = .{ .mem, .none } }, 3306 .{ .src = .{ .to_xmm, .none } }, 3307 }, 3308 .dst_temps = .{.{ .rc = .sse }}, 3309 .each = .{ .once = &.{ 3310 .{ ._, .p_d, .cmpeq, .dst0x, .dst0x, ._, ._ }, 3311 .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ }, 3312 } }, 3313 }, .{ 3314 .required_features = .{ .sse2, null, null, null }, 3315 .src_constraints = .{ .{ .unsigned_int = .xword }, .any }, 3316 .patterns = &.{ 3317 .{ .src = .{ .to_mut_xmm, .none } }, 3318 }, 3319 .extra_temps = .{ 3320 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 3321 .{ .kind = .{ .umax_mem = .src0 } }, 3322 .unused, 3323 .unused, 3324 .unused, 3325 .unused, 3326 }, 3327 .dst_temps = .{.{ .ref = .src0 }}, 3328 .each = .{ .once = &.{ 3329 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 3330 .{ ._, .p_, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, 3331 } }, 3332 }, .{ 3333 .required_features = .{ .sse, null, null, null }, 3334 .src_constraints = .{ .{ .int = .xword }, .any }, 3335 .patterns = &.{ 3336 .{ .src = .{ .to_mut_xmm, .none } }, 3337 }, 3338 .extra_temps = .{ 3339 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 3340 .{ .kind = .{ .umax_mem = .src0 } }, 3341 .unused, 3342 .unused, 3343 .unused, 3344 .unused, 3345 }, 3346 .dst_temps = .{.{ .ref = .src0 }}, 3347 .each = .{ .once = &.{ 3348 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 3349 .{ ._, ._ps, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, 3350 } }, 3351 }, .{ 3352 .required_features = .{ .avx2, null, null, null }, 3353 .src_constraints = .{ .{ .signed_or_exact_int = .yword }, .any }, 3354 .patterns = &.{ 3355 .{ .src = .{ .mem, .none } }, 3356 .{ .src = .{ .to_ymm, .none } }, 3357 }, 3358 .dst_temps = .{.{ .rc = .sse }}, 3359 .each = .{ .once = &.{ 3360 .{ ._, .vp_q, .cmpeq, .dst0y, .dst0y, .dst0y, ._ }, 3361 .{ ._, .vp_, .xor, .dst0y, .dst0y, .src0y, ._ }, 3362 } }, 3363 }, .{ 3364 .required_features = .{ .avx2, null, null, null }, 3365 .src_constraints = .{ .{ .unsigned_int = .yword }, .any }, 3366 .patterns = &.{ 3367 .{ .src = .{ .to_ymm, .none } }, 3368 }, 3369 .extra_temps = .{ 3370 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 3371 .{ .kind = .{ .umax_mem = .src0 } }, 3372 .unused, 3373 .unused, 3374 .unused, 3375 .unused, 3376 }, 3377 .dst_temps = .{.{ .rc = .sse }}, 3378 .each = .{ .once = &.{ 3379 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 3380 .{ ._, .vp_, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, 3381 } }, 3382 }, .{ 3383 .required_features = .{ .avx, null, null, null }, 3384 .src_constraints = .{ .{ .signed_or_exact_int = .yword }, .any }, 3385 .patterns = &.{ 3386 .{ .src = .{ .mem, .none } }, 3387 .{ .src = .{ .to_ymm, .none } }, 3388 }, 3389 .dst_temps = .{.{ .rc = .sse }}, 3390 .each = .{ .once = &.{ 3391 .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) }, 3392 .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ }, 3393 } }, 3394 }, .{ 3395 .required_features = .{ .avx, null, null, null }, 3396 .src_constraints = .{ .{ .unsigned_int = .yword }, .any }, 3397 .patterns = &.{ 3398 .{ .src = .{ .to_ymm, .none } }, 3399 }, 3400 .extra_temps = .{ 3401 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 3402 .{ .kind = .{ .umax_mem = .src0 } }, 3403 .unused, 3404 .unused, 3405 .unused, 3406 .unused, 3407 }, 3408 .dst_temps = .{.{ .rc = .sse }}, 3409 .each = .{ .once = &.{ 3410 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 3411 .{ ._, .v_pd, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, 3412 } }, 3413 }, .{ 3414 .required_features = .{ .avx2, null, null, null }, 3415 .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .xword } }, .any }, 3416 .patterns = &.{ 3417 .{ .src = .{ .to_mem, .none } }, 3418 }, 3419 .extra_temps = .{ 3420 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3421 .{ .kind = .{ .rc = .sse } }, 3422 .{ .kind = .{ .rc = .sse } }, 3423 .unused, 3424 .unused, 3425 .unused, 3426 }, 3427 .dst_temps = .{.mem}, 3428 .clobbers = .{ .eflags = true }, 3429 .each = .{ .once = &.{ 3430 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 3431 .{ ._, .vp_q, .cmpeq, .tmp1y, .tmp1y, .tmp1y, ._ }, 3432 .{ .@"0:", .vp_, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ }, 3433 .{ ._, .v_dqu, .mov, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ }, 3434 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 3435 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3436 .{ .@"0:", .vp_, .xor, .tmp2x, .tmp1x, .memad(.src0x, .add_size, -16), ._ }, 3437 .{ ._, .v_dqa, .mov, .memad(.dst0x, .add_size, -16), .tmp2x, ._, ._ }, 3438 } }, 3439 }, .{ 3440 .required_features = .{ .avx2, null, null, null }, 3441 .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .yword } }, .any }, 3442 .patterns = &.{ 3443 .{ .src = .{ .to_mem, .none } }, 3444 }, 3445 .extra_temps = .{ 3446 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3447 .{ .kind = .{ .rc = .sse } }, 3448 .{ .kind = .{ .rc = .sse } }, 3449 .unused, 3450 .unused, 3451 .unused, 3452 }, 3453 .dst_temps = .{.mem}, 3454 .clobbers = .{ .eflags = true }, 3455 .each = .{ .once = &.{ 3456 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3457 .{ ._, .vp_q, .cmpeq, .tmp1y, .tmp1y, .tmp1y, ._ }, 3458 .{ .@"0:", .vp_, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ }, 3459 .{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ }, 3460 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 3461 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3462 } }, 3463 }, .{ 3464 .required_features = .{ .avx, null, null, null }, 3465 .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .xword } }, .any }, 3466 .patterns = &.{ 3467 .{ .src = .{ .to_mem, .none } }, 3468 }, 3469 .extra_temps = .{ 3470 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3471 .{ .kind = .{ .rc = .sse } }, 3472 .{ .kind = .{ .rc = .sse } }, 3473 .unused, 3474 .unused, 3475 .unused, 3476 }, 3477 .dst_temps = .{.mem}, 3478 .clobbers = .{ .eflags = true }, 3479 .each = .{ .once = &.{ 3480 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 3481 .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) }, 3482 .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ }, 3483 .{ ._, .v_pd, .movu, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ }, 3484 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 3485 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3486 .{ .@"0:", .v_pd, .xor, .tmp2x, .tmp1x, .memad(.src0x, .add_size, -16), ._ }, 3487 .{ ._, .v_pd, .mova, .memad(.dst0x, .add_size, -16), .tmp2x, ._, ._ }, 3488 } }, 3489 }, .{ 3490 .required_features = .{ .avx, null, null, null }, 3491 .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .yword } }, .any }, 3492 .patterns = &.{ 3493 .{ .src = .{ .to_mem, .none } }, 3494 }, 3495 .extra_temps = .{ 3496 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3497 .{ .kind = .{ .rc = .sse } }, 3498 .{ .kind = .{ .rc = .sse } }, 3499 .unused, 3500 .unused, 3501 .unused, 3502 }, 3503 .dst_temps = .{.mem}, 3504 .clobbers = .{ .eflags = true }, 3505 .each = .{ .once = &.{ 3506 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3507 .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) }, 3508 .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ }, 3509 .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ }, 3510 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 3511 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3512 } }, 3513 }, .{ 3514 .required_features = .{ .avx, null, null, null }, 3515 .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 3516 .patterns = &.{ 3517 .{ .src = .{ .to_mem, .none } }, 3518 }, 3519 .extra_temps = .{ 3520 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3521 .{ .kind = .{ .rc = .sse } }, 3522 .{ .kind = .{ .rc = .sse } }, 3523 .unused, 3524 .unused, 3525 .unused, 3526 }, 3527 .dst_temps = .{.mem}, 3528 .clobbers = .{ .eflags = true }, 3529 .each = .{ .once = &.{ 3530 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3531 .{ ._, .vp_q, .cmpeq, .tmp1x, .tmp1x, .tmp1x, ._ }, 3532 .{ .@"0:", .v_, .xor, .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._ }, 3533 .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ }, 3534 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 3535 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3536 } }, 3537 }, .{ 3538 .required_features = .{ .sse2, null, null, null }, 3539 .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 3540 .patterns = &.{ 3541 .{ .src = .{ .to_mem, .none } }, 3542 }, 3543 .extra_temps = .{ 3544 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3545 .{ .kind = .{ .rc = .sse } }, 3546 .{ .kind = .{ .rc = .sse } }, 3547 .unused, 3548 .unused, 3549 .unused, 3550 }, 3551 .dst_temps = .{.mem}, 3552 .clobbers = .{ .eflags = true }, 3553 .each = .{ .once = &.{ 3554 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3555 .{ ._, .p_d, .cmpeq, .tmp1x, .tmp1x, ._, ._ }, 3556 .{ .@"0:", ._dqa, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 3557 .{ ._, .p_, .xor, .tmp2x, .tmp1x, ._, ._ }, 3558 .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ }, 3559 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 3560 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3561 } }, 3562 }, .{ 3563 .required_features = .{ .@"64bit", null, null, null }, 3564 .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 3565 .patterns = &.{ 3566 .{ .src = .{ .mut_mem, .none } }, 3567 }, 3568 .extra_temps = .{ 3569 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3570 .unused, 3571 .unused, 3572 .unused, 3573 .unused, 3574 .unused, 3575 }, 3576 .dst_temps = .{.{ .ref = .src0 }}, 3577 .clobbers = .{ .eflags = true }, 3578 .each = .{ .once = &.{ 3579 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3580 .{ .@"0:", ._, .not, .memia(.dst0q, .tmp0, .add_size), ._, ._, ._ }, 3581 .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, 8), ._, ._, ._ }, 3582 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 3583 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3584 } }, 3585 }, .{ 3586 .required_features = .{ .@"64bit", null, null, null }, 3587 .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 3588 .patterns = &.{ 3589 .{ .src = .{ .to_mem, .none } }, 3590 }, 3591 .extra_temps = .{ 3592 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3593 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 3594 .unused, 3595 .unused, 3596 .unused, 3597 .unused, 3598 }, 3599 .dst_temps = .{.mem}, 3600 .clobbers = .{ .eflags = true }, 3601 .each = .{ .once = &.{ 3602 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 3603 .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 3604 .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, 3605 .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ }, 3606 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3607 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3608 } }, 3609 }, .{ 3610 .required_features = .{ .@"64bit", null, null, null }, 3611 .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .dword } }, .any }, 3612 .patterns = &.{ 3613 .{ .src = .{ .mut_mem, .none } }, 3614 }, 3615 .extra_temps = .{ 3616 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3617 .unused, 3618 .unused, 3619 .unused, 3620 .unused, 3621 .unused, 3622 }, 3623 .dst_temps = .{.{ .ref = .src0 }}, 3624 .clobbers = .{ .eflags = true }, 3625 .each = .{ .once = &.{ 3626 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 3627 .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, 3628 .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, 3629 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 3630 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3631 .{ ._, ._, .not, .memad(.dst0d, .add_size, -16), ._, ._, ._ }, 3632 } }, 3633 }, .{ 3634 .required_features = .{ .@"64bit", null, null, null }, 3635 .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .dword } }, .any }, 3636 .patterns = &.{ 3637 .{ .src = .{ .to_mem, .none } }, 3638 }, 3639 .extra_temps = .{ 3640 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3641 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 3642 .unused, 3643 .unused, 3644 .unused, 3645 .unused, 3646 }, 3647 .dst_temps = .{.mem}, 3648 .clobbers = .{ .eflags = true }, 3649 .each = .{ .once = &.{ 3650 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 3651 .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ }, 3652 .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, 3653 .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ }, 3654 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3655 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3656 .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -16), ._, ._ }, 3657 .{ ._, ._, .not, .tmp0d, ._, ._, ._ }, 3658 .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16), .tmp0d, ._, ._ }, 3659 .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16 + 4), .si(0), ._, ._ }, 3660 .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16 + 8), .si(0), ._, ._ }, 3661 } }, 3662 }, .{ 3663 .required_features = .{ .@"64bit", null, null, null }, 3664 .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .qword, .is = .qword } }, .any }, 3665 .patterns = &.{ 3666 .{ .src = .{ .mut_mem, .none } }, 3667 }, 3668 .extra_temps = .{ 3669 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3670 .unused, 3671 .unused, 3672 .unused, 3673 .unused, 3674 .unused, 3675 }, 3676 .dst_temps = .{.{ .ref = .src0 }}, 3677 .clobbers = .{ .eflags = true }, 3678 .each = .{ .once = &.{ 3679 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 3680 .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, 3681 .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, 3682 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 3683 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3684 .{ ._, ._, .not, .memad(.dst0q, .add_size, -16), ._, ._, ._ }, 3685 } }, 3686 }, .{ 3687 .required_features = .{ .@"64bit", null, null, null }, 3688 .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .qword, .is = .qword } }, .any }, 3689 .patterns = &.{ 3690 .{ .src = .{ .to_mem, .none } }, 3691 }, 3692 .extra_temps = .{ 3693 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3694 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 3695 .unused, 3696 .unused, 3697 .unused, 3698 .unused, 3699 }, 3700 .dst_temps = .{.mem}, 3701 .clobbers = .{ .eflags = true }, 3702 .each = .{ .once = &.{ 3703 .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, 3704 .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, 3705 .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, 3706 .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, 3707 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3708 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3709 .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ }, 3710 } }, 3711 }, .{ 3712 .required_features = .{ .@"64bit", null, null, null }, 3713 .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .dword, .is = .dword } }, .any }, 3714 .patterns = &.{ 3715 .{ .src = .{ .mut_mem, .none } }, 3716 }, 3717 .extra_temps = .{ 3718 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3719 .unused, 3720 .unused, 3721 .unused, 3722 .unused, 3723 .unused, 3724 }, 3725 .dst_temps = .{.{ .ref = .src0 }}, 3726 .clobbers = .{ .eflags = true }, 3727 .each = .{ .once = &.{ 3728 .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, 3729 .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ }, 3730 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3731 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3732 .{ ._, ._, .not, .memad(.dst0d, .add_size, -8), ._, ._, ._ }, 3733 } }, 3734 }, .{ 3735 .required_features = .{ .@"64bit", null, null, null }, 3736 .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .dword, .is = .dword } }, .any }, 3737 .patterns = &.{ 3738 .{ .src = .{ .to_mem, .none } }, 3739 }, 3740 .extra_temps = .{ 3741 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3742 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 3743 .unused, 3744 .unused, 3745 .unused, 3746 .unused, 3747 }, 3748 .dst_temps = .{.mem}, 3749 .clobbers = .{ .eflags = true }, 3750 .each = .{ .once = &.{ 3751 .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, 3752 .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, 3753 .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, 3754 .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, 3755 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3756 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3757 .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -8), ._, ._ }, 3758 .{ ._, ._, .not, .tmp0d, ._, ._, ._ }, 3759 .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8), .tmp0d, ._, ._ }, 3760 .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8 + 4), .si(0), ._, ._ }, 3761 } }, 3762 }, .{ 3763 .required_features = .{ .@"64bit", null, null, null }, 3764 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .dword } }, .any }, 3765 .patterns = &.{ 3766 .{ .src = .{ .mut_mem, .none } }, 3767 }, 3768 .extra_temps = .{ 3769 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3770 .unused, 3771 .unused, 3772 .unused, 3773 .unused, 3774 .unused, 3775 }, 3776 .dst_temps = .{.{ .ref = .src0 }}, 3777 .clobbers = .{ .eflags = true }, 3778 .each = .{ .once = &.{ 3779 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 3780 .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, 3781 .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, 3782 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 3783 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3784 .{ ._, ._, .xor, .memad(.dst0d, .add_size, -16), .sa(.src0, .add_umax), ._, ._ }, 3785 } }, 3786 }, .{ 3787 .required_features = .{ .@"64bit", null, null, null }, 3788 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .dword } }, .any }, 3789 .patterns = &.{ 3790 .{ .src = .{ .to_mem, .none } }, 3791 }, 3792 .extra_temps = .{ 3793 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3794 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 3795 .unused, 3796 .unused, 3797 .unused, 3798 .unused, 3799 }, 3800 .dst_temps = .{.mem}, 3801 .clobbers = .{ .eflags = true }, 3802 .each = .{ .once = &.{ 3803 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 3804 .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ }, 3805 .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, 3806 .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ }, 3807 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3808 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3809 .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -16), ._, ._ }, 3810 .{ ._, ._, .xor, .tmp0d, .sa(.src0, .add_umax), ._, ._ }, 3811 .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16), .tmp0d, ._, ._ }, 3812 .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16 + 4), .si(0), ._, ._ }, 3813 .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16 + 8), .si(0), ._, ._ }, 3814 } }, 3815 }, .{ 3816 .required_features = .{ .@"64bit", null, null, null }, 3817 .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .dword } }, .any }, 3818 .patterns = &.{ 3819 .{ .src = .{ .mut_mem, .none } }, 3820 }, 3821 .extra_temps = .{ 3822 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3823 .unused, 3824 .unused, 3825 .unused, 3826 .unused, 3827 .unused, 3828 }, 3829 .dst_temps = .{.{ .ref = .src0 }}, 3830 .clobbers = .{ .eflags = true }, 3831 .each = .{ .once = &.{ 3832 .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, 3833 .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ }, 3834 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3835 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3836 .{ ._, ._, .xor, .memad(.dst0d, .add_size, -8), .sa(.src0, .add_umax), ._, ._ }, 3837 } }, 3838 }, .{ 3839 .required_features = .{ .@"64bit", null, null, null }, 3840 .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .dword } }, .any }, 3841 .patterns = &.{ 3842 .{ .src = .{ .to_mem, .none } }, 3843 }, 3844 .extra_temps = .{ 3845 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3846 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 3847 .unused, 3848 .unused, 3849 .unused, 3850 .unused, 3851 }, 3852 .dst_temps = .{.mem}, 3853 .clobbers = .{ .eflags = true }, 3854 .each = .{ .once = &.{ 3855 .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, 3856 .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, 3857 .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, 3858 .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, 3859 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3860 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3861 .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -8), ._, ._ }, 3862 .{ ._, ._, .xor, .tmp0d, .sa(.src0, .add_umax), ._, ._ }, 3863 .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8), .tmp0d, ._, ._ }, 3864 .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8 + 4), .si(0), ._, ._ }, 3865 } }, 3866 }, .{ 3867 .required_features = .{ .@"64bit", null, null, null }, 3868 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 3869 .patterns = &.{ 3870 .{ .src = .{ .mut_mem, .none } }, 3871 }, 3872 .extra_temps = .{ 3873 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3874 .unused, 3875 .unused, 3876 .unused, 3877 .unused, 3878 .unused, 3879 }, 3880 .dst_temps = .{.{ .ref = .src0 }}, 3881 .clobbers = .{ .eflags = true }, 3882 .each = .{ .once = &.{ 3883 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 3884 .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, 3885 .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, 3886 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 3887 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3888 .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, 3889 .{ ._, ._, .xor, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ }, 3890 } }, 3891 }, .{ 3892 .required_features = .{ .@"64bit", null, null, null }, 3893 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 3894 .patterns = &.{ 3895 .{ .src = .{ .to_mem, .none } }, 3896 }, 3897 .extra_temps = .{ 3898 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3899 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 3900 .unused, 3901 .unused, 3902 .unused, 3903 .unused, 3904 }, 3905 .dst_temps = .{.mem}, 3906 .clobbers = .{ .eflags = true }, 3907 .each = .{ .once = &.{ 3908 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 3909 .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ }, 3910 .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, 3911 .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ }, 3912 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3913 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3914 .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, 3915 .{ ._, ._, .xor, .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ }, 3916 .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ }, 3917 .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ }, 3918 } }, 3919 }, .{ 3920 .required_features = .{ .@"64bit", null, null, null }, 3921 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 3922 .patterns = &.{ 3923 .{ .src = .{ .mut_mem, .none } }, 3924 }, 3925 .extra_temps = .{ 3926 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3927 .unused, 3928 .unused, 3929 .unused, 3930 .unused, 3931 .unused, 3932 }, 3933 .dst_temps = .{.{ .ref = .src0 }}, 3934 .clobbers = .{ .eflags = true }, 3935 .each = .{ .once = &.{ 3936 .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, 3937 .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ }, 3938 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3939 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3940 .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, 3941 .{ ._, ._, .xor, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ }, 3942 } }, 3943 }, .{ 3944 .required_features = .{ .@"64bit", null, null, null }, 3945 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 3946 .patterns = &.{ 3947 .{ .src = .{ .to_mem, .none } }, 3948 }, 3949 .extra_temps = .{ 3950 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 3951 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 3952 .unused, 3953 .unused, 3954 .unused, 3955 .unused, 3956 }, 3957 .dst_temps = .{.mem}, 3958 .clobbers = .{ .eflags = true }, 3959 .each = .{ .once = &.{ 3960 .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, 3961 .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, 3962 .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, 3963 .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, 3964 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 3965 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 3966 .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, 3967 .{ ._, ._, .xor, .tmp0q, .memad(.src0q, .add_size, -8), ._, ._ }, 3968 .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ }, 3969 } }, 3970 }, .{ 3971 .required_features = .{ .mmx, null, null, null }, 3972 .src_constraints = .{ .{ .signed_int_or_full_vec = .qword }, .any }, 3973 .patterns = &.{ 3974 .{ .src = .{ .mem, .none } }, 3975 .{ .src = .{ .to_mm, .none } }, 3976 }, 3977 .dst_temps = .{.{ .rc = .mmx }}, 3978 .each = .{ .once = &.{ 3979 .{ ._, .p_d, .cmpeq, .dst0q, .dst0q, ._, ._ }, 3980 .{ ._, .p_, .xor, .dst0q, .src0q, ._, ._ }, 3981 } }, 3982 }, .{ 3983 .required_features = .{ .mmx, null, null, null }, 3984 .src_constraints = .{ .{ .unsigned_int_vec = .qword }, .any }, 3985 .patterns = &.{ 3986 .{ .src = .{ .to_mut_mm, .none } }, 3987 }, 3988 .extra_temps = .{ 3989 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 3990 .{ .kind = .{ .umax_mem = .src0 } }, 3991 .unused, 3992 .unused, 3993 .unused, 3994 .unused, 3995 }, 3996 .dst_temps = .{.{ .ref = .src0 }}, 3997 .each = .{ .once = &.{ 3998 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 3999 .{ ._, .p_, .xor, .dst0q, .lea(.qword, .tmp0), ._, ._ }, 4000 } }, 4001 }, .{ 4002 .required_features = .{ .avx, null, null, null }, 4003 .src_constraints = .{ .{ .signed_int_or_full_vec = .xword }, .any }, 4004 .patterns = &.{ 4005 .{ .src = .{ .mem, .none } }, 4006 .{ .src = .{ .to_xmm, .none } }, 4007 }, 4008 .dst_temps = .{.{ .rc = .sse }}, 4009 .each = .{ .once = &.{ 4010 .{ ._, .vp_q, .cmpeq, .dst0x, .dst0x, .dst0x, ._ }, 4011 .{ ._, .vp_, .xor, .dst0x, .dst0x, .src0x, ._ }, 4012 } }, 4013 }, .{ 4014 .required_features = .{ .avx, null, null, null }, 4015 .src_constraints = .{ .{ .unsigned_int_vec = .xword }, .any }, 4016 .patterns = &.{ 4017 .{ .src = .{ .to_xmm, .none } }, 4018 }, 4019 .extra_temps = .{ 4020 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 4021 .{ .kind = .{ .umax_mem = .src0 } }, 4022 .unused, 4023 .unused, 4024 .unused, 4025 .unused, 4026 }, 4027 .dst_temps = .{.{ .rc = .sse }}, 4028 .each = .{ .once = &.{ 4029 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 4030 .{ ._, .vp_, .xor, .dst0x, .src0x, .lea(.xword, .tmp0), ._ }, 4031 } }, 4032 }, .{ 4033 .required_features = .{ .sse2, null, null, null }, 4034 .src_constraints = .{ .{ .signed_int_or_full_vec = .xword }, .any }, 4035 .patterns = &.{ 4036 .{ .src = .{ .mem, .none } }, 4037 .{ .src = .{ .to_xmm, .none } }, 4038 }, 4039 .dst_temps = .{.{ .rc = .sse }}, 4040 .each = .{ .once = &.{ 4041 .{ ._, .p_d, .cmpeq, .dst0x, .dst0x, ._, ._ }, 4042 .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ }, 4043 } }, 4044 }, .{ 4045 .required_features = .{ .sse2, null, null, null }, 4046 .src_constraints = .{ .{ .unsigned_int_vec = .xword }, .any }, 4047 .patterns = &.{ 4048 .{ .src = .{ .to_mut_xmm, .none } }, 4049 }, 4050 .extra_temps = .{ 4051 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 4052 .{ .kind = .{ .umax_mem = .src0 } }, 4053 .unused, 4054 .unused, 4055 .unused, 4056 .unused, 4057 }, 4058 .dst_temps = .{.{ .ref = .src0 }}, 4059 .each = .{ .once = &.{ 4060 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 4061 .{ ._, .p_, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, 4062 } }, 4063 }, .{ 4064 .required_features = .{ .sse, null, null, null }, 4065 .src_constraints = .{ .{ .vec = .xword }, .any }, 4066 .patterns = &.{ 4067 .{ .src = .{ .to_mut_xmm, .none } }, 4068 }, 4069 .extra_temps = .{ 4070 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 4071 .{ .kind = .{ .umax_mem = .src0 } }, 4072 .unused, 4073 .unused, 4074 .unused, 4075 .unused, 4076 }, 4077 .dst_temps = .{.{ .ref = .src0 }}, 4078 .each = .{ .once = &.{ 4079 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 4080 .{ ._, ._ps, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, 4081 } }, 4082 }, .{ 4083 .required_features = .{ .avx2, null, null, null }, 4084 .src_constraints = .{ .{ .signed_int_or_full_vec = .yword }, .any }, 4085 .patterns = &.{ 4086 .{ .src = .{ .mem, .none } }, 4087 .{ .src = .{ .to_ymm, .none } }, 4088 }, 4089 .dst_temps = .{.{ .rc = .sse }}, 4090 .each = .{ .once = &.{ 4091 .{ ._, .vp_q, .cmpeq, .dst0y, .dst0y, .dst0y, ._ }, 4092 .{ ._, .vp_, .xor, .dst0y, .dst0y, .src0y, ._ }, 4093 } }, 4094 }, .{ 4095 .required_features = .{ .avx2, null, null, null }, 4096 .src_constraints = .{ .{ .unsigned_int_vec = .yword }, .any }, 4097 .patterns = &.{ 4098 .{ .src = .{ .to_ymm, .none } }, 4099 }, 4100 .extra_temps = .{ 4101 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 4102 .{ .kind = .{ .umax_mem = .src0 } }, 4103 .unused, 4104 .unused, 4105 .unused, 4106 .unused, 4107 }, 4108 .dst_temps = .{.{ .rc = .sse }}, 4109 .each = .{ .once = &.{ 4110 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 4111 .{ ._, .vp_, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, 4112 } }, 4113 }, .{ 4114 .required_features = .{ .avx, null, null, null }, 4115 .src_constraints = .{ .{ .signed_int_or_full_vec = .yword }, .any }, 4116 .patterns = &.{ 4117 .{ .src = .{ .mem, .none } }, 4118 .{ .src = .{ .to_ymm, .none } }, 4119 }, 4120 .dst_temps = .{.{ .rc = .sse }}, 4121 .each = .{ .once = &.{ 4122 .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) }, 4123 .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ }, 4124 } }, 4125 }, .{ 4126 .required_features = .{ .avx, null, null, null }, 4127 .src_constraints = .{ .{ .unsigned_int_vec = .yword }, .any }, 4128 .patterns = &.{ 4129 .{ .src = .{ .to_ymm, .none } }, 4130 }, 4131 .extra_temps = .{ 4132 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 4133 .{ .kind = .{ .umax_mem = .src0 } }, 4134 .unused, 4135 .unused, 4136 .unused, 4137 .unused, 4138 }, 4139 .dst_temps = .{.{ .rc = .sse }}, 4140 .each = .{ .once = &.{ 4141 .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, 4142 .{ ._, .v_pd, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, 4143 } }, 4144 }, .{ 4145 .required_features = .{ .@"64bit", null, null, null }, 4146 .patterns = &.{ 4147 .{ .src = .{ .to_mem, .none } }, 4148 }, 4149 .extra_temps = .{ 4150 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 4151 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 4152 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 4153 .{ .kind = .{ .umax_mem = .src0 } }, 4154 .unused, 4155 .unused, 4156 }, 4157 .dst_temps = .{.mem}, 4158 .each = .{ .once = &.{ 4159 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_src0_size), ._, ._ }, 4160 .{ ._, ._, .lea, .tmp1p, .mem(.tmp3), ._, ._ }, 4161 .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_src0_size), ._, ._ }, 4162 .{ ._, ._, .xor, .tmp2q, .leaia(.qword, .tmp1, .tmp0, .add_src0_size), ._, ._ }, 4163 .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_src0_size), .tmp2q, ._, ._ }, 4164 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 4165 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 4166 } }, 4167 }, .{ 4168 .patterns = &.{ 4169 .{ .src = .{ .to_mem, .none } }, 4170 }, 4171 .extra_temps = .{ 4172 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 4173 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 4174 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4175 .{ .kind = .{ .umax_mem = .src0 } }, 4176 .unused, 4177 .unused, 4178 }, 4179 .dst_temps = .{.mem}, 4180 .each = .{ .once = &.{ 4181 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_src0_size), ._, ._ }, 4182 .{ ._, ._, .lea, .tmp1p, .mem(.tmp3), ._, ._ }, 4183 .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_src0_size), ._, ._ }, 4184 .{ ._, ._, .xor, .tmp2d, .leaia(.dword, .tmp1, .tmp0, .add_src0_size), ._, ._ }, 4185 .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_src0_size), .tmp2d, ._, ._ }, 4186 .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, 4187 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 4188 } }, 4189 } }) catch |err| switch (err) { 4190 error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{ 4191 @tagName(air_tag), 4192 cg.typeOf(ty_op.operand).fmt(pt), 4193 ops[0].tracking(cg), 4194 }), 4195 else => |e| return e, 4196 }; 4197 for (ops) |op| for (res) |r| { 4198 if (op.index == r.index) break; 4199 } else try op.die(cg); 4200 try res[0].moveTo(inst, cg); 4201 }, 4202 4203 .block => if (use_old) try cg.airBlock(inst) else { 4204 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 4205 const extra = cg.air.extraData(Air.Block, ty_pl.payload); 4206 try cg.asmPseudo(.pseudo_dbg_enter_block_none); 4207 try cg.lowerBlock(inst, @ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len])); 4208 try cg.asmPseudo(.pseudo_dbg_leave_block_none); 4209 }, 4210 .loop => if (use_old) try cg.airLoop(inst) else { 4211 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 4212 const extra = cg.air.extraData(Air.Block, ty_pl.payload); 4213 cg.scope_generation += 1; 4214 try cg.loops.putNoClobber(cg.gpa, inst, .{ 4215 .state = try cg.saveState(), 4216 .target = @intCast(cg.mir_instructions.len), 4217 }); 4218 defer assert(cg.loops.remove(inst)); 4219 try cg.genBodyBlock(@ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len])); 4220 }, 4221 .repeat => if (use_old) try cg.airRepeat(inst) else { 4222 const repeat = air_datas[@intFromEnum(inst)].repeat; 4223 const loop = cg.loops.get(repeat.loop_inst).?; 4224 try cg.restoreState(loop.state, &.{}, .{ 4225 .emit_instructions = true, 4226 .update_tracking = false, 4227 .resurrect = false, 4228 .close_scope = true, 4229 }); 4230 _ = try cg.asmJmpReloc(loop.target); 4231 }, 4232 .br => try cg.airBr(inst), 4233 .trap => try cg.asmOpOnly(.{ ._, .ud2 }), 4234 .breakpoint => try cg.asmOpOnly(.{ ._, .int3 }), 4235 .ret_addr => if (use_old) try cg.airRetAddr(inst) else { 4236 var slot = try cg.tempInit(.usize, .{ .load_frame = .{ 4237 .index = .ret_addr, 4238 } }); 4239 while (try slot.toRegClass(true, .general_purpose, cg)) {} 4240 try slot.moveTo(inst, cg); 4241 }, 4242 .frame_addr => if (use_old) try cg.airFrameAddress(inst) else { 4243 var slot = try cg.tempInit(.usize, .{ .lea_frame = .{ 4244 .index = .base_ptr, 4245 } }); 4246 try slot.moveTo(inst, cg); 4247 }, 4248 .call => try cg.airCall(inst, .auto, .{ .safety = true }), 4249 .call_always_tail => try cg.airCall(inst, .always_tail, .{ .safety = true }), 4250 .call_never_tail => try cg.airCall(inst, .never_tail, .{ .safety = true }), 4251 .call_never_inline => try cg.airCall(inst, .never_inline, .{ .safety = true }), 4252 4253 .clz => |air_tag| if (use_old) try cg.airClz(inst) else { 4254 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 4255 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 4256 var res: [1]Temp = undefined; 4257 cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{ 4258 .required_features = .{ .slow_incdec, null, null, null }, 4259 .src_constraints = .{ .{ .exact_signed_int = 1 }, .any }, 4260 .patterns = &.{ 4261 .{ .src = .{ .mut_mem, .none } }, 4262 .{ .src = .{ .to_mut_gpr, .none } }, 4263 }, 4264 .dst_temps = .{.{ .ref = .src0 }}, 4265 .clobbers = .{ .eflags = true }, 4266 .each = .{ .once = &.{ 4267 .{ ._, ._, .add, .dst0b, .si(1), ._, ._ }, 4268 } }, 4269 }, .{ 4270 .src_constraints = .{ .{ .exact_signed_int = 1 }, .any }, 4271 .patterns = &.{ 4272 .{ .src = .{ .mut_mem, .none } }, 4273 .{ .src = .{ .to_mut_gpr, .none } }, 4274 }, 4275 .dst_temps = .{.{ .ref = .src0 }}, 4276 .clobbers = .{ .eflags = true }, 4277 .each = .{ .once = &.{ 4278 .{ ._, ._, .inc, .dst0b, ._, ._, ._ }, 4279 } }, 4280 }, .{ 4281 .src_constraints = .{ .{ .exact_unsigned_int = 1 }, .any }, 4282 .patterns = &.{ 4283 .{ .src = .{ .mut_mem, .none } }, 4284 .{ .src = .{ .to_mut_gpr, .none } }, 4285 }, 4286 .dst_temps = .{.{ .ref = .src0 }}, 4287 .clobbers = .{ .eflags = true }, 4288 .each = .{ .once = &.{ 4289 .{ ._, ._, .xor, .dst0b, .si(1), ._, ._ }, 4290 } }, 4291 }, .{ 4292 .required_features = .{ .lzcnt, null, null, null }, 4293 .src_constraints = .{ .{ .unsigned_or_exact_int = .byte }, .any }, 4294 .patterns = &.{ 4295 .{ .src = .{ .mem, .none } }, 4296 .{ .src = .{ .to_gpr, .none } }, 4297 }, 4298 .dst_temps = .{.{ .rc = .general_purpose }}, 4299 .clobbers = .{ .eflags = true }, 4300 .each = .{ .once = &.{ 4301 .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, 4302 .{ ._, ._, .lzcnt, .dst0d, .dst0d, ._, ._ }, 4303 .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 4304 } }, 4305 }, .{ 4306 .required_features = .{ .lzcnt, null, null, null }, 4307 .src_constraints = .{ .{ .signed_int = .byte }, .any }, 4308 .patterns = &.{ 4309 .{ .src = .{ .mem, .none } }, 4310 .{ .src = .{ .to_gpr, .none } }, 4311 }, 4312 .dst_temps = .{.{ .rc = .general_purpose }}, 4313 .clobbers = .{ .eflags = true }, 4314 .each = .{ .once = &.{ 4315 .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, 4316 .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, 4317 .{ ._, ._, .lzcnt, .dst0d, .dst0d, ._, ._ }, 4318 .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 4319 } }, 4320 }, .{ 4321 .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, 4322 .src_constraints = .{ .{ .exact_int = 16 }, .any }, 4323 .patterns = &.{ 4324 .{ .src = .{ .to_mut_gpr, .none } }, 4325 }, 4326 .dst_temps = .{.{ .ref = .src0 }}, 4327 .clobbers = .{ .eflags = true }, 4328 .each = .{ .once = &.{ 4329 .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, 4330 } }, 4331 }, .{ 4332 .required_features = .{ .lzcnt, null, null, null }, 4333 .src_constraints = .{ .{ .exact_int = 16 }, .any }, 4334 .patterns = &.{ 4335 .{ .src = .{ .mem, .none } }, 4336 .{ .src = .{ .to_gpr, .none } }, 4337 }, 4338 .dst_temps = .{.{ .rc = .general_purpose }}, 4339 .clobbers = .{ .eflags = true }, 4340 .each = .{ .once = &.{ 4341 .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, 4342 } }, 4343 }, .{ 4344 .required_features = .{ .lzcnt, null, null, null }, 4345 .src_constraints = .{ .{ .signed_int = .word }, .any }, 4346 .patterns = &.{ 4347 .{ .src = .{ .to_mut_gpr, .none } }, 4348 }, 4349 .dst_temps = .{.{ .ref = .src0 }}, 4350 .clobbers = .{ .eflags = true }, 4351 .each = .{ .once = &.{ 4352 .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, 4353 .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, 4354 .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ }, 4355 } }, 4356 }, .{ 4357 .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, 4358 .src_constraints = .{ .{ .unsigned_int = .word }, .any }, 4359 .patterns = &.{ 4360 .{ .src = .{ .to_mut_gpr, .none } }, 4361 }, 4362 .dst_temps = .{.{ .ref = .src0 }}, 4363 .clobbers = .{ .eflags = true }, 4364 .each = .{ .once = &.{ 4365 .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, 4366 .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ }, 4367 } }, 4368 }, .{ 4369 .required_features = .{ .lzcnt, null, null, null }, 4370 .src_constraints = .{ .{ .unsigned_int = .word }, .any }, 4371 .patterns = &.{ 4372 .{ .src = .{ .mem, .none } }, 4373 .{ .src = .{ .to_gpr, .none } }, 4374 }, 4375 .dst_temps = .{.{ .rc = .general_purpose }}, 4376 .clobbers = .{ .eflags = true }, 4377 .each = .{ .once = &.{ 4378 .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, 4379 .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ }, 4380 } }, 4381 }, .{ 4382 .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, 4383 .src_constraints = .{ .{ .exact_int = 32 }, .any }, 4384 .patterns = &.{ 4385 .{ .src = .{ .to_mut_gpr, .none } }, 4386 }, 4387 .dst_temps = .{.{ .ref = .src0 }}, 4388 .clobbers = .{ .eflags = true }, 4389 .each = .{ .once = &.{ 4390 .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, 4391 } }, 4392 }, .{ 4393 .required_features = .{ .lzcnt, null, null, null }, 4394 .src_constraints = .{ .{ .exact_int = 32 }, .any }, 4395 .patterns = &.{ 4396 .{ .src = .{ .mem, .none } }, 4397 .{ .src = .{ .to_gpr, .none } }, 4398 }, 4399 .dst_temps = .{.{ .rc = .general_purpose }}, 4400 .clobbers = .{ .eflags = true }, 4401 .each = .{ .once = &.{ 4402 .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, 4403 } }, 4404 }, .{ 4405 .required_features = .{ .lzcnt, null, null, null }, 4406 .src_constraints = .{ .{ .signed_int = .dword }, .any }, 4407 .patterns = &.{ 4408 .{ .src = .{ .to_mut_gpr, .none } }, 4409 }, 4410 .dst_temps = .{.{ .ref = .src0 }}, 4411 .clobbers = .{ .eflags = true }, 4412 .each = .{ .once = &.{ 4413 .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, 4414 .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, 4415 .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 4416 } }, 4417 }, .{ 4418 .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, 4419 .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, 4420 .patterns = &.{ 4421 .{ .src = .{ .to_mut_gpr, .none } }, 4422 }, 4423 .dst_temps = .{.{ .ref = .src0 }}, 4424 .clobbers = .{ .eflags = true }, 4425 .each = .{ .once = &.{ 4426 .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, 4427 .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 4428 } }, 4429 }, .{ 4430 .required_features = .{ .lzcnt, null, null, null }, 4431 .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, 4432 .patterns = &.{ 4433 .{ .src = .{ .mem, .none } }, 4434 .{ .src = .{ .to_gpr, .none } }, 4435 }, 4436 .dst_temps = .{.{ .rc = .general_purpose }}, 4437 .clobbers = .{ .eflags = true }, 4438 .each = .{ .once = &.{ 4439 .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, 4440 .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 4441 } }, 4442 }, .{ 4443 .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, 4444 .src_constraints = .{ .{ .exact_int = 64 }, .any }, 4445 .patterns = &.{ 4446 .{ .src = .{ .to_mut_gpr, .none } }, 4447 }, 4448 .dst_temps = .{.{ .ref = .src0 }}, 4449 .clobbers = .{ .eflags = true }, 4450 .each = .{ .once = &.{ 4451 .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, 4452 } }, 4453 }, .{ 4454 .required_features = .{ .@"64bit", .lzcnt, null, null }, 4455 .src_constraints = .{ .{ .exact_int = 64 }, .any }, 4456 .patterns = &.{ 4457 .{ .src = .{ .mem, .none } }, 4458 .{ .src = .{ .to_gpr, .none } }, 4459 }, 4460 .dst_temps = .{.{ .rc = .general_purpose }}, 4461 .clobbers = .{ .eflags = true }, 4462 .each = .{ .once = &.{ 4463 .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, 4464 } }, 4465 }, .{ 4466 .required_features = .{ .@"64bit", .lzcnt, null, null }, 4467 .src_constraints = .{ .{ .signed_int = .qword }, .any }, 4468 .patterns = &.{ 4469 .{ .src = .{ .mem, .none } }, 4470 .{ .src = .{ .to_gpr, .none } }, 4471 }, 4472 .dst_temps = .{.{ .rc = .general_purpose }}, 4473 .clobbers = .{ .eflags = true }, 4474 .each = .{ .once = &.{ 4475 .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, 4476 .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ }, 4477 .{ ._, ._, .lzcnt, .dst0q, .dst0q, ._, ._ }, 4478 .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ }, 4479 } }, 4480 }, .{ 4481 .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, 4482 .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, 4483 .patterns = &.{ 4484 .{ .src = .{ .to_mut_gpr, .none } }, 4485 }, 4486 .dst_temps = .{.{ .ref = .src0 }}, 4487 .clobbers = .{ .eflags = true }, 4488 .each = .{ .once = &.{ 4489 .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, 4490 .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ }, 4491 } }, 4492 }, .{ 4493 .required_features = .{ .@"64bit", .lzcnt, null, null }, 4494 .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, 4495 .patterns = &.{ 4496 .{ .src = .{ .mem, .none } }, 4497 .{ .src = .{ .to_gpr, .none } }, 4498 }, 4499 .dst_temps = .{.{ .rc = .general_purpose }}, 4500 .clobbers = .{ .eflags = true }, 4501 .each = .{ .once = &.{ 4502 .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, 4503 .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ }, 4504 } }, 4505 }, .{ 4506 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 4507 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any }, 4508 .patterns = &.{ 4509 .{ .src = .{ .mem, .none } }, 4510 .{ .src = .{ .to_gpr, .none } }, 4511 }, 4512 .extra_temps = .{ 4513 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4514 .unused, 4515 .unused, 4516 .unused, 4517 .unused, 4518 .unused, 4519 }, 4520 .dst_temps = .{.{ .rc = .general_purpose }}, 4521 .clobbers = .{ .eflags = true }, 4522 .each = .{ .once = &.{ 4523 .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, 4524 .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, 4525 .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4526 .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ }, 4527 .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4528 } }, 4529 }, .{ 4530 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 4531 .src_constraints = .{ .{ .signed_po2_int = .byte }, .any }, 4532 .patterns = &.{ 4533 .{ .src = .{ .mem, .none } }, 4534 .{ .src = .{ .to_gpr, .none } }, 4535 }, 4536 .extra_temps = .{ 4537 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4538 .unused, 4539 .unused, 4540 .unused, 4541 .unused, 4542 .unused, 4543 }, 4544 .dst_temps = .{.{ .rc = .general_purpose }}, 4545 .clobbers = .{ .eflags = true }, 4546 .each = .{ .once = &.{ 4547 .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, 4548 .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, 4549 .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, 4550 .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4551 .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ }, 4552 .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4553 } }, 4554 }, .{ 4555 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 4556 .src_constraints = .{ .{ .signed_int = .byte }, .any }, 4557 .patterns = &.{ 4558 .{ .src = .{ .mem, .none } }, 4559 .{ .src = .{ .to_gpr, .none } }, 4560 }, 4561 .extra_temps = .{ 4562 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4563 .unused, 4564 .unused, 4565 .unused, 4566 .unused, 4567 .unused, 4568 }, 4569 .dst_temps = .{.{ .rc = .general_purpose }}, 4570 .clobbers = .{ .eflags = true }, 4571 .each = .{ .once = &.{ 4572 .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, 4573 .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ }, 4574 .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, 4575 .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, 4576 .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ }, 4577 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4578 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 4579 } }, 4580 }, .{ 4581 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 4582 .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, 4583 .patterns = &.{ 4584 .{ .src = .{ .mem, .none } }, 4585 .{ .src = .{ .to_gpr, .none } }, 4586 }, 4587 .extra_temps = .{ 4588 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4589 .unused, 4590 .unused, 4591 .unused, 4592 .unused, 4593 .unused, 4594 }, 4595 .dst_temps = .{.{ .rc = .general_purpose }}, 4596 .clobbers = .{ .eflags = true }, 4597 .each = .{ .once = &.{ 4598 .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, 4599 .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, 4600 .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, 4601 .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ }, 4602 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4603 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 4604 } }, 4605 }, .{ 4606 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 4607 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any }, 4608 .patterns = &.{ 4609 .{ .src = .{ .mem, .none } }, 4610 .{ .src = .{ .to_gpr, .none } }, 4611 }, 4612 .dst_temps = .{.{ .rc = .general_purpose }}, 4613 .clobbers = .{ .eflags = true }, 4614 .each = .{ .once = &.{ 4615 .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, 4616 .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, 4617 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 4618 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4619 .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4620 } }, 4621 }, .{ 4622 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 4623 .src_constraints = .{ .{ .signed_po2_int = .byte }, .any }, 4624 .patterns = &.{ 4625 .{ .src = .{ .mem, .none } }, 4626 .{ .src = .{ .to_gpr, .none } }, 4627 }, 4628 .dst_temps = .{.{ .rc = .general_purpose }}, 4629 .clobbers = .{ .eflags = true }, 4630 .each = .{ .once = &.{ 4631 .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, 4632 .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, 4633 .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, 4634 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 4635 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4636 .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4637 } }, 4638 }, .{ 4639 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 4640 .src_constraints = .{ .{ .signed_int = .byte }, .any }, 4641 .patterns = &.{ 4642 .{ .src = .{ .mem, .none } }, 4643 .{ .src = .{ .to_gpr, .none } }, 4644 }, 4645 .extra_temps = .{ 4646 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4647 .unused, 4648 .unused, 4649 .unused, 4650 .unused, 4651 .unused, 4652 }, 4653 .dst_temps = .{.{ .rc = .general_purpose }}, 4654 .clobbers = .{ .eflags = true }, 4655 .each = .{ .once = &.{ 4656 .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, 4657 .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ }, 4658 .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, 4659 .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, 4660 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 4661 .{ ._, ._c, .st, ._, ._, ._, ._ }, 4662 .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ }, 4663 } }, 4664 }, .{ 4665 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 4666 .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, 4667 .patterns = &.{ 4668 .{ .src = .{ .mem, .none } }, 4669 .{ .src = .{ .to_gpr, .none } }, 4670 }, 4671 .extra_temps = .{ 4672 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4673 .unused, 4674 .unused, 4675 .unused, 4676 .unused, 4677 .unused, 4678 }, 4679 .dst_temps = .{.{ .rc = .general_purpose }}, 4680 .clobbers = .{ .eflags = true }, 4681 .each = .{ .once = &.{ 4682 .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, 4683 .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, 4684 .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, 4685 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 4686 .{ ._, ._c, .st, ._, ._, ._, ._ }, 4687 .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ }, 4688 } }, 4689 }, .{ 4690 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any }, 4691 .patterns = &.{ 4692 .{ .src = .{ .mem, .none } }, 4693 .{ .src = .{ .to_gpr, .none } }, 4694 }, 4695 .extra_temps = .{ 4696 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4697 .unused, 4698 .unused, 4699 .unused, 4700 .unused, 4701 .unused, 4702 }, 4703 .dst_temps = .{.{ .rc = .general_purpose }}, 4704 .clobbers = .{ .eflags = true }, 4705 .each = .{ .once = &.{ 4706 .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, 4707 .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4708 .{ ._, ._r, .bs, .dst0d, .tmp0d, ._, ._ }, 4709 .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4710 } }, 4711 }, .{ 4712 .src_constraints = .{ .{ .signed_po2_int = .byte }, .any }, 4713 .patterns = &.{ 4714 .{ .src = .{ .mem, .none } }, 4715 .{ .src = .{ .to_gpr, .none } }, 4716 }, 4717 .extra_temps = .{ 4718 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4719 .unused, 4720 .unused, 4721 .unused, 4722 .unused, 4723 .unused, 4724 }, 4725 .dst_temps = .{.{ .rc = .general_purpose }}, 4726 .clobbers = .{ .eflags = true }, 4727 .each = .{ .once = &.{ 4728 .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, 4729 .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ }, 4730 .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4731 .{ ._, ._r, .bs, .dst0d, .tmp0d, ._, ._ }, 4732 .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4733 } }, 4734 }, .{ 4735 .src_constraints = .{ .{ .signed_int = .byte }, .any }, 4736 .patterns = &.{ 4737 .{ .src = .{ .mem, .none } }, 4738 .{ .src = .{ .to_gpr, .none } }, 4739 }, 4740 .extra_temps = .{ 4741 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4742 .unused, 4743 .unused, 4744 .unused, 4745 .unused, 4746 .unused, 4747 }, 4748 .dst_temps = .{.{ .rc = .general_purpose }}, 4749 .clobbers = .{ .eflags = true }, 4750 .each = .{ .once = &.{ 4751 .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, 4752 .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, 4753 .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, 4754 .{ ._, ._r, .bs, .tmp0d, .dst0d, ._, ._ }, 4755 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4756 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 4757 } }, 4758 }, .{ 4759 .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, 4760 .patterns = &.{ 4761 .{ .src = .{ .mem, .none } }, 4762 .{ .src = .{ .to_gpr, .none } }, 4763 }, 4764 .extra_temps = .{ 4765 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 4766 .unused, 4767 .unused, 4768 .unused, 4769 .unused, 4770 .unused, 4771 }, 4772 .dst_temps = .{.{ .rc = .general_purpose }}, 4773 .clobbers = .{ .eflags = true }, 4774 .each = .{ .once = &.{ 4775 .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, 4776 .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, 4777 .{ ._, ._r, .bs, .tmp0d, .dst0d, ._, ._ }, 4778 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4779 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 4780 } }, 4781 }, .{ 4782 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 4783 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any }, 4784 .patterns = &.{ 4785 .{ .src = .{ .to_mut_gpr, .none } }, 4786 }, 4787 .dst_temps = .{.{ .rc = .general_purpose }}, 4788 .clobbers = .{ .eflags = true }, 4789 .each = .{ .once = &.{ 4790 .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, 4791 .{ ._, ._, .mov, .dst0w, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4792 .{ ._, ._nz, .cmov, .dst0w, .src0w, ._, ._ }, 4793 .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4794 } }, 4795 }, .{ 4796 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 4797 .src_constraints = .{ .{ .signed_int = .word }, .any }, 4798 .patterns = &.{ 4799 .{ .src = .{ .to_mut_gpr, .none } }, 4800 }, 4801 .dst_temps = .{.{ .rc = .general_purpose }}, 4802 .clobbers = .{ .eflags = true }, 4803 .each = .{ .once = &.{ 4804 .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, 4805 .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, 4806 .{ ._, ._, .mov, .dst0w, .si(0xff), ._, ._ }, 4807 .{ ._, ._z, .cmov, .src0w, .dst0w, ._, ._ }, 4808 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4809 .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, 4810 } }, 4811 }, .{ 4812 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 4813 .src_constraints = .{ .{ .unsigned_int = .word }, .any }, 4814 .patterns = &.{ 4815 .{ .src = .{ .to_mut_gpr, .none } }, 4816 }, 4817 .dst_temps = .{.{ .rc = .general_purpose }}, 4818 .clobbers = .{ .eflags = true }, 4819 .each = .{ .once = &.{ 4820 .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, 4821 .{ ._, ._, .mov, .dst0w, .si(0xff), ._, ._ }, 4822 .{ ._, ._z, .cmov, .src0w, .dst0w, ._, ._ }, 4823 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4824 .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, 4825 } }, 4826 }, .{ 4827 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 4828 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any }, 4829 .patterns = &.{ 4830 .{ .src = .{ .to_mut_gpr, .none } }, 4831 }, 4832 .dst_temps = .{.{ .ref = .src0 }}, 4833 .clobbers = .{ .eflags = true }, 4834 .each = .{ .once = &.{ 4835 .{ ._, ._r, .bs, .dst0w, .src0w, ._, ._ }, 4836 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 4837 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4838 .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4839 } }, 4840 }, .{ 4841 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 4842 .src_constraints = .{ .{ .signed_int = .word }, .any }, 4843 .patterns = &.{ 4844 .{ .src = .{ .to_mut_gpr, .none } }, 4845 }, 4846 .dst_temps = .{.{ .rc = .general_purpose }}, 4847 .clobbers = .{ .eflags = true }, 4848 .each = .{ .once = &.{ 4849 .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, 4850 .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, 4851 .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, 4852 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 4853 .{ ._, ._c, .st, ._, ._, ._, ._ }, 4854 .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, 4855 } }, 4856 }, .{ 4857 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 4858 .src_constraints = .{ .{ .unsigned_int = .word }, .any }, 4859 .patterns = &.{ 4860 .{ .src = .{ .to_mut_gpr, .none } }, 4861 }, 4862 .dst_temps = .{.{ .rc = .general_purpose }}, 4863 .clobbers = .{ .eflags = true }, 4864 .each = .{ .once = &.{ 4865 .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, 4866 .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, 4867 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 4868 .{ ._, ._c, .st, ._, ._, ._, ._ }, 4869 .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, 4870 } }, 4871 }, .{ 4872 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any }, 4873 .patterns = &.{ 4874 .{ .src = .{ .mem, .none } }, 4875 .{ .src = .{ .to_gpr, .none } }, 4876 }, 4877 .dst_temps = .{.{ .rc = .general_purpose }}, 4878 .clobbers = .{ .eflags = true }, 4879 .each = .{ .once = &.{ 4880 .{ ._, ._, .mov, .dst0w, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4881 .{ ._, ._r, .bs, .dst0w, .src0w, ._, ._ }, 4882 .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4883 } }, 4884 }, .{ 4885 .src_constraints = .{ .{ .signed_int = .word }, .any }, 4886 .patterns = &.{ 4887 .{ .src = .{ .to_mut_gpr, .none } }, 4888 }, 4889 .extra_temps = .{ 4890 .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, 4891 .unused, 4892 .unused, 4893 .unused, 4894 .unused, 4895 .unused, 4896 }, 4897 .dst_temps = .{.{ .rc = .general_purpose }}, 4898 .clobbers = .{ .eflags = true }, 4899 .each = .{ .once = &.{ 4900 .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, 4901 .{ ._, ._, .mov, .tmp0w, .si(0xff), ._, ._ }, 4902 .{ ._, ._r, .bs, .tmp0w, .src0w, ._, ._ }, 4903 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4904 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 4905 } }, 4906 }, .{ 4907 .src_constraints = .{ .{ .unsigned_int = .word }, .any }, 4908 .patterns = &.{ 4909 .{ .src = .{ .mem, .none } }, 4910 .{ .src = .{ .to_gpr, .none } }, 4911 }, 4912 .extra_temps = .{ 4913 .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, 4914 .unused, 4915 .unused, 4916 .unused, 4917 .unused, 4918 .unused, 4919 }, 4920 .dst_temps = .{.{ .rc = .general_purpose }}, 4921 .clobbers = .{ .eflags = true }, 4922 .each = .{ .once = &.{ 4923 .{ ._, ._, .mov, .tmp0w, .si(0xff), ._, ._ }, 4924 .{ ._, ._r, .bs, .tmp0w, .src0w, ._, ._ }, 4925 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4926 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 4927 } }, 4928 }, .{ 4929 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 4930 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any }, 4931 .patterns = &.{ 4932 .{ .src = .{ .to_mut_gpr, .none } }, 4933 }, 4934 .dst_temps = .{.{ .rc = .general_purpose }}, 4935 .clobbers = .{ .eflags = true }, 4936 .each = .{ .once = &.{ 4937 .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, 4938 .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4939 .{ ._, ._nz, .cmov, .dst0d, .src0d, ._, ._ }, 4940 .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4941 } }, 4942 }, .{ 4943 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 4944 .src_constraints = .{ .{ .signed_int = .dword }, .any }, 4945 .patterns = &.{ 4946 .{ .src = .{ .to_mut_gpr, .none } }, 4947 }, 4948 .dst_temps = .{.{ .rc = .general_purpose }}, 4949 .clobbers = .{ .eflags = true }, 4950 .each = .{ .once = &.{ 4951 .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, 4952 .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, 4953 .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, 4954 .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ }, 4955 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4956 .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, 4957 } }, 4958 }, .{ 4959 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 4960 .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, 4961 .patterns = &.{ 4962 .{ .src = .{ .to_mut_gpr, .none } }, 4963 }, 4964 .dst_temps = .{.{ .rc = .general_purpose }}, 4965 .clobbers = .{ .eflags = true }, 4966 .each = .{ .once = &.{ 4967 .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, 4968 .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, 4969 .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ }, 4970 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4971 .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, 4972 } }, 4973 }, .{ 4974 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 4975 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any }, 4976 .patterns = &.{ 4977 .{ .src = .{ .to_mut_gpr, .none } }, 4978 }, 4979 .dst_temps = .{.{ .ref = .src0 }}, 4980 .clobbers = .{ .eflags = true }, 4981 .each = .{ .once = &.{ 4982 .{ ._, ._r, .bs, .dst0d, .src0d, ._, ._ }, 4983 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 4984 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 4985 .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 4986 } }, 4987 }, .{ 4988 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 4989 .src_constraints = .{ .{ .signed_int = .dword }, .any }, 4990 .patterns = &.{ 4991 .{ .src = .{ .to_mut_gpr, .none } }, 4992 }, 4993 .dst_temps = .{.{ .rc = .general_purpose }}, 4994 .clobbers = .{ .eflags = true }, 4995 .each = .{ .once = &.{ 4996 .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, 4997 .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, 4998 .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, 4999 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 5000 .{ ._, ._c, .st, ._, ._, ._, ._ }, 5001 .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, 5002 } }, 5003 }, .{ 5004 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 5005 .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, 5006 .patterns = &.{ 5007 .{ .src = .{ .to_mut_gpr, .none } }, 5008 }, 5009 .dst_temps = .{.{ .rc = .general_purpose }}, 5010 .clobbers = .{ .eflags = true }, 5011 .each = .{ .once = &.{ 5012 .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, 5013 .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, 5014 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 5015 .{ ._, ._c, .st, ._, ._, ._, ._ }, 5016 .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, 5017 } }, 5018 }, .{ 5019 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any }, 5020 .patterns = &.{ 5021 .{ .src = .{ .mem, .none } }, 5022 .{ .src = .{ .to_gpr, .none } }, 5023 }, 5024 .dst_temps = .{.{ .rc = .general_purpose }}, 5025 .clobbers = .{ .eflags = true }, 5026 .each = .{ .once = &.{ 5027 .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 5028 .{ ._, ._r, .bs, .dst0d, .src0d, ._, ._ }, 5029 .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5030 } }, 5031 }, .{ 5032 .src_constraints = .{ .{ .signed_int = .dword }, .any }, 5033 .patterns = &.{ 5034 .{ .src = .{ .to_mut_gpr, .none } }, 5035 }, 5036 .extra_temps = .{ 5037 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5038 .unused, 5039 .unused, 5040 .unused, 5041 .unused, 5042 .unused, 5043 }, 5044 .dst_temps = .{.{ .rc = .general_purpose }}, 5045 .clobbers = .{ .eflags = true }, 5046 .each = .{ .once = &.{ 5047 .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, 5048 .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, 5049 .{ ._, ._r, .bs, .tmp0d, .src0d, ._, ._ }, 5050 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5051 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 5052 } }, 5053 }, .{ 5054 .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, 5055 .patterns = &.{ 5056 .{ .src = .{ .mem, .none } }, 5057 .{ .src = .{ .to_gpr, .none } }, 5058 }, 5059 .extra_temps = .{ 5060 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5061 .unused, 5062 .unused, 5063 .unused, 5064 .unused, 5065 .unused, 5066 }, 5067 .dst_temps = .{.{ .rc = .general_purpose }}, 5068 .clobbers = .{ .eflags = true }, 5069 .each = .{ .once = &.{ 5070 .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, 5071 .{ ._, ._r, .bs, .tmp0d, .src0d, ._, ._ }, 5072 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5073 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 5074 } }, 5075 }, .{ 5076 .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, 5077 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any }, 5078 .patterns = &.{ 5079 .{ .src = .{ .to_mut_gpr, .none } }, 5080 }, 5081 .dst_temps = .{.{ .rc = .general_purpose }}, 5082 .clobbers = .{ .eflags = true }, 5083 .each = .{ .once = &.{ 5084 .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ }, 5085 .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 5086 .{ ._, ._nz, .cmov, .dst0d, .src0d, ._, ._ }, 5087 .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5088 } }, 5089 }, .{ 5090 .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, 5091 .src_constraints = .{ .{ .signed_int = .qword }, .any }, 5092 .patterns = &.{ 5093 .{ .src = .{ .mem, .none } }, 5094 .{ .src = .{ .to_gpr, .none } }, 5095 }, 5096 .extra_temps = .{ 5097 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 5098 .unused, 5099 .unused, 5100 .unused, 5101 .unused, 5102 .unused, 5103 }, 5104 .dst_temps = .{.{ .rc = .general_purpose }}, 5105 .clobbers = .{ .eflags = true }, 5106 .each = .{ .once = &.{ 5107 .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, 5108 .{ ._, ._, .@"and", .tmp0q, .src0q, ._, ._ }, 5109 .{ ._, ._r, .bs, .tmp0q, .tmp0q, ._, ._ }, 5110 .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, 5111 .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ }, 5112 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5113 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 5114 } }, 5115 }, .{ 5116 .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, 5117 .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, 5118 .patterns = &.{ 5119 .{ .src = .{ .to_mut_gpr, .none } }, 5120 }, 5121 .dst_temps = .{.{ .rc = .general_purpose }}, 5122 .clobbers = .{ .eflags = true }, 5123 .each = .{ .once = &.{ 5124 .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ }, 5125 .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, 5126 .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ }, 5127 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5128 .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, 5129 } }, 5130 }, .{ 5131 .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, 5132 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any }, 5133 .patterns = &.{ 5134 .{ .src = .{ .to_mut_gpr, .none } }, 5135 }, 5136 .dst_temps = .{.{ .ref = .src0 }}, 5137 .clobbers = .{ .eflags = true }, 5138 .each = .{ .once = &.{ 5139 .{ ._, ._r, .bs, .dst0q, .src0q, ._, ._ }, 5140 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 5141 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 5142 .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5143 } }, 5144 }, .{ 5145 .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, 5146 .src_constraints = .{ .{ .signed_int = .qword }, .any }, 5147 .patterns = &.{ 5148 .{ .src = .{ .mem, .none } }, 5149 .{ .src = .{ .to_gpr, .none } }, 5150 }, 5151 .extra_temps = .{ 5152 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 5153 .unused, 5154 .unused, 5155 .unused, 5156 .unused, 5157 .unused, 5158 }, 5159 .dst_temps = .{.{ .rc = .general_purpose }}, 5160 .clobbers = .{ .eflags = true }, 5161 .each = .{ .once = &.{ 5162 .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, 5163 .{ ._, ._, .@"and", .tmp0q, .src0q, ._, ._ }, 5164 .{ ._, ._r, .bs, .tmp0q, .tmp0q, ._, ._ }, 5165 .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, 5166 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 5167 .{ ._, ._c, .st, ._, ._, ._, ._ }, 5168 .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ }, 5169 } }, 5170 }, .{ 5171 .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, 5172 .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, 5173 .patterns = &.{ 5174 .{ .src = .{ .to_mut_gpr, .none } }, 5175 }, 5176 .dst_temps = .{.{ .rc = .general_purpose }}, 5177 .clobbers = .{ .eflags = true }, 5178 .each = .{ .once = &.{ 5179 .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ }, 5180 .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, 5181 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 5182 .{ ._, ._c, .st, ._, ._, ._, ._ }, 5183 .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, 5184 } }, 5185 }, .{ 5186 .required_features = .{ .@"64bit", null, null, null }, 5187 .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any }, 5188 .patterns = &.{ 5189 .{ .src = .{ .mem, .none } }, 5190 .{ .src = .{ .to_gpr, .none } }, 5191 }, 5192 .dst_temps = .{.{ .rc = .general_purpose }}, 5193 .clobbers = .{ .eflags = true }, 5194 .each = .{ .once = &.{ 5195 .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, 5196 .{ ._, ._r, .bs, .dst0q, .src0q, ._, ._ }, 5197 .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5198 } }, 5199 }, .{ 5200 .required_features = .{ .@"64bit", null, null, null }, 5201 .src_constraints = .{ .{ .signed_int = .qword }, .any }, 5202 .patterns = &.{ 5203 .{ .src = .{ .mem, .none } }, 5204 .{ .src = .{ .to_gpr, .none } }, 5205 }, 5206 .extra_temps = .{ 5207 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 5208 .unused, 5209 .unused, 5210 .unused, 5211 .unused, 5212 .unused, 5213 }, 5214 .dst_temps = .{.{ .rc = .general_purpose }}, 5215 .clobbers = .{ .eflags = true }, 5216 .each = .{ .once = &.{ 5217 .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, 5218 .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ }, 5219 .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, 5220 .{ ._, ._r, .bs, .tmp0q, .dst0q, ._, ._ }, 5221 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5222 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 5223 } }, 5224 }, .{ 5225 .required_features = .{ .@"64bit", null, null, null }, 5226 .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, 5227 .patterns = &.{ 5228 .{ .src = .{ .mem, .none } }, 5229 .{ .src = .{ .to_gpr, .none } }, 5230 }, 5231 .extra_temps = .{ 5232 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 5233 .unused, 5234 .unused, 5235 .unused, 5236 .unused, 5237 .unused, 5238 }, 5239 .dst_temps = .{.{ .rc = .general_purpose }}, 5240 .clobbers = .{ .eflags = true }, 5241 .each = .{ .once = &.{ 5242 .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, 5243 .{ ._, ._r, .bs, .tmp0q, .src0q, ._, ._ }, 5244 .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5245 .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, 5246 } }, 5247 }, .{ 5248 .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, 5249 .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 5250 .patterns = &.{ 5251 .{ .src = .{ .to_mem, .none } }, 5252 }, 5253 .extra_temps = .{ 5254 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5255 .unused, 5256 .unused, 5257 .unused, 5258 .unused, 5259 .unused, 5260 }, 5261 .dst_temps = .{.{ .rc = .general_purpose }}, 5262 .clobbers = .{ .eflags = true }, 5263 .each = .{ .once = &.{ 5264 .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, 5265 .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, 5266 .{ ._, ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, 5267 .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, 5268 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5269 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5270 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5271 .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, 5272 .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, 5273 } }, 5274 }, .{ 5275 .required_features = .{ .@"64bit", .lzcnt, null, null }, 5276 .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 5277 .patterns = &.{ 5278 .{ .src = .{ .to_mem, .none } }, 5279 }, 5280 .extra_temps = .{ 5281 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5282 .unused, 5283 .unused, 5284 .unused, 5285 .unused, 5286 .unused, 5287 }, 5288 .dst_temps = .{.{ .rc = .general_purpose }}, 5289 .clobbers = .{ .eflags = true }, 5290 .each = .{ .once = &.{ 5291 .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, 5292 .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, 5293 .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, 5294 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5295 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5296 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5297 .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, 5298 .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, 5299 } }, 5300 }, .{ 5301 .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, 5302 .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 5303 .patterns = &.{ 5304 .{ .src = .{ .to_mem, .none } }, 5305 }, 5306 .extra_temps = .{ 5307 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5308 .unused, 5309 .unused, 5310 .unused, 5311 .unused, 5312 .unused, 5313 }, 5314 .dst_temps = .{.{ .rc = .general_purpose }}, 5315 .clobbers = .{ .eflags = true }, 5316 .each = .{ .once = &.{ 5317 .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, 5318 .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, 5319 .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, 5320 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 5321 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5322 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5323 .{ ._, ._, .mov, .dst0d, .si(-1), ._, ._ }, 5324 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5325 .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, 5326 .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, 5327 } }, 5328 }, .{ 5329 .required_features = .{ .@"64bit", null, null, null }, 5330 .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 5331 .patterns = &.{ 5332 .{ .src = .{ .to_mem, .none } }, 5333 }, 5334 .extra_temps = .{ 5335 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5336 .unused, 5337 .unused, 5338 .unused, 5339 .unused, 5340 .unused, 5341 }, 5342 .dst_temps = .{.{ .rc = .general_purpose }}, 5343 .clobbers = .{ .eflags = true }, 5344 .each = .{ .once = &.{ 5345 .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, 5346 .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ }, 5347 .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, 5348 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 5349 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5350 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5351 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5352 .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, 5353 .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, 5354 } }, 5355 }, .{ 5356 .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, 5357 .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 5358 .patterns = &.{ 5359 .{ .src = .{ .to_mem, .none } }, 5360 }, 5361 .extra_temps = .{ 5362 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5363 .unused, 5364 .unused, 5365 .unused, 5366 .unused, 5367 .unused, 5368 }, 5369 .dst_temps = .{.{ .rc = .general_purpose }}, 5370 .clobbers = .{ .eflags = true }, 5371 .each = .{ .once = &.{ 5372 .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, 5373 .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, 5374 .{ ._, ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, 5375 .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, 5376 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5377 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5378 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5379 .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, 5380 .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, 5381 } }, 5382 }, .{ 5383 .required_features = .{ .@"64bit", .lzcnt, null, null }, 5384 .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 5385 .patterns = &.{ 5386 .{ .src = .{ .to_mem, .none } }, 5387 }, 5388 .extra_temps = .{ 5389 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5390 .unused, 5391 .unused, 5392 .unused, 5393 .unused, 5394 .unused, 5395 }, 5396 .dst_temps = .{.{ .rc = .general_purpose }}, 5397 .clobbers = .{ .eflags = true }, 5398 .each = .{ .once = &.{ 5399 .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, 5400 .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, 5401 .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, 5402 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5403 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5404 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5405 .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, 5406 .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, 5407 } }, 5408 }, .{ 5409 .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, 5410 .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 5411 .patterns = &.{ 5412 .{ .src = .{ .to_mem, .none } }, 5413 }, 5414 .extra_temps = .{ 5415 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5416 .unused, 5417 .unused, 5418 .unused, 5419 .unused, 5420 .unused, 5421 }, 5422 .dst_temps = .{.{ .rc = .general_purpose }}, 5423 .clobbers = .{ .eflags = true }, 5424 .each = .{ .once = &.{ 5425 .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, 5426 .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, 5427 .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, 5428 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 5429 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5430 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5431 .{ ._, ._, .mov, .dst0d, .si(-1), ._, ._ }, 5432 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5433 .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, 5434 .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, 5435 } }, 5436 }, .{ 5437 .required_features = .{ .@"64bit", null, null, null }, 5438 .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 5439 .patterns = &.{ 5440 .{ .src = .{ .to_mem, .none } }, 5441 }, 5442 .extra_temps = .{ 5443 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5444 .unused, 5445 .unused, 5446 .unused, 5447 .unused, 5448 .unused, 5449 }, 5450 .dst_temps = .{.{ .rc = .general_purpose }}, 5451 .clobbers = .{ .eflags = true }, 5452 .each = .{ .once = &.{ 5453 .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, 5454 .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ }, 5455 .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, 5456 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 5457 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5458 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5459 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5460 .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, 5461 .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, 5462 } }, 5463 }, .{ 5464 .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, 5465 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 5466 .patterns = &.{ 5467 .{ .src = .{ .to_mem, .none } }, 5468 }, 5469 .extra_temps = .{ 5470 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5471 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 5472 .unused, 5473 .unused, 5474 .unused, 5475 .unused, 5476 }, 5477 .dst_temps = .{.{ .rc = .general_purpose }}, 5478 .clobbers = .{ .eflags = true }, 5479 .each = .{ .once = &.{ 5480 .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, 5481 .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, 5482 .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, 5483 .{ ._, ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, 5484 .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, 5485 .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, 5486 .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, 5487 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5488 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5489 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5490 .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, 5491 .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, 5492 } }, 5493 }, .{ 5494 .required_features = .{ .@"64bit", .lzcnt, null, null }, 5495 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 5496 .patterns = &.{ 5497 .{ .src = .{ .to_mem, .none } }, 5498 }, 5499 .extra_temps = .{ 5500 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5501 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 5502 .unused, 5503 .unused, 5504 .unused, 5505 .unused, 5506 }, 5507 .dst_temps = .{.{ .rc = .general_purpose }}, 5508 .clobbers = .{ .eflags = true }, 5509 .each = .{ .once = &.{ 5510 .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, 5511 .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, 5512 .{ .@"0:", ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, 5513 .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, 5514 .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, 5515 .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, 5516 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5517 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5518 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5519 .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, 5520 .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, 5521 } }, 5522 }, .{ 5523 .required_features = .{ .@"64bit", null, null, null }, 5524 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 5525 .patterns = &.{ 5526 .{ .src = .{ .to_mem, .none } }, 5527 }, 5528 .extra_temps = .{ 5529 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5530 .unused, 5531 .unused, 5532 .unused, 5533 .unused, 5534 .unused, 5535 }, 5536 .dst_temps = .{.{ .rc = .general_purpose }}, 5537 .clobbers = .{ .eflags = true }, 5538 .each = .{ .once = &.{ 5539 .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, 5540 .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, 5541 .{ .@"0:", ._, .@"and", .dst0q, .memi(.src0q, .tmp0), ._, ._ }, 5542 .{ ._, ._r, .bs, .dst0q, .dst0q, ._, ._ }, 5543 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 5544 .{ ._, ._, .mov, .dst0q, .si(-1), ._, ._ }, 5545 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5546 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5547 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5548 .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, 5549 .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, 5550 } }, 5551 }, .{ 5552 .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, 5553 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 5554 .patterns = &.{ 5555 .{ .src = .{ .to_mem, .none } }, 5556 }, 5557 .extra_temps = .{ 5558 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5559 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 5560 .unused, 5561 .unused, 5562 .unused, 5563 .unused, 5564 }, 5565 .dst_temps = .{.{ .rc = .general_purpose }}, 5566 .clobbers = .{ .eflags = true }, 5567 .each = .{ .once = &.{ 5568 .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, 5569 .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, 5570 .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, 5571 .{ ._, ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, 5572 .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, 5573 .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, 5574 .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, 5575 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5576 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5577 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5578 .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, 5579 .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, 5580 } }, 5581 }, .{ 5582 .required_features = .{ .@"64bit", .lzcnt, null, null }, 5583 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 5584 .patterns = &.{ 5585 .{ .src = .{ .to_mem, .none } }, 5586 }, 5587 .extra_temps = .{ 5588 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5589 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 5590 .unused, 5591 .unused, 5592 .unused, 5593 .unused, 5594 }, 5595 .dst_temps = .{.{ .rc = .general_purpose }}, 5596 .clobbers = .{ .eflags = true }, 5597 .each = .{ .once = &.{ 5598 .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, 5599 .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, 5600 .{ .@"0:", ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, 5601 .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, 5602 .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, 5603 .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, 5604 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5605 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5606 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5607 .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, 5608 .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, 5609 } }, 5610 }, .{ 5611 .required_features = .{ .@"64bit", null, null, null }, 5612 .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 5613 .patterns = &.{ 5614 .{ .src = .{ .to_mem, .none } }, 5615 }, 5616 .extra_temps = .{ 5617 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5618 .unused, 5619 .unused, 5620 .unused, 5621 .unused, 5622 .unused, 5623 }, 5624 .dst_temps = .{.{ .rc = .general_purpose }}, 5625 .clobbers = .{ .eflags = true }, 5626 .each = .{ .once = &.{ 5627 .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, 5628 .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, 5629 .{ .@"0:", ._, .@"and", .dst0q, .memi(.src0q, .tmp0), ._, ._ }, 5630 .{ ._, ._r, .bs, .dst0q, .dst0q, ._, ._ }, 5631 .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, 5632 .{ ._, ._, .mov, .dst0q, .si(-1), ._, ._ }, 5633 .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, 5634 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5635 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 5636 .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, 5637 .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, 5638 } }, 5639 }, .{ 5640 .required_features = .{ .lzcnt, .slow_incdec, null, null }, 5641 .src_constraints = .{ .{ .scalar_int = .byte }, .any }, 5642 .patterns = &.{ 5643 .{ .src = .{ .to_mem, .none } }, 5644 }, 5645 .extra_temps = .{ 5646 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5647 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5648 .unused, 5649 .unused, 5650 .unused, 5651 .unused, 5652 }, 5653 .dst_temps = .{.mem}, 5654 .clobbers = .{ .eflags = true }, 5655 .each = .{ .once = &.{ 5656 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5657 .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, 5658 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 5659 .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, 5660 .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 5661 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 5662 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 5663 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5664 } }, 5665 }, .{ 5666 .required_features = .{ .lzcnt, null, null, null }, 5667 .src_constraints = .{ .{ .scalar_int = .byte }, .any }, 5668 .patterns = &.{ 5669 .{ .src = .{ .to_mem, .none } }, 5670 }, 5671 .extra_temps = .{ 5672 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5673 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5674 .unused, 5675 .unused, 5676 .unused, 5677 .unused, 5678 }, 5679 .dst_temps = .{.mem}, 5680 .clobbers = .{ .eflags = true }, 5681 .each = .{ .once = &.{ 5682 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5683 .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, 5684 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 5685 .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, 5686 .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 5687 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 5688 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 5689 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 5690 } }, 5691 }, .{ 5692 .required_features = .{ .lzcnt, .slow_incdec, null, null }, 5693 .src_constraints = .{ .{ .scalar_int = .word }, .any }, 5694 .patterns = &.{ 5695 .{ .src = .{ .to_mem, .none } }, 5696 }, 5697 .extra_temps = .{ 5698 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5699 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5700 .unused, 5701 .unused, 5702 .unused, 5703 .unused, 5704 }, 5705 .dst_temps = .{.mem}, 5706 .clobbers = .{ .eflags = true }, 5707 .each = .{ .once = &.{ 5708 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5709 .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, 5710 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 5711 .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, 5712 .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 5713 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 5714 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 5715 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5716 } }, 5717 }, .{ 5718 .required_features = .{ .lzcnt, null, null, null }, 5719 .src_constraints = .{ .{ .scalar_int = .word }, .any }, 5720 .patterns = &.{ 5721 .{ .src = .{ .to_mem, .none } }, 5722 }, 5723 .extra_temps = .{ 5724 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5725 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5726 .unused, 5727 .unused, 5728 .unused, 5729 .unused, 5730 }, 5731 .dst_temps = .{.mem}, 5732 .clobbers = .{ .eflags = true }, 5733 .each = .{ .once = &.{ 5734 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5735 .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, 5736 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 5737 .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, 5738 .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 5739 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 5740 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 5741 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 5742 } }, 5743 }, .{ 5744 .required_features = .{ .lzcnt, .slow_incdec, null, null }, 5745 .src_constraints = .{ .{ .scalar_int = .dword }, .any }, 5746 .patterns = &.{ 5747 .{ .src = .{ .to_mem, .none } }, 5748 }, 5749 .extra_temps = .{ 5750 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5751 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5752 .unused, 5753 .unused, 5754 .unused, 5755 .unused, 5756 }, 5757 .dst_temps = .{.mem}, 5758 .clobbers = .{ .eflags = true }, 5759 .each = .{ .once = &.{ 5760 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5761 .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, 5762 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 5763 .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, 5764 .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 5765 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 5766 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 5767 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5768 } }, 5769 }, .{ 5770 .required_features = .{ .lzcnt, null, null, null }, 5771 .src_constraints = .{ .{ .scalar_int = .dword }, .any }, 5772 .patterns = &.{ 5773 .{ .src = .{ .to_mem, .none } }, 5774 }, 5775 .extra_temps = .{ 5776 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5777 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5778 .unused, 5779 .unused, 5780 .unused, 5781 .unused, 5782 }, 5783 .dst_temps = .{.mem}, 5784 .clobbers = .{ .eflags = true }, 5785 .each = .{ .once = &.{ 5786 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5787 .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, 5788 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 5789 .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, 5790 .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, 5791 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 5792 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 5793 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 5794 } }, 5795 }, .{ 5796 .required_features = .{ .@"64bit", .lzcnt, .slow_incdec, null }, 5797 .src_constraints = .{ .{ .scalar_int = .qword }, .any }, 5798 .patterns = &.{ 5799 .{ .src = .{ .to_mem, .none } }, 5800 }, 5801 .extra_temps = .{ 5802 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5803 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 5804 .unused, 5805 .unused, 5806 .unused, 5807 .unused, 5808 }, 5809 .dst_temps = .{.mem}, 5810 .clobbers = .{ .eflags = true }, 5811 .each = .{ .once = &.{ 5812 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5813 .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, 5814 .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, 5815 .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ }, 5816 .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ }, 5817 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 5818 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 5819 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5820 } }, 5821 }, .{ 5822 .required_features = .{ .@"64bit", .lzcnt, null, null }, 5823 .src_constraints = .{ .{ .scalar_int = .qword }, .any }, 5824 .patterns = &.{ 5825 .{ .src = .{ .to_mem, .none } }, 5826 }, 5827 .extra_temps = .{ 5828 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5829 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 5830 .unused, 5831 .unused, 5832 .unused, 5833 .unused, 5834 }, 5835 .dst_temps = .{.mem}, 5836 .clobbers = .{ .eflags = true }, 5837 .each = .{ .once = &.{ 5838 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5839 .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, 5840 .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, 5841 .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ }, 5842 .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ }, 5843 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 5844 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 5845 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 5846 } }, 5847 }, .{ 5848 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null }, 5849 .src_constraints = .{ .{ .scalar_int = .byte }, .any }, 5850 .patterns = &.{ 5851 .{ .src = .{ .to_mem, .none } }, 5852 }, 5853 .extra_temps = .{ 5854 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5855 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5856 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5857 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 5858 .unused, 5859 .unused, 5860 }, 5861 .dst_temps = .{.mem}, 5862 .clobbers = .{ .eflags = true }, 5863 .each = .{ .once = &.{ 5864 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5865 .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, 5866 .{ .@"0:", ._, .movzx, .tmp2d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, 5867 .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, 5868 .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, 5869 .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, 5870 .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5871 .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, 5872 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 5873 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 5874 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5875 } }, 5876 }, .{ 5877 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 5878 .src_constraints = .{ .{ .scalar_int = .byte }, .any }, 5879 .patterns = &.{ 5880 .{ .src = .{ .to_mem, .none } }, 5881 }, 5882 .extra_temps = .{ 5883 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5884 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5885 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5886 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 5887 .unused, 5888 .unused, 5889 }, 5890 .dst_temps = .{.mem}, 5891 .clobbers = .{ .eflags = true }, 5892 .each = .{ .once = &.{ 5893 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5894 .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, 5895 .{ .@"0:", ._, .movzx, .tmp2d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, 5896 .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, 5897 .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, 5898 .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, 5899 .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5900 .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, 5901 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 5902 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 5903 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 5904 } }, 5905 }, .{ 5906 .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null }, 5907 .src_constraints = .{ .{ .scalar_int = .byte }, .any }, 5908 .patterns = &.{ 5909 .{ .src = .{ .to_mem, .none } }, 5910 }, 5911 .extra_temps = .{ 5912 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5913 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5914 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5915 .unused, 5916 .unused, 5917 .unused, 5918 }, 5919 .dst_temps = .{.mem}, 5920 .clobbers = .{ .eflags = true }, 5921 .each = .{ .once = &.{ 5922 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5923 .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, 5924 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 5925 .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, 5926 .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, 5927 .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, 5928 .{ ._, ._c, .st, ._, ._, ._, ._ }, 5929 .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, 5930 .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, 5931 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 5932 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5933 } }, 5934 }, .{ 5935 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 5936 .src_constraints = .{ .{ .scalar_int = .byte }, .any }, 5937 .patterns = &.{ 5938 .{ .src = .{ .to_mem, .none } }, 5939 }, 5940 .extra_temps = .{ 5941 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5942 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5943 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5944 .unused, 5945 .unused, 5946 .unused, 5947 }, 5948 .dst_temps = .{.mem}, 5949 .clobbers = .{ .eflags = true }, 5950 .each = .{ .once = &.{ 5951 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5952 .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, 5953 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 5954 .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, 5955 .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, 5956 .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, 5957 .{ ._, ._c, .st, ._, ._, ._, ._ }, 5958 .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, 5959 .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, 5960 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 5961 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 5962 } }, 5963 }, .{ 5964 .required_features = .{ .slow_incdec, null, null, null }, 5965 .src_constraints = .{ .{ .scalar_int = .byte }, .any }, 5966 .patterns = &.{ 5967 .{ .src = .{ .to_mem, .none } }, 5968 }, 5969 .extra_temps = .{ 5970 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5971 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5972 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5973 .unused, 5974 .unused, 5975 .unused, 5976 }, 5977 .dst_temps = .{.mem}, 5978 .clobbers = .{ .eflags = true }, 5979 .each = .{ .once = &.{ 5980 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 5981 .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, 5982 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 5983 .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, 5984 .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, 5985 .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 5986 .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, 5987 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 5988 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 5989 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 5990 } }, 5991 }, .{ 5992 .src_constraints = .{ .{ .scalar_int = .byte }, .any }, 5993 .patterns = &.{ 5994 .{ .src = .{ .to_mem, .none } }, 5995 }, 5996 .extra_temps = .{ 5997 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 5998 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 5999 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6000 .unused, 6001 .unused, 6002 .unused, 6003 }, 6004 .dst_temps = .{.mem}, 6005 .clobbers = .{ .eflags = true }, 6006 .each = .{ .once = &.{ 6007 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6008 .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, 6009 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 6010 .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, 6011 .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, 6012 .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6013 .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, 6014 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 6015 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 6016 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 6017 } }, 6018 }, .{ 6019 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null }, 6020 .src_constraints = .{ .{ .scalar_int = .word }, .any }, 6021 .patterns = &.{ 6022 .{ .src = .{ .to_mem, .none } }, 6023 }, 6024 .extra_temps = .{ 6025 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6026 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6027 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6028 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 6029 .unused, 6030 .unused, 6031 }, 6032 .dst_temps = .{.mem}, 6033 .clobbers = .{ .eflags = true }, 6034 .each = .{ .once = &.{ 6035 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6036 .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, 6037 .{ .@"0:", ._, .movzx, .tmp2d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, 6038 .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, 6039 .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, 6040 .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, 6041 .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6042 .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, 6043 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6044 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6045 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6046 } }, 6047 }, .{ 6048 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 6049 .src_constraints = .{ .{ .scalar_int = .word }, .any }, 6050 .patterns = &.{ 6051 .{ .src = .{ .to_mem, .none } }, 6052 }, 6053 .extra_temps = .{ 6054 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6055 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6056 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6057 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 6058 .unused, 6059 .unused, 6060 }, 6061 .dst_temps = .{.mem}, 6062 .clobbers = .{ .eflags = true }, 6063 .each = .{ .once = &.{ 6064 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6065 .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, 6066 .{ .@"0:", ._, .movzx, .tmp2d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, 6067 .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, 6068 .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, 6069 .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, 6070 .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6071 .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, 6072 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6073 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 6074 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 6075 } }, 6076 }, .{ 6077 .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null }, 6078 .src_constraints = .{ .{ .scalar_int = .word }, .any }, 6079 .patterns = &.{ 6080 .{ .src = .{ .to_mem, .none } }, 6081 }, 6082 .extra_temps = .{ 6083 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6084 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6085 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6086 .unused, 6087 .unused, 6088 .unused, 6089 }, 6090 .dst_temps = .{.mem}, 6091 .clobbers = .{ .eflags = true }, 6092 .each = .{ .once = &.{ 6093 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6094 .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, 6095 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 6096 .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, 6097 .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, 6098 .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, 6099 .{ ._, ._c, .st, ._, ._, ._, ._ }, 6100 .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, 6101 .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, 6102 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6103 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6104 } }, 6105 }, .{ 6106 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 6107 .src_constraints = .{ .{ .scalar_int = .word }, .any }, 6108 .patterns = &.{ 6109 .{ .src = .{ .to_mem, .none } }, 6110 }, 6111 .extra_temps = .{ 6112 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6113 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6114 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6115 .unused, 6116 .unused, 6117 .unused, 6118 }, 6119 .dst_temps = .{.mem}, 6120 .clobbers = .{ .eflags = true }, 6121 .each = .{ .once = &.{ 6122 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6123 .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, 6124 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 6125 .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, 6126 .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, 6127 .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, 6128 .{ ._, ._c, .st, ._, ._, ._, ._ }, 6129 .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, 6130 .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, 6131 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 6132 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 6133 } }, 6134 }, .{ 6135 .required_features = .{ .slow_incdec, null, null, null }, 6136 .src_constraints = .{ .{ .scalar_int = .word }, .any }, 6137 .patterns = &.{ 6138 .{ .src = .{ .to_mem, .none } }, 6139 }, 6140 .extra_temps = .{ 6141 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6142 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6143 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6144 .unused, 6145 .unused, 6146 .unused, 6147 }, 6148 .dst_temps = .{.mem}, 6149 .clobbers = .{ .eflags = true }, 6150 .each = .{ .once = &.{ 6151 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6152 .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, 6153 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 6154 .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, 6155 .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, 6156 .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6157 .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, 6158 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 6159 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6160 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6161 } }, 6162 }, .{ 6163 .src_constraints = .{ .{ .scalar_int = .word }, .any }, 6164 .patterns = &.{ 6165 .{ .src = .{ .to_mem, .none } }, 6166 }, 6167 .extra_temps = .{ 6168 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6169 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6170 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6171 .unused, 6172 .unused, 6173 .unused, 6174 }, 6175 .dst_temps = .{.mem}, 6176 .clobbers = .{ .eflags = true }, 6177 .each = .{ .once = &.{ 6178 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6179 .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, 6180 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 6181 .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, 6182 .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, 6183 .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6184 .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, 6185 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 6186 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 6187 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 6188 } }, 6189 }, .{ 6190 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null }, 6191 .src_constraints = .{ .{ .scalar_int = .dword }, .any }, 6192 .patterns = &.{ 6193 .{ .src = .{ .to_mem, .none } }, 6194 }, 6195 .extra_temps = .{ 6196 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6197 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6198 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6199 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 6200 .unused, 6201 .unused, 6202 }, 6203 .dst_temps = .{.mem}, 6204 .clobbers = .{ .eflags = true }, 6205 .each = .{ .once = &.{ 6206 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6207 .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, 6208 .{ .@"0:", ._, .mov, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, 6209 .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, 6210 .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, 6211 .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, 6212 .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6213 .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, 6214 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6215 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6216 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6217 } }, 6218 }, .{ 6219 .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, 6220 .src_constraints = .{ .{ .scalar_int = .dword }, .any }, 6221 .patterns = &.{ 6222 .{ .src = .{ .to_mem, .none } }, 6223 }, 6224 .extra_temps = .{ 6225 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6226 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6227 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6228 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 6229 .unused, 6230 .unused, 6231 }, 6232 .dst_temps = .{.mem}, 6233 .clobbers = .{ .eflags = true }, 6234 .each = .{ .once = &.{ 6235 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6236 .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, 6237 .{ .@"0:", ._, .mov, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, 6238 .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, 6239 .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, 6240 .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, 6241 .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6242 .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, 6243 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6244 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 6245 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 6246 } }, 6247 }, .{ 6248 .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null }, 6249 .src_constraints = .{ .{ .scalar_int = .dword }, .any }, 6250 .patterns = &.{ 6251 .{ .src = .{ .to_mem, .none } }, 6252 }, 6253 .extra_temps = .{ 6254 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6255 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6256 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6257 .unused, 6258 .unused, 6259 .unused, 6260 }, 6261 .dst_temps = .{.mem}, 6262 .clobbers = .{ .eflags = true }, 6263 .each = .{ .once = &.{ 6264 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6265 .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, 6266 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 6267 .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, 6268 .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, 6269 .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, 6270 .{ ._, ._c, .st, ._, ._, ._, ._ }, 6271 .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, 6272 .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, 6273 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6274 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6275 } }, 6276 }, .{ 6277 .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, 6278 .src_constraints = .{ .{ .scalar_int = .dword }, .any }, 6279 .patterns = &.{ 6280 .{ .src = .{ .to_mem, .none } }, 6281 }, 6282 .extra_temps = .{ 6283 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6284 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6285 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6286 .unused, 6287 .unused, 6288 .unused, 6289 }, 6290 .dst_temps = .{.mem}, 6291 .clobbers = .{ .eflags = true }, 6292 .each = .{ .once = &.{ 6293 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6294 .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, 6295 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 6296 .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, 6297 .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, 6298 .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, 6299 .{ ._, ._c, .st, ._, ._, ._, ._ }, 6300 .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, 6301 .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, 6302 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 6303 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 6304 } }, 6305 }, .{ 6306 .required_features = .{ .slow_incdec, null, null, null }, 6307 .src_constraints = .{ .{ .scalar_int = .dword }, .any }, 6308 .patterns = &.{ 6309 .{ .src = .{ .to_mem, .none } }, 6310 }, 6311 .extra_temps = .{ 6312 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6313 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6314 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6315 .unused, 6316 .unused, 6317 .unused, 6318 }, 6319 .dst_temps = .{.mem}, 6320 .clobbers = .{ .eflags = true }, 6321 .each = .{ .once = &.{ 6322 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6323 .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, 6324 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 6325 .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, 6326 .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, 6327 .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6328 .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, 6329 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 6330 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6331 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6332 } }, 6333 }, .{ 6334 .src_constraints = .{ .{ .scalar_int = .dword }, .any }, 6335 .patterns = &.{ 6336 .{ .src = .{ .to_mem, .none } }, 6337 }, 6338 .extra_temps = .{ 6339 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6340 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6341 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6342 .unused, 6343 .unused, 6344 .unused, 6345 }, 6346 .dst_temps = .{.mem}, 6347 .clobbers = .{ .eflags = true }, 6348 .each = .{ .once = &.{ 6349 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6350 .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, 6351 .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, 6352 .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, 6353 .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, 6354 .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6355 .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, 6356 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 6357 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 6358 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 6359 } }, 6360 }, .{ 6361 .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec }, 6362 .src_constraints = .{ .{ .scalar_int = .qword }, .any }, 6363 .patterns = &.{ 6364 .{ .src = .{ .to_mem, .none } }, 6365 }, 6366 .extra_temps = .{ 6367 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6368 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6369 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6370 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 6371 .unused, 6372 .unused, 6373 }, 6374 .dst_temps = .{.mem}, 6375 .clobbers = .{ .eflags = true }, 6376 .each = .{ .once = &.{ 6377 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6378 .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, 6379 .{ .@"0:", ._, .mov, .tmp2q, .ua(.src0, .add_umax), ._, ._ }, 6380 .{ ._, ._, .@"and", .tmp2q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, 6381 .{ ._, ._r, .bs, .tmp2q, .tmp2q, ._, ._ }, 6382 .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, 6383 .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6384 .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, 6385 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6386 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6387 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6388 } }, 6389 }, .{ 6390 .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, 6391 .src_constraints = .{ .{ .scalar_int = .qword }, .any }, 6392 .patterns = &.{ 6393 .{ .src = .{ .to_mem, .none } }, 6394 }, 6395 .extra_temps = .{ 6396 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6397 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6398 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6399 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 6400 .unused, 6401 .unused, 6402 }, 6403 .dst_temps = .{.mem}, 6404 .clobbers = .{ .eflags = true }, 6405 .each = .{ .once = &.{ 6406 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6407 .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, 6408 .{ .@"0:", ._, .mov, .tmp2q, .ua(.src0, .add_umax), ._, ._ }, 6409 .{ ._, ._, .@"and", .tmp2q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, 6410 .{ ._, ._r, .bs, .tmp2q, .tmp2q, ._, ._ }, 6411 .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, 6412 .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6413 .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, 6414 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6415 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 6416 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 6417 } }, 6418 }, .{ 6419 .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, .slow_incdec, null }, 6420 .src_constraints = .{ .{ .scalar_int = .qword }, .any }, 6421 .patterns = &.{ 6422 .{ .src = .{ .to_mem, .none } }, 6423 }, 6424 .extra_temps = .{ 6425 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6426 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6427 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6428 .unused, 6429 .unused, 6430 .unused, 6431 }, 6432 .dst_temps = .{.mem}, 6433 .clobbers = .{ .eflags = true }, 6434 .each = .{ .once = &.{ 6435 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6436 .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, 6437 .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, 6438 .{ ._, ._r, .bs, .tmp1q, .tmp1q, ._, ._ }, 6439 .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, 6440 .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, 6441 .{ ._, ._c, .st, ._, ._, ._, ._ }, 6442 .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, 6443 .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, 6444 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6445 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6446 } }, 6447 }, .{ 6448 .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, 6449 .src_constraints = .{ .{ .scalar_int = .qword }, .any }, 6450 .patterns = &.{ 6451 .{ .src = .{ .to_mem, .none } }, 6452 }, 6453 .extra_temps = .{ 6454 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6455 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6456 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6457 .unused, 6458 .unused, 6459 .unused, 6460 }, 6461 .dst_temps = .{.mem}, 6462 .clobbers = .{ .eflags = true }, 6463 .each = .{ .once = &.{ 6464 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6465 .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, 6466 .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, 6467 .{ ._, ._r, .bs, .tmp1q, .tmp1q, ._, ._ }, 6468 .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, 6469 .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, 6470 .{ ._, ._c, .st, ._, ._, ._, ._ }, 6471 .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, 6472 .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, 6473 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 6474 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 6475 } }, 6476 }, .{ 6477 .required_features = .{ .@"64bit", .slow_incdec, null, null }, 6478 .src_constraints = .{ .{ .scalar_int = .qword }, .any }, 6479 .patterns = &.{ 6480 .{ .src = .{ .to_mem, .none } }, 6481 }, 6482 .extra_temps = .{ 6483 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6484 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6485 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6486 .unused, 6487 .unused, 6488 .unused, 6489 }, 6490 .dst_temps = .{.mem}, 6491 .clobbers = .{ .eflags = true }, 6492 .each = .{ .once = &.{ 6493 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6494 .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, 6495 .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, 6496 .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, 6497 .{ ._, ._r, .bs, .tmp2q, .tmp1q, ._, ._ }, 6498 .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6499 .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, 6500 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 6501 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6502 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6503 } }, 6504 }, .{ 6505 .required_features = .{ .@"64bit", null, null, null }, 6506 .src_constraints = .{ .{ .scalar_int = .qword }, .any }, 6507 .patterns = &.{ 6508 .{ .src = .{ .to_mem, .none } }, 6509 }, 6510 .extra_temps = .{ 6511 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6512 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6513 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6514 .unused, 6515 .unused, 6516 .unused, 6517 }, 6518 .dst_temps = .{.mem}, 6519 .clobbers = .{ .eflags = true }, 6520 .each = .{ .once = &.{ 6521 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6522 .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, 6523 .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, 6524 .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, 6525 .{ ._, ._r, .bs, .tmp2q, .tmp1q, ._, ._ }, 6526 .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, 6527 .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, 6528 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, 6529 .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, 6530 .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, 6531 } }, 6532 }, .{ 6533 .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, 6534 .dst_constraints = .{.{ .scalar_int = .byte }}, 6535 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 6536 .patterns = &.{ 6537 .{ .src = .{ .to_mem, .none } }, 6538 }, 6539 .extra_temps = .{ 6540 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6541 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6542 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6543 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6544 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6545 .unused, 6546 }, 6547 .dst_temps = .{.mem}, 6548 .clobbers = .{ .eflags = true }, 6549 .each = .{ .once = &.{ 6550 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6551 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6552 .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, 6553 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6554 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6555 .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, 6556 .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, 6557 .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, 6558 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6559 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6560 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6561 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6562 .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, 6563 .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, 6564 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6565 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6566 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6567 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6568 } }, 6569 }, .{ 6570 .required_features = .{ .@"64bit", .lzcnt, null, null }, 6571 .dst_constraints = .{.{ .scalar_int = .byte }}, 6572 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 6573 .patterns = &.{ 6574 .{ .src = .{ .to_mem, .none } }, 6575 }, 6576 .extra_temps = .{ 6577 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6578 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6579 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6580 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6581 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6582 .unused, 6583 }, 6584 .dst_temps = .{.mem}, 6585 .clobbers = .{ .eflags = true }, 6586 .each = .{ .once = &.{ 6587 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6588 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6589 .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, 6590 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6591 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6592 .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, 6593 .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, 6594 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6595 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6596 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6597 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6598 .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, 6599 .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, 6600 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6601 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6602 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6603 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6604 } }, 6605 }, .{ 6606 .required_features = .{ .@"64bit", null, null, null }, 6607 .dst_constraints = .{.{ .scalar_int = .byte }}, 6608 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 6609 .patterns = &.{ 6610 .{ .src = .{ .to_mem, .none } }, 6611 }, 6612 .extra_temps = .{ 6613 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6614 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6615 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6616 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6617 .unused, 6618 .unused, 6619 }, 6620 .dst_temps = .{.mem}, 6621 .clobbers = .{ .eflags = true }, 6622 .each = .{ .once = &.{ 6623 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6624 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6625 .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, 6626 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6627 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6628 .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, 6629 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 6630 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6631 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6632 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6633 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6634 .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, 6635 .{ ._, ._, .neg, .tmp3b, ._, ._, ._ }, 6636 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6637 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6638 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6639 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6640 } }, 6641 }, .{ 6642 .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, 6643 .dst_constraints = .{.{ .scalar_int = .byte }}, 6644 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 6645 .patterns = &.{ 6646 .{ .src = .{ .to_mem, .none } }, 6647 }, 6648 .extra_temps = .{ 6649 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6650 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6651 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6652 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6653 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6654 .unused, 6655 }, 6656 .dst_temps = .{.mem}, 6657 .clobbers = .{ .eflags = true }, 6658 .each = .{ .once = &.{ 6659 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6660 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6661 .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, 6662 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6663 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6664 .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, 6665 .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, 6666 .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, 6667 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6668 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6669 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6670 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6671 .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, 6672 .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, 6673 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6674 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6675 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6676 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6677 } }, 6678 }, .{ 6679 .required_features = .{ .@"64bit", .lzcnt, null, null }, 6680 .dst_constraints = .{.{ .scalar_int = .byte }}, 6681 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 6682 .patterns = &.{ 6683 .{ .src = .{ .to_mem, .none } }, 6684 }, 6685 .extra_temps = .{ 6686 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6687 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6688 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6689 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6690 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6691 .unused, 6692 }, 6693 .dst_temps = .{.mem}, 6694 .clobbers = .{ .eflags = true }, 6695 .each = .{ .once = &.{ 6696 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6697 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6698 .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, 6699 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6700 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6701 .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, 6702 .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, 6703 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6704 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6705 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6706 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6707 .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, 6708 .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, 6709 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6710 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6711 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6712 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6713 } }, 6714 }, .{ 6715 .required_features = .{ .@"64bit", null, null, null }, 6716 .dst_constraints = .{.{ .scalar_int = .byte }}, 6717 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 6718 .patterns = &.{ 6719 .{ .src = .{ .to_mem, .none } }, 6720 }, 6721 .extra_temps = .{ 6722 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6723 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6724 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6725 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6726 .unused, 6727 .unused, 6728 }, 6729 .dst_temps = .{.mem}, 6730 .clobbers = .{ .eflags = true }, 6731 .each = .{ .once = &.{ 6732 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6733 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6734 .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, 6735 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6736 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6737 .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, 6738 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 6739 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6740 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6741 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6742 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6743 .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, 6744 .{ ._, ._, .neg, .tmp3b, ._, ._, ._ }, 6745 .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, 6746 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6747 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6748 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6749 } }, 6750 }, .{ 6751 .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, 6752 .dst_constraints = .{.{ .scalar_int = .word }}, 6753 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 6754 .patterns = &.{ 6755 .{ .src = .{ .to_mem, .none } }, 6756 }, 6757 .extra_temps = .{ 6758 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6759 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6760 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6761 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6762 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6763 .unused, 6764 }, 6765 .dst_temps = .{.mem}, 6766 .clobbers = .{ .eflags = true }, 6767 .each = .{ .once = &.{ 6768 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6769 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6770 .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, 6771 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6772 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6773 .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, 6774 .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, 6775 .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, 6776 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6777 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6778 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6779 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6780 .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, 6781 .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, 6782 .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, 6783 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6784 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6785 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6786 } }, 6787 }, .{ 6788 .required_features = .{ .@"64bit", .lzcnt, null, null }, 6789 .dst_constraints = .{.{ .scalar_int = .word }}, 6790 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 6791 .patterns = &.{ 6792 .{ .src = .{ .to_mem, .none } }, 6793 }, 6794 .extra_temps = .{ 6795 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6796 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6797 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6798 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6799 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6800 .unused, 6801 }, 6802 .dst_temps = .{.mem}, 6803 .clobbers = .{ .eflags = true }, 6804 .each = .{ .once = &.{ 6805 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6806 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6807 .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, 6808 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6809 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6810 .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, 6811 .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, 6812 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6813 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6814 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6815 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6816 .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, 6817 .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, 6818 .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, 6819 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6820 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6821 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6822 } }, 6823 }, .{ 6824 .required_features = .{ .@"64bit", null, null, null }, 6825 .dst_constraints = .{.{ .scalar_int = .word }}, 6826 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, 6827 .patterns = &.{ 6828 .{ .src = .{ .to_mem, .none } }, 6829 }, 6830 .extra_temps = .{ 6831 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6832 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6833 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6834 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6835 .unused, 6836 .unused, 6837 }, 6838 .dst_temps = .{.mem}, 6839 .clobbers = .{ .eflags = true }, 6840 .each = .{ .once = &.{ 6841 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6842 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6843 .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, 6844 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6845 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6846 .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, 6847 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 6848 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6849 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6850 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6851 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6852 .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, 6853 .{ ._, ._, .neg, .tmp3d, ._, ._, ._ }, 6854 .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, 6855 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6856 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6857 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6858 } }, 6859 }, .{ 6860 .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, 6861 .dst_constraints = .{.{ .scalar_int = .word }}, 6862 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 6863 .patterns = &.{ 6864 .{ .src = .{ .to_mem, .none } }, 6865 }, 6866 .extra_temps = .{ 6867 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6868 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6869 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6870 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6871 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6872 .unused, 6873 }, 6874 .dst_temps = .{.mem}, 6875 .clobbers = .{ .eflags = true }, 6876 .each = .{ .once = &.{ 6877 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6878 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6879 .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, 6880 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6881 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6882 .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, 6883 .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, 6884 .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, 6885 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6886 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6887 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6888 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6889 .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, 6890 .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, 6891 .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, 6892 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6893 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6894 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6895 } }, 6896 }, .{ 6897 .required_features = .{ .@"64bit", .lzcnt, null, null }, 6898 .dst_constraints = .{.{ .scalar_int = .word }}, 6899 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 6900 .patterns = &.{ 6901 .{ .src = .{ .to_mem, .none } }, 6902 }, 6903 .extra_temps = .{ 6904 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6905 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6906 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6907 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6908 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6909 .unused, 6910 }, 6911 .dst_temps = .{.mem}, 6912 .clobbers = .{ .eflags = true }, 6913 .each = .{ .once = &.{ 6914 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6915 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6916 .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, 6917 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6918 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6919 .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, 6920 .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, 6921 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6922 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6923 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6924 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6925 .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, 6926 .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, 6927 .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, 6928 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6929 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6930 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6931 } }, 6932 }, .{ 6933 .required_features = .{ .@"64bit", null, null, null }, 6934 .dst_constraints = .{.{ .scalar_int = .word }}, 6935 .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, 6936 .patterns = &.{ 6937 .{ .src = .{ .to_mem, .none } }, 6938 }, 6939 .extra_temps = .{ 6940 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 6941 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 6942 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 6943 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 6944 .unused, 6945 .unused, 6946 }, 6947 .dst_temps = .{.mem}, 6948 .clobbers = .{ .eflags = true }, 6949 .each = .{ .once = &.{ 6950 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, 6951 .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, 6952 .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, 6953 .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, 6954 .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, 6955 .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, 6956 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 6957 .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, 6958 .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, 6959 .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, 6960 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 6961 .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, 6962 .{ ._, ._, .neg, .tmp3d, ._, ._, ._ }, 6963 .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, 6964 .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, 6965 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 6966 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 6967 } }, 6968 } }) catch |err| switch (err) { 6969 error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{ 6970 @tagName(air_tag), 6971 cg.typeOf(ty_op.operand).fmt(pt), 6972 ops[0].tracking(cg), 6973 }), 6974 else => |e| return e, 6975 }; 6976 for (ops) |op| for (res) |r| { 6977 if (op.index == r.index) break; 6978 } else try op.die(cg); 6979 try res[0].moveTo(inst, cg); 6980 }, 6981 6982 .cmp_vector, .cmp_vector_optimized => |air_tag| if (use_old) try cg.airCmpVector(inst) else fallback: { 6983 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 6984 const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data; 6985 switch (extra.compareOperator()) { 6986 .eq, .neq => {}, 6987 else => break :fallback try cg.airCmpVector(inst), 6988 } 6989 var ops = try cg.tempsFromOperands(inst, .{ extra.lhs, extra.rhs }); 6990 var res: [1]Temp = undefined; 6991 switch (extra.compareOperator()) { 6992 .lt => unreachable, 6993 .lte => unreachable, 6994 .eq, .neq => |cmp_op| cg.select(&res, &.{ty_pl.ty.toType()}, &ops, switch (@as(Condition, switch (cmp_op) { 6995 else => unreachable, 6996 .eq => .e, 6997 .neq => .ne, 6998 })) { 6999 else => unreachable, 7000 inline .e, .ne => |cc| comptime &.{ .{ 7001 .required_features = .{ .avx2, null, null, null }, 7002 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 7003 .patterns = &.{ 7004 .{ .src = .{ .to_ymm, .mem } }, 7005 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, 7006 .{ .src = .{ .to_ymm, .to_ymm } }, 7007 }, 7008 .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ 7009 .kind = .all, 7010 .inverted = switch (cc) { 7011 else => unreachable, 7012 .e => false, 7013 .ne => true, 7014 }, 7015 .scalar = .byte, 7016 } } }}, 7017 .each = .{ .once = &.{ 7018 .{ ._, .vp_b, .cmpeq, .dst0y, .src0y, .src1y, ._ }, 7019 } }, 7020 }, .{ 7021 .required_features = .{ .avx2, null, null, null }, 7022 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 7023 .patterns = &.{ 7024 .{ .src = .{ .to_ymm, .mem } }, 7025 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, 7026 .{ .src = .{ .to_ymm, .to_ymm } }, 7027 }, 7028 .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ 7029 .kind = .all, 7030 .inverted = switch (cc) { 7031 else => unreachable, 7032 .e => false, 7033 .ne => true, 7034 }, 7035 .scalar = .word, 7036 } } }}, 7037 .each = .{ .once = &.{ 7038 .{ ._, .vp_w, .cmpeq, .dst0y, .src0y, .src1y, ._ }, 7039 } }, 7040 }, .{ 7041 .required_features = .{ .avx2, null, null, null }, 7042 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 7043 .patterns = &.{ 7044 .{ .src = .{ .to_ymm, .mem } }, 7045 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, 7046 .{ .src = .{ .to_ymm, .to_ymm } }, 7047 }, 7048 .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ 7049 .kind = .all, 7050 .inverted = switch (cc) { 7051 else => unreachable, 7052 .e => false, 7053 .ne => true, 7054 }, 7055 .scalar = .dword, 7056 } } }}, 7057 .each = .{ .once = &.{ 7058 .{ ._, .vp_d, .cmpeq, .dst0y, .src0y, .src1y, ._ }, 7059 } }, 7060 }, .{ 7061 .required_features = .{ .avx2, null, null, null }, 7062 .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, 7063 .patterns = &.{ 7064 .{ .src = .{ .to_ymm, .mem } }, 7065 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, 7066 .{ .src = .{ .to_ymm, .to_ymm } }, 7067 }, 7068 .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ 7069 .kind = .all, 7070 .inverted = switch (cc) { 7071 else => unreachable, 7072 .e => false, 7073 .ne => true, 7074 }, 7075 .scalar = .qword, 7076 } } }}, 7077 .each = .{ .once = &.{ 7078 .{ ._, .vp_q, .cmpeq, .dst0y, .src0y, .src1y, ._ }, 7079 } }, 7080 }, .{ 7081 .required_features = .{ .avx, null, null, null }, 7082 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 7083 .patterns = &.{ 7084 .{ .src = .{ .to_xmm, .mem } }, 7085 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, 7086 .{ .src = .{ .to_xmm, .to_xmm } }, 7087 }, 7088 .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ 7089 .kind = .all, 7090 .inverted = switch (cc) { 7091 else => unreachable, 7092 .e => false, 7093 .ne => true, 7094 }, 7095 .scalar = .byte, 7096 } } }}, 7097 .each = .{ .once = &.{ 7098 .{ ._, .vp_b, .cmpeq, .dst0x, .src0x, .src1x, ._ }, 7099 } }, 7100 }, .{ 7101 .required_features = .{ .avx, null, null, null }, 7102 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 7103 .patterns = &.{ 7104 .{ .src = .{ .to_xmm, .mem } }, 7105 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, 7106 .{ .src = .{ .to_xmm, .to_xmm } }, 7107 }, 7108 .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ 7109 .kind = .all, 7110 .inverted = switch (cc) { 7111 else => unreachable, 7112 .e => false, 7113 .ne => true, 7114 }, 7115 .scalar = .word, 7116 } } }}, 7117 .each = .{ .once = &.{ 7118 .{ ._, .vp_w, .cmpeq, .dst0x, .src0x, .src1x, ._ }, 7119 } }, 7120 }, .{ 7121 .required_features = .{ .avx, null, null, null }, 7122 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 7123 .patterns = &.{ 7124 .{ .src = .{ .to_xmm, .mem } }, 7125 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, 7126 .{ .src = .{ .to_xmm, .to_xmm } }, 7127 }, 7128 .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ 7129 .kind = .all, 7130 .inverted = switch (cc) { 7131 else => unreachable, 7132 .e => false, 7133 .ne => true, 7134 }, 7135 .scalar = .dword, 7136 } } }}, 7137 .each = .{ .once = &.{ 7138 .{ ._, .vp_d, .cmpeq, .dst0x, .src0x, .src1x, ._ }, 7139 } }, 7140 }, .{ 7141 .required_features = .{ .avx, null, null, null }, 7142 .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, 7143 .patterns = &.{ 7144 .{ .src = .{ .to_xmm, .mem } }, 7145 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, 7146 .{ .src = .{ .to_xmm, .to_xmm } }, 7147 }, 7148 .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ 7149 .kind = .all, 7150 .inverted = switch (cc) { 7151 else => unreachable, 7152 .e => false, 7153 .ne => true, 7154 }, 7155 .scalar = .qword, 7156 } } }}, 7157 .each = .{ .once = &.{ 7158 .{ ._, .vp_q, .cmpeq, .dst0x, .src0x, .src1x, ._ }, 7159 } }, 7160 }, .{ 7161 .required_features = .{ .sse2, null, null, null }, 7162 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 7163 .patterns = &.{ 7164 .{ .src = .{ .to_mut_xmm, .mem } }, 7165 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, 7166 .{ .src = .{ .to_mut_xmm, .to_xmm } }, 7167 }, 7168 .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ 7169 .kind = .all, 7170 .inverted = switch (cc) { 7171 else => unreachable, 7172 .e => false, 7173 .ne => true, 7174 }, 7175 .scalar = .byte, 7176 } } }}, 7177 .each = .{ .once = &.{ 7178 .{ ._, .p_b, .cmpeq, .dst0x, .src1x, ._, ._ }, 7179 } }, 7180 }, .{ 7181 .required_features = .{ .sse2, null, null, null }, 7182 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 7183 .patterns = &.{ 7184 .{ .src = .{ .to_mut_xmm, .mem } }, 7185 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, 7186 .{ .src = .{ .to_mut_xmm, .to_xmm } }, 7187 }, 7188 .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ 7189 .kind = .all, 7190 .inverted = switch (cc) { 7191 else => unreachable, 7192 .e => false, 7193 .ne => true, 7194 }, 7195 .scalar = .word, 7196 } } }}, 7197 .each = .{ .once = &.{ 7198 .{ ._, .p_w, .cmpeq, .dst0x, .src1x, ._, ._ }, 7199 } }, 7200 }, .{ 7201 .required_features = .{ .sse2, null, null, null }, 7202 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 7203 .patterns = &.{ 7204 .{ .src = .{ .to_mut_xmm, .mem } }, 7205 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, 7206 .{ .src = .{ .to_mut_xmm, .to_xmm } }, 7207 }, 7208 .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ 7209 .kind = .all, 7210 .inverted = switch (cc) { 7211 else => unreachable, 7212 .e => false, 7213 .ne => true, 7214 }, 7215 .scalar = .dword, 7216 } } }}, 7217 .each = .{ .once = &.{ 7218 .{ ._, .p_d, .cmpeq, .dst0x, .src1x, ._, ._ }, 7219 } }, 7220 }, .{ 7221 .required_features = .{ .sse4_1, null, null, null }, 7222 .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, 7223 .patterns = &.{ 7224 .{ .src = .{ .to_mut_xmm, .mem } }, 7225 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, 7226 .{ .src = .{ .to_mut_xmm, .to_xmm } }, 7227 }, 7228 .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ 7229 .kind = .all, 7230 .inverted = switch (cc) { 7231 else => unreachable, 7232 .e => false, 7233 .ne => true, 7234 }, 7235 .scalar = .qword, 7236 } } }}, 7237 .each = .{ .once = &.{ 7238 .{ ._, .p_q, .cmpeq, .dst0x, .src1x, ._, ._ }, 7239 } }, 7240 }, .{ 7241 .required_features = .{ .mmx, null, null, null }, 7242 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 7243 .patterns = &.{ 7244 .{ .src = .{ .to_mut_mm, .mem } }, 7245 .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, 7246 .{ .src = .{ .to_mut_mm, .to_mm } }, 7247 }, 7248 .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ 7249 .kind = .all, 7250 .inverted = switch (cc) { 7251 else => unreachable, 7252 .e => false, 7253 .ne => true, 7254 }, 7255 .scalar = .byte, 7256 } } }}, 7257 .each = .{ .once = &.{ 7258 .{ ._, .p_b, .cmpeq, .dst0q, .src1q, ._, ._ }, 7259 } }, 7260 }, .{ 7261 .required_features = .{ .mmx, null, null, null }, 7262 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 7263 .patterns = &.{ 7264 .{ .src = .{ .to_mut_mm, .mem } }, 7265 .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, 7266 .{ .src = .{ .to_mut_mm, .to_mm } }, 7267 }, 7268 .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ 7269 .kind = .all, 7270 .inverted = switch (cc) { 7271 else => unreachable, 7272 .e => false, 7273 .ne => true, 7274 }, 7275 .scalar = .word, 7276 } } }}, 7277 .each = .{ .once = &.{ 7278 .{ ._, .p_w, .cmpeq, .dst0q, .src1q, ._, ._ }, 7279 } }, 7280 }, .{ 7281 .required_features = .{ .mmx, null, null, null }, 7282 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 7283 .patterns = &.{ 7284 .{ .src = .{ .to_mut_mm, .mem } }, 7285 .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, 7286 .{ .src = .{ .to_mut_mm, .to_mm } }, 7287 }, 7288 .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ 7289 .kind = .all, 7290 .inverted = switch (cc) { 7291 else => unreachable, 7292 .e => false, 7293 .ne => true, 7294 }, 7295 .scalar = .dword, 7296 } } }}, 7297 .each = .{ .once = &.{ 7298 .{ ._, .p_d, .cmpeq, .dst0q, .src1q, ._, ._ }, 7299 } }, 7300 }, .{ 7301 .src_constraints = .{ .{ .bool_vec = .byte }, .{ .bool_vec = .byte } }, 7302 .patterns = &.{ 7303 .{ .src = .{ .mut_mem, .imm8 } }, 7304 .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, 7305 .{ .src = .{ .to_mut_gpr, .imm8 } }, 7306 .{ .src = .{ .imm8, .to_mut_gpr }, .commute = .{ 0, 1 } }, 7307 .{ .src = .{ .mut_mem, .to_gpr } }, 7308 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, 7309 .{ .src = .{ .to_mut_gpr, .mem } }, 7310 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, 7311 .{ .src = .{ .to_mut_gpr, .to_gpr } }, 7312 }, 7313 .dst_temps = .{.{ .ref = .src0 }}, 7314 .clobbers = .{ .eflags = true }, 7315 .each = .{ .once = switch (cc) { 7316 else => unreachable, 7317 .e => &.{ 7318 .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ }, 7319 .{ ._, ._, .not, .dst0b, ._, ._, ._ }, 7320 }, 7321 .ne => &.{ 7322 .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ }, 7323 }, 7324 } }, 7325 }, .{ 7326 .src_constraints = .{ .{ .bool_vec = .word }, .{ .bool_vec = .word } }, 7327 .patterns = &.{ 7328 .{ .src = .{ .mut_mem, .imm16 } }, 7329 .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, 7330 .{ .src = .{ .to_mut_gpr, .imm16 } }, 7331 .{ .src = .{ .imm16, .to_mut_gpr }, .commute = .{ 0, 1 } }, 7332 .{ .src = .{ .mut_mem, .to_gpr } }, 7333 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, 7334 .{ .src = .{ .to_mut_gpr, .mem } }, 7335 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, 7336 .{ .src = .{ .to_mut_gpr, .to_gpr } }, 7337 }, 7338 .dst_temps = .{.{ .ref = .src0 }}, 7339 .clobbers = .{ .eflags = true }, 7340 .each = .{ .once = switch (cc) { 7341 else => unreachable, 7342 .e => &.{ 7343 .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ }, 7344 .{ ._, ._, .not, .dst0w, ._, ._, ._ }, 7345 }, 7346 .ne => &.{ 7347 .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ }, 7348 }, 7349 } }, 7350 }, .{ 7351 .src_constraints = .{ .{ .bool_vec = .dword }, .{ .bool_vec = .dword } }, 7352 .patterns = &.{ 7353 .{ .src = .{ .mut_mem, .imm32 } }, 7354 .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, 7355 .{ .src = .{ .to_mut_gpr, .imm32 } }, 7356 .{ .src = .{ .imm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, 7357 .{ .src = .{ .mut_mem, .to_gpr } }, 7358 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, 7359 .{ .src = .{ .to_mut_gpr, .mem } }, 7360 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, 7361 .{ .src = .{ .to_mut_gpr, .to_gpr } }, 7362 }, 7363 .dst_temps = .{.{ .ref = .src0 }}, 7364 .clobbers = .{ .eflags = true }, 7365 .each = .{ .once = switch (cc) { 7366 else => unreachable, 7367 .e => &.{ 7368 .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ }, 7369 .{ ._, ._, .not, .dst0d, ._, ._, ._ }, 7370 }, 7371 .ne => &.{ 7372 .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ }, 7373 }, 7374 } }, 7375 }, .{ 7376 .required_features = .{ .@"64bit", null, null, null }, 7377 .src_constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } }, 7378 .patterns = &.{ 7379 .{ .src = .{ .mut_mem, .simm32 } }, 7380 .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, 7381 .{ .src = .{ .to_mut_gpr, .simm32 } }, 7382 .{ .src = .{ .simm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, 7383 .{ .src = .{ .mut_mem, .to_gpr } }, 7384 .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, 7385 .{ .src = .{ .to_mut_gpr, .mem } }, 7386 .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, 7387 .{ .src = .{ .to_mut_gpr, .to_gpr } }, 7388 }, 7389 .dst_temps = .{.{ .ref = .src0 }}, 7390 .clobbers = .{ .eflags = true }, 7391 .each = .{ .once = switch (cc) { 7392 else => unreachable, 7393 .e => &.{ 7394 .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ }, 7395 .{ ._, ._, .not, .dst0q, ._, ._, ._ }, 7396 }, 7397 .ne => &.{ 7398 .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ }, 7399 }, 7400 } }, 7401 }, .{ 7402 .src_constraints = .{ .any_bool_vec, .any_bool_vec }, 7403 .patterns = &.{ 7404 .{ .src = .{ .to_mem, .to_mem } }, 7405 }, 7406 .extra_temps = .{ 7407 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7408 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 7409 .unused, 7410 .unused, 7411 .unused, 7412 .unused, 7413 }, 7414 .dst_temps = .{.mem}, 7415 .clobbers = .{ .eflags = true }, 7416 .each = .{ .once = switch (cc) { 7417 else => unreachable, 7418 .e => &.{ 7419 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7420 .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, 7421 .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, 7422 .{ ._, ._, .not, .tmp1p, ._, ._, ._ }, 7423 .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, 7424 .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, 7425 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7426 }, 7427 .ne => &.{ 7428 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7429 .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, 7430 .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, 7431 .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, 7432 .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, 7433 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7434 }, 7435 } }, 7436 }, .{ 7437 .required_features = .{ .avx2, null, null, null }, 7438 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 7439 .patterns = &.{ 7440 .{ .src = .{ .to_mem, .to_mem } }, 7441 }, 7442 .extra_temps = .{ 7443 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7444 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 7445 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 7446 .{ .kind = .{ .rc = .sse } }, 7447 .unused, 7448 .unused, 7449 }, 7450 .dst_temps = .{.mem}, 7451 .clobbers = .{ .eflags = true }, 7452 .each = .{ .once = switch (cc) { 7453 else => unreachable, 7454 .e => &.{ 7455 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7456 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7457 .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 7458 .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, 7459 .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, 7460 .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ }, 7461 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, 7462 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 7463 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7464 }, 7465 .ne => &.{ 7466 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7467 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7468 .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 7469 .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, 7470 .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, 7471 .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, 7472 .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ }, 7473 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, 7474 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 7475 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7476 }, 7477 } }, 7478 }, .{ 7479 .required_features = .{ .avx2, null, null, null }, 7480 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 7481 .patterns = &.{ 7482 .{ .src = .{ .to_mem, .to_mem } }, 7483 }, 7484 .extra_temps = .{ 7485 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7486 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 7487 .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, 7488 .{ .kind = .{ .rc = .sse } }, 7489 .unused, 7490 .unused, 7491 }, 7492 .dst_temps = .{.mem}, 7493 .clobbers = .{ .eflags = true }, 7494 .each = .{ .once = switch (cc) { 7495 else => unreachable, 7496 .e => &.{ 7497 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7498 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7499 .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 7500 .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, 7501 .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ }, 7502 .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, 7503 .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, 7504 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 7505 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 7506 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7507 }, 7508 .ne => &.{ 7509 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7510 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7511 .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 7512 .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, 7513 .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ }, 7514 .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, 7515 .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, 7516 .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, 7517 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 7518 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 7519 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7520 }, 7521 } }, 7522 }, .{ 7523 .required_features = .{ .avx2, null, null, null }, 7524 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 7525 .patterns = &.{ 7526 .{ .src = .{ .to_mem, .to_mem } }, 7527 }, 7528 .extra_temps = .{ 7529 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7530 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 7531 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7532 .{ .kind = .{ .rc = .sse } }, 7533 .unused, 7534 .unused, 7535 }, 7536 .dst_temps = .{.mem}, 7537 .clobbers = .{ .eflags = true }, 7538 .each = .{ .once = switch (cc) { 7539 else => unreachable, 7540 .e => &.{ 7541 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7542 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7543 .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 7544 .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, 7545 .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ }, 7546 .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, 7547 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, 7548 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 7549 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7550 }, 7551 .ne => &.{ 7552 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7553 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7554 .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 7555 .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, 7556 .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ }, 7557 .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, 7558 .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, 7559 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, 7560 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 7561 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7562 }, 7563 } }, 7564 }, .{ 7565 .required_features = .{ .avx2, null, null, null }, 7566 .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, 7567 .patterns = &.{ 7568 .{ .src = .{ .to_mem, .to_mem } }, 7569 }, 7570 .extra_temps = .{ 7571 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7572 .{ .type = .u32, .kind = .{ .reg = .rcx } }, 7573 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7574 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7575 .{ .kind = .{ .rc = .sse } }, 7576 .unused, 7577 }, 7578 .dst_temps = .{.mem}, 7579 .clobbers = .{ .eflags = true }, 7580 .each = .{ .once = switch (cc) { 7581 else => unreachable, 7582 .e => &.{ 7583 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7584 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7585 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7586 .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 7587 .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ }, 7588 .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ }, 7589 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, 7590 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, 7591 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, 7592 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7593 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 7594 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7595 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7596 .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, 7597 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7598 .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ }, 7599 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7600 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7601 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 7602 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7603 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7604 .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, 7605 }, 7606 .ne => &.{ 7607 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7608 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7609 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7610 .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 7611 .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ }, 7612 .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ }, 7613 .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ }, 7614 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, 7615 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, 7616 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, 7617 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7618 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 7619 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7620 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7621 .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, 7622 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7623 .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ }, 7624 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7625 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7626 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 7627 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7628 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7629 .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, 7630 }, 7631 } }, 7632 }, .{ 7633 .required_features = .{ .avx, null, null, null }, 7634 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 7635 .patterns = &.{ 7636 .{ .src = .{ .to_mem, .to_mem } }, 7637 }, 7638 .extra_temps = .{ 7639 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7640 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 7641 .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, 7642 .{ .kind = .{ .rc = .sse } }, 7643 .unused, 7644 .unused, 7645 }, 7646 .dst_temps = .{.mem}, 7647 .clobbers = .{ .eflags = true }, 7648 .each = .{ .once = switch (cc) { 7649 else => unreachable, 7650 .e => &.{ 7651 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7652 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7653 .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7654 .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, 7655 .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, 7656 .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, 7657 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 7658 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 7659 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7660 }, 7661 .ne => &.{ 7662 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7663 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7664 .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7665 .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, 7666 .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, 7667 .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, 7668 .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, 7669 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 7670 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 7671 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7672 }, 7673 } }, 7674 }, .{ 7675 .required_features = .{ .avx, null, null, null }, 7676 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 7677 .patterns = &.{ 7678 .{ .src = .{ .to_mem, .to_mem } }, 7679 }, 7680 .extra_temps = .{ 7681 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7682 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 7683 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7684 .{ .kind = .{ .rc = .sse } }, 7685 .unused, 7686 .unused, 7687 }, 7688 .dst_temps = .{.mem}, 7689 .clobbers = .{ .eflags = true }, 7690 .each = .{ .once = switch (cc) { 7691 else => unreachable, 7692 .e => &.{ 7693 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7694 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7695 .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7696 .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, 7697 .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ }, 7698 .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, 7699 .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, 7700 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, 7701 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 7702 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7703 }, 7704 .ne => &.{ 7705 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7706 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7707 .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7708 .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, 7709 .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ }, 7710 .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, 7711 .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, 7712 .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, 7713 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, 7714 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 7715 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7716 }, 7717 } }, 7718 }, .{ 7719 .required_features = .{ .avx, null, null, null }, 7720 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 7721 .patterns = &.{ 7722 .{ .src = .{ .to_mem, .to_mem } }, 7723 }, 7724 .extra_temps = .{ 7725 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7726 .{ .type = .u32, .kind = .{ .reg = .rcx } }, 7727 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7728 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7729 .{ .kind = .{ .rc = .sse } }, 7730 .unused, 7731 }, 7732 .dst_temps = .{.mem}, 7733 .clobbers = .{ .eflags = true }, 7734 .each = .{ .once = switch (cc) { 7735 else => unreachable, 7736 .e => &.{ 7737 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7738 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7739 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7740 .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7741 .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, 7742 .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, 7743 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, 7744 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, 7745 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, 7746 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7747 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 7748 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7749 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7750 .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, 7751 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7752 .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, 7753 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7754 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7755 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 7756 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7757 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7758 .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, 7759 }, 7760 .ne => &.{ 7761 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7762 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7763 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7764 .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7765 .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, 7766 .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, 7767 .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ }, 7768 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, 7769 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, 7770 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, 7771 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7772 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 7773 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7774 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7775 .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, 7776 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7777 .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, 7778 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7779 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7780 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 7781 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7782 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7783 .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, 7784 }, 7785 } }, 7786 }, .{ 7787 .required_features = .{ .avx, null, null, null }, 7788 .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, 7789 .patterns = &.{ 7790 .{ .src = .{ .to_mem, .to_mem } }, 7791 }, 7792 .extra_temps = .{ 7793 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7794 .{ .type = .u32, .kind = .{ .reg = .rcx } }, 7795 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7796 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7797 .{ .kind = .{ .rc = .sse } }, 7798 .unused, 7799 }, 7800 .dst_temps = .{.mem}, 7801 .clobbers = .{ .eflags = true }, 7802 .each = .{ .once = switch (cc) { 7803 else => unreachable, 7804 .e => &.{ 7805 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7806 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7807 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7808 .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7809 .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, 7810 .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, 7811 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, 7812 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, 7813 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 7814 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7815 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 7816 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7817 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7818 .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, 7819 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7820 .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, 7821 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7822 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7823 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 7824 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7825 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7826 .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, 7827 }, 7828 .ne => &.{ 7829 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7830 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7831 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7832 .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7833 .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, 7834 .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, 7835 .{ ._, ._, .xor, .tmp3b, .si(0b11), ._, ._ }, 7836 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, 7837 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, 7838 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 7839 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7840 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 7841 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7842 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7843 .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, 7844 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7845 .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, 7846 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7847 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7848 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 7849 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7850 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7851 .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, 7852 }, 7853 } }, 7854 }, .{ 7855 .required_features = .{ .sse2, null, null, null }, 7856 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 7857 .patterns = &.{ 7858 .{ .src = .{ .to_mem, .to_mem } }, 7859 }, 7860 .extra_temps = .{ 7861 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7862 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 7863 .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, 7864 .{ .kind = .{ .rc = .sse } }, 7865 .unused, 7866 .unused, 7867 }, 7868 .dst_temps = .{.mem}, 7869 .clobbers = .{ .eflags = true }, 7870 .each = .{ .once = switch (cc) { 7871 else => unreachable, 7872 .e => &.{ 7873 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7874 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7875 .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7876 .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 7877 .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, 7878 .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, 7879 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 7880 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 7881 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7882 }, 7883 .ne => &.{ 7884 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7885 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7886 .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7887 .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 7888 .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, 7889 .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, 7890 .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, 7891 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 7892 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 7893 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7894 }, 7895 } }, 7896 }, .{ 7897 .required_features = .{ .sse2, null, null, null }, 7898 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 7899 .patterns = &.{ 7900 .{ .src = .{ .to_mem, .to_mem } }, 7901 }, 7902 .extra_temps = .{ 7903 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7904 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 7905 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7906 .{ .kind = .{ .rc = .sse } }, 7907 .unused, 7908 .unused, 7909 }, 7910 .dst_temps = .{.mem}, 7911 .clobbers = .{ .eflags = true }, 7912 .each = .{ .once = switch (cc) { 7913 else => unreachable, 7914 .e => &.{ 7915 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7916 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7917 .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7918 .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 7919 .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ }, 7920 .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, 7921 .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, 7922 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, 7923 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 7924 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7925 }, 7926 .ne => &.{ 7927 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7928 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7929 .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7930 .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 7931 .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ }, 7932 .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, 7933 .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, 7934 .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, 7935 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, 7936 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 7937 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7938 }, 7939 } }, 7940 }, .{ 7941 .required_features = .{ .sse2, null, null, null }, 7942 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 7943 .patterns = &.{ 7944 .{ .src = .{ .to_mem, .to_mem } }, 7945 }, 7946 .extra_temps = .{ 7947 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 7948 .{ .type = .u32, .kind = .{ .reg = .rcx } }, 7949 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7950 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 7951 .{ .kind = .{ .rc = .sse } }, 7952 .unused, 7953 }, 7954 .dst_temps = .{.mem}, 7955 .clobbers = .{ .eflags = true }, 7956 .each = .{ .once = switch (cc) { 7957 else => unreachable, 7958 .e => &.{ 7959 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7960 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7961 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7962 .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7963 .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 7964 .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, 7965 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, 7966 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, 7967 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, 7968 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7969 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 7970 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7971 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7972 .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, 7973 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7974 .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, 7975 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 7976 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7977 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 7978 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7979 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7980 .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, 7981 }, 7982 .ne => &.{ 7983 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 7984 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 7985 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7986 .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 7987 .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 7988 .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, 7989 .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ }, 7990 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, 7991 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, 7992 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, 7993 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 7994 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 7995 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 7996 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 7997 .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, 7998 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 7999 .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, 8000 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8001 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8002 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 8003 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 8004 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 8005 .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, 8006 }, 8007 } }, 8008 }, .{ 8009 .required_features = .{ .sse4_1, null, null, null }, 8010 .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, 8011 .patterns = &.{ 8012 .{ .src = .{ .to_mem, .to_mem } }, 8013 }, 8014 .extra_temps = .{ 8015 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8016 .{ .type = .u32, .kind = .{ .reg = .rcx } }, 8017 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8018 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8019 .{ .kind = .{ .rc = .sse } }, 8020 .unused, 8021 }, 8022 .dst_temps = .{.mem}, 8023 .clobbers = .{ .eflags = true }, 8024 .each = .{ .once = switch (cc) { 8025 else => unreachable, 8026 .e => &.{ 8027 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8028 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 8029 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8030 .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 8031 .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 8032 .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, 8033 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, 8034 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, 8035 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 8036 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8037 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 8038 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 8039 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 8040 .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, 8041 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8042 .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, 8043 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8044 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8045 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 8046 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 8047 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 8048 .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, 8049 }, 8050 .ne => &.{ 8051 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8052 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 8053 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8054 .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 8055 .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 8056 .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, 8057 .{ ._, ._, .xor, .tmp3b, .si(0b11), ._, ._ }, 8058 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, 8059 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, 8060 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 8061 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8062 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 8063 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 8064 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 8065 .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, 8066 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8067 .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, 8068 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8069 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8070 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 8071 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 8072 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 8073 .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, 8074 }, 8075 } }, 8076 }, .{ 8077 .required_features = .{ .sse, .mmx, null, null }, 8078 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 8079 .patterns = &.{ 8080 .{ .src = .{ .to_mem, .to_mem } }, 8081 }, 8082 .extra_temps = .{ 8083 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8084 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8085 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8086 .{ .kind = .{ .rc = .mmx } }, 8087 .unused, 8088 .unused, 8089 }, 8090 .dst_temps = .{.mem}, 8091 .clobbers = .{ .eflags = true }, 8092 .each = .{ .once = switch (cc) { 8093 else => unreachable, 8094 .e => &.{ 8095 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8096 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 8097 .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 8098 .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 8099 .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, 8100 .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, 8101 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, 8102 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 8103 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8104 }, 8105 .ne => &.{ 8106 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8107 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 8108 .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 8109 .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 8110 .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, 8111 .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, 8112 .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, 8113 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, 8114 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 8115 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8116 }, 8117 } }, 8118 }, .{ 8119 .required_features = .{ .sse, .mmx, null, null }, 8120 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 8121 .patterns = &.{ 8122 .{ .src = .{ .to_mem, .to_mem } }, 8123 }, 8124 .extra_temps = .{ 8125 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8126 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8127 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8128 .{ .kind = .{ .rc = .mmx } }, 8129 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8130 .{ .kind = .{ .rc = .mmx } }, 8131 }, 8132 .dst_temps = .{.mem}, 8133 .clobbers = .{ .eflags = true }, 8134 .each = .{ .once = switch (cc) { 8135 else => unreachable, 8136 .e => &.{ 8137 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8138 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 8139 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8140 .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, 8141 .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 8142 .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 8143 .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, 8144 .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, 8145 .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, 8146 .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, 8147 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, 8148 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8149 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 8150 .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, 8151 .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, 8152 .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, 8153 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8154 .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, 8155 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8156 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8157 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 8158 .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, 8159 .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, 8160 .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, 8161 }, 8162 .ne => &.{ 8163 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8164 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 8165 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8166 .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, 8167 .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 8168 .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 8169 .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, 8170 .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, 8171 .{ ._, ._, .xor, .tmp4b, .si(0b1111), ._, ._ }, 8172 .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, 8173 .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, 8174 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, 8175 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8176 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 8177 .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, 8178 .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, 8179 .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, 8180 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8181 .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, 8182 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8183 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8184 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 8185 .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, 8186 .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, 8187 .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, 8188 }, 8189 } }, 8190 }, .{ 8191 .required_features = .{ .sse, .mmx, null, null }, 8192 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 8193 .patterns = &.{ 8194 .{ .src = .{ .to_mem, .to_mem } }, 8195 }, 8196 .extra_temps = .{ 8197 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8198 .{ .type = .u32, .kind = .{ .reg = .rcx } }, 8199 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8200 .{ .kind = .{ .rc = .mmx } }, 8201 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8202 .{ .kind = .{ .rc = .mmx } }, 8203 }, 8204 .dst_temps = .{.mem}, 8205 .clobbers = .{ .eflags = true }, 8206 .each = .{ .once = switch (cc) { 8207 else => unreachable, 8208 .e => &.{ 8209 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8210 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 8211 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8212 .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, 8213 .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 8214 .{ ._, .p_d, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 8215 .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ }, 8216 .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, 8217 .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, 8218 .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, 8219 .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, 8220 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 8221 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8222 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 8223 .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, 8224 .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, 8225 .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, 8226 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8227 .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, 8228 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8229 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8230 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 8231 .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, 8232 .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, 8233 .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, 8234 }, 8235 .ne => &.{ 8236 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8237 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 8238 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8239 .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, 8240 .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 8241 .{ ._, .p_d, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 8242 .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ }, 8243 .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, 8244 .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, 8245 .{ ._, ._, .xor, .tmp4b, .si(0b11), ._, ._ }, 8246 .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, 8247 .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, 8248 .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, 8249 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8250 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 8251 .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, 8252 .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, 8253 .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, 8254 .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, 8255 .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, 8256 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8257 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, 8258 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 8259 .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, 8260 .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, 8261 .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, 8262 }, 8263 } }, 8264 }, .{ 8265 .dst_constraints = .{.{ .bool_vec = .byte }}, 8266 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 8267 .patterns = &.{ 8268 .{ .src = .{ .to_mem, .to_mem } }, 8269 }, 8270 .extra_temps = .{ 8271 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8272 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8273 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8274 .unused, 8275 .unused, 8276 .unused, 8277 }, 8278 .dst_temps = .{.{ .rc = .general_purpose }}, 8279 .clobbers = .{ .eflags = true }, 8280 .each = .{ .once = &.{ 8281 .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, 8282 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8283 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8284 .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, 8285 .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, 8286 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8287 .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, 8288 .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, 8289 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8290 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 8291 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8292 } }, 8293 }, .{ 8294 .dst_constraints = .{.{ .bool_vec = .byte }}, 8295 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 8296 .patterns = &.{ 8297 .{ .src = .{ .to_mem, .to_mem } }, 8298 }, 8299 .extra_temps = .{ 8300 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8301 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8302 .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, 8303 .unused, 8304 .unused, 8305 .unused, 8306 }, 8307 .dst_temps = .{.{ .rc = .general_purpose }}, 8308 .clobbers = .{ .eflags = true }, 8309 .each = .{ .once = &.{ 8310 .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, 8311 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8312 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8313 .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, 8314 .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, 8315 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8316 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, 8317 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, 8318 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8319 .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, 8320 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8321 } }, 8322 }, .{ 8323 .dst_constraints = .{.{ .bool_vec = .byte }}, 8324 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 8325 .patterns = &.{ 8326 .{ .src = .{ .to_mem, .to_mem } }, 8327 }, 8328 .extra_temps = .{ 8329 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8330 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8331 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8332 .unused, 8333 .unused, 8334 .unused, 8335 }, 8336 .dst_temps = .{.{ .rc = .general_purpose }}, 8337 .clobbers = .{ .eflags = true }, 8338 .each = .{ .once = &.{ 8339 .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, 8340 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8341 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8342 .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, 8343 .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, 8344 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8345 .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, 8346 .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, 8347 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8348 .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, 8349 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8350 } }, 8351 }, .{ 8352 .required_features = .{ .@"64bit", null, null, null }, 8353 .dst_constraints = .{.{ .bool_vec = .byte }}, 8354 .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, 8355 .patterns = &.{ 8356 .{ .src = .{ .to_mem, .to_mem } }, 8357 }, 8358 .extra_temps = .{ 8359 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8360 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8361 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 8362 .unused, 8363 .unused, 8364 .unused, 8365 }, 8366 .dst_temps = .{.{ .rc = .general_purpose }}, 8367 .clobbers = .{ .eflags = true }, 8368 .each = .{ .once = &.{ 8369 .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, 8370 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8371 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8372 .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 8373 .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 8374 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8375 .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, 8376 .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, 8377 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8378 .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, 8379 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8380 } }, 8381 }, .{ 8382 .dst_constraints = .{.{ .bool_vec = .byte }}, 8383 .patterns = &.{ 8384 .{ .src = .{ .to_mem, .to_mem } }, 8385 }, 8386 .extra_temps = .{ 8387 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8388 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8389 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8390 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8391 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8392 .unused, 8393 }, 8394 .dst_temps = .{.{ .rc = .general_purpose }}, 8395 .clobbers = .{ .eflags = true }, 8396 .each = .{ .once = &.{ 8397 .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, 8398 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 8399 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8400 .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ }, 8401 .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, 8402 .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, 8403 .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, 8404 .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, 8405 .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ }, 8406 .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ }, 8407 .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, 8408 .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, 8409 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8410 .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, 8411 .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, 8412 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8413 .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ }, 8414 .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, 8415 } }, 8416 }, .{ 8417 .dst_constraints = .{.{ .bool_vec = .dword }}, 8418 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 8419 .patterns = &.{ 8420 .{ .src = .{ .to_mem, .to_mem } }, 8421 }, 8422 .extra_temps = .{ 8423 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8424 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8425 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8426 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8427 .unused, 8428 .unused, 8429 }, 8430 .dst_temps = .{.{ .rc = .general_purpose }}, 8431 .clobbers = .{ .eflags = true }, 8432 .each = .{ .once = &.{ 8433 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, 8434 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8435 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8436 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8437 .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, 8438 .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, 8439 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8440 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, 8441 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, 8442 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8443 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 8444 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8445 } }, 8446 }, .{ 8447 .dst_constraints = .{.{ .bool_vec = .dword }}, 8448 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 8449 .patterns = &.{ 8450 .{ .src = .{ .to_mem, .to_mem } }, 8451 }, 8452 .extra_temps = .{ 8453 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8454 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8455 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8456 .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, 8457 .unused, 8458 .unused, 8459 }, 8460 .dst_temps = .{.{ .rc = .general_purpose }}, 8461 .clobbers = .{ .eflags = true }, 8462 .each = .{ .once = &.{ 8463 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, 8464 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8465 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8466 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8467 .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, 8468 .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, 8469 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8470 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, 8471 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, 8472 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8473 .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, 8474 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8475 } }, 8476 }, .{ 8477 .dst_constraints = .{.{ .bool_vec = .dword }}, 8478 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 8479 .patterns = &.{ 8480 .{ .src = .{ .to_mem, .to_mem } }, 8481 }, 8482 .extra_temps = .{ 8483 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8484 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8485 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8486 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8487 .unused, 8488 .unused, 8489 }, 8490 .dst_temps = .{.{ .rc = .general_purpose }}, 8491 .clobbers = .{ .eflags = true }, 8492 .each = .{ .once = &.{ 8493 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, 8494 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8495 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8496 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8497 .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, 8498 .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, 8499 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8500 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, 8501 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, 8502 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8503 .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, 8504 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8505 } }, 8506 }, .{ 8507 .required_features = .{ .@"64bit", null, null, null }, 8508 .dst_constraints = .{.{ .bool_vec = .dword }}, 8509 .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, 8510 .patterns = &.{ 8511 .{ .src = .{ .to_mem, .to_mem } }, 8512 }, 8513 .extra_temps = .{ 8514 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8515 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8516 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8517 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 8518 .unused, 8519 .unused, 8520 }, 8521 .dst_temps = .{.{ .rc = .general_purpose }}, 8522 .clobbers = .{ .eflags = true }, 8523 .each = .{ .once = &.{ 8524 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, 8525 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8526 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8527 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8528 .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 8529 .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 8530 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8531 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, 8532 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, 8533 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8534 .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, 8535 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8536 } }, 8537 }, .{ 8538 .dst_constraints = .{.{ .bool_vec = .dword }}, 8539 .patterns = &.{ 8540 .{ .src = .{ .to_mem, .to_mem } }, 8541 }, 8542 .extra_temps = .{ 8543 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8544 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8545 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8546 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8547 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8548 .unused, 8549 }, 8550 .dst_temps = .{.{ .rc = .general_purpose }}, 8551 .clobbers = .{ .eflags = true }, 8552 .each = .{ .once = &.{ 8553 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, 8554 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 8555 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8556 .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ }, 8557 .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, 8558 .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, 8559 .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, 8560 .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, 8561 .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ }, 8562 .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ }, 8563 .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, 8564 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8565 .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, 8566 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8567 .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, 8568 .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, 8569 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8570 .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ }, 8571 .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, 8572 } }, 8573 }, .{ 8574 .required_features = .{ .@"64bit", null, null, null }, 8575 .dst_constraints = .{.{ .bool_vec = .qword }}, 8576 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 8577 .patterns = &.{ 8578 .{ .src = .{ .to_mem, .to_mem } }, 8579 }, 8580 .extra_temps = .{ 8581 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8582 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8583 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 8584 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8585 .unused, 8586 .unused, 8587 }, 8588 .dst_temps = .{.{ .rc = .general_purpose }}, 8589 .clobbers = .{ .eflags = true }, 8590 .each = .{ .once = &.{ 8591 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, 8592 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8593 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8594 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8595 .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, 8596 .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, 8597 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8598 .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, 8599 .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, 8600 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8601 .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, 8602 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8603 } }, 8604 }, .{ 8605 .required_features = .{ .@"64bit", null, null, null }, 8606 .dst_constraints = .{.{ .bool_vec = .qword }}, 8607 .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, 8608 .patterns = &.{ 8609 .{ .src = .{ .to_mem, .to_mem } }, 8610 }, 8611 .extra_temps = .{ 8612 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8613 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8614 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 8615 .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, 8616 .unused, 8617 .unused, 8618 }, 8619 .dst_temps = .{.{ .rc = .general_purpose }}, 8620 .clobbers = .{ .eflags = true }, 8621 .each = .{ .once = &.{ 8622 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, 8623 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8624 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8625 .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, 8626 .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, 8627 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8628 .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, 8629 .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, 8630 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8631 .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, 8632 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8633 } }, 8634 }, .{ 8635 .required_features = .{ .@"64bit", null, null, null }, 8636 .dst_constraints = .{.{ .bool_vec = .qword }}, 8637 .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, 8638 .patterns = &.{ 8639 .{ .src = .{ .to_mem, .to_mem } }, 8640 }, 8641 .extra_temps = .{ 8642 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8643 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8644 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 8645 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 8646 .unused, 8647 .unused, 8648 }, 8649 .dst_temps = .{.{ .rc = .general_purpose }}, 8650 .clobbers = .{ .eflags = true }, 8651 .each = .{ .once = &.{ 8652 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, 8653 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8654 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8655 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8656 .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, 8657 .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, 8658 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8659 .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, 8660 .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, 8661 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8662 .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, 8663 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8664 } }, 8665 }, .{ 8666 .required_features = .{ .@"64bit", null, null, null }, 8667 .dst_constraints = .{.{ .bool_vec = .qword }}, 8668 .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, 8669 .patterns = &.{ 8670 .{ .src = .{ .to_mem, .to_mem } }, 8671 }, 8672 .extra_temps = .{ 8673 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8674 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8675 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 8676 .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, 8677 .unused, 8678 .unused, 8679 }, 8680 .dst_temps = .{.{ .rc = .general_purpose }}, 8681 .clobbers = .{ .eflags = true }, 8682 .each = .{ .once = &.{ 8683 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, 8684 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8685 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8686 .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8687 .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 8688 .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 8689 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8690 .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, 8691 .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, 8692 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8693 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 8694 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8695 } }, 8696 }, .{ 8697 .required_features = .{ .@"64bit", null, null, null }, 8698 .dst_constraints = .{.{ .bool_vec = .qword }}, 8699 .patterns = &.{ 8700 .{ .src = .{ .to_mem, .to_mem } }, 8701 }, 8702 .extra_temps = .{ 8703 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8704 .{ .type = .u8, .kind = .{ .reg = .cl } }, 8705 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8706 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8707 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8708 .unused, 8709 }, 8710 .dst_temps = .{.{ .rc = .general_purpose }}, 8711 .clobbers = .{ .eflags = true }, 8712 .each = .{ .once = &.{ 8713 .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, 8714 .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, 8715 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8716 .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ }, 8717 .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, 8718 .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, 8719 .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, 8720 .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, 8721 .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ }, 8722 .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ }, 8723 .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, 8724 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8725 .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, 8726 .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, 8727 .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, 8728 .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, 8729 .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, 8730 .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ }, 8731 .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, 8732 } }, 8733 }, .{ 8734 .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, 8735 .patterns = &.{ 8736 .{ .src = .{ .to_mem, .to_mem } }, 8737 }, 8738 .extra_temps = .{ 8739 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8740 .{ .type = .u32, .kind = .{ .reg = .ecx } }, 8741 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8742 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8743 .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, 8744 .unused, 8745 }, 8746 .dst_temps = .{.mem}, 8747 .clobbers = .{ .eflags = true }, 8748 .each = .{ .once = &.{ 8749 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8750 .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, 8751 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8752 .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ }, 8753 .{ ._, ._, .mov, .tmp4b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, 8754 .{ ._, ._, .cmp, .tmp4b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, 8755 .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ }, 8756 .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ }, 8757 .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ }, 8758 .{ ._, ._, .add, .tmp1d, .si(1), ._, ._ }, 8759 .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ }, 8760 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, 8761 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 8762 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 8763 .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ }, 8764 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, 8765 .{ .@"1:", ._, .add, .tmp0p, .si(1), ._, ._ }, 8766 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8767 .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ }, 8768 .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, 8769 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, 8770 .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, 8771 .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ }, 8772 } }, 8773 } }, 8774 }) catch |err| switch (err) { 8775 error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ 8776 @tagName(air_tag), 8777 cg.typeOf(extra.lhs).fmt(pt), 8778 ops[0].tracking(cg), 8779 ops[1].tracking(cg), 8780 }), 8781 else => |e| return e, 8782 }, 8783 .gte => unreachable, 8784 .gt => unreachable, 8785 } 8786 for (ops) |op| for (res) |r| { 8787 if (op.index == r.index) break; 8788 } else try op.die(cg); 8789 try res[0].moveTo(inst, cg); 8790 }, 8791 8792 .cmp_lt, 8793 .cmp_lt_optimized, 8794 .cmp_lte, 8795 .cmp_lte_optimized, 8796 .cmp_gte, 8797 .cmp_gte_optimized, 8798 .cmp_gt, 8799 .cmp_gt_optimized, 8800 => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) { 8801 else => unreachable, 8802 .cmp_lt, .cmp_lt_optimized => .lt, 8803 .cmp_lte, .cmp_lte_optimized => .lte, 8804 .cmp_gte, .cmp_gte_optimized => .gte, 8805 .cmp_gt, .cmp_gt_optimized => .gt, 8806 }) else fallback: { 8807 const bin_op = air_datas[@intFromEnum(inst)].bin_op; 8808 const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu); 8809 if (scalar_ty.isRuntimeFloat()) break :fallback try cg.airCmp(inst, switch (air_tag) { 8810 else => unreachable, 8811 .cmp_lt, .cmp_lt_optimized => .lt, 8812 .cmp_lte, .cmp_lte_optimized => .lte, 8813 .cmp_gte, .cmp_gte_optimized => .gte, 8814 .cmp_gt, .cmp_gt_optimized => .gt, 8815 }); 8816 const signedness = if (scalar_ty.isAbiInt(zcu)) 8817 scalar_ty.intInfo(zcu).signedness 8818 else 8819 .unsigned; 8820 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); 8821 var res: [1]Temp = undefined; 8822 cg.select(&res, &.{.bool}, &ops, switch (@as(Condition, switch (signedness) { 8823 .signed => switch (air_tag) { 8824 else => unreachable, 8825 .cmp_lt, .cmp_lt_optimized => .l, 8826 .cmp_lte, .cmp_lte_optimized => .le, 8827 .cmp_gte, .cmp_gte_optimized => .ge, 8828 .cmp_gt, .cmp_gt_optimized => .g, 8829 }, 8830 .unsigned => switch (air_tag) { 8831 else => unreachable, 8832 .cmp_lt, .cmp_lt_optimized => .b, 8833 .cmp_lte, .cmp_lte_optimized => .be, 8834 .cmp_gte, .cmp_gte_optimized => .ae, 8835 .cmp_gt, .cmp_gt_optimized => .a, 8836 }, 8837 })) { 8838 else => unreachable, 8839 inline .l, .le, .ge, .g, .b, .be, .ae, .a => |cc| comptime &.{ .{ 8840 .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, 8841 .patterns = &.{ 8842 .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } }, 8843 .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } }, 8844 .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, 8845 }, 8846 .dst_temps = .{.{ .cc = cc.commute() }}, 8847 .clobbers = .{ .eflags = true }, 8848 .each = .{ .once = &.{ 8849 .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, 8850 } }, 8851 }, .{ 8852 .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, 8853 .patterns = &.{ 8854 .{ .src = .{ .mem, .imm8 } }, 8855 .{ .src = .{ .to_gpr, .imm8 } }, 8856 .{ .src = .{ .to_gpr, .mem } }, 8857 .{ .src = .{ .to_gpr, .to_gpr } }, 8858 }, 8859 .dst_temps = .{.{ .cc = cc }}, 8860 .clobbers = .{ .eflags = true }, 8861 .each = .{ .once = &.{ 8862 .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, 8863 } }, 8864 }, .{ 8865 .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, 8866 .patterns = &.{ 8867 .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } }, 8868 .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } }, 8869 .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, 8870 }, 8871 .dst_temps = .{.{ .cc = cc.commute() }}, 8872 .clobbers = .{ .eflags = true }, 8873 .each = .{ .once = &.{ 8874 .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, 8875 } }, 8876 }, .{ 8877 .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, 8878 .patterns = &.{ 8879 .{ .src = .{ .mem, .imm16 } }, 8880 .{ .src = .{ .to_gpr, .imm16 } }, 8881 .{ .src = .{ .to_gpr, .mem } }, 8882 .{ .src = .{ .to_gpr, .to_gpr } }, 8883 }, 8884 .dst_temps = .{.{ .cc = cc }}, 8885 .clobbers = .{ .eflags = true }, 8886 .each = .{ .once = &.{ 8887 .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, 8888 } }, 8889 }, .{ 8890 .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, 8891 .patterns = &.{ 8892 .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } }, 8893 .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } }, 8894 .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, 8895 }, 8896 .dst_temps = .{.{ .cc = cc.commute() }}, 8897 .clobbers = .{ .eflags = true }, 8898 .each = .{ .once = &.{ 8899 .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, 8900 } }, 8901 }, .{ 8902 .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, 8903 .patterns = &.{ 8904 .{ .src = .{ .mem, .imm32 } }, 8905 .{ .src = .{ .to_gpr, .imm32 } }, 8906 .{ .src = .{ .to_gpr, .mem } }, 8907 .{ .src = .{ .to_gpr, .to_gpr } }, 8908 }, 8909 .dst_temps = .{.{ .cc = cc }}, 8910 .clobbers = .{ .eflags = true }, 8911 .each = .{ .once = &.{ 8912 .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, 8913 } }, 8914 }, .{ 8915 .required_features = .{ .@"64bit", null, null, null }, 8916 .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, 8917 .patterns = &.{ 8918 .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } }, 8919 .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } }, 8920 .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, 8921 }, 8922 .dst_temps = .{.{ .cc = cc.commute() }}, 8923 .clobbers = .{ .eflags = true }, 8924 .each = .{ .once = &.{ 8925 .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, 8926 } }, 8927 }, .{ 8928 .required_features = .{ .@"64bit", null, null, null }, 8929 .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, 8930 .patterns = &.{ 8931 .{ .src = .{ .mem, .simm32 } }, 8932 .{ .src = .{ .to_gpr, .simm32 } }, 8933 .{ .src = .{ .to_gpr, .mem } }, 8934 .{ .src = .{ .to_gpr, .to_gpr } }, 8935 }, 8936 .dst_temps = .{.{ .cc = cc }}, 8937 .clobbers = .{ .eflags = true }, 8938 .each = .{ .once = &.{ 8939 .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, 8940 } }, 8941 }, .{ 8942 .src_constraints = .{ .any_int, .any_int }, 8943 .patterns = &.{ 8944 .{ .src = .{ .to_mem, .to_mem }, .commute = switch (cc) { 8945 else => unreachable, 8946 .l, .ge, .b, .ae => .{ 0, 0 }, 8947 .le, .g, .be, .a => .{ 0, 1 }, 8948 } }, 8949 }, 8950 .extra_temps = .{ 8951 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 8952 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 8953 .unused, 8954 .unused, 8955 .unused, 8956 .unused, 8957 }, 8958 .dst_temps = .{.{ .rc = .general_purpose }}, 8959 .clobbers = .{ .eflags = true }, 8960 .each = .{ .once = &.{ 8961 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 8962 .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, 8963 .{ .@"0:", ._r, .sh, .tmp1b, .si(1), ._, ._ }, 8964 .{ ._, ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, 8965 .{ ._, ._, .sbb, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, 8966 .{ ._, ._c, .set, .tmp1b, ._, ._, ._ }, 8967 .{ ._, .fromCondition(switch (cc) { 8968 else => unreachable, 8969 .l, .ge, .b, .ae => cc, 8970 .le, .g, .be, .a => cc.commute(), 8971 }), .set, .dst0b, ._, ._, ._ }, 8972 .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, 8973 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 8974 } }, 8975 } }, 8976 }) catch |err| switch (err) { 8977 error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ 8978 @tagName(air_tag), 8979 cg.typeOf(bin_op.lhs).fmt(pt), 8980 ops[0].tracking(cg), 8981 ops[1].tracking(cg), 8982 }), 8983 else => |e| return e, 8984 }; 8985 for (ops) |op| for (res) |r| { 8986 if (op.index == r.index) break; 8987 } else try op.die(cg); 8988 try res[0].moveTo(inst, cg); 8989 }, 8990 .cmp_eq, 8991 .cmp_eq_optimized, 8992 .cmp_neq, 8993 .cmp_neq_optimized, 8994 => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) { 8995 else => unreachable, 8996 .cmp_eq, .cmp_eq_optimized => .eq, 8997 .cmp_neq, .cmp_neq_optimized => .neq, 8998 }) else fallback: { 8999 const bin_op = air_datas[@intFromEnum(inst)].bin_op; 9000 const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu); 9001 if (scalar_ty.isRuntimeFloat() or ip.isOptionalType(scalar_ty.toIntern())) break :fallback try cg.airCmp(inst, switch (air_tag) { 9002 else => unreachable, 9003 .cmp_eq, .cmp_eq_optimized => .eq, 9004 .cmp_neq, .cmp_neq_optimized => .neq, 9005 }); 9006 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); 9007 var res: [1]Temp = undefined; 9008 cg.select(&res, &.{.bool}, &ops, switch (@as(Condition, switch (air_tag) { 9009 else => unreachable, 9010 .cmp_eq, .cmp_eq_optimized => .e, 9011 .cmp_neq, .cmp_neq_optimized => .ne, 9012 })) { 9013 else => unreachable, 9014 inline .e, .ne => |cc| comptime &.{ .{ 9015 .required_features = .{ .avx2, null, null, null }, 9016 .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } }, 9017 .patterns = &.{ 9018 .{ .src = .{ .to_ymm, .mem } }, 9019 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, 9020 .{ .src = .{ .to_ymm, .to_ymm } }, 9021 }, 9022 .extra_temps = .{ 9023 .{ .kind = .{ .rc = .sse } }, 9024 .unused, 9025 .unused, 9026 .unused, 9027 .unused, 9028 .unused, 9029 }, 9030 .dst_temps = .{.{ .cc = cc }}, 9031 .clobbers = .{ .eflags = true }, 9032 .each = .{ .once = &.{ 9033 .{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ }, 9034 .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, 9035 } }, 9036 }, .{ 9037 .required_features = .{ .avx, null, null, null }, 9038 .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } }, 9039 .patterns = &.{ 9040 .{ .src = .{ .to_ymm, .mem } }, 9041 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, 9042 .{ .src = .{ .to_ymm, .to_ymm } }, 9043 }, 9044 .extra_temps = .{ 9045 .{ .kind = .{ .rc = .sse } }, 9046 .unused, 9047 .unused, 9048 .unused, 9049 .unused, 9050 .unused, 9051 }, 9052 .dst_temps = .{.{ .cc = cc }}, 9053 .clobbers = .{ .eflags = true }, 9054 .each = .{ .once = &.{ 9055 .{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ }, 9056 .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, 9057 } }, 9058 }, .{ 9059 .required_features = .{ .avx, null, null, null }, 9060 .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, 9061 .patterns = &.{ 9062 .{ .src = .{ .to_xmm, .mem } }, 9063 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, 9064 .{ .src = .{ .to_xmm, .to_xmm } }, 9065 }, 9066 .extra_temps = .{ 9067 .{ .kind = .{ .rc = .sse } }, 9068 .unused, 9069 .unused, 9070 .unused, 9071 .unused, 9072 .unused, 9073 }, 9074 .dst_temps = .{.{ .cc = cc }}, 9075 .clobbers = .{ .eflags = true }, 9076 .each = .{ .once = &.{ 9077 .{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ }, 9078 .{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ }, 9079 } }, 9080 }, .{ 9081 .required_features = .{ .sse4_1, null, null, null }, 9082 .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, 9083 .patterns = &.{ 9084 .{ .src = .{ .to_mut_xmm, .mem } }, 9085 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, 9086 .{ .src = .{ .to_mut_xmm, .to_xmm } }, 9087 }, 9088 .dst_temps = .{.{ .cc = cc }}, 9089 .clobbers = .{ .eflags = true }, 9090 .each = .{ .once = &.{ 9091 .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, 9092 .{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ }, 9093 } }, 9094 }, .{ 9095 .required_features = .{ .sse2, null, null, null }, 9096 .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, 9097 .patterns = &.{ 9098 .{ .src = .{ .to_mut_xmm, .mem } }, 9099 .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, 9100 .{ .src = .{ .to_mut_xmm, .to_xmm } }, 9101 }, 9102 .extra_temps = .{ 9103 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 9104 .{ .kind = .{ .rc = .sse } }, 9105 .unused, 9106 .unused, 9107 .unused, 9108 .unused, 9109 }, 9110 .dst_temps = .{.{ .cc = cc }}, 9111 .clobbers = .{ .eflags = true }, 9112 .each = .{ .once = &.{ 9113 .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, 9114 .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, 9115 .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ }, 9116 .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ }, 9117 .{ ._, ._, .xor, .tmp0d, .si(0xffff), ._, ._ }, 9118 } }, 9119 }, .{ 9120 .required_features = .{ .sse, .mmx, null, null }, 9121 .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, 9122 .patterns = &.{ 9123 .{ .src = .{ .to_mut_mm, .mem } }, 9124 .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, 9125 .{ .src = .{ .to_mut_mm, .to_mm } }, 9126 }, 9127 .extra_temps = .{ 9128 .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, 9129 .{ .kind = .{ .rc = .mmx } }, 9130 .unused, 9131 .unused, 9132 .unused, 9133 .unused, 9134 }, 9135 .dst_temps = .{.{ .cc = cc }}, 9136 .clobbers = .{ .eflags = true }, 9137 .each = .{ .once = &.{ 9138 .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, 9139 .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ }, 9140 .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ }, 9141 .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ }, 9142 .{ ._, ._, .xor, .tmp0d, .si(0xff), ._, ._ }, 9143 } }, 9144 }, .{ 9145 .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, 9146 .patterns = &.{ 9147 .{ .src = .{ .mem, .imm8 } }, 9148 .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } }, 9149 .{ .src = .{ .to_gpr, .imm8 } }, 9150 .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } }, 9151 .{ .src = .{ .to_gpr, .mem } }, 9152 .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, 9153 .{ .src = .{ .to_gpr, .to_gpr } }, 9154 }, 9155 .dst_temps = .{.{ .cc = cc }}, 9156 .clobbers = .{ .eflags = true }, 9157 .each = .{ .once = &.{ 9158 .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, 9159 } }, 9160 }, .{ 9161 .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, 9162 .patterns = &.{ 9163 .{ .src = .{ .mem, .imm16 } }, 9164 .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } }, 9165 .{ .src = .{ .to_gpr, .imm16 } }, 9166 .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } }, 9167 .{ .src = .{ .to_gpr, .mem } }, 9168 .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, 9169 .{ .src = .{ .to_gpr, .to_gpr } }, 9170 }, 9171 .dst_temps = .{.{ .cc = cc }}, 9172 .clobbers = .{ .eflags = true }, 9173 .each = .{ .once = &.{ 9174 .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, 9175 } }, 9176 }, .{ 9177 .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, 9178 .patterns = &.{ 9179 .{ .src = .{ .mem, .imm32 } }, 9180 .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } }, 9181 .{ .src = .{ .to_gpr, .imm32 } }, 9182 .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } }, 9183 .{ .src = .{ .to_gpr, .mem } }, 9184 .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, 9185 .{ .src = .{ .to_gpr, .to_gpr } }, 9186 }, 9187 .dst_temps = .{.{ .cc = cc }}, 9188 .clobbers = .{ .eflags = true }, 9189 .each = .{ .once = &.{ 9190 .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, 9191 } }, 9192 }, .{ 9193 .required_features = .{ .@"64bit", null, null, null }, 9194 .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, 9195 .patterns = &.{ 9196 .{ .src = .{ .mem, .simm32 } }, 9197 .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } }, 9198 .{ .src = .{ .to_gpr, .simm32 } }, 9199 .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } }, 9200 .{ .src = .{ .to_gpr, .mem } }, 9201 .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, 9202 .{ .src = .{ .to_gpr, .to_gpr } }, 9203 }, 9204 .dst_temps = .{.{ .cc = cc }}, 9205 .clobbers = .{ .eflags = true }, 9206 .each = .{ .once = &.{ 9207 .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, 9208 } }, 9209 }, .{ 9210 .required_features = .{ .avx2, null, null, null }, 9211 .src_constraints = .{ 9212 .{ .remainder_int = .{ .of = .yword, .is = .xword } }, 9213 .{ .remainder_int = .{ .of = .yword, .is = .xword } }, 9214 }, 9215 .patterns = &.{ 9216 .{ .src = .{ .to_mem, .to_mem } }, 9217 }, 9218 .extra_temps = .{ 9219 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 9220 .{ .kind = .{ .rc = .sse } }, 9221 .{ .kind = .{ .rc = .sse } }, 9222 .unused, 9223 .unused, 9224 .unused, 9225 }, 9226 .dst_temps = .{.{ .cc = cc }}, 9227 .clobbers = .{ .eflags = true }, 9228 .each = .{ .once = &.{ 9229 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 9230 .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, 9231 .{ .@"0:", .v_dqu, .mov, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ }, 9232 .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ }, 9233 .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, 9234 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 9235 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 9236 .{ ._, .v_dqa, .mov, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ }, 9237 .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ }, 9238 .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, 9239 .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, 9240 } }, 9241 }, .{ 9242 .required_features = .{ .avx2, null, null, null }, 9243 .patterns = &.{ 9244 .{ .src = .{ .to_mem, .to_mem } }, 9245 }, 9246 .extra_temps = .{ 9247 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 9248 .{ .kind = .{ .rc = .sse } }, 9249 .{ .kind = .{ .rc = .sse } }, 9250 .unused, 9251 .unused, 9252 .unused, 9253 }, 9254 .dst_temps = .{.{ .cc = cc }}, 9255 .clobbers = .{ .eflags = true }, 9256 .each = .{ .once = &.{ 9257 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 9258 .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, 9259 .{ .@"0:", .v_dqu, .mov, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 9260 .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ }, 9261 .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, 9262 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 9263 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 9264 .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, 9265 } }, 9266 }, .{ 9267 .required_features = .{ .avx, null, null, null }, 9268 .src_constraints = .{ 9269 .{ .remainder_int = .{ .of = .yword, .is = .xword } }, 9270 .{ .remainder_int = .{ .of = .yword, .is = .xword } }, 9271 }, 9272 .patterns = &.{ 9273 .{ .src = .{ .to_mem, .to_mem } }, 9274 }, 9275 .extra_temps = .{ 9276 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 9277 .{ .kind = .{ .rc = .sse } }, 9278 .{ .kind = .{ .rc = .sse } }, 9279 .unused, 9280 .unused, 9281 .unused, 9282 }, 9283 .dst_temps = .{.{ .cc = cc }}, 9284 .clobbers = .{ .eflags = true }, 9285 .each = .{ .once = &.{ 9286 .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, 9287 .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, 9288 .{ .@"0:", .v_pd, .movu, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ }, 9289 .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ }, 9290 .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, 9291 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 9292 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 9293 .{ ._, .v_pd, .mova, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ }, 9294 .{ ._, .v_pd, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ }, 9295 .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, 9296 .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, 9297 } }, 9298 }, .{ 9299 .required_features = .{ .avx, null, null, null }, 9300 .patterns = &.{ 9301 .{ .src = .{ .to_mem, .to_mem } }, 9302 }, 9303 .extra_temps = .{ 9304 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 9305 .{ .kind = .{ .rc = .sse } }, 9306 .{ .kind = .{ .rc = .sse } }, 9307 .unused, 9308 .unused, 9309 .unused, 9310 }, 9311 .dst_temps = .{.{ .cc = cc }}, 9312 .clobbers = .{ .eflags = true }, 9313 .each = .{ .once = &.{ 9314 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 9315 .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, 9316 .{ .@"0:", .v_pd, .movu, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, 9317 .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ }, 9318 .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, 9319 .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, 9320 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 9321 .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, 9322 } }, 9323 }, .{ 9324 .required_features = .{ .avx, null, null, null }, 9325 .patterns = &.{ 9326 .{ .src = .{ .to_mem, .to_mem } }, 9327 }, 9328 .extra_temps = .{ 9329 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 9330 .{ .kind = .{ .rc = .sse } }, 9331 .{ .kind = .{ .rc = .sse } }, 9332 .unused, 9333 .unused, 9334 .unused, 9335 }, 9336 .dst_temps = .{.{ .cc = cc }}, 9337 .clobbers = .{ .eflags = true }, 9338 .each = .{ .once = &.{ 9339 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 9340 .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .tmp1x, ._ }, 9341 .{ .@"0:", .v_dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 9342 .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._ }, 9343 .{ ._, .vp_, .@"or", .tmp1x, .tmp1x, .tmp2x, ._ }, 9344 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 9345 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 9346 .{ ._, .vp_, .@"test", .tmp1x, .tmp1x, ._, ._ }, 9347 } }, 9348 }, .{ 9349 .required_features = .{ .sse4_1, null, null, null }, 9350 .patterns = &.{ 9351 .{ .src = .{ .to_mem, .to_mem } }, 9352 }, 9353 .extra_temps = .{ 9354 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 9355 .{ .kind = .{ .rc = .sse } }, 9356 .{ .kind = .{ .rc = .sse } }, 9357 .unused, 9358 .unused, 9359 .unused, 9360 }, 9361 .dst_temps = .{.{ .cc = cc }}, 9362 .clobbers = .{ .eflags = true }, 9363 .each = .{ .once = &.{ 9364 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 9365 .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, 9366 .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 9367 .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 9368 .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ }, 9369 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 9370 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 9371 .{ ._, .p_, .@"test", .tmp1x, .tmp1x, ._, ._ }, 9372 } }, 9373 }, .{ 9374 .required_features = .{ .sse2, null, null, null }, 9375 .patterns = &.{ 9376 .{ .src = .{ .to_mem, .to_mem } }, 9377 }, 9378 .extra_temps = .{ 9379 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 9380 .{ .kind = .{ .rc = .sse } }, 9381 .{ .kind = .{ .rc = .sse } }, 9382 .unused, 9383 .unused, 9384 .unused, 9385 }, 9386 .dst_temps = .{.{ .cc = cc }}, 9387 .clobbers = .{ .eflags = true }, 9388 .each = .{ .once = &.{ 9389 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 9390 .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, 9391 .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, 9392 .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, 9393 .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ }, 9394 .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, 9395 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 9396 .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, 9397 .{ ._, .p_b, .cmpeq, .tmp1x, .tmp2x, ._, ._ }, 9398 .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ }, 9399 .{ ._, ._, .cmp, .tmp0d, .si(0xffff), ._, ._ }, 9400 } }, 9401 }, .{ 9402 .required_features = .{ .sse, .mmx, null, null }, 9403 .patterns = &.{ 9404 .{ .src = .{ .to_mem, .to_mem } }, 9405 }, 9406 .extra_temps = .{ 9407 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 9408 .{ .kind = .{ .rc = .mmx } }, 9409 .{ .kind = .{ .rc = .mmx } }, 9410 .unused, 9411 .unused, 9412 .unused, 9413 }, 9414 .dst_temps = .{.{ .cc = cc }}, 9415 .clobbers = .{ .eflags = true }, 9416 .each = .{ .once = &.{ 9417 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 9418 .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, 9419 .{ .@"0:", ._q, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, 9420 .{ ._, .p_, .xor, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, 9421 .{ ._, .p_, .@"or", .tmp1q, .tmp2q, ._, ._ }, 9422 .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, 9423 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 9424 .{ ._, .p_, .xor, .tmp2q, .tmp2q, ._, ._ }, 9425 .{ ._, .p_b, .cmpeq, .tmp1q, .tmp2q, ._, ._ }, 9426 .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ }, 9427 .{ ._, ._, .cmp, .tmp0d, .si(0xff), ._, ._ }, 9428 } }, 9429 }, .{ 9430 .patterns = &.{ 9431 .{ .src = .{ .to_mem, .to_mem } }, 9432 }, 9433 .extra_temps = .{ 9434 .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, 9435 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 9436 .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, 9437 .unused, 9438 .unused, 9439 .unused, 9440 }, 9441 .dst_temps = .{.{ .cc = cc }}, 9442 .clobbers = .{ .eflags = true }, 9443 .each = .{ .once = &.{ 9444 .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, 9445 .{ ._, ._, .xor, .tmp1p, .tmp1p, ._, ._ }, 9446 .{ .@"0:", ._, .mov, .tmp2p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, 9447 .{ ._, ._, .xor, .tmp2p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, 9448 .{ ._, ._, .@"or", .tmp1p, .tmp2p, ._, ._ }, 9449 .{ ._, ._, .add, .tmp0p, .sa(.tmp2, .add_size), ._, ._ }, 9450 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, 9451 .{ ._, ._, .@"test", .tmp1p, .tmp1p, ._, ._ }, 9452 } }, 9453 } }, 9454 }) catch |err| switch (err) { 9455 error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ 9456 @tagName(air_tag), 9457 cg.typeOf(bin_op.lhs).fmt(pt), 9458 ops[0].tracking(cg), 9459 ops[1].tracking(cg), 9460 }), 9461 else => |e| return e, 9462 }; 9463 for (ops) |op| for (res) |r| { 9464 if (op.index == r.index) break; 9465 } else try op.die(cg); 9466 try res[0].moveTo(inst, cg); 9467 }, 9468 9469 .cond_br => try cg.airCondBr(inst), 9470 .switch_br => try cg.airSwitchBr(inst), 9471 .loop_switch_br => try cg.airLoopSwitchBr(inst), 9472 .switch_dispatch => try cg.airSwitchDispatch(inst), 9473 .@"try", .try_cold => try cg.airTry(inst), 9474 .try_ptr, .try_ptr_cold => try cg.airTryPtr(inst), 9475 .dbg_stmt => if (use_old) try cg.airDbgStmt(inst) else { 9476 const dbg_stmt = air_datas[@intFromEnum(inst)].dbg_stmt; 9477 _ = try cg.addInst(.{ 9478 .tag = .pseudo, 9479 .ops = .pseudo_dbg_line_line_column, 9480 .data = .{ .line_column = .{ 9481 .line = dbg_stmt.line, 9482 .column = dbg_stmt.column, 9483 } }, 9484 }); 9485 }, 9486 .dbg_empty_stmt => if (use_old) try cg.airDbgEmptyStmt() else { 9487 if (cg.mir_instructions.len > 0) { 9488 const prev_mir_op = &cg.mir_instructions.items(.ops)[cg.mir_instructions.len - 1]; 9489 if (prev_mir_op.* == .pseudo_dbg_line_stmt_line_column) 9490 prev_mir_op.* = .pseudo_dbg_line_line_column; 9491 } 9492 try cg.asmOpOnly(.{ ._, .nop }); 9493 }, 9494 .dbg_inline_block => if (use_old) try cg.airDbgInlineBlock(inst) else { 9495 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 9496 const extra = cg.air.extraData(Air.DbgInlineBlock, ty_pl.payload); 9497 const old_inline_func = cg.inline_func; 9498 defer cg.inline_func = old_inline_func; 9499 cg.inline_func = extra.data.func; 9500 _ = try cg.addInst(.{ 9501 .tag = .pseudo, 9502 .ops = .pseudo_dbg_enter_inline_func, 9503 .data = .{ .func = extra.data.func }, 9504 }); 9505 try cg.lowerBlock(inst, @ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len])); 9506 _ = try cg.addInst(.{ 9507 .tag = .pseudo, 9508 .ops = .pseudo_dbg_leave_inline_func, 9509 .data = .{ .func = old_inline_func }, 9510 }); 9511 }, 9512 .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => if (use_old) try cg.airDbgVar(inst) else { 9513 const pl_op = air_datas[@intFromEnum(inst)].pl_op; 9514 var ops = try cg.tempsFromOperands(inst, .{pl_op.operand}); 9515 try cg.genLocalDebugInfo(inst, ops[0].tracking(cg).short); 9516 try ops[0].die(cg); 9517 }, 9518 .is_null_ptr => if (use_old) try cg.airIsNullPtr(inst) else { 9519 const un_op = air_datas[@intFromEnum(inst)].un_op; 9520 const opt_ty = cg.typeOf(un_op).childType(zcu); 9521 const opt_repr_is_pl = opt_ty.optionalReprIsPayload(zcu); 9522 const opt_child_ty = opt_ty.optionalChild(zcu); 9523 const opt_child_abi_size: u31 = @intCast(opt_child_ty.abiSize(zcu)); 9524 var ops = try cg.tempsFromOperands(inst, .{un_op}); 9525 if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, cg); 9526 while (try ops[0].toLea(cg)) {} 9527 try cg.asmMemoryImmediate( 9528 .{ ._, .cmp }, 9529 try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = if (!opt_repr_is_pl) 9530 .byte 9531 else if (opt_child_ty.isSlice(zcu)) 9532 .qword 9533 else 9534 .fromSize(opt_child_abi_size) }), 9535 .u(0), 9536 ); 9537 var is_null = try cg.tempInit(.bool, .{ .eflags = .e }); 9538 try ops[0].die(cg); 9539 try is_null.moveTo(inst, cg); 9540 }, 9541 .is_non_null_ptr => if (use_old) try cg.airIsNonNullPtr(inst) else { 9542 const un_op = air_datas[@intFromEnum(inst)].un_op; 9543 const opt_ty = cg.typeOf(un_op).childType(zcu); 9544 const opt_repr_is_pl = opt_ty.optionalReprIsPayload(zcu); 9545 const opt_child_ty = opt_ty.optionalChild(zcu); 9546 const opt_child_abi_size: u31 = @intCast(opt_child_ty.abiSize(zcu)); 9547 var ops = try cg.tempsFromOperands(inst, .{un_op}); 9548 if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, cg); 9549 while (try ops[0].toLea(cg)) {} 9550 try cg.asmMemoryImmediate( 9551 .{ ._, .cmp }, 9552 try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = if (!opt_repr_is_pl) 9553 .byte 9554 else if (opt_child_ty.isSlice(zcu)) 9555 .qword 9556 else 9557 .fromSize(opt_child_abi_size) }), 9558 .u(0), 9559 ); 9560 var is_non_null = try cg.tempInit(.bool, .{ .eflags = .ne }); 9561 try ops[0].die(cg); 9562 try is_non_null.moveTo(inst, cg); 9563 }, 9564 .is_err_ptr => if (use_old) try cg.airIsErrPtr(inst) else { 9565 const un_op = air_datas[@intFromEnum(inst)].un_op; 9566 const eu_ty = cg.typeOf(un_op).childType(zcu); 9567 const eu_err_ty = eu_ty.errorUnionSet(zcu); 9568 const eu_pl_ty = eu_ty.errorUnionPayload(zcu); 9569 const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); 9570 var ops = try cg.tempsFromOperands(inst, .{un_op}); 9571 try ops[0].toOffset(eu_err_off, cg); 9572 while (try ops[0].toLea(cg)) {} 9573 try cg.asmMemoryImmediate( 9574 .{ ._, .cmp }, 9575 try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(eu_err_ty) }), 9576 .u(0), 9577 ); 9578 var is_err = try cg.tempInit(.bool, .{ .eflags = .ne }); 9579 try ops[0].die(cg); 9580 try is_err.moveTo(inst, cg); 9581 }, 9582 .is_non_err_ptr => if (use_old) try cg.airIsNonErrPtr(inst) else { 9583 const un_op = air_datas[@intFromEnum(inst)].un_op; 9584 const eu_ty = cg.typeOf(un_op).childType(zcu); 9585 const eu_err_ty = eu_ty.errorUnionSet(zcu); 9586 const eu_pl_ty = eu_ty.errorUnionPayload(zcu); 9587 const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); 9588 var ops = try cg.tempsFromOperands(inst, .{un_op}); 9589 try ops[0].toOffset(eu_err_off, cg); 9590 while (try ops[0].toLea(cg)) {} 9591 try cg.asmMemoryImmediate( 9592 .{ ._, .cmp }, 9593 try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(eu_err_ty) }), 9594 .u(0), 9595 ); 9596 var is_non_err = try cg.tempInit(.bool, .{ .eflags = .e }); 9597 try ops[0].die(cg); 9598 try is_non_err.moveTo(inst, cg); 9599 }, 9600 .load => if (use_old) try cg.airLoad(inst) else fallback: { 9601 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9602 const val_ty = ty_op.ty.toType(); 9603 const ptr_ty = cg.typeOf(ty_op.operand); 9604 const ptr_info = ptr_ty.ptrInfo(zcu); 9605 if (ptr_info.packed_offset.host_size > 0 and 9606 (ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type)) 9607 break :fallback try cg.airLoad(inst); 9608 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9609 var res = try ops[0].load(val_ty, .{ 9610 .disp = switch (ptr_info.flags.vector_index) { 9611 .none => 0, 9612 .runtime => unreachable, 9613 else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)), 9614 }, 9615 }, cg); 9616 for (ops) |op| if (op.index != res.index) try op.die(cg); 9617 try res.moveTo(inst, cg); 9618 }, 9619 .int_from_ptr => if (use_old) try cg.airIntFromPtr(inst) else { 9620 const un_op = air_datas[@intFromEnum(inst)].un_op; 9621 var ops = try cg.tempsFromOperands(inst, .{un_op}); 9622 try ops[0].toSlicePtr(cg); 9623 try ops[0].moveTo(inst, cg); 9624 }, 9625 .int_from_bool => if (use_old) try cg.airIntFromBool(inst) else { 9626 const un_op = air_datas[@intFromEnum(inst)].un_op; 9627 var ops = try cg.tempsFromOperands(inst, .{un_op}); 9628 try ops[0].moveTo(inst, cg); 9629 }, 9630 .ret => try cg.airRet(inst, false), 9631 .ret_safe => try cg.airRet(inst, true), 9632 .ret_load => try cg.airRetLoad(inst), 9633 .store, .store_safe => |air_tag| if (use_old) try cg.airStore(inst, switch (air_tag) { 9634 else => unreachable, 9635 .store => false, 9636 .store_safe => true, 9637 }) else fallback: { 9638 const bin_op = air_datas[@intFromEnum(inst)].bin_op; 9639 const ptr_ty = cg.typeOf(bin_op.lhs); 9640 const ptr_info = ptr_ty.ptrInfo(zcu); 9641 const val_ty = cg.typeOf(bin_op.rhs); 9642 if (ptr_info.packed_offset.host_size > 0 and 9643 (ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type)) 9644 break :fallback try cg.airStore(inst, switch (air_tag) { 9645 else => unreachable, 9646 .store => false, 9647 .store_safe => true, 9648 }); 9649 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); 9650 try ops[0].store(&ops[1], .{ 9651 .disp = switch (ptr_info.flags.vector_index) { 9652 .none => 0, 9653 .runtime => unreachable, 9654 else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)), 9655 }, 9656 .safe = switch (air_tag) { 9657 else => unreachable, 9658 .store => false, 9659 .store_safe => true, 9660 }, 9661 }, cg); 9662 for (ops) |op| try op.die(cg); 9663 }, 9664 .unreach => {}, 9665 .optional_payload_ptr => if (use_old) try cg.airOptionalPayloadPtr(inst) else { 9666 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9667 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9668 try ops[0].moveTo(inst, cg); 9669 }, 9670 .optional_payload_ptr_set => if (use_old) try cg.airOptionalPayloadPtrSet(inst) else { 9671 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9672 const opt_ty = cg.typeOf(ty_op.operand).childType(zcu); 9673 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9674 if (!opt_ty.optionalReprIsPayload(zcu)) { 9675 const opt_child_ty = opt_ty.optionalChild(zcu); 9676 const opt_child_abi_size: i32 = @intCast(opt_child_ty.abiSize(zcu)); 9677 try ops[0].toOffset(opt_child_abi_size, cg); 9678 var has_value = try cg.tempInit(.bool, .{ .immediate = 1 }); 9679 try ops[0].store(&has_value, .{}, cg); 9680 try has_value.die(cg); 9681 try ops[0].toOffset(-opt_child_abi_size, cg); 9682 } 9683 try ops[0].moveTo(inst, cg); 9684 }, 9685 .unwrap_errunion_payload_ptr => if (use_old) try cg.airUnwrapErrUnionPayloadPtr(inst) else { 9686 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9687 const eu_ty = cg.typeOf(ty_op.operand).childType(zcu); 9688 const eu_pl_ty = eu_ty.errorUnionPayload(zcu); 9689 const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu)); 9690 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9691 try ops[0].toOffset(eu_pl_off, cg); 9692 try ops[0].moveTo(inst, cg); 9693 }, 9694 .unwrap_errunion_err_ptr => if (use_old) try cg.airUnwrapErrUnionErrPtr(inst) else { 9695 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9696 const eu_ty = cg.typeOf(ty_op.operand).childType(zcu); 9697 const eu_pl_ty = eu_ty.errorUnionPayload(zcu); 9698 const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); 9699 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9700 try ops[0].toOffset(eu_err_off, cg); 9701 var err = try ops[0].load(eu_ty.errorUnionSet(zcu), .{}, cg); 9702 try ops[0].die(cg); 9703 try err.moveTo(inst, cg); 9704 }, 9705 .errunion_payload_ptr_set => if (use_old) try cg.airErrUnionPayloadPtrSet(inst) else { 9706 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9707 const eu_ty = cg.typeOf(ty_op.operand).childType(zcu); 9708 const eu_err_ty = eu_ty.errorUnionSet(zcu); 9709 const eu_pl_ty = eu_ty.errorUnionPayload(zcu); 9710 const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); 9711 const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu)); 9712 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9713 try ops[0].toOffset(eu_err_off, cg); 9714 var no_err = try cg.tempInit(eu_err_ty, .{ .immediate = 0 }); 9715 try ops[0].store(&no_err, .{}, cg); 9716 try no_err.die(cg); 9717 try ops[0].toOffset(eu_pl_off - eu_err_off, cg); 9718 try ops[0].moveTo(inst, cg); 9719 }, 9720 .struct_field_ptr => if (use_old) try cg.airStructFieldPtr(inst) else { 9721 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 9722 const extra = cg.air.extraData(Air.StructField, ty_pl.payload).data; 9723 var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand}); 9724 try ops[0].toOffset(cg.fieldOffset( 9725 cg.typeOf(extra.struct_operand), 9726 ty_pl.ty.toType(), 9727 extra.field_index, 9728 ), cg); 9729 try ops[0].moveTo(inst, cg); 9730 }, 9731 .struct_field_ptr_index_0, 9732 .struct_field_ptr_index_1, 9733 .struct_field_ptr_index_2, 9734 .struct_field_ptr_index_3, 9735 => |air_tag| if (use_old) try cg.airStructFieldPtrIndex(inst, switch (air_tag) { 9736 else => unreachable, 9737 .struct_field_ptr_index_0 => 0, 9738 .struct_field_ptr_index_1 => 1, 9739 .struct_field_ptr_index_2 => 2, 9740 .struct_field_ptr_index_3 => 3, 9741 }) else { 9742 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9743 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9744 try ops[0].toOffset(cg.fieldOffset( 9745 cg.typeOf(ty_op.operand), 9746 ty_op.ty.toType(), 9747 switch (air_tag) { 9748 else => unreachable, 9749 .struct_field_ptr_index_0 => 0, 9750 .struct_field_ptr_index_1 => 1, 9751 .struct_field_ptr_index_2 => 2, 9752 .struct_field_ptr_index_3 => 3, 9753 }, 9754 ), cg); 9755 try ops[0].moveTo(inst, cg); 9756 }, 9757 .struct_field_val => if (use_old) try cg.airStructFieldVal(inst) else fallback: { 9758 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 9759 const extra = cg.air.extraData(Air.StructField, ty_pl.payload).data; 9760 const agg_ty = cg.typeOf(extra.struct_operand); 9761 const field_ty = ty_pl.ty.toType(); 9762 const field_off: u31 = switch (agg_ty.containerLayout(zcu)) { 9763 .auto, .@"extern" => @intCast(agg_ty.structFieldOffset(extra.field_index, zcu)), 9764 .@"packed" => break :fallback try cg.airStructFieldVal(inst), 9765 }; 9766 if (field_ty.hasRuntimeBitsIgnoreComptime(zcu)) { 9767 var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand}); 9768 var res = try ops[0].read(field_ty, .{ .disp = field_off }, cg); 9769 for (ops) |op| if (op.index != res.index) try op.die(cg); 9770 try res.moveTo(inst, cg); 9771 } else { 9772 // hack around Sema OPV bugs 9773 const res = try cg.tempInit(field_ty, .none); 9774 try res.moveTo(inst, cg); 9775 } 9776 }, 9777 .set_union_tag => if (use_old) try cg.airSetUnionTag(inst) else { 9778 const bin_op = air_datas[@intFromEnum(inst)].bin_op; 9779 const union_ty = cg.typeOf(bin_op.lhs).childType(zcu); 9780 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); 9781 const union_layout = union_ty.unionGetLayout(zcu); 9782 // hack around Sema OPV bugs 9783 if (union_layout.tag_size > 0) try ops[0].store(&ops[1], .{ 9784 .disp = @intCast(union_layout.tagOffset()), 9785 }, cg); 9786 for (ops) |op| try op.die(cg); 9787 }, 9788 .get_union_tag => if (use_old) try cg.airGetUnionTag(inst) else { 9789 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9790 const union_ty = cg.typeOf(ty_op.operand); 9791 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9792 const union_layout = union_ty.unionGetLayout(zcu); 9793 assert(union_layout.tag_size > 0); 9794 var res = try ops[0].read(ty_op.ty.toType(), .{ 9795 .disp = @intCast(union_layout.tagOffset()), 9796 }, cg); 9797 for (ops) |op| if (op.index != res.index) try op.die(cg); 9798 try res.moveTo(inst, cg); 9799 }, 9800 .slice => if (use_old) try cg.airSlice(inst) else { 9801 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 9802 const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; 9803 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); 9804 try ops[0].toPair(&ops[1], cg); 9805 try ops[0].moveTo(inst, cg); 9806 }, 9807 .slice_len => if (use_old) try cg.airSliceLen(inst) else { 9808 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9809 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9810 try ops[0].toSliceLen(cg); 9811 try ops[0].moveTo(inst, cg); 9812 }, 9813 .slice_ptr => if (use_old) try cg.airSlicePtr(inst) else { 9814 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9815 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9816 try ops[0].toSlicePtr(cg); 9817 try ops[0].moveTo(inst, cg); 9818 }, 9819 .ptr_slice_len_ptr => if (use_old) try cg.airPtrSliceLenPtr(inst) else { 9820 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9821 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9822 try ops[0].toOffset(8, cg); 9823 try ops[0].moveTo(inst, cg); 9824 }, 9825 .ptr_slice_ptr_ptr => if (use_old) try cg.airPtrSlicePtrPtr(inst) else { 9826 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 9827 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 9828 try ops[0].toOffset(0, cg); 9829 try ops[0].moveTo(inst, cg); 9830 }, 9831 .slice_elem_val, .ptr_elem_val => |air_tag| if (use_old) switch (air_tag) { 9832 else => unreachable, 9833 .slice_elem_val => try cg.airSliceElemVal(inst), 9834 .ptr_elem_val => try cg.airPtrElemVal(inst), 9835 } else { 9836 const bin_op = air_datas[@intFromEnum(inst)].bin_op; 9837 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); 9838 try ops[0].toSlicePtr(cg); 9839 var res: [1]Temp = undefined; 9840 const res_ty = cg.typeOf(bin_op.lhs).elemType2(zcu); 9841 cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{ 9842 .dst_constraints = .{.{ .int = .byte }}, 9843 .patterns = &.{ 9844 .{ .src = .{ .to_gpr, .simm32 } }, 9845 }, 9846 .dst_temps = .{.{ .rc = .general_purpose }}, 9847 .each = .{ .once = &.{ 9848 .{ ._, ._, .movzx, .dst0d, .leaa(.byte, .src0, .add_src0_elem_size_times_src1), ._, ._ }, 9849 } }, 9850 }, .{ 9851 .dst_constraints = .{.{ .int = .byte }}, 9852 .patterns = &.{ 9853 .{ .src = .{ .to_gpr, .to_gpr } }, 9854 }, 9855 .dst_temps = .{.{ .rc = .general_purpose }}, 9856 .each = .{ .once = &.{ 9857 .{ ._, ._, .movzx, .dst0d, .leai(.byte, .src0, .src1), ._, ._ }, 9858 } }, 9859 }, .{ 9860 .dst_constraints = .{.{ .int = .word }}, 9861 .patterns = &.{ 9862 .{ .src = .{ .to_gpr, .simm32 } }, 9863 }, 9864 .dst_temps = .{.{ .rc = .general_purpose }}, 9865 .each = .{ .once = &.{ 9866 .{ ._, ._, .movzx, .dst0d, .leaa(.word, .src0, .add_src0_elem_size_times_src1), ._, ._ }, 9867 } }, 9868 }, .{ 9869 .dst_constraints = .{.{ .int = .word }}, 9870 .patterns = &.{ 9871 .{ .src = .{ .to_gpr, .to_gpr } }, 9872 }, 9873 .dst_temps = .{.{ .rc = .general_purpose }}, 9874 .each = .{ .once = &.{ 9875 .{ ._, ._, .movzx, .dst0d, .leasi(.word, .src0, .@"2", .src1), ._, ._ }, 9876 } }, 9877 }, .{ 9878 .dst_constraints = .{.{ .int = .dword }}, 9879 .patterns = &.{ 9880 .{ .src = .{ .to_gpr, .simm32 } }, 9881 }, 9882 .dst_temps = .{.{ .rc = .general_purpose }}, 9883 .each = .{ .once = &.{ 9884 .{ ._, ._, .mov, .dst0d, .leaa(.dword, .src0, .add_src0_elem_size_times_src1), ._, ._ }, 9885 } }, 9886 }, .{ 9887 .dst_constraints = .{.{ .int = .dword }}, 9888 .patterns = &.{ 9889 .{ .src = .{ .to_gpr, .to_gpr } }, 9890 }, 9891 .dst_temps = .{.{ .rc = .general_purpose }}, 9892 .each = .{ .once = &.{ 9893 .{ ._, ._, .mov, .dst0d, .leasi(.dword, .src0, .@"4", .src1), ._, ._ }, 9894 } }, 9895 }, .{ 9896 .dst_constraints = .{.{ .int = .qword }}, 9897 .patterns = &.{ 9898 .{ .src = .{ .to_gpr, .simm32 } }, 9899 }, 9900 .dst_temps = .{.{ .rc = .general_purpose }}, 9901 .each = .{ .once = &.{ 9902 .{ ._, ._, .mov, .dst0q, .leaa(.qword, .src0, .add_src0_elem_size_times_src1), ._, ._ }, 9903 } }, 9904 }, .{ 9905 .required_features = .{ .@"64bit", null, null, null }, 9906 .dst_constraints = .{.{ .int = .qword }}, 9907 .patterns = &.{ 9908 .{ .src = .{ .to_gpr, .to_gpr } }, 9909 }, 9910 .dst_temps = .{.{ .rc = .general_purpose }}, 9911 .each = .{ .once = &.{ 9912 .{ ._, ._, .mov, .dst0q, .leasi(.qword, .src0, .@"8", .src1), ._, ._ }, 9913 } }, 9914 } }) catch |err| switch (err) { 9915 error.SelectFailed => switch (res_ty.abiSize(zcu)) { 9916 // hack around Sema OPV bugs 9917 0 => res[0] = try cg.tempInit(res_ty, .none), 9918 else => |elem_size| { 9919 while (true) for (&ops) |*op| { 9920 if (try op.toRegClass(true, .general_purpose, cg)) break; 9921 } else break; 9922 const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); 9923 const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); 9924 if (!std.math.isPowerOfTwo(elem_size)) { 9925 try cg.spillEflagsIfOccupied(); 9926 try cg.asmRegisterRegisterImmediate( 9927 .{ .i_, .mul }, 9928 rhs_reg, 9929 rhs_reg, 9930 .u(elem_size), 9931 ); 9932 try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ 9933 .base = .{ .reg = lhs_reg }, 9934 .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, 9935 }); 9936 } else if (elem_size > 8) { 9937 try cg.spillEflagsIfOccupied(); 9938 try cg.asmRegisterImmediate( 9939 .{ ._l, .sh }, 9940 rhs_reg, 9941 .u(std.math.log2_int(u64, elem_size)), 9942 ); 9943 try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ 9944 .base = .{ .reg = lhs_reg }, 9945 .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, 9946 }); 9947 } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ 9948 .base = .{ .reg = lhs_reg }, 9949 .mod = .{ .rm = .{ 9950 .size = .qword, 9951 .index = rhs_reg, 9952 .scale = .fromFactor(@intCast(elem_size)), 9953 } }, 9954 }); 9955 res[0] = try ops[0].load(res_ty, .{}, cg); 9956 }, 9957 }, 9958 else => |e| return e, 9959 }; 9960 for (ops) |op| for (res) |r| { 9961 if (op.index == r.index) break; 9962 } else try op.die(cg); 9963 try res[0].moveTo(inst, cg); 9964 }, 9965 .slice_elem_ptr, .ptr_elem_ptr => |air_tag| if (use_old) switch (air_tag) { 9966 else => unreachable, 9967 .slice_elem_ptr => try cg.airSliceElemPtr(inst), 9968 .ptr_elem_ptr => try cg.airPtrElemPtr(inst), 9969 } else { 9970 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 9971 const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; 9972 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); 9973 try ops[0].toSlicePtr(cg); 9974 const dst_ty = ty_pl.ty.toType(); 9975 if (dst_ty.ptrInfo(zcu).flags.vector_index == .none) zero_offset: { 9976 const elem_size = dst_ty.childType(zcu).abiSize(zcu); 9977 // hack around Sema OPV bugs 9978 if (elem_size == 0) break :zero_offset; 9979 while (true) for (&ops) |*op| { 9980 if (try op.toRegClass(true, .general_purpose, cg)) break; 9981 } else break; 9982 const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); 9983 const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); 9984 if (!std.math.isPowerOfTwo(elem_size)) { 9985 try cg.spillEflagsIfOccupied(); 9986 try cg.asmRegisterRegisterImmediate( 9987 .{ .i_, .mul }, 9988 rhs_reg, 9989 rhs_reg, 9990 .u(elem_size), 9991 ); 9992 try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ 9993 .base = .{ .reg = lhs_reg }, 9994 .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, 9995 }); 9996 } else if (elem_size > 8) { 9997 try cg.spillEflagsIfOccupied(); 9998 try cg.asmRegisterImmediate( 9999 .{ ._l, .sh }, 10000 rhs_reg, 10001 .u(std.math.log2_int(u64, elem_size)), 10002 ); 10003 try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ 10004 .base = .{ .reg = lhs_reg }, 10005 .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, 10006 }); 10007 } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ 10008 .base = .{ .reg = lhs_reg }, 10009 .mod = .{ .rm = .{ 10010 .size = .qword, 10011 .index = rhs_reg, 10012 .scale = .fromFactor(@intCast(elem_size)), 10013 } }, 10014 }); 10015 } 10016 try ops[1].die(cg); 10017 try ops[0].moveTo(inst, cg); 10018 }, 10019 .array_to_slice => if (use_old) try cg.airArrayToSlice(inst) else { 10020 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 10021 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 10022 var len = try cg.tempInit(.usize, .{ 10023 .immediate = cg.typeOf(ty_op.operand).childType(zcu).arrayLen(zcu), 10024 }); 10025 try ops[0].toPair(&len, cg); 10026 try ops[0].moveTo(inst, cg); 10027 }, 10028 .error_set_has_value => return cg.fail("TODO implement error_set_has_value", .{}), 10029 .union_init => if (use_old) try cg.airUnionInit(inst) else { 10030 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 10031 const extra = cg.air.extraData(Air.UnionInit, ty_pl.payload).data; 10032 const union_ty = ty_pl.ty.toType(); 10033 var ops = try cg.tempsFromOperands(inst, .{extra.init}); 10034 var res = try cg.tempAllocMem(union_ty); 10035 const union_layout = union_ty.unionGetLayout(zcu); 10036 if (union_layout.tag_size > 0) { 10037 var tag_temp = try cg.tempFromValue(try pt.enumValueFieldIndex( 10038 union_ty.unionTagTypeSafety(zcu).?, 10039 extra.field_index, 10040 )); 10041 try res.write(&tag_temp, .{ 10042 .disp = @intCast(union_layout.tagOffset()), 10043 }, cg); 10044 try tag_temp.die(cg); 10045 } 10046 try res.write(&ops[0], .{ 10047 .disp = @intCast(union_layout.payloadOffset()), 10048 }, cg); 10049 try ops[0].die(cg); 10050 try res.moveTo(inst, cg); 10051 }, 10052 .field_parent_ptr => if (use_old) try cg.airFieldParentPtr(inst) else { 10053 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; 10054 const extra = cg.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; 10055 var ops = try cg.tempsFromOperands(inst, .{extra.field_ptr}); 10056 try ops[0].toOffset(-cg.fieldOffset( 10057 ty_pl.ty.toType(), 10058 cg.typeOf(extra.field_ptr), 10059 extra.field_index, 10060 ), cg); 10061 try ops[0].moveTo(inst, cg); 10062 }, 10063 10064 .is_named_enum_value => return cg.fail("TODO implement is_named_enum_value", .{}), 10065 10066 .wasm_memory_size => unreachable, 10067 .wasm_memory_grow => unreachable, 10068 10069 .addrspace_cast => { 10070 const ty_op = air_datas[@intFromEnum(inst)].ty_op; 10071 var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); 10072 try ops[0].moveTo(inst, cg); 10073 }, 10074 10075 .vector_store_elem => return cg.fail("TODO implement vector_store_elem", .{}), 10076 10077 .c_va_arg => try cg.airVaArg(inst), 10078 .c_va_copy => try cg.airVaCopy(inst), 10079 .c_va_end => try cg.airVaEnd(inst), 10080 .c_va_start => try cg.airVaStart(inst), 10081 10082 .work_item_id => unreachable, 10083 .work_group_size => unreachable, 10084 .work_group_id => unreachable, 10085 } 10086 cg.resetTemps(); 10087 cg.checkInvariantsAfterAirInst(); 10088 } 10089 verbose_tracking_log.debug("{}", .{cg.fmtTracking()}); 10090 } 10091 10092 fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void { 10093 const pt = self.pt; 10094 const zcu = pt.zcu; 10095 const ip = &zcu.intern_pool; 10096 switch (Type.fromInterned(lazy_sym.ty).zigTypeTag(zcu)) { 10097 .@"enum" => { 10098 const enum_ty: Type = .fromInterned(lazy_sym.ty); 10099 wip_mir_log.debug("{}.@tagName:", .{enum_ty.fmt(pt)}); 10100 10101 const param_regs = abi.getCAbiIntParamRegs(.auto); 10102 const param_locks = self.register_manager.lockRegsAssumeUnused(2, param_regs[0..2].*); 10103 defer for (param_locks) |lock| self.register_manager.unlockReg(lock); 10104 10105 const ret_reg = param_regs[0]; 10106 const enum_mcv = MCValue{ .register = param_regs[1] }; 10107 10108 const epilogue_relocs = try self.gpa.alloc(Mir.Inst.Index, enum_ty.enumFieldCount(zcu)); 10109 defer self.gpa.free(epilogue_relocs); 10110 10111 const data_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 10112 const data_lock = self.register_manager.lockRegAssumeUnused(data_reg); 10113 defer self.register_manager.unlockReg(data_lock); 10114 try self.genLazySymbolRef(.lea, data_reg, .{ .kind = .const_data, .ty = enum_ty.toIntern() }); 10115 10116 var data_off: i32 = 0; 10117 const tag_names = enum_ty.enumFields(zcu); 10118 for (epilogue_relocs, 0..) |*epilogue_reloc, tag_index| { 10119 const tag_name_len = tag_names.get(ip)[tag_index].length(ip); 10120 const tag_val = try pt.enumValueFieldIndex(enum_ty, @intCast(tag_index)); 10121 const tag_mcv = try self.genTypedValue(tag_val); 10122 try self.genBinOpMir(.{ ._, .cmp }, enum_ty, enum_mcv, tag_mcv); 10123 const skip_reloc = try self.asmJccReloc(.ne, undefined); 10124 10125 try self.genSetMem( 10126 .{ .reg = ret_reg }, 10127 0, 10128 .usize, 10129 .{ .register_offset = .{ .reg = data_reg, .off = data_off } }, 10130 .{}, 10131 ); 10132 try self.genSetMem(.{ .reg = ret_reg }, 8, .usize, .{ .immediate = tag_name_len }, .{}); 10133 10134 epilogue_reloc.* = try self.asmJmpReloc(undefined); 10135 self.performReloc(skip_reloc); 10136 10137 data_off += @intCast(tag_name_len + 1); 10138 } 10139 10140 try self.asmOpOnly(.{ ._, .ud2 }); 10141 10142 for (epilogue_relocs) |reloc| self.performReloc(reloc); 10143 try self.asmOpOnly(.{ ._, .ret }); 10144 }, 10145 else => return self.fail( 10146 "TODO implement {s} for {}", 10147 .{ @tagName(lazy_sym.kind), Type.fromInterned(lazy_sym.ty).fmt(pt) }, 10148 ), 10149 } 10150 } 10151 10152 fn getValue(self: *CodeGen, value: MCValue, inst: ?Air.Inst.Index) !void { 10153 for (value.getRegs()) |reg| try self.register_manager.getReg(reg, inst); 10154 switch (value) { 10155 else => {}, 10156 .eflags, .register_overflow => self.eflags_inst = inst, 10157 } 10158 } 10159 10160 fn getValueIfFree(self: *CodeGen, value: MCValue, inst: ?Air.Inst.Index) void { 10161 for (value.getRegs()) |reg| if (self.register_manager.isRegFree(reg)) 10162 self.register_manager.getRegAssumeFree(reg, inst); 10163 } 10164 10165 fn freeReg(self: *CodeGen, reg: Register) !void { 10166 self.register_manager.freeReg(reg); 10167 if (reg.class() == .x87) try self.asmRegister(.{ .f_, .free }, reg); 10168 } 10169 10170 fn freeValue(self: *CodeGen, value: MCValue) !void { 10171 switch (value) { 10172 .register => |reg| try self.freeReg(reg), 10173 inline .register_pair, 10174 .register_triple, 10175 .register_quadruple, 10176 => |regs| for (regs) |reg| try self.freeReg(reg), 10177 .register_offset, .indirect => |reg_off| try self.freeReg(reg_off.reg), 10178 .register_overflow => |reg_ov| { 10179 try self.freeReg(reg_ov.reg); 10180 self.eflags_inst = null; 10181 }, 10182 .register_mask => |reg_mask| try self.freeReg(reg_mask.reg), 10183 .eflags => self.eflags_inst = null, 10184 else => {}, // TODO process stack allocation death 10185 } 10186 } 10187 10188 fn feed(self: *CodeGen, bt: *Liveness.BigTomb, operand: Air.Inst.Ref) !void { 10189 if (bt.feed()) if (operand.toIndex()) |inst| try self.processDeath(inst); 10190 } 10191 10192 /// Asserts there is already capacity to insert into top branch inst_table. 10193 fn processDeath(self: *CodeGen, inst: Air.Inst.Index) !void { 10194 try self.inst_tracking.getPtr(inst).?.die(self, inst); 10195 } 10196 10197 fn finishAirResult(self: *CodeGen, inst: Air.Inst.Index, result: MCValue) void { 10198 if (self.liveness.isUnused(inst) and self.air.instructions.items(.tag)[@intFromEnum(inst)] != .arg) switch (result) { 10199 .none, .dead, .unreach => {}, 10200 else => unreachable, // Why didn't the result die? 10201 } else { 10202 tracking_log.debug("{} => {} (birth)", .{ inst, result }); 10203 self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(result)); 10204 // In some cases, an operand may be reused as the result. 10205 // If that operand died and was a register, it was freed by 10206 // processDeath, so we have to "re-allocate" the register. 10207 self.getValueIfFree(result, inst); 10208 } 10209 } 10210 10211 fn finishAir( 10212 self: *CodeGen, 10213 inst: Air.Inst.Index, 10214 result: MCValue, 10215 operands: [Liveness.bpi - 1]Air.Inst.Ref, 10216 ) !void { 10217 const tomb_bits = self.liveness.getTombBits(inst); 10218 for (0.., operands) |op_index, op| { 10219 if (tomb_bits & @as(Liveness.Bpi, 1) << @intCast(op_index) == 0) continue; 10220 if (self.reused_operands.isSet(op_index)) continue; 10221 try self.processDeath(op.toIndexAllowNone() orelse continue); 10222 } 10223 self.finishAirResult(inst, result); 10224 } 10225 10226 const FrameLayout = struct { 10227 stack_mask: u32, 10228 stack_adjust: u32, 10229 save_reg_list: Mir.RegisterList, 10230 }; 10231 10232 fn setFrameLoc( 10233 self: *CodeGen, 10234 frame_index: FrameIndex, 10235 base: Register, 10236 offset: *i32, 10237 comptime aligned: bool, 10238 ) void { 10239 const frame_i = @intFromEnum(frame_index); 10240 if (aligned) { 10241 const alignment = self.frame_allocs.items(.abi_align)[frame_i]; 10242 offset.* = @intCast(alignment.forward(@intCast(offset.*))); 10243 } 10244 self.frame_locs.set(frame_i, .{ .base = base, .disp = offset.* }); 10245 offset.* += self.frame_allocs.items(.abi_size)[frame_i]; 10246 } 10247 10248 fn computeFrameLayout(self: *CodeGen, cc: std.builtin.CallingConvention.Tag) !FrameLayout { 10249 const frame_allocs_len = self.frame_allocs.len; 10250 try self.frame_locs.resize(self.gpa, frame_allocs_len); 10251 const stack_frame_order = try self.gpa.alloc(FrameIndex, frame_allocs_len - FrameIndex.named_count); 10252 defer self.gpa.free(stack_frame_order); 10253 10254 const frame_size = self.frame_allocs.items(.abi_size); 10255 const frame_align = self.frame_allocs.items(.abi_align); 10256 const frame_offset = self.frame_locs.items(.disp); 10257 10258 for (stack_frame_order, FrameIndex.named_count..) |*frame_order, frame_index| 10259 frame_order.* = @enumFromInt(frame_index); 10260 { 10261 const SortContext = struct { 10262 frame_align: @TypeOf(frame_align), 10263 pub fn lessThan(context: @This(), lhs: FrameIndex, rhs: FrameIndex) bool { 10264 return context.frame_align[@intFromEnum(lhs)].compare(.gt, context.frame_align[@intFromEnum(rhs)]); 10265 } 10266 }; 10267 const sort_context = SortContext{ .frame_align = frame_align }; 10268 std.mem.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan); 10269 } 10270 10271 const call_frame_align = frame_align[@intFromEnum(FrameIndex.call_frame)]; 10272 const stack_frame_align = frame_align[@intFromEnum(FrameIndex.stack_frame)]; 10273 const args_frame_align = frame_align[@intFromEnum(FrameIndex.args_frame)]; 10274 const needed_align = call_frame_align.max(stack_frame_align); 10275 const need_align_stack = needed_align.compare(.gt, args_frame_align); 10276 10277 // Create list of registers to save in the prologue. 10278 // TODO handle register classes 10279 var save_reg_list: Mir.RegisterList = .empty; 10280 const callee_preserved_regs = abi.getCalleePreservedRegs(cc); 10281 for (callee_preserved_regs) |reg| { 10282 if (self.register_manager.isRegAllocated(reg)) { 10283 save_reg_list.push(callee_preserved_regs, reg); 10284 } 10285 } 10286 10287 var rbp_offset: i32 = 0; 10288 self.setFrameLoc(.base_ptr, .rbp, &rbp_offset, false); 10289 self.setFrameLoc(.ret_addr, .rbp, &rbp_offset, false); 10290 self.setFrameLoc(.args_frame, .rbp, &rbp_offset, false); 10291 const stack_frame_align_offset = if (need_align_stack) 10292 0 10293 else 10294 save_reg_list.size(self.target) + frame_offset[@intFromEnum(FrameIndex.args_frame)]; 10295 10296 var rsp_offset: i32 = 0; 10297 self.setFrameLoc(.call_frame, .rsp, &rsp_offset, true); 10298 self.setFrameLoc(.stack_frame, .rsp, &rsp_offset, true); 10299 for (stack_frame_order) |frame_index| self.setFrameLoc(frame_index, .rsp, &rsp_offset, true); 10300 rsp_offset += stack_frame_align_offset; 10301 rsp_offset = @intCast(needed_align.forward(@intCast(rsp_offset))); 10302 rsp_offset -= stack_frame_align_offset; 10303 frame_size[@intFromEnum(FrameIndex.call_frame)] = 10304 @intCast(rsp_offset - frame_offset[@intFromEnum(FrameIndex.stack_frame)]); 10305 10306 return .{ 10307 .stack_mask = @as(u32, std.math.maxInt(u32)) << @intCast(if (need_align_stack) @intFromEnum(needed_align) else 0), 10308 .stack_adjust = @intCast(rsp_offset - frame_offset[@intFromEnum(FrameIndex.call_frame)]), 10309 .save_reg_list = save_reg_list, 10310 }; 10311 } 10312 10313 fn getFrameAddrAlignment(self: *CodeGen, frame_addr: bits.FrameAddr) InternPool.Alignment { 10314 const alloc_align = self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_align; 10315 return @enumFromInt(@min(@intFromEnum(alloc_align), @ctz(frame_addr.off))); 10316 } 10317 10318 fn getFrameAddrSize(self: *CodeGen, frame_addr: bits.FrameAddr) u32 { 10319 return self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_size - @as(u31, @intCast(frame_addr.off)); 10320 } 10321 10322 fn allocFrameIndex(self: *CodeGen, alloc: FrameAlloc) !FrameIndex { 10323 const frame_allocs_slice = self.frame_allocs.slice(); 10324 const frame_size = frame_allocs_slice.items(.abi_size); 10325 const frame_align = frame_allocs_slice.items(.abi_align); 10326 10327 const stack_frame_align = &frame_align[@intFromEnum(FrameIndex.stack_frame)]; 10328 stack_frame_align.* = stack_frame_align.max(alloc.abi_align); 10329 10330 for (self.free_frame_indices.keys(), 0..) |frame_index, free_i| { 10331 const abi_size = frame_size[@intFromEnum(frame_index)]; 10332 if (abi_size != alloc.abi_size) continue; 10333 const abi_align = &frame_align[@intFromEnum(frame_index)]; 10334 abi_align.* = abi_align.max(alloc.abi_align); 10335 10336 _ = self.free_frame_indices.swapRemoveAt(free_i); 10337 return frame_index; 10338 } 10339 const frame_index: FrameIndex = @enumFromInt(self.frame_allocs.len); 10340 try self.frame_allocs.append(self.gpa, alloc); 10341 return frame_index; 10342 } 10343 10344 /// Use a pointer instruction as the basis for allocating stack memory. 10345 fn allocMemPtr(self: *CodeGen, inst: Air.Inst.Index) !FrameIndex { 10346 const pt = self.pt; 10347 const zcu = pt.zcu; 10348 const ptr_ty = self.typeOfIndex(inst); 10349 const val_ty = ptr_ty.childType(zcu); 10350 return self.allocFrameIndex(.init(.{ 10351 .size = std.math.cast(u32, val_ty.abiSize(zcu)) orelse { 10352 return self.fail("type '{}' too big to fit into stack frame", .{val_ty.fmt(pt)}); 10353 }, 10354 .alignment = ptr_ty.ptrAlignment(zcu).max(.@"1"), 10355 })); 10356 } 10357 10358 fn allocRegOrMem(self: *CodeGen, inst: Air.Inst.Index, reg_ok: bool) !MCValue { 10359 return self.allocRegOrMemAdvanced(self.typeOfIndex(inst), inst, reg_ok); 10360 } 10361 10362 fn allocTempRegOrMem(self: *CodeGen, elem_ty: Type, reg_ok: bool) !MCValue { 10363 return self.allocRegOrMemAdvanced(elem_ty, null, reg_ok); 10364 } 10365 10366 fn allocRegOrMemAdvanced(self: *CodeGen, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue { 10367 const pt = self.pt; 10368 const zcu = pt.zcu; 10369 const abi_size = std.math.cast(u32, ty.abiSize(zcu)) orelse { 10370 return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(pt)}); 10371 }; 10372 10373 if (reg_ok) need_mem: { 10374 if (std.math.isPowerOfTwo(abi_size) and abi_size <= @as(u32, switch (ty.zigTypeTag(zcu)) { 10375 .float => switch (ty.floatBits(self.target.*)) { 10376 16, 32, 64, 128 => 16, 10377 80 => break :need_mem, 10378 else => unreachable, 10379 }, 10380 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { 10381 .float => switch (ty.childType(zcu).floatBits(self.target.*)) { 10382 16, 32, 64, 128 => self.vectorSize(.float), 10383 80 => break :need_mem, 10384 else => unreachable, 10385 }, 10386 else => self.vectorSize(.int), 10387 }, 10388 else => 8, 10389 })) { 10390 if (self.register_manager.tryAllocReg(inst, self.regSetForType(ty))) |reg| { 10391 return MCValue{ .register = registerAlias(reg, abi_size) }; 10392 } 10393 } 10394 } 10395 10396 const frame_index = try self.allocFrameIndex(.initSpill(ty, zcu)); 10397 return .{ .load_frame = .{ .index = frame_index } }; 10398 } 10399 10400 fn regClassForType(self: *CodeGen, ty: Type) Register.Class { 10401 const pt = self.pt; 10402 const zcu = pt.zcu; 10403 return switch (ty.zigTypeTag(zcu)) { 10404 .float => switch (ty.floatBits(self.target.*)) { 10405 80 => .x87, 10406 else => .sse, 10407 }, 10408 .vector => switch (ty.childType(zcu).toIntern()) { 10409 .bool_type => .general_purpose, 10410 else => .sse, 10411 }, 10412 else => .general_purpose, 10413 }; 10414 } 10415 10416 fn regSetForRegClass(rc: Register.Class) RegisterManager.RegisterBitSet { 10417 return switch (rc) { 10418 .general_purpose => abi.RegisterClass.gp, 10419 .segment, .ip => unreachable, 10420 .x87 => abi.RegisterClass.x87, 10421 .mmx => @panic("TODO"), 10422 .sse => abi.RegisterClass.sse, 10423 }; 10424 } 10425 10426 fn regSetForType(self: *CodeGen, ty: Type) RegisterManager.RegisterBitSet { 10427 return regSetForRegClass(self.regClassForType(ty)); 10428 } 10429 10430 fn vectorSize(cg: *CodeGen, kind: enum { int, float }) u6 { 10431 return if (cg.hasFeature(switch (kind) { 10432 .int => .avx2, 10433 .float => .avx, 10434 })) 32 else if (cg.hasFeature(.sse)) 16 else 8; 10435 } 10436 10437 fn limbType(cg: *CodeGen, ty: Type) Type { 10438 const pt = cg.pt; 10439 const zcu = pt.zcu; 10440 const vector_size = cg.vectorSize(if (ty.isRuntimeFloat()) .float else .int); 10441 const scalar_ty, const scalar_size = scalar: { 10442 const scalar_ty = ty.scalarType(zcu); 10443 const scalar_size = scalar_ty.abiSize(zcu); 10444 if (scalar_size <= vector_size) break :scalar .{ scalar_ty, scalar_size }; 10445 }; 10446 pt.vectorType(.{ 10447 .len = @divExact(vector_size, scalar_size), 10448 .child = scalar_ty.toIntern(), 10449 }); 10450 } 10451 10452 const State = struct { 10453 registers: RegisterManager.TrackedRegisters, 10454 reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking, 10455 free_registers: RegisterManager.RegisterBitSet, 10456 inst_tracking_len: u32, 10457 scope_generation: u32, 10458 }; 10459 10460 fn initRetroactiveState(self: *CodeGen) State { 10461 var state: State = undefined; 10462 state.inst_tracking_len = @intCast(self.inst_tracking.count()); 10463 state.scope_generation = self.scope_generation; 10464 return state; 10465 } 10466 10467 fn saveRetroactiveState(self: *CodeGen, state: *State) !void { 10468 try self.spillEflagsIfOccupied(); 10469 const free_registers = self.register_manager.free_registers; 10470 var it = free_registers.iterator(.{ .kind = .unset }); 10471 while (it.next()) |index| { 10472 const tracked_inst = self.register_manager.registers[index]; 10473 state.registers[index] = tracked_inst; 10474 state.reg_tracking[index] = self.inst_tracking.get(tracked_inst).?; 10475 } 10476 state.free_registers = free_registers; 10477 } 10478 10479 fn saveState(self: *CodeGen) !State { 10480 var state = self.initRetroactiveState(); 10481 try self.saveRetroactiveState(&state); 10482 return state; 10483 } 10484 10485 fn restoreState(self: *CodeGen, state: State, deaths: []const Air.Inst.Index, comptime opts: struct { 10486 emit_instructions: bool, 10487 update_tracking: bool, 10488 resurrect: bool, 10489 close_scope: bool, 10490 }) !void { 10491 if (opts.close_scope) { 10492 for ( 10493 self.inst_tracking.keys()[state.inst_tracking_len..], 10494 self.inst_tracking.values()[state.inst_tracking_len..], 10495 ) |inst, *tracking| try tracking.die(self, inst); 10496 self.inst_tracking.shrinkRetainingCapacity(state.inst_tracking_len); 10497 } 10498 10499 if (opts.resurrect) for ( 10500 self.inst_tracking.keys()[Temp.Index.max..state.inst_tracking_len], 10501 self.inst_tracking.values()[Temp.Index.max..state.inst_tracking_len], 10502 ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation); 10503 for (deaths) |death| try self.processDeath(death); 10504 10505 const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).array.len]RegisterLock; 10506 var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = 10507 if (opts.update_tracking) 10508 {} else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); 10509 10510 var reg_locks = if (opts.update_tracking) {} else try std.ArrayList(RegisterLock).initCapacity( 10511 stack.get(), 10512 @typeInfo(ExpectedContents).array.len, 10513 ); 10514 defer if (!opts.update_tracking) { 10515 for (reg_locks.items) |lock| self.register_manager.unlockReg(lock); 10516 reg_locks.deinit(); 10517 }; 10518 10519 for ( 10520 0.., 10521 self.register_manager.registers, 10522 state.registers, 10523 state.reg_tracking, 10524 ) |reg_i, current_slot, target_slot, reg_tracking| { 10525 const reg_index: RegisterManager.TrackedIndex = @intCast(reg_i); 10526 const current_maybe_inst = if (self.register_manager.isRegIndexFree(reg_index)) null else current_slot; 10527 const target_maybe_inst = if (state.free_registers.isSet(reg_index)) null else target_slot; 10528 if (std.debug.runtime_safety) if (target_maybe_inst) |target_inst| 10529 assert(self.inst_tracking.getIndex(target_inst).? < state.inst_tracking_len); 10530 if (opts.emit_instructions) { 10531 if (current_maybe_inst) |current_inst| 10532 try self.inst_tracking.getPtr(current_inst).?.spill(self, current_inst); 10533 if (target_maybe_inst) |target_inst| 10534 try self.inst_tracking.getPtr(target_inst).?.materialize(self, target_inst, reg_tracking); 10535 } 10536 if (opts.update_tracking) { 10537 if (current_maybe_inst) |current_inst| { 10538 try self.inst_tracking.getPtr(current_inst).?.trackSpill(self, current_inst); 10539 self.register_manager.freeRegIndex(reg_index); 10540 } 10541 if (target_maybe_inst) |target_inst| { 10542 self.register_manager.getRegIndexAssumeFree(reg_index, target_maybe_inst); 10543 self.inst_tracking.getPtr(target_inst).?.trackMaterialize(target_inst, reg_tracking); 10544 } 10545 } else if (target_maybe_inst) |_| 10546 try reg_locks.append(self.register_manager.lockRegIndexAssumeUnused(reg_index)); 10547 } 10548 if (opts.emit_instructions) if (self.eflags_inst) |inst| 10549 try self.inst_tracking.getPtr(inst).?.spill(self, inst); 10550 if (opts.update_tracking) if (self.eflags_inst) |inst| { 10551 self.eflags_inst = null; 10552 try self.inst_tracking.getPtr(inst).?.trackSpill(self, inst); 10553 }; 10554 10555 if (opts.update_tracking and std.debug.runtime_safety) { 10556 assert(self.eflags_inst == null); 10557 assert(self.register_manager.free_registers.eql(state.free_registers)); 10558 var used_reg_it = state.free_registers.iterator(.{ .kind = .unset }); 10559 while (used_reg_it.next()) |index| 10560 assert(self.register_manager.registers[index] == state.registers[index]); 10561 } 10562 } 10563 10564 pub fn spillInstruction(self: *CodeGen, reg: Register, inst: Air.Inst.Index) !void { 10565 const tracking = self.inst_tracking.getPtr(inst) orelse return; 10566 for (tracking.getRegs()) |tracked_reg| { 10567 if (tracked_reg.id() == reg.id()) break; 10568 } else unreachable; // spilled reg not tracked with spilled instruction 10569 try tracking.spill(self, inst); 10570 try tracking.trackSpill(self, inst); 10571 } 10572 10573 pub fn spillEflagsIfOccupied(self: *CodeGen) !void { 10574 if (self.eflags_inst) |inst| { 10575 self.eflags_inst = null; 10576 const tracking = self.inst_tracking.getPtr(inst).?; 10577 assert(tracking.getCondition() != null); 10578 try tracking.spill(self, inst); 10579 try tracking.trackSpill(self, inst); 10580 } 10581 } 10582 10583 pub fn spillCallerPreservedRegs(self: *CodeGen, cc: std.builtin.CallingConvention.Tag) !void { 10584 switch (cc) { 10585 inline .auto, .x86_64_sysv, .x86_64_win => |tag| try self.spillRegisters(abi.getCallerPreservedRegs(tag)), 10586 else => unreachable, 10587 } 10588 } 10589 10590 pub fn spillRegisters(self: *CodeGen, comptime registers: []const Register) !void { 10591 inline for (registers) |reg| try self.register_manager.getKnownReg(reg, null); 10592 } 10593 10594 /// Copies a value to a register without tracking the register. The register is not considered 10595 /// allocated. A second call to `copyToTmpRegister` may return the same register. 10596 /// This can have a side effect of spilling instructions to the stack to free up a register. 10597 fn copyToTmpRegister(self: *CodeGen, ty: Type, mcv: MCValue) !Register { 10598 const reg = try self.register_manager.allocReg(null, self.regSetForType(ty)); 10599 try self.genSetReg(reg, ty, mcv, .{}); 10600 return reg; 10601 } 10602 10603 /// Allocates a new register and copies `mcv` into it. 10604 /// `reg_owner` is the instruction that gets associated with the register in the register table. 10605 /// This can have a side effect of spilling instructions to the stack to free up a register. 10606 /// WARNING make sure that the allocated register matches the returned MCValue from an instruction! 10607 fn copyToRegisterWithInstTracking( 10608 self: *CodeGen, 10609 reg_owner: Air.Inst.Index, 10610 ty: Type, 10611 mcv: MCValue, 10612 ) !MCValue { 10613 const reg: Register = try self.register_manager.allocReg(reg_owner, self.regSetForType(ty)); 10614 try self.genSetReg(reg, ty, mcv, .{}); 10615 return MCValue{ .register = reg }; 10616 } 10617 10618 fn airAlloc(self: *CodeGen, inst: Air.Inst.Index) !void { 10619 const result = MCValue{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } }; 10620 return self.finishAir(inst, result, .{ .none, .none, .none }); 10621 } 10622 10623 fn airRetPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 10624 const result: MCValue = switch (self.ret_mcv.long) { 10625 else => unreachable, 10626 .none => .{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } }, 10627 .load_frame => .{ .register_offset = .{ 10628 .reg = (try self.copyToRegisterWithInstTracking( 10629 inst, 10630 self.typeOfIndex(inst), 10631 self.ret_mcv.long, 10632 )).register, 10633 .off = self.ret_mcv.short.indirect.off, 10634 } }, 10635 }; 10636 return self.finishAir(inst, result, .{ .none, .none, .none }); 10637 } 10638 10639 fn airFptrunc(self: *CodeGen, inst: Air.Inst.Index) !void { 10640 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 10641 const dst_ty = self.typeOfIndex(inst); 10642 const dst_bits = dst_ty.floatBits(self.target.*); 10643 const src_ty = self.typeOf(ty_op.operand); 10644 const src_bits = src_ty.floatBits(self.target.*); 10645 10646 const result = result: { 10647 if (switch (dst_bits) { 10648 16 => switch (src_bits) { 10649 32 => !self.hasFeature(.f16c), 10650 64, 80, 128 => true, 10651 else => unreachable, 10652 }, 10653 32 => switch (src_bits) { 10654 64 => false, 10655 80, 128 => true, 10656 else => unreachable, 10657 }, 10658 64 => switch (src_bits) { 10659 80, 128 => true, 10660 else => unreachable, 10661 }, 10662 80 => switch (src_bits) { 10663 128 => true, 10664 else => unreachable, 10665 }, 10666 else => unreachable, 10667 }) { 10668 var callee_buf: ["__trunc?f?f2".len]u8 = undefined; 10669 break :result try self.genCall(.{ .lib = .{ 10670 .return_type = self.floatCompilerRtAbiType(dst_ty, src_ty).toIntern(), 10671 .param_types = &.{self.floatCompilerRtAbiType(src_ty, dst_ty).toIntern()}, 10672 .callee = std.fmt.bufPrint(&callee_buf, "__trunc{c}f{c}f2", .{ 10673 floatCompilerRtAbiName(src_bits), 10674 floatCompilerRtAbiName(dst_bits), 10675 }) catch unreachable, 10676 } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); 10677 } 10678 10679 const src_mcv = try self.resolveInst(ty_op.operand); 10680 const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 10681 src_mcv 10682 else 10683 try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); 10684 const dst_reg = dst_mcv.getReg().?.to128(); 10685 const dst_lock = self.register_manager.lockReg(dst_reg); 10686 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 10687 10688 if (dst_bits == 16) { 10689 assert(self.hasFeature(.f16c)); 10690 switch (src_bits) { 10691 32 => { 10692 const mat_src_reg = if (src_mcv.isRegister()) 10693 src_mcv.getReg().? 10694 else 10695 try self.copyToTmpRegister(src_ty, src_mcv); 10696 try self.asmRegisterRegisterImmediate( 10697 .{ .v_, .cvtps2ph }, 10698 dst_reg, 10699 mat_src_reg.to128(), 10700 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), 10701 ); 10702 }, 10703 else => unreachable, 10704 } 10705 } else { 10706 assert(src_bits == 64 and dst_bits == 32); 10707 if (self.hasFeature(.avx)) if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( 10708 .{ .v_ss, .cvtsd2 }, 10709 dst_reg, 10710 dst_reg, 10711 try src_mcv.mem(self, .{ .size = .qword }), 10712 ) else try self.asmRegisterRegisterRegister( 10713 .{ .v_ss, .cvtsd2 }, 10714 dst_reg, 10715 dst_reg, 10716 (if (src_mcv.isRegister()) 10717 src_mcv.getReg().? 10718 else 10719 try self.copyToTmpRegister(src_ty, src_mcv)).to128(), 10720 ) else if (src_mcv.isBase()) try self.asmRegisterMemory( 10721 .{ ._ss, .cvtsd2 }, 10722 dst_reg, 10723 try src_mcv.mem(self, .{ .size = .qword }), 10724 ) else try self.asmRegisterRegister( 10725 .{ ._ss, .cvtsd2 }, 10726 dst_reg, 10727 (if (src_mcv.isRegister()) 10728 src_mcv.getReg().? 10729 else 10730 try self.copyToTmpRegister(src_ty, src_mcv)).to128(), 10731 ); 10732 } 10733 break :result dst_mcv; 10734 }; 10735 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 10736 } 10737 10738 fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void { 10739 const pt = self.pt; 10740 const zcu = pt.zcu; 10741 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 10742 const dst_ty = self.typeOfIndex(inst); 10743 const dst_scalar_ty = dst_ty.scalarType(zcu); 10744 const dst_bits = dst_scalar_ty.floatBits(self.target.*); 10745 const src_ty = self.typeOf(ty_op.operand); 10746 const src_scalar_ty = src_ty.scalarType(zcu); 10747 const src_bits = src_scalar_ty.floatBits(self.target.*); 10748 10749 const result = result: { 10750 if (switch (src_bits) { 10751 16 => switch (dst_bits) { 10752 32, 64 => !self.hasFeature(.f16c), 10753 80, 128 => true, 10754 else => unreachable, 10755 }, 10756 32 => switch (dst_bits) { 10757 64 => false, 10758 80, 128 => true, 10759 else => unreachable, 10760 }, 10761 64 => switch (dst_bits) { 10762 80, 128 => true, 10763 else => unreachable, 10764 }, 10765 80 => switch (dst_bits) { 10766 128 => true, 10767 else => unreachable, 10768 }, 10769 else => unreachable, 10770 }) { 10771 if (dst_ty.isVector(zcu)) break :result null; 10772 var callee_buf: ["__extend?f?f2".len]u8 = undefined; 10773 break :result try self.genCall(.{ .lib = .{ 10774 .return_type = self.floatCompilerRtAbiType(dst_scalar_ty, src_scalar_ty).toIntern(), 10775 .param_types = &.{self.floatCompilerRtAbiType(src_scalar_ty, dst_scalar_ty).toIntern()}, 10776 .callee = std.fmt.bufPrint(&callee_buf, "__extend{c}f{c}f2", .{ 10777 floatCompilerRtAbiName(src_bits), 10778 floatCompilerRtAbiName(dst_bits), 10779 }) catch unreachable, 10780 } }, &.{src_scalar_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); 10781 } 10782 10783 const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); 10784 const src_mcv = try self.resolveInst(ty_op.operand); 10785 const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 10786 src_mcv 10787 else 10788 try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); 10789 const dst_reg = dst_mcv.getReg().?; 10790 const dst_alias = registerAlias(dst_reg, @intCast(@max(dst_ty.abiSize(zcu), 16))); 10791 const dst_lock = self.register_manager.lockReg(dst_reg); 10792 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 10793 10794 const vec_len = if (dst_ty.isVector(zcu)) dst_ty.vectorLen(zcu) else 1; 10795 if (src_bits == 16) { 10796 assert(self.hasFeature(.f16c)); 10797 const mat_src_reg = if (src_mcv.isRegister()) 10798 src_mcv.getReg().? 10799 else 10800 try self.copyToTmpRegister(src_ty, src_mcv); 10801 try self.asmRegisterRegister( 10802 .{ .v_ps, .cvtph2 }, 10803 dst_alias, 10804 registerAlias(mat_src_reg, src_abi_size), 10805 ); 10806 switch (dst_bits) { 10807 32 => {}, 10808 64 => try self.asmRegisterRegisterRegister( 10809 .{ .v_sd, .cvtss2 }, 10810 dst_alias, 10811 dst_alias, 10812 dst_alias, 10813 ), 10814 else => unreachable, 10815 } 10816 } else { 10817 assert(src_bits == 32 and dst_bits == 64); 10818 if (self.hasFeature(.avx)) switch (vec_len) { 10819 1 => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( 10820 .{ .v_sd, .cvtss2 }, 10821 dst_alias, 10822 dst_alias, 10823 try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), 10824 ) else try self.asmRegisterRegisterRegister( 10825 .{ .v_sd, .cvtss2 }, 10826 dst_alias, 10827 dst_alias, 10828 registerAlias(if (src_mcv.isRegister()) 10829 src_mcv.getReg().? 10830 else 10831 try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), 10832 ), 10833 2...4 => if (src_mcv.isBase()) try self.asmRegisterMemory( 10834 .{ .v_pd, .cvtps2 }, 10835 dst_alias, 10836 try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), 10837 ) else try self.asmRegisterRegister( 10838 .{ .v_pd, .cvtps2 }, 10839 dst_alias, 10840 registerAlias(if (src_mcv.isRegister()) 10841 src_mcv.getReg().? 10842 else 10843 try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), 10844 ), 10845 else => break :result null, 10846 } else if (src_mcv.isBase()) try self.asmRegisterMemory( 10847 switch (vec_len) { 10848 1 => .{ ._sd, .cvtss2 }, 10849 2 => .{ ._pd, .cvtps2 }, 10850 else => break :result null, 10851 }, 10852 dst_alias, 10853 try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), 10854 ) else try self.asmRegisterRegister( 10855 switch (vec_len) { 10856 1 => .{ ._sd, .cvtss2 }, 10857 2 => .{ ._pd, .cvtps2 }, 10858 else => break :result null, 10859 }, 10860 dst_alias, 10861 registerAlias(if (src_mcv.isRegister()) 10862 src_mcv.getReg().? 10863 else 10864 try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), 10865 ); 10866 } 10867 break :result dst_mcv; 10868 } orelse return self.fail("TODO implement airFpext from {} to {}", .{ 10869 src_ty.fmt(pt), dst_ty.fmt(pt), 10870 }); 10871 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 10872 } 10873 10874 fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { 10875 const pt = self.pt; 10876 const zcu = pt.zcu; 10877 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 10878 const src_ty = self.typeOf(ty_op.operand); 10879 const dst_ty = self.typeOfIndex(inst); 10880 10881 const result = @as(?MCValue, result: { 10882 const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); 10883 10884 const src_int_info = src_ty.intInfo(zcu); 10885 const dst_int_info = dst_ty.intInfo(zcu); 10886 const extend = switch (src_int_info.signedness) { 10887 .signed => dst_int_info, 10888 .unsigned => src_int_info, 10889 }.signedness; 10890 10891 const src_mcv = try self.resolveInst(ty_op.operand); 10892 if (dst_ty.isVector(zcu)) { 10893 const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); 10894 const max_abi_size = @max(dst_abi_size, src_abi_size); 10895 if (max_abi_size > self.vectorSize(.int)) break :result null; 10896 const has_avx = self.hasFeature(.avx); 10897 10898 const dst_elem_abi_size = dst_ty.childType(zcu).abiSize(zcu); 10899 const src_elem_abi_size = src_ty.childType(zcu).abiSize(zcu); 10900 switch (std.math.order(dst_elem_abi_size, src_elem_abi_size)) { 10901 .lt => { 10902 const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) { 10903 else => break :result null, 10904 1 => switch (src_elem_abi_size) { 10905 else => break :result null, 10906 2 => switch (dst_int_info.signedness) { 10907 .signed => if (has_avx) .{ .vp_b, .ackssw } else .{ .p_b, .ackssw }, 10908 .unsigned => if (has_avx) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw }, 10909 }, 10910 }, 10911 2 => switch (src_elem_abi_size) { 10912 else => break :result null, 10913 4 => switch (dst_int_info.signedness) { 10914 .signed => if (has_avx) .{ .vp_w, .ackssd } else .{ .p_w, .ackssd }, 10915 .unsigned => if (has_avx) 10916 .{ .vp_w, .ackusd } 10917 else if (self.hasFeature(.sse4_1)) 10918 .{ .p_w, .ackusd } 10919 else 10920 break :result null, 10921 }, 10922 }, 10923 }; 10924 10925 const dst_mcv: MCValue = if (src_mcv.isRegister() and 10926 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 10927 src_mcv 10928 else if (has_avx and src_mcv.isRegister()) 10929 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } 10930 else 10931 try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv); 10932 const dst_reg = dst_mcv.getReg().?; 10933 const dst_alias = registerAlias(dst_reg, dst_abi_size); 10934 10935 if (has_avx) try self.asmRegisterRegisterRegister( 10936 mir_tag, 10937 dst_alias, 10938 registerAlias(if (src_mcv.isRegister()) 10939 src_mcv.getReg().? 10940 else 10941 dst_reg, src_abi_size), 10942 dst_alias, 10943 ) else try self.asmRegisterRegister( 10944 mir_tag, 10945 dst_alias, 10946 dst_alias, 10947 ); 10948 break :result dst_mcv; 10949 }, 10950 .eq => if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 10951 break :result src_mcv 10952 else { 10953 const dst_mcv = try self.allocRegOrMem(inst, true); 10954 try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); 10955 break :result dst_mcv; 10956 }, 10957 .gt => if (self.hasFeature(.sse4_1)) { 10958 const mir_tag: Mir.Inst.FixedTag = .{ switch (dst_elem_abi_size) { 10959 else => break :result null, 10960 2 => if (has_avx) .vp_w else .p_w, 10961 4 => if (has_avx) .vp_d else .p_d, 10962 8 => if (has_avx) .vp_q else .p_q, 10963 }, switch (src_elem_abi_size) { 10964 else => break :result null, 10965 1 => switch (extend) { 10966 .signed => .movsxb, 10967 .unsigned => .movzxb, 10968 }, 10969 2 => switch (extend) { 10970 .signed => .movsxw, 10971 .unsigned => .movzxw, 10972 }, 10973 4 => switch (extend) { 10974 .signed => .movsxd, 10975 .unsigned => .movzxd, 10976 }, 10977 } }; 10978 10979 const dst_mcv: MCValue = if (src_mcv.isRegister() and 10980 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 10981 src_mcv 10982 else 10983 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }; 10984 const dst_reg = dst_mcv.getReg().?; 10985 const dst_alias = registerAlias(dst_reg, dst_abi_size); 10986 10987 if (src_mcv.isBase()) try self.asmRegisterMemory( 10988 mir_tag, 10989 dst_alias, 10990 try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), 10991 ) else try self.asmRegisterRegister( 10992 mir_tag, 10993 dst_alias, 10994 registerAlias(if (src_mcv.isRegister()) 10995 src_mcv.getReg().? 10996 else 10997 try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), 10998 ); 10999 break :result dst_mcv; 11000 } else { 11001 const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) { 11002 else => break :result null, 11003 2 => switch (src_elem_abi_size) { 11004 else => break :result null, 11005 1 => .{ .p_, .unpcklbw }, 11006 }, 11007 4 => switch (src_elem_abi_size) { 11008 else => break :result null, 11009 2 => .{ .p_, .unpcklwd }, 11010 }, 11011 8 => switch (src_elem_abi_size) { 11012 else => break :result null, 11013 2 => .{ .p_, .unpckldq }, 11014 }, 11015 }; 11016 11017 const dst_mcv: MCValue = if (src_mcv.isRegister() and 11018 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 11019 src_mcv 11020 else 11021 try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); 11022 const dst_reg = dst_mcv.getReg().?; 11023 11024 const ext_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); 11025 const ext_alias = registerAlias(ext_reg, src_abi_size); 11026 const ext_lock = self.register_manager.lockRegAssumeUnused(ext_reg); 11027 defer self.register_manager.unlockReg(ext_lock); 11028 11029 try self.asmRegisterRegister(.{ .p_, .xor }, ext_alias, ext_alias); 11030 switch (extend) { 11031 .signed => try self.asmRegisterRegister( 11032 .{ switch (src_elem_abi_size) { 11033 else => unreachable, 11034 1 => .p_b, 11035 2 => .p_w, 11036 4 => .p_d, 11037 }, .cmpgt }, 11038 ext_alias, 11039 registerAlias(dst_reg, src_abi_size), 11040 ), 11041 .unsigned => {}, 11042 } 11043 try self.asmRegisterRegister( 11044 mir_tag, 11045 registerAlias(dst_reg, dst_abi_size), 11046 registerAlias(ext_reg, dst_abi_size), 11047 ); 11048 break :result dst_mcv; 11049 }, 11050 } 11051 @compileError("unreachable"); 11052 } 11053 11054 const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty; 11055 11056 const src_storage_bits: u16 = switch (src_mcv) { 11057 .register, .register_offset => 64, 11058 .register_pair => 128, 11059 .load_frame => |frame_addr| @intCast(self.getFrameAddrSize(frame_addr) * 8), 11060 else => src_int_info.bits, 11061 }; 11062 11063 const dst_mcv = if (dst_int_info.bits <= src_storage_bits and 11064 std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable == 11065 std.math.divCeil(u32, src_storage_bits, 64) catch unreachable and 11066 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { 11067 const dst_mcv = try self.allocRegOrMem(inst, true); 11068 try self.genCopy(min_ty, dst_mcv, src_mcv, .{}); 11069 break :dst dst_mcv; 11070 }; 11071 11072 if (dst_int_info.bits <= src_int_info.bits) break :result if (dst_mcv.isRegister()) 11073 .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) } 11074 else 11075 dst_mcv; 11076 11077 if (dst_mcv.isRegister()) { 11078 try self.truncateRegister(src_ty, dst_mcv.getReg().?); 11079 break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) }; 11080 } 11081 11082 const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable; 11083 const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable; 11084 11085 const high_mcv: MCValue = if (dst_mcv.isBase()) 11086 dst_mcv.address().offset((src_limbs_len - 1) * 8).deref() 11087 else 11088 .{ .register = dst_mcv.register_pair[1] }; 11089 const high_reg = if (high_mcv.isRegister()) 11090 high_mcv.getReg().? 11091 else 11092 try self.copyToTmpRegister(switch (src_int_info.signedness) { 11093 .signed => .isize, 11094 .unsigned => .usize, 11095 }, high_mcv); 11096 const high_lock = self.register_manager.lockRegAssumeUnused(high_reg); 11097 defer self.register_manager.unlockReg(high_lock); 11098 11099 const high_bits = src_int_info.bits % 64; 11100 if (high_bits > 0) { 11101 try self.truncateRegister(src_ty, high_reg); 11102 const high_ty: Type = if (dst_int_info.bits >= 64) .usize else dst_ty; 11103 try self.genCopy(high_ty, high_mcv, .{ .register = high_reg }, .{}); 11104 } 11105 11106 if (dst_limbs_len > src_limbs_len) try self.genInlineMemset( 11107 dst_mcv.address().offset(src_limbs_len * 8), 11108 switch (extend) { 11109 .signed => extend: { 11110 const extend_mcv = MCValue{ .register = high_reg }; 11111 try self.genShiftBinOpMir(.{ ._r, .sa }, .isize, extend_mcv, .u8, .{ .immediate = 63 }); 11112 break :extend extend_mcv; 11113 }, 11114 .unsigned => .{ .immediate = 0 }, 11115 }, 11116 .{ .immediate = (dst_limbs_len - src_limbs_len) * 8 }, 11117 .{}, 11118 ); 11119 11120 break :result dst_mcv; 11121 }) orelse return self.fail("TODO implement airIntCast from {} to {}", .{ 11122 src_ty.fmt(pt), dst_ty.fmt(pt), 11123 }); 11124 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 11125 } 11126 11127 fn airTrunc(self: *CodeGen, inst: Air.Inst.Index) !void { 11128 const pt = self.pt; 11129 const zcu = pt.zcu; 11130 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 11131 11132 const dst_ty = self.typeOfIndex(inst); 11133 const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); 11134 const src_ty = self.typeOf(ty_op.operand); 11135 const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); 11136 11137 const result = result: { 11138 const src_mcv = try self.resolveInst(ty_op.operand); 11139 const src_lock = 11140 if (src_mcv.getReg()) |reg| self.register_manager.lockRegAssumeUnused(reg) else null; 11141 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 11142 11143 const dst_mcv = if (src_mcv.isRegister() and src_mcv.getReg().?.class() == self.regClassForType(dst_ty) and 11144 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 11145 src_mcv 11146 else if (dst_abi_size <= 8) 11147 try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv) 11148 else if (dst_abi_size <= 16 and !dst_ty.isVector(zcu)) dst: { 11149 const dst_regs = 11150 try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp); 11151 const dst_mcv: MCValue = .{ .register_pair = dst_regs }; 11152 const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); 11153 defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); 11154 11155 try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); 11156 break :dst dst_mcv; 11157 } else dst: { 11158 const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, inst, true); 11159 try self.genCopy(src_ty, dst_mcv, src_mcv, .{}); 11160 break :dst dst_mcv; 11161 }; 11162 11163 if (dst_ty.zigTypeTag(zcu) == .vector) { 11164 assert(src_ty.zigTypeTag(zcu) == .vector and dst_ty.vectorLen(zcu) == src_ty.vectorLen(zcu)); 11165 const dst_elem_ty = dst_ty.childType(zcu); 11166 const dst_elem_abi_size: u32 = @intCast(dst_elem_ty.abiSize(zcu)); 11167 const src_elem_ty = src_ty.childType(zcu); 11168 const src_elem_abi_size: u32 = @intCast(src_elem_ty.abiSize(zcu)); 11169 11170 const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_elem_abi_size) { 11171 1 => switch (src_elem_abi_size) { 11172 2 => switch (dst_ty.vectorLen(zcu)) { 11173 1...8 => if (self.hasFeature(.avx)) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw }, 11174 9...16 => if (self.hasFeature(.avx2)) .{ .vp_b, .ackusw } else null, 11175 else => null, 11176 }, 11177 else => null, 11178 }, 11179 2 => switch (src_elem_abi_size) { 11180 4 => switch (dst_ty.vectorLen(zcu)) { 11181 1...4 => if (self.hasFeature(.avx)) 11182 .{ .vp_w, .ackusd } 11183 else if (self.hasFeature(.sse4_1)) 11184 .{ .p_w, .ackusd } 11185 else 11186 null, 11187 5...8 => if (self.hasFeature(.avx2)) .{ .vp_w, .ackusd } else null, 11188 else => null, 11189 }, 11190 else => null, 11191 }, 11192 else => null, 11193 }) orelse return self.fail("TODO implement airTrunc for {}", .{dst_ty.fmt(pt)}); 11194 11195 const dst_info = dst_elem_ty.intInfo(zcu); 11196 const src_info = src_elem_ty.intInfo(zcu); 11197 11198 const mask_val = try pt.intValue(src_elem_ty, @as(u64, std.math.maxInt(u64)) >> @intCast(64 - dst_info.bits)); 11199 11200 const splat_ty = try pt.vectorType(.{ 11201 .len = @intCast(@divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)), 11202 .child = src_elem_ty.ip_index, 11203 }); 11204 const splat_abi_size: u32 = @intCast(splat_ty.abiSize(zcu)); 11205 11206 const splat_val = try pt.intern(.{ .aggregate = .{ 11207 .ty = splat_ty.ip_index, 11208 .storage = .{ .repeated_elem = mask_val.ip_index }, 11209 } }); 11210 11211 const splat_mcv = try self.genTypedValue(.fromInterned(splat_val)); 11212 const splat_addr_mcv: MCValue = switch (splat_mcv) { 11213 .memory, .indirect, .load_frame => splat_mcv.address(), 11214 else => .{ .register = try self.copyToTmpRegister(.usize, splat_mcv.address()) }, 11215 }; 11216 11217 const dst_reg = dst_mcv.getReg().?; 11218 const dst_alias = registerAlias(dst_reg, src_abi_size); 11219 if (self.hasFeature(.avx)) { 11220 try self.asmRegisterRegisterMemory( 11221 .{ .vp_, .@"and" }, 11222 dst_alias, 11223 dst_alias, 11224 try splat_addr_mcv.deref().mem(self, .{ .size = .fromSize(splat_abi_size) }), 11225 ); 11226 if (src_abi_size > 16) { 11227 const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); 11228 const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); 11229 defer self.register_manager.unlockReg(temp_lock); 11230 11231 try self.asmRegisterRegisterImmediate( 11232 .{ if (self.hasFeature(.avx2)) .v_i128 else .v_f128, .extract }, 11233 registerAlias(temp_reg, dst_abi_size), 11234 dst_alias, 11235 .u(1), 11236 ); 11237 try self.asmRegisterRegisterRegister( 11238 mir_tag, 11239 registerAlias(dst_reg, dst_abi_size), 11240 registerAlias(dst_reg, dst_abi_size), 11241 registerAlias(temp_reg, dst_abi_size), 11242 ); 11243 } else try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, dst_alias); 11244 } else { 11245 try self.asmRegisterMemory( 11246 .{ .p_, .@"and" }, 11247 dst_alias, 11248 try splat_addr_mcv.deref().mem(self, .{ .size = .fromSize(splat_abi_size) }), 11249 ); 11250 try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias); 11251 } 11252 break :result dst_mcv; 11253 } 11254 11255 // when truncating a `u16` to `u5`, for example, those top 3 bits in the result 11256 // have to be removed. this only happens if the dst if not a power-of-two size. 11257 if (dst_abi_size <= 8) { 11258 if (self.regExtraBits(dst_ty) > 0) { 11259 try self.truncateRegister(dst_ty, dst_mcv.register.to64()); 11260 } 11261 } else if (dst_abi_size <= 16) { 11262 const dst_info = dst_ty.intInfo(zcu); 11263 const high_ty = try pt.intType(dst_info.signedness, dst_info.bits - 64); 11264 if (self.regExtraBits(high_ty) > 0) { 11265 try self.truncateRegister(high_ty, dst_mcv.register_pair[1].to64()); 11266 } 11267 } 11268 11269 break :result dst_mcv; 11270 }; 11271 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 11272 } 11273 11274 fn airIntFromBool(self: *CodeGen, inst: Air.Inst.Index) !void { 11275 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 11276 const ty = self.typeOfIndex(inst); 11277 11278 const operand = try self.resolveInst(un_op); 11279 const dst_mcv = if (self.reuseOperand(inst, un_op, 0, operand)) 11280 operand 11281 else 11282 try self.copyToRegisterWithInstTracking(inst, ty, operand); 11283 11284 return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); 11285 } 11286 11287 fn airSlice(self: *CodeGen, inst: Air.Inst.Index) !void { 11288 const zcu = self.pt.zcu; 11289 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 11290 const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; 11291 11292 const slice_ty = self.typeOfIndex(inst); 11293 const frame_index = try self.allocFrameIndex(.initSpill(slice_ty, zcu)); 11294 11295 const ptr_ty = self.typeOf(bin_op.lhs); 11296 try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, .{ .air_ref = bin_op.lhs }, .{}); 11297 11298 const len_ty = self.typeOf(bin_op.rhs); 11299 try self.genSetMem( 11300 .{ .frame = frame_index }, 11301 @intCast(ptr_ty.abiSize(zcu)), 11302 len_ty, 11303 .{ .air_ref = bin_op.rhs }, 11304 .{}, 11305 ); 11306 11307 const result = MCValue{ .load_frame = .{ .index = frame_index } }; 11308 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 11309 } 11310 11311 fn airUnOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { 11312 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 11313 const dst_mcv = try self.genUnOp(inst, tag, ty_op.operand); 11314 return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); 11315 } 11316 11317 fn airBinOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { 11318 const pt = self.pt; 11319 const zcu = pt.zcu; 11320 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 11321 const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs); 11322 11323 const dst_ty = self.typeOfIndex(inst); 11324 if (dst_ty.isAbiInt(zcu)) { 11325 const abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); 11326 const bit_size: u32 = @intCast(dst_ty.bitSize(zcu)); 11327 if (abi_size * 8 > bit_size) { 11328 const dst_lock = switch (dst_mcv) { 11329 .register => |dst_reg| self.register_manager.lockRegAssumeUnused(dst_reg), 11330 else => null, 11331 }; 11332 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 11333 11334 if (dst_mcv.isRegister()) { 11335 try self.truncateRegister(dst_ty, dst_mcv.getReg().?); 11336 } else { 11337 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 11338 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 11339 defer self.register_manager.unlockReg(tmp_lock); 11340 11341 const hi_ty = try pt.intType(.unsigned, @intCast((dst_ty.bitSize(zcu) - 1) % 64 + 1)); 11342 const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref(); 11343 try self.genSetReg(tmp_reg, hi_ty, hi_mcv, .{}); 11344 try self.truncateRegister(dst_ty, tmp_reg); 11345 try self.genCopy(hi_ty, hi_mcv, .{ .register = tmp_reg }, .{}); 11346 } 11347 } 11348 } 11349 return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); 11350 } 11351 11352 fn airPtrArithmetic(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { 11353 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 11354 const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; 11355 const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs); 11356 return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); 11357 } 11358 11359 fn activeIntBits(self: *CodeGen, dst_air: Air.Inst.Ref) u16 { 11360 const pt = self.pt; 11361 const zcu = pt.zcu; 11362 const air_tag = self.air.instructions.items(.tag); 11363 const air_data = self.air.instructions.items(.data); 11364 11365 const dst_ty = self.typeOf(dst_air); 11366 const dst_info = dst_ty.intInfo(zcu); 11367 if (dst_air.toIndex()) |inst| { 11368 switch (air_tag[@intFromEnum(inst)]) { 11369 .intcast => { 11370 const src_ty = self.typeOf(air_data[@intFromEnum(inst)].ty_op.operand); 11371 const src_info = src_ty.intInfo(zcu); 11372 return @min(switch (src_info.signedness) { 11373 .signed => switch (dst_info.signedness) { 11374 .signed => src_info.bits, 11375 .unsigned => src_info.bits - 1, 11376 }, 11377 .unsigned => switch (dst_info.signedness) { 11378 .signed => src_info.bits + 1, 11379 .unsigned => src_info.bits, 11380 }, 11381 }, dst_info.bits); 11382 }, 11383 else => {}, 11384 } 11385 } else if (dst_air.toInterned()) |ip_index| { 11386 var space: Value.BigIntSpace = undefined; 11387 const src_int = Value.fromInterned(ip_index).toBigInt(&space, zcu); 11388 return @as(u16, @intCast(src_int.bitCountTwosComp())) + 11389 @intFromBool(src_int.positive and dst_info.signedness == .signed); 11390 } 11391 return dst_info.bits; 11392 } 11393 11394 fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { 11395 const pt = self.pt; 11396 const zcu = pt.zcu; 11397 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 11398 const result = result: { 11399 const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)]; 11400 const dst_ty = self.typeOfIndex(inst); 11401 switch (dst_ty.zigTypeTag(zcu)) { 11402 .float, .vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs), 11403 else => {}, 11404 } 11405 const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); 11406 11407 const dst_info = dst_ty.intInfo(zcu); 11408 const src_ty = try pt.intType(dst_info.signedness, switch (tag) { 11409 else => unreachable, 11410 .mul, .mul_wrap => @max( 11411 self.activeIntBits(bin_op.lhs), 11412 self.activeIntBits(bin_op.rhs), 11413 dst_info.bits / 2, 11414 ), 11415 .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_info.bits, 11416 }); 11417 const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); 11418 11419 if (dst_abi_size == 16 and src_abi_size == 16) switch (tag) { 11420 else => unreachable, 11421 .mul, .mul_wrap => {}, 11422 .div_trunc, .div_floor, .div_exact, .rem, .mod => { 11423 const signed = dst_ty.isSignedInt(zcu); 11424 var callee_buf: ["__udiv?i3".len]u8 = undefined; 11425 const signed_div_floor_state: struct { 11426 frame_index: FrameIndex, 11427 state: State, 11428 reloc: Mir.Inst.Index, 11429 } = if (signed and tag == .div_floor) state: { 11430 const frame_index = try self.allocFrameIndex(.initType(.usize, zcu)); 11431 try self.asmMemoryImmediate( 11432 .{ ._, .mov }, 11433 .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } }, 11434 .u(0), 11435 ); 11436 11437 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 11438 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 11439 defer self.register_manager.unlockReg(tmp_lock); 11440 11441 const lhs_mcv = try self.resolveInst(bin_op.lhs); 11442 const mat_lhs_mcv = switch (lhs_mcv) { 11443 .load_symbol => mat_lhs_mcv: { 11444 // TODO clean this up! 11445 const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); 11446 break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; 11447 }, 11448 else => lhs_mcv, 11449 }; 11450 const mat_lhs_lock = switch (mat_lhs_mcv) { 11451 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), 11452 else => null, 11453 }; 11454 defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); 11455 if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( 11456 .{ ._, .mov }, 11457 tmp_reg, 11458 try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 11459 ) else try self.asmRegisterRegister( 11460 .{ ._, .mov }, 11461 tmp_reg, 11462 mat_lhs_mcv.register_pair[1], 11463 ); 11464 11465 const rhs_mcv = try self.resolveInst(bin_op.rhs); 11466 const mat_rhs_mcv = switch (rhs_mcv) { 11467 .load_symbol => mat_rhs_mcv: { 11468 // TODO clean this up! 11469 const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); 11470 break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; 11471 }, 11472 else => rhs_mcv, 11473 }; 11474 const mat_rhs_lock = switch (mat_rhs_mcv) { 11475 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), 11476 else => null, 11477 }; 11478 defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); 11479 if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( 11480 .{ ._, .xor }, 11481 tmp_reg, 11482 try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 11483 ) else try self.asmRegisterRegister( 11484 .{ ._, .xor }, 11485 tmp_reg, 11486 mat_rhs_mcv.register_pair[1], 11487 ); 11488 const state = try self.saveState(); 11489 const reloc = try self.asmJccReloc(.ns, undefined); 11490 11491 break :state .{ .frame_index = frame_index, .state = state, .reloc = reloc }; 11492 } else undefined; 11493 const call_mcv = try self.genCall( 11494 .{ .lib = .{ 11495 .return_type = dst_ty.toIntern(), 11496 .param_types = &.{ src_ty.toIntern(), src_ty.toIntern() }, 11497 .callee = std.fmt.bufPrint(&callee_buf, "__{s}{s}{c}i3", .{ 11498 if (signed) "" else "u", 11499 switch (tag) { 11500 .div_trunc, .div_exact => "div", 11501 .div_floor => if (signed) "mod" else "div", 11502 .rem, .mod => "mod", 11503 else => unreachable, 11504 }, 11505 intCompilerRtAbiName(@intCast(dst_ty.bitSize(zcu))), 11506 }) catch unreachable, 11507 } }, 11508 &.{ src_ty, src_ty }, 11509 &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, 11510 .{}, 11511 ); 11512 break :result if (signed) switch (tag) { 11513 .div_floor => { 11514 try self.asmRegisterRegister( 11515 .{ ._, .@"or" }, 11516 call_mcv.register_pair[0], 11517 call_mcv.register_pair[1], 11518 ); 11519 try self.asmSetccMemory(.nz, .{ 11520 .base = .{ .frame = signed_div_floor_state.frame_index }, 11521 .mod = .{ .rm = .{ .size = .byte } }, 11522 }); 11523 try self.restoreState(signed_div_floor_state.state, &.{}, .{ 11524 .emit_instructions = true, 11525 .update_tracking = true, 11526 .resurrect = true, 11527 .close_scope = true, 11528 }); 11529 self.performReloc(signed_div_floor_state.reloc); 11530 const dst_mcv = try self.genCall( 11531 .{ .lib = .{ 11532 .return_type = dst_ty.toIntern(), 11533 .param_types = &.{ src_ty.toIntern(), src_ty.toIntern() }, 11534 .callee = std.fmt.bufPrint(&callee_buf, "__div{c}i3", .{ 11535 intCompilerRtAbiName(@intCast(dst_ty.bitSize(zcu))), 11536 }) catch unreachable, 11537 } }, 11538 &.{ src_ty, src_ty }, 11539 &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, 11540 .{}, 11541 ); 11542 try self.asmRegisterMemory( 11543 .{ ._, .sub }, 11544 dst_mcv.register_pair[0], 11545 .{ 11546 .base = .{ .frame = signed_div_floor_state.frame_index }, 11547 .mod = .{ .rm = .{ .size = .qword } }, 11548 }, 11549 ); 11550 try self.asmRegisterImmediate(.{ ._, .sbb }, dst_mcv.register_pair[1], .u(0)); 11551 try self.freeValue( 11552 .{ .load_frame = .{ .index = signed_div_floor_state.frame_index } }, 11553 ); 11554 break :result dst_mcv; 11555 }, 11556 .mod => { 11557 const dst_regs = call_mcv.register_pair; 11558 const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); 11559 defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); 11560 11561 const tmp_regs = 11562 try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); 11563 const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs); 11564 defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); 11565 11566 const rhs_mcv = try self.resolveInst(bin_op.rhs); 11567 const mat_rhs_mcv = switch (rhs_mcv) { 11568 .load_symbol => mat_rhs_mcv: { 11569 // TODO clean this up! 11570 const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); 11571 break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; 11572 }, 11573 else => rhs_mcv, 11574 }; 11575 const mat_rhs_lock = switch (mat_rhs_mcv) { 11576 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), 11577 else => null, 11578 }; 11579 defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); 11580 11581 for (tmp_regs, dst_regs) |tmp_reg, dst_reg| 11582 try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_reg); 11583 if (mat_rhs_mcv.isBase()) { 11584 try self.asmRegisterMemory( 11585 .{ ._, .add }, 11586 tmp_regs[0], 11587 try mat_rhs_mcv.mem(self, .{ .size = .qword }), 11588 ); 11589 try self.asmRegisterMemory( 11590 .{ ._, .adc }, 11591 tmp_regs[1], 11592 try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 11593 ); 11594 } else for ( 11595 [_]Mir.Inst.Tag{ .add, .adc }, 11596 tmp_regs, 11597 mat_rhs_mcv.register_pair, 11598 ) |op, tmp_reg, rhs_reg| 11599 try self.asmRegisterRegister(.{ ._, op }, tmp_reg, rhs_reg); 11600 try self.asmRegisterRegister(.{ ._, .@"test" }, dst_regs[1], dst_regs[1]); 11601 for (dst_regs, tmp_regs) |dst_reg, tmp_reg| 11602 try self.asmCmovccRegisterRegister(.s, dst_reg, tmp_reg); 11603 break :result call_mcv; 11604 }, 11605 else => call_mcv, 11606 } else call_mcv; 11607 }, 11608 }; 11609 11610 try self.spillEflagsIfOccupied(); 11611 try self.spillRegisters(&.{ .rax, .rcx, .rdx }); 11612 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx }); 11613 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 11614 11615 const lhs_mcv = try self.resolveInst(bin_op.lhs); 11616 const rhs_mcv = try self.resolveInst(bin_op.rhs); 11617 break :result try self.genMulDivBinOp(tag, inst, dst_ty, src_ty, lhs_mcv, rhs_mcv); 11618 }; 11619 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 11620 } 11621 11622 fn airAddSat(self: *CodeGen, inst: Air.Inst.Index) !void { 11623 const pt = self.pt; 11624 const zcu = pt.zcu; 11625 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 11626 const ty = self.typeOf(bin_op.lhs); 11627 if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail( 11628 "TODO implement airAddSat for {}", 11629 .{ty.fmt(pt)}, 11630 ); 11631 11632 const lhs_mcv = try self.resolveInst(bin_op.lhs); 11633 const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) 11634 lhs_mcv 11635 else 11636 try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv); 11637 const dst_reg = dst_mcv.register; 11638 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); 11639 defer self.register_manager.unlockReg(dst_lock); 11640 11641 const rhs_mcv = try self.resolveInst(bin_op.rhs); 11642 const rhs_lock = switch (rhs_mcv) { 11643 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 11644 else => null, 11645 }; 11646 defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); 11647 11648 const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 11649 const limit_mcv = MCValue{ .register = limit_reg }; 11650 const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg); 11651 defer self.register_manager.unlockReg(limit_lock); 11652 11653 const reg_bits = self.regBitSize(ty); 11654 const reg_extra_bits = self.regExtraBits(ty); 11655 const cc: Condition = if (ty.isSignedInt(zcu)) cc: { 11656 if (reg_extra_bits > 0) { 11657 try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); 11658 } 11659 try self.genSetReg(limit_reg, ty, dst_mcv, .{}); 11660 try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 }); 11661 try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ 11662 .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1, 11663 }); 11664 if (reg_extra_bits > 0) { 11665 const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv); 11666 const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg }; 11667 const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg); 11668 defer self.register_manager.unlockReg(shifted_rhs_lock); 11669 11670 try self.genShiftBinOpMir(.{ ._l, .sa }, ty, shifted_rhs_mcv, .u8, .{ .immediate = reg_extra_bits }); 11671 try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, shifted_rhs_mcv); 11672 } else try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv); 11673 break :cc .o; 11674 } else cc: { 11675 try self.genSetReg(limit_reg, ty, .{ 11676 .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - ty.bitSize(zcu)), 11677 }, .{}); 11678 11679 try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv); 11680 if (reg_extra_bits > 0) { 11681 try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, limit_mcv); 11682 break :cc .a; 11683 } 11684 break :cc .c; 11685 }; 11686 11687 const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2); 11688 try self.asmCmovccRegisterRegister( 11689 cc, 11690 registerAlias(dst_reg, cmov_abi_size), 11691 registerAlias(limit_reg, cmov_abi_size), 11692 ); 11693 11694 if (reg_extra_bits > 0 and ty.isSignedInt(zcu)) 11695 try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); 11696 11697 return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); 11698 } 11699 11700 fn airSubSat(self: *CodeGen, inst: Air.Inst.Index) !void { 11701 const pt = self.pt; 11702 const zcu = pt.zcu; 11703 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 11704 const ty = self.typeOf(bin_op.lhs); 11705 if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail( 11706 "TODO implement airSubSat for {}", 11707 .{ty.fmt(pt)}, 11708 ); 11709 11710 const lhs_mcv = try self.resolveInst(bin_op.lhs); 11711 const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) 11712 lhs_mcv 11713 else 11714 try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv); 11715 const dst_reg = dst_mcv.register; 11716 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); 11717 defer self.register_manager.unlockReg(dst_lock); 11718 11719 const rhs_mcv = try self.resolveInst(bin_op.rhs); 11720 const rhs_lock = switch (rhs_mcv) { 11721 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 11722 else => null, 11723 }; 11724 defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); 11725 11726 const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 11727 const limit_mcv = MCValue{ .register = limit_reg }; 11728 const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg); 11729 defer self.register_manager.unlockReg(limit_lock); 11730 11731 const reg_bits = self.regBitSize(ty); 11732 const reg_extra_bits = self.regExtraBits(ty); 11733 const cc: Condition = if (ty.isSignedInt(zcu)) cc: { 11734 if (reg_extra_bits > 0) { 11735 try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); 11736 } 11737 try self.genSetReg(limit_reg, ty, dst_mcv, .{}); 11738 try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 }); 11739 try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ 11740 .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1, 11741 }); 11742 if (reg_extra_bits > 0) { 11743 const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv); 11744 const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg }; 11745 const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg); 11746 defer self.register_manager.unlockReg(shifted_rhs_lock); 11747 11748 try self.genShiftBinOpMir(.{ ._l, .sa }, ty, shifted_rhs_mcv, .u8, .{ .immediate = reg_extra_bits }); 11749 try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, shifted_rhs_mcv); 11750 } else try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv); 11751 break :cc .o; 11752 } else cc: { 11753 try self.genSetReg(limit_reg, ty, .{ .immediate = 0 }, .{}); 11754 try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv); 11755 break :cc .c; 11756 }; 11757 11758 const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2); 11759 try self.asmCmovccRegisterRegister( 11760 cc, 11761 registerAlias(dst_reg, cmov_abi_size), 11762 registerAlias(limit_reg, cmov_abi_size), 11763 ); 11764 11765 if (reg_extra_bits > 0 and ty.isSignedInt(zcu)) 11766 try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); 11767 11768 return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); 11769 } 11770 11771 fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void { 11772 const pt = self.pt; 11773 const zcu = pt.zcu; 11774 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 11775 const ty = self.typeOf(bin_op.lhs); 11776 11777 const result = result: { 11778 if (ty.toIntern() == .i128_type) { 11779 const ptr_c_int = try pt.singleMutPtrType(.c_int); 11780 const overflow = try self.allocTempRegOrMem(.c_int, false); 11781 11782 const dst_mcv = try self.genCall(.{ .lib = .{ 11783 .return_type = .i128_type, 11784 .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() }, 11785 .callee = "__muloti4", 11786 } }, &.{ .i128, .i128, ptr_c_int }, &.{ 11787 .{ .air_ref = bin_op.lhs }, 11788 .{ .air_ref = bin_op.rhs }, 11789 overflow.address(), 11790 }, .{}); 11791 const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_mcv.register_pair); 11792 defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); 11793 11794 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 11795 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 11796 defer self.register_manager.unlockReg(tmp_lock); 11797 11798 const lhs_mcv = try self.resolveInst(bin_op.lhs); 11799 const mat_lhs_mcv = switch (lhs_mcv) { 11800 .load_symbol => mat_lhs_mcv: { 11801 // TODO clean this up! 11802 const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); 11803 break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; 11804 }, 11805 else => lhs_mcv, 11806 }; 11807 const mat_lhs_lock = switch (mat_lhs_mcv) { 11808 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), 11809 else => null, 11810 }; 11811 defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); 11812 if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( 11813 .{ ._, .mov }, 11814 tmp_reg, 11815 try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 11816 ) else try self.asmRegisterRegister( 11817 .{ ._, .mov }, 11818 tmp_reg, 11819 mat_lhs_mcv.register_pair[1], 11820 ); 11821 11822 const rhs_mcv = try self.resolveInst(bin_op.rhs); 11823 const mat_rhs_mcv = switch (rhs_mcv) { 11824 .load_symbol => mat_rhs_mcv: { 11825 // TODO clean this up! 11826 const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); 11827 break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; 11828 }, 11829 else => rhs_mcv, 11830 }; 11831 const mat_rhs_lock = switch (mat_rhs_mcv) { 11832 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), 11833 else => null, 11834 }; 11835 defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); 11836 if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( 11837 .{ ._, .xor }, 11838 tmp_reg, 11839 try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 11840 ) else try self.asmRegisterRegister( 11841 .{ ._, .xor }, 11842 tmp_reg, 11843 mat_rhs_mcv.register_pair[1], 11844 ); 11845 11846 try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63)); 11847 try self.asmRegister(.{ ._, .not }, tmp_reg); 11848 try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .{ .size = .dword }), .s(0)); 11849 try self.freeValue(overflow); 11850 try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[0], tmp_reg); 11851 try self.asmRegisterImmediate(.{ ._c, .bt }, tmp_reg, .u(63)); 11852 try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[1], tmp_reg); 11853 break :result dst_mcv; 11854 } 11855 11856 if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail( 11857 "TODO implement airMulSat for {}", 11858 .{ty.fmt(pt)}, 11859 ); 11860 11861 try self.spillRegisters(&.{ .rax, .rcx, .rdx }); 11862 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx }); 11863 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 11864 11865 const lhs_mcv = try self.resolveInst(bin_op.lhs); 11866 const lhs_lock = switch (lhs_mcv) { 11867 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 11868 else => null, 11869 }; 11870 defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); 11871 11872 const rhs_mcv = try self.resolveInst(bin_op.rhs); 11873 const rhs_lock = switch (rhs_mcv) { 11874 .register => |reg| self.register_manager.lockReg(reg), 11875 else => null, 11876 }; 11877 defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); 11878 11879 const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 11880 const limit_mcv = MCValue{ .register = limit_reg }; 11881 const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg); 11882 defer self.register_manager.unlockReg(limit_lock); 11883 11884 const reg_bits = self.regBitSize(ty); 11885 const cc: Condition = if (ty.isSignedInt(zcu)) cc: { 11886 try self.genSetReg(limit_reg, ty, lhs_mcv, .{}); 11887 try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv); 11888 try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 }); 11889 try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ 11890 .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1, 11891 }); 11892 break :cc .o; 11893 } else cc: { 11894 try self.genSetReg(limit_reg, ty, .{ 11895 .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - reg_bits), 11896 }, .{}); 11897 break :cc .c; 11898 }; 11899 11900 const dst_mcv = try self.genMulDivBinOp(.mul, inst, ty, ty, lhs_mcv, rhs_mcv); 11901 const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2); 11902 try self.asmCmovccRegisterRegister( 11903 cc, 11904 registerAlias(dst_mcv.register, cmov_abi_size), 11905 registerAlias(limit_reg, cmov_abi_size), 11906 ); 11907 break :result dst_mcv; 11908 }; 11909 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 11910 } 11911 11912 fn airAddSubWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { 11913 const pt = self.pt; 11914 const zcu = pt.zcu; 11915 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 11916 const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; 11917 const result: MCValue = result: { 11918 const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)]; 11919 const ty = self.typeOf(bin_op.lhs); 11920 switch (ty.zigTypeTag(zcu)) { 11921 .vector => return self.fail("TODO implement add/sub with overflow for Vector type", .{}), 11922 .int => { 11923 try self.spillEflagsIfOccupied(); 11924 try self.spillRegisters(&.{ .rcx, .rdi, .rsi }); 11925 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rcx, .rdi, .rsi }); 11926 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 11927 11928 const partial_mcv = try self.genBinOp(null, switch (tag) { 11929 .add_with_overflow => .add, 11930 .sub_with_overflow => .sub, 11931 else => unreachable, 11932 }, bin_op.lhs, bin_op.rhs); 11933 const int_info = ty.intInfo(zcu); 11934 const cc: Condition = switch (int_info.signedness) { 11935 .unsigned => .c, 11936 .signed => .o, 11937 }; 11938 11939 const tuple_ty = self.typeOfIndex(inst); 11940 if (int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits)) { 11941 switch (partial_mcv) { 11942 .register => |reg| { 11943 self.eflags_inst = inst; 11944 break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; 11945 }, 11946 else => {}, 11947 } 11948 11949 const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); 11950 try self.genSetMem( 11951 .{ .frame = frame_index }, 11952 @intCast(tuple_ty.structFieldOffset(1, zcu)), 11953 .u1, 11954 .{ .eflags = cc }, 11955 .{}, 11956 ); 11957 try self.genSetMem( 11958 .{ .frame = frame_index }, 11959 @intCast(tuple_ty.structFieldOffset(0, zcu)), 11960 ty, 11961 partial_mcv, 11962 .{}, 11963 ); 11964 break :result .{ .load_frame = .{ .index = frame_index } }; 11965 } 11966 11967 const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); 11968 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); 11969 break :result .{ .load_frame = .{ .index = frame_index } }; 11970 }, 11971 else => unreachable, 11972 } 11973 }; 11974 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 11975 } 11976 11977 fn airShlWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { 11978 const pt = self.pt; 11979 const zcu = pt.zcu; 11980 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 11981 const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; 11982 const result: MCValue = result: { 11983 const lhs_ty = self.typeOf(bin_op.lhs); 11984 const rhs_ty = self.typeOf(bin_op.rhs); 11985 switch (lhs_ty.zigTypeTag(zcu)) { 11986 .vector => return self.fail("TODO implement shl with overflow for Vector type", .{}), 11987 .int => { 11988 try self.spillEflagsIfOccupied(); 11989 try self.spillRegisters(&.{ .rcx, .rdi, .rsi }); 11990 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rcx, .rdi, .rsi }); 11991 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 11992 11993 const lhs = try self.resolveInst(bin_op.lhs); 11994 const rhs = try self.resolveInst(bin_op.rhs); 11995 11996 const int_info = lhs_ty.intInfo(zcu); 11997 11998 const partial_mcv = try self.genShiftBinOp(.shl, null, lhs, rhs, lhs_ty, rhs_ty); 11999 const partial_lock = switch (partial_mcv) { 12000 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 12001 else => null, 12002 }; 12003 defer if (partial_lock) |lock| self.register_manager.unlockReg(lock); 12004 12005 const tmp_mcv = try self.genShiftBinOp(.shr, null, partial_mcv, rhs, lhs_ty, rhs_ty); 12006 const tmp_lock = switch (tmp_mcv) { 12007 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 12008 else => null, 12009 }; 12010 defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock); 12011 12012 try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs); 12013 const cc = Condition.ne; 12014 12015 const tuple_ty = self.typeOfIndex(inst); 12016 if (int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits)) { 12017 switch (partial_mcv) { 12018 .register => |reg| { 12019 self.eflags_inst = inst; 12020 break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; 12021 }, 12022 else => {}, 12023 } 12024 12025 const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); 12026 try self.genSetMem( 12027 .{ .frame = frame_index }, 12028 @intCast(tuple_ty.structFieldOffset(1, zcu)), 12029 tuple_ty.fieldType(1, zcu), 12030 .{ .eflags = cc }, 12031 .{}, 12032 ); 12033 try self.genSetMem( 12034 .{ .frame = frame_index }, 12035 @intCast(tuple_ty.structFieldOffset(0, zcu)), 12036 tuple_ty.fieldType(0, zcu), 12037 partial_mcv, 12038 .{}, 12039 ); 12040 break :result .{ .load_frame = .{ .index = frame_index } }; 12041 } 12042 12043 const frame_index = 12044 try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); 12045 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); 12046 break :result .{ .load_frame = .{ .index = frame_index } }; 12047 }, 12048 else => unreachable, 12049 } 12050 }; 12051 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 12052 } 12053 12054 fn genSetFrameTruncatedOverflowCompare( 12055 self: *CodeGen, 12056 tuple_ty: Type, 12057 frame_index: FrameIndex, 12058 src_mcv: MCValue, 12059 overflow_cc: ?Condition, 12060 ) !void { 12061 const pt = self.pt; 12062 const zcu = pt.zcu; 12063 const src_lock = switch (src_mcv) { 12064 .register => |reg| self.register_manager.lockReg(reg), 12065 else => null, 12066 }; 12067 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 12068 12069 const ty = tuple_ty.fieldType(0, zcu); 12070 const int_info = ty.intInfo(zcu); 12071 12072 const hi_bits = (int_info.bits - 1) % 64 + 1; 12073 const hi_ty = try pt.intType(int_info.signedness, hi_bits); 12074 12075 const limb_bits: u16 = @intCast(if (int_info.bits <= 64) self.regBitSize(ty) else 64); 12076 const limb_ty = try pt.intType(int_info.signedness, limb_bits); 12077 12078 const rest_ty = try pt.intType(.unsigned, int_info.bits - hi_bits); 12079 12080 const temp_regs = 12081 try self.register_manager.allocRegs(3, @splat(null), abi.RegisterClass.gp); 12082 const temp_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs); 12083 defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); 12084 12085 const overflow_reg = temp_regs[0]; 12086 if (overflow_cc) |cc| try self.asmSetccRegister(cc, overflow_reg.to8()); 12087 12088 const scratch_reg = temp_regs[1]; 12089 const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8; 12090 const hi_limb_mcv = if (hi_limb_off > 0) 12091 src_mcv.address().offset(int_info.bits / 64 * 8).deref() 12092 else 12093 src_mcv; 12094 try self.genSetReg(scratch_reg, limb_ty, hi_limb_mcv, .{}); 12095 try self.truncateRegister(hi_ty, scratch_reg); 12096 try self.genBinOpMir(.{ ._, .cmp }, limb_ty, .{ .register = scratch_reg }, hi_limb_mcv); 12097 12098 const eq_reg = temp_regs[2]; 12099 if (overflow_cc) |_| { 12100 try self.asmSetccRegister(.ne, eq_reg.to8()); 12101 try self.genBinOpMir(.{ ._, .@"or" }, .u8, .{ .register = overflow_reg }, .{ .register = eq_reg }); 12102 } 12103 12104 const payload_off: i32 = @intCast(tuple_ty.structFieldOffset(0, zcu)); 12105 if (hi_limb_off > 0) try self.genSetMem( 12106 .{ .frame = frame_index }, 12107 payload_off, 12108 rest_ty, 12109 src_mcv, 12110 .{}, 12111 ); 12112 try self.genSetMem( 12113 .{ .frame = frame_index }, 12114 payload_off + hi_limb_off, 12115 limb_ty, 12116 .{ .register = scratch_reg }, 12117 .{}, 12118 ); 12119 try self.genSetMem( 12120 .{ .frame = frame_index }, 12121 @intCast(tuple_ty.structFieldOffset(1, zcu)), 12122 tuple_ty.fieldType(1, zcu), 12123 if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne }, 12124 .{}, 12125 ); 12126 } 12127 12128 fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { 12129 const pt = self.pt; 12130 const zcu = pt.zcu; 12131 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 12132 const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; 12133 const tuple_ty = self.typeOfIndex(inst); 12134 const dst_ty = self.typeOf(bin_op.lhs); 12135 const result: MCValue = switch (dst_ty.zigTypeTag(zcu)) { 12136 .vector => return self.fail("TODO implement airMulWithOverflow for {}", .{dst_ty.fmt(pt)}), 12137 .int => result: { 12138 const dst_info = dst_ty.intInfo(zcu); 12139 if (dst_info.bits > 128 and dst_info.signedness == .unsigned) { 12140 const slow_inc = self.hasFeature(.slow_incdec); 12141 const abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); 12142 const limb_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; 12143 12144 try self.spillRegisters(&.{ .rax, .rcx, .rdx }); 12145 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx }); 12146 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 12147 12148 const dst_mcv = try self.allocRegOrMem(inst, false); 12149 try self.genInlineMemset( 12150 dst_mcv.address(), 12151 .{ .immediate = 0 }, 12152 .{ .immediate = tuple_ty.abiSize(zcu) }, 12153 .{}, 12154 ); 12155 const lhs_mcv = try self.resolveInst(bin_op.lhs); 12156 const rhs_mcv = try self.resolveInst(bin_op.rhs); 12157 12158 const temp_regs = 12159 try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); 12160 const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs); 12161 defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); 12162 12163 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32()); 12164 12165 const outer_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 12166 try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[1].to64(), .{ 12167 .base = .{ .frame = rhs_mcv.load_frame.index }, 12168 .mod = .{ .rm = .{ 12169 .size = .qword, 12170 .index = temp_regs[0].to64(), 12171 .scale = .@"8", 12172 .disp = rhs_mcv.load_frame.off, 12173 } }, 12174 }); 12175 try self.asmRegisterRegister(.{ ._, .@"test" }, temp_regs[1].to64(), temp_regs[1].to64()); 12176 const skip_inner = try self.asmJccReloc(.z, undefined); 12177 12178 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[2].to32(), temp_regs[2].to32()); 12179 try self.asmRegisterRegister(.{ ._, .mov }, temp_regs[3].to32(), temp_regs[0].to32()); 12180 try self.asmRegisterRegister(.{ ._, .xor }, .ecx, .ecx); 12181 try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx); 12182 12183 const inner_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 12184 try self.asmRegisterImmediate(.{ ._r, .sh }, .cl, .u(1)); 12185 try self.asmMemoryRegister(.{ ._, .adc }, .{ 12186 .base = .{ .frame = dst_mcv.load_frame.index }, 12187 .mod = .{ .rm = .{ 12188 .size = .qword, 12189 .index = temp_regs[3].to64(), 12190 .scale = .@"8", 12191 .disp = dst_mcv.load_frame.off + 12192 @as(i32, @intCast(tuple_ty.structFieldOffset(0, zcu))), 12193 } }, 12194 }, .rdx); 12195 try self.asmSetccRegister(.c, .cl); 12196 12197 try self.asmRegisterMemory(.{ ._, .mov }, .rax, .{ 12198 .base = .{ .frame = lhs_mcv.load_frame.index }, 12199 .mod = .{ .rm = .{ 12200 .size = .qword, 12201 .index = temp_regs[2].to64(), 12202 .scale = .@"8", 12203 .disp = lhs_mcv.load_frame.off, 12204 } }, 12205 }); 12206 try self.asmRegister(.{ ._, .mul }, temp_regs[1].to64()); 12207 12208 try self.asmRegisterImmediate(.{ ._r, .sh }, .ch, .u(1)); 12209 try self.asmMemoryRegister(.{ ._, .adc }, .{ 12210 .base = .{ .frame = dst_mcv.load_frame.index }, 12211 .mod = .{ .rm = .{ 12212 .size = .qword, 12213 .index = temp_regs[3].to64(), 12214 .scale = .@"8", 12215 .disp = dst_mcv.load_frame.off + 12216 @as(i32, @intCast(tuple_ty.structFieldOffset(0, zcu))), 12217 } }, 12218 }, .rax); 12219 try self.asmSetccRegister(.c, .ch); 12220 12221 if (slow_inc) { 12222 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); 12223 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1)); 12224 } else { 12225 try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32()); 12226 try self.asmRegister(.{ ._, .inc }, temp_regs[3].to32()); 12227 } 12228 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len)); 12229 _ = try self.asmJccReloc(.b, inner_loop); 12230 12231 try self.asmRegisterRegister(.{ ._, .@"or" }, .rdx, .rcx); 12232 const overflow = try self.asmJccReloc(.nz, undefined); 12233 const overflow_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 12234 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[2].to32(), .u(limb_len)); 12235 const no_overflow = try self.asmJccReloc(.nb, undefined); 12236 if (slow_inc) { 12237 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); 12238 } else { 12239 try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32()); 12240 } 12241 try self.asmMemoryImmediate(.{ ._, .cmp }, .{ 12242 .base = .{ .frame = lhs_mcv.load_frame.index }, 12243 .mod = .{ .rm = .{ 12244 .size = .qword, 12245 .index = temp_regs[2].to64(), 12246 .scale = .@"8", 12247 .disp = lhs_mcv.load_frame.off - 8, 12248 } }, 12249 }, .u(0)); 12250 _ = try self.asmJccReloc(.z, overflow_loop); 12251 self.performReloc(overflow); 12252 try self.asmMemoryImmediate(.{ ._, .mov }, .{ 12253 .base = .{ .frame = dst_mcv.load_frame.index }, 12254 .mod = .{ .rm = .{ 12255 .size = .byte, 12256 .disp = dst_mcv.load_frame.off + 12257 @as(i32, @intCast(tuple_ty.structFieldOffset(1, zcu))), 12258 } }, 12259 }, .u(1)); 12260 self.performReloc(no_overflow); 12261 12262 self.performReloc(skip_inner); 12263 if (slow_inc) { 12264 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); 12265 } else { 12266 try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); 12267 } 12268 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len)); 12269 _ = try self.asmJccReloc(.b, outer_loop); 12270 12271 break :result dst_mcv; 12272 } 12273 12274 const lhs_active_bits = self.activeIntBits(bin_op.lhs); 12275 const rhs_active_bits = self.activeIntBits(bin_op.rhs); 12276 const src_bits = @max(lhs_active_bits, rhs_active_bits, dst_info.bits / 2); 12277 const src_ty = try pt.intType(dst_info.signedness, src_bits); 12278 if (src_bits > 64 and src_bits <= 128 and 12279 dst_info.bits > 64 and dst_info.bits <= 128) switch (dst_info.signedness) { 12280 .signed => { 12281 const ptr_c_int = try pt.singleMutPtrType(.c_int); 12282 const overflow = try self.allocTempRegOrMem(.c_int, false); 12283 const result = try self.genCall(.{ .lib = .{ 12284 .return_type = .i128_type, 12285 .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() }, 12286 .callee = "__muloti4", 12287 } }, &.{ .i128, .i128, ptr_c_int }, &.{ 12288 .{ .air_ref = bin_op.lhs }, 12289 .{ .air_ref = bin_op.rhs }, 12290 overflow.address(), 12291 }, .{}); 12292 12293 const dst_mcv = try self.allocRegOrMem(inst, false); 12294 try self.genSetMem( 12295 .{ .frame = dst_mcv.load_frame.index }, 12296 @intCast(tuple_ty.structFieldOffset(0, zcu)), 12297 tuple_ty.fieldType(0, zcu), 12298 result, 12299 .{}, 12300 ); 12301 try self.asmMemoryImmediate( 12302 .{ ._, .cmp }, 12303 try overflow.mem(self, .{ .size = self.memSize(.c_int) }), 12304 .s(0), 12305 ); 12306 try self.genSetMem( 12307 .{ .frame = dst_mcv.load_frame.index }, 12308 @intCast(tuple_ty.structFieldOffset(1, zcu)), 12309 tuple_ty.fieldType(1, zcu), 12310 .{ .eflags = .ne }, 12311 .{}, 12312 ); 12313 try self.freeValue(overflow); 12314 break :result dst_mcv; 12315 }, 12316 .unsigned => { 12317 try self.spillEflagsIfOccupied(); 12318 try self.spillRegisters(&.{ .rax, .rdx }); 12319 const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rdx }); 12320 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 12321 12322 const tmp_regs = 12323 try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); 12324 const tmp_locks = self.register_manager.lockRegsAssumeUnused(4, tmp_regs); 12325 defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); 12326 12327 const lhs_mcv = try self.resolveInst(bin_op.lhs); 12328 const rhs_mcv = try self.resolveInst(bin_op.rhs); 12329 const mat_lhs_mcv = mat_lhs_mcv: switch (lhs_mcv) { 12330 .register => |lhs_reg| switch (lhs_reg.class()) { 12331 else => lhs_mcv, 12332 .sse => { 12333 const mat_lhs_mcv: MCValue = .{ 12334 .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp), 12335 }; 12336 try self.genCopy(dst_ty, mat_lhs_mcv, lhs_mcv, .{}); 12337 break :mat_lhs_mcv mat_lhs_mcv; 12338 }, 12339 }, 12340 .load_symbol => { 12341 // TODO clean this up! 12342 const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); 12343 break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; 12344 }, 12345 else => lhs_mcv, 12346 }; 12347 const mat_lhs_locks: [2]?RegisterLock = switch (mat_lhs_mcv) { 12348 .register_pair => |mat_lhs_regs| self.register_manager.lockRegs(2, mat_lhs_regs), 12349 .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null }, 12350 else => @splat(null), 12351 }; 12352 defer for (mat_lhs_locks) |mat_lhs_lock| if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); 12353 const mat_rhs_mcv = mat_rhs_mcv: switch (rhs_mcv) { 12354 .register => |rhs_reg| switch (rhs_reg.class()) { 12355 else => rhs_mcv, 12356 .sse => { 12357 const mat_rhs_mcv: MCValue = .{ 12358 .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp), 12359 }; 12360 try self.genCopy(dst_ty, mat_rhs_mcv, rhs_mcv, .{}); 12361 break :mat_rhs_mcv mat_rhs_mcv; 12362 }, 12363 }, 12364 .load_symbol => { 12365 // TODO clean this up! 12366 const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); 12367 break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; 12368 }, 12369 else => rhs_mcv, 12370 }; 12371 const mat_rhs_locks: [2]?RegisterLock = switch (mat_rhs_mcv) { 12372 .register_pair => |mat_rhs_regs| self.register_manager.lockRegs(2, mat_rhs_regs), 12373 .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null }, 12374 else => @splat(null), 12375 }; 12376 defer for (mat_rhs_locks) |mat_rhs_lock| if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); 12377 12378 if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( 12379 .{ ._, .mov }, 12380 .rax, 12381 try mat_lhs_mcv.mem(self, .{ .size = .qword }), 12382 ) else try self.asmRegisterRegister( 12383 .{ ._, .mov }, 12384 .rax, 12385 mat_lhs_mcv.register_pair[0], 12386 ); 12387 if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( 12388 .{ ._, .mov }, 12389 tmp_regs[0], 12390 try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 12391 ) else try self.asmRegisterRegister( 12392 .{ ._, .mov }, 12393 tmp_regs[0], 12394 mat_rhs_mcv.register_pair[1], 12395 ); 12396 try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]); 12397 try self.asmSetccRegister(.nz, tmp_regs[1].to8()); 12398 try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax); 12399 try self.asmSetccRegister(.o, tmp_regs[2].to8()); 12400 if (mat_rhs_mcv.isBase()) 12401 try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword })) 12402 else 12403 try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]); 12404 try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]); 12405 try self.asmSetccRegister(.c, tmp_regs[3].to8()); 12406 try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[2].to8(), tmp_regs[3].to8()); 12407 if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( 12408 .{ ._, .mov }, 12409 tmp_regs[0], 12410 try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 12411 ) else try self.asmRegisterRegister( 12412 .{ ._, .mov }, 12413 tmp_regs[0], 12414 mat_lhs_mcv.register_pair[1], 12415 ); 12416 try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]); 12417 try self.asmSetccRegister(.nz, tmp_regs[3].to8()); 12418 try self.asmRegisterRegister( 12419 .{ ._, .@"and" }, 12420 tmp_regs[1].to8(), 12421 tmp_regs[3].to8(), 12422 ); 12423 try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8()); 12424 if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( 12425 .{ .i_, .mul }, 12426 tmp_regs[0], 12427 try mat_rhs_mcv.mem(self, .{ .size = .qword }), 12428 ) else try self.asmRegisterRegister( 12429 .{ .i_, .mul }, 12430 tmp_regs[0], 12431 mat_rhs_mcv.register_pair[0], 12432 ); 12433 try self.asmSetccRegister(.o, tmp_regs[2].to8()); 12434 try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8()); 12435 try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]); 12436 try self.asmSetccRegister(.c, tmp_regs[2].to8()); 12437 try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8()); 12438 12439 const dst_mcv = try self.allocRegOrMem(inst, false); 12440 try self.genSetMem( 12441 .{ .frame = dst_mcv.load_frame.index }, 12442 @intCast(tuple_ty.structFieldOffset(0, zcu)), 12443 tuple_ty.fieldType(0, zcu), 12444 .{ .register_pair = .{ .rax, .rdx } }, 12445 .{}, 12446 ); 12447 try self.genSetMem( 12448 .{ .frame = dst_mcv.load_frame.index }, 12449 @intCast(tuple_ty.structFieldOffset(1, zcu)), 12450 tuple_ty.fieldType(1, zcu), 12451 .{ .register = tmp_regs[1] }, 12452 .{}, 12453 ); 12454 break :result dst_mcv; 12455 }, 12456 }; 12457 12458 try self.spillEflagsIfOccupied(); 12459 try self.spillRegisters(&.{ .rax, .rcx, .rdx, .rdi, .rsi }); 12460 const reg_locks = self.register_manager.lockRegsAssumeUnused(5, .{ .rax, .rcx, .rdx, .rdi, .rsi }); 12461 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 12462 12463 const cc: Condition = switch (dst_info.signedness) { 12464 .unsigned => .c, 12465 .signed => .o, 12466 }; 12467 12468 const lhs = try self.resolveInst(bin_op.lhs); 12469 const rhs = try self.resolveInst(bin_op.rhs); 12470 12471 const extra_bits = if (dst_info.bits <= 64) 12472 self.regExtraBits(dst_ty) 12473 else 12474 dst_info.bits % 64; 12475 const partial_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs); 12476 12477 switch (partial_mcv) { 12478 .register => |reg| if (extra_bits == 0) { 12479 self.eflags_inst = inst; 12480 break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; 12481 } else { 12482 const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); 12483 try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); 12484 break :result .{ .load_frame = .{ .index = frame_index } }; 12485 }, 12486 else => { 12487 // For now, this is the only supported multiply that doesn't fit in a register. 12488 if (dst_info.bits > 128 or src_bits != 64) 12489 return self.fail("TODO implement airWithOverflow from {} to {}", .{ 12490 src_ty.fmt(pt), dst_ty.fmt(pt), 12491 }); 12492 12493 const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); 12494 if (dst_info.bits >= lhs_active_bits + rhs_active_bits) { 12495 try self.genSetMem( 12496 .{ .frame = frame_index }, 12497 @intCast(tuple_ty.structFieldOffset(0, zcu)), 12498 tuple_ty.fieldType(0, zcu), 12499 partial_mcv, 12500 .{}, 12501 ); 12502 try self.genSetMem( 12503 .{ .frame = frame_index }, 12504 @intCast(tuple_ty.structFieldOffset(1, zcu)), 12505 tuple_ty.fieldType(1, zcu), 12506 .{ .immediate = 0 }, // cc being set is impossible 12507 .{}, 12508 ); 12509 } else try self.genSetFrameTruncatedOverflowCompare( 12510 tuple_ty, 12511 frame_index, 12512 partial_mcv, 12513 null, 12514 ); 12515 break :result .{ .load_frame = .{ .index = frame_index } }; 12516 }, 12517 } 12518 }, 12519 else => unreachable, 12520 }; 12521 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 12522 } 12523 12524 /// Generates signed or unsigned integer multiplication/division. 12525 /// Clobbers .rax and .rdx registers. 12526 /// Quotient is saved in .rax and remainder in .rdx. 12527 fn genIntMulDivOpMir(self: *CodeGen, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void { 12528 const pt = self.pt; 12529 const abi_size: u32 = @intCast(ty.abiSize(pt.zcu)); 12530 const bit_size: u32 = @intCast(self.regBitSize(ty)); 12531 if (abi_size > 8) { 12532 return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{}); 12533 } 12534 12535 try self.genSetReg(.rax, ty, lhs, .{}); 12536 switch (tag[1]) { 12537 else => unreachable, 12538 .mul => {}, 12539 .div => switch (tag[0]) { 12540 ._ => { 12541 const hi_reg: Register = 12542 switch (bit_size) { 12543 8 => .ah, 12544 16, 32, 64 => .edx, 12545 else => unreachable, 12546 }; 12547 try self.asmRegisterRegister(.{ ._, .xor }, hi_reg, hi_reg); 12548 }, 12549 .i_ => try self.asmOpOnly(.{ ._, switch (bit_size) { 12550 8 => .cbw, 12551 16 => .cwd, 12552 32 => .cdq, 12553 64 => .cqo, 12554 else => unreachable, 12555 } }), 12556 else => unreachable, 12557 }, 12558 } 12559 12560 const mat_rhs: MCValue = switch (rhs) { 12561 .register, .indirect, .load_frame => rhs, 12562 else => .{ .register = try self.copyToTmpRegister(ty, rhs) }, 12563 }; 12564 switch (mat_rhs) { 12565 .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)), 12566 .memory, .indirect, .load_frame => try self.asmMemory( 12567 tag, 12568 try mat_rhs.mem(self, .{ .size = .fromSize(abi_size) }), 12569 ), 12570 else => unreachable, 12571 } 12572 if (tag[1] == .div and bit_size == 8) try self.asmRegisterRegister(.{ ._, .mov }, .dl, .ah); 12573 } 12574 12575 /// Always returns a register. 12576 /// Clobbers .rax and .rdx registers. 12577 fn genInlineIntDivFloor(self: *CodeGen, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue { 12578 const pt = self.pt; 12579 const zcu = pt.zcu; 12580 const abi_size: u32 = @intCast(ty.abiSize(zcu)); 12581 const int_info = ty.intInfo(zcu); 12582 const dividend = switch (lhs) { 12583 .register => |reg| reg, 12584 else => try self.copyToTmpRegister(ty, lhs), 12585 }; 12586 const dividend_lock = self.register_manager.lockReg(dividend); 12587 defer if (dividend_lock) |lock| self.register_manager.unlockReg(lock); 12588 12589 const divisor = switch (rhs) { 12590 .register => |reg| reg, 12591 else => try self.copyToTmpRegister(ty, rhs), 12592 }; 12593 const divisor_lock = self.register_manager.lockReg(divisor); 12594 defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock); 12595 12596 try self.genIntMulDivOpMir( 12597 switch (int_info.signedness) { 12598 .signed => .{ .i_, .div }, 12599 .unsigned => .{ ._, .div }, 12600 }, 12601 ty, 12602 .{ .register = dividend }, 12603 .{ .register = divisor }, 12604 ); 12605 12606 try self.asmRegisterRegister( 12607 .{ ._, .xor }, 12608 registerAlias(divisor, abi_size), 12609 registerAlias(dividend, abi_size), 12610 ); 12611 try self.asmRegisterImmediate( 12612 .{ ._r, .sa }, 12613 registerAlias(divisor, abi_size), 12614 .u(int_info.bits - 1), 12615 ); 12616 try self.asmRegisterRegister( 12617 .{ ._, .@"test" }, 12618 registerAlias(.rdx, abi_size), 12619 registerAlias(.rdx, abi_size), 12620 ); 12621 try self.asmCmovccRegisterRegister( 12622 .z, 12623 registerAlias(divisor, @max(abi_size, 2)), 12624 registerAlias(.rdx, @max(abi_size, 2)), 12625 ); 12626 try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax }); 12627 return MCValue{ .register = divisor }; 12628 } 12629 12630 fn airShlShrBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { 12631 const pt = self.pt; 12632 const zcu = pt.zcu; 12633 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 12634 12635 const air_tags = self.air.instructions.items(.tag); 12636 const tag = air_tags[@intFromEnum(inst)]; 12637 const lhs_ty = self.typeOf(bin_op.lhs); 12638 const rhs_ty = self.typeOf(bin_op.rhs); 12639 const result: MCValue = result: { 12640 switch (lhs_ty.zigTypeTag(zcu)) { 12641 .int => { 12642 try self.spillRegisters(&.{.rcx}); 12643 try self.register_manager.getKnownReg(.rcx, null); 12644 const lhs_mcv = try self.resolveInst(bin_op.lhs); 12645 const rhs_mcv = try self.resolveInst(bin_op.rhs); 12646 12647 const dst_mcv = try self.genShiftBinOp(tag, inst, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty); 12648 switch (tag) { 12649 .shr, .shr_exact, .shl_exact => {}, 12650 .shl => switch (dst_mcv) { 12651 .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg), 12652 .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]), 12653 .load_frame => |frame_addr| { 12654 const tmp_reg = 12655 try self.register_manager.allocReg(null, abi.RegisterClass.gp); 12656 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 12657 defer self.register_manager.unlockReg(tmp_lock); 12658 12659 const lhs_bits: u31 = @intCast(lhs_ty.bitSize(zcu)); 12660 const tmp_ty: Type = if (lhs_bits > 64) .usize else lhs_ty; 12661 const off = frame_addr.off + (lhs_bits - 1) / 64 * 8; 12662 try self.genSetReg( 12663 tmp_reg, 12664 tmp_ty, 12665 .{ .load_frame = .{ .index = frame_addr.index, .off = off } }, 12666 .{}, 12667 ); 12668 try self.truncateRegister(lhs_ty, tmp_reg); 12669 try self.genSetMem( 12670 .{ .frame = frame_addr.index }, 12671 off, 12672 tmp_ty, 12673 .{ .register = tmp_reg }, 12674 .{}, 12675 ); 12676 }, 12677 else => {}, 12678 }, 12679 else => unreachable, 12680 } 12681 break :result dst_mcv; 12682 }, 12683 .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { 12684 .int => if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.childType(zcu).intInfo(zcu).bits) { 12685 else => null, 12686 16 => switch (lhs_ty.vectorLen(zcu)) { 12687 else => null, 12688 1...8 => switch (tag) { 12689 else => unreachable, 12690 .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 12691 .signed => if (self.hasFeature(.avx)) 12692 .{ .vp_w, .sra } 12693 else 12694 .{ .p_w, .sra }, 12695 .unsigned => if (self.hasFeature(.avx)) 12696 .{ .vp_w, .srl } 12697 else 12698 .{ .p_w, .srl }, 12699 }, 12700 .shl, .shl_exact => if (self.hasFeature(.avx)) 12701 .{ .vp_w, .sll } 12702 else 12703 .{ .p_w, .sll }, 12704 }, 12705 9...16 => switch (tag) { 12706 else => unreachable, 12707 .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 12708 .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .sra } else null, 12709 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .srl } else null, 12710 }, 12711 .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_w, .sll } else null, 12712 }, 12713 }, 12714 32 => switch (lhs_ty.vectorLen(zcu)) { 12715 else => null, 12716 1...4 => switch (tag) { 12717 else => unreachable, 12718 .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 12719 .signed => if (self.hasFeature(.avx)) 12720 .{ .vp_d, .sra } 12721 else 12722 .{ .p_d, .sra }, 12723 .unsigned => if (self.hasFeature(.avx)) 12724 .{ .vp_d, .srl } 12725 else 12726 .{ .p_d, .srl }, 12727 }, 12728 .shl, .shl_exact => if (self.hasFeature(.avx)) 12729 .{ .vp_d, .sll } 12730 else 12731 .{ .p_d, .sll }, 12732 }, 12733 5...8 => switch (tag) { 12734 else => unreachable, 12735 .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 12736 .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .sra } else null, 12737 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .srl } else null, 12738 }, 12739 .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_d, .sll } else null, 12740 }, 12741 }, 12742 64 => switch (lhs_ty.vectorLen(zcu)) { 12743 else => null, 12744 1...2 => switch (tag) { 12745 else => unreachable, 12746 .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 12747 .signed => if (self.hasFeature(.avx)) 12748 .{ .vp_q, .sra } 12749 else 12750 .{ .p_q, .sra }, 12751 .unsigned => if (self.hasFeature(.avx)) 12752 .{ .vp_q, .srl } 12753 else 12754 .{ .p_q, .srl }, 12755 }, 12756 .shl, .shl_exact => if (self.hasFeature(.avx)) 12757 .{ .vp_q, .sll } 12758 else 12759 .{ .p_q, .sll }, 12760 }, 12761 3...4 => switch (tag) { 12762 else => unreachable, 12763 .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 12764 .signed => if (self.hasFeature(.avx2)) .{ .vp_q, .sra } else null, 12765 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_q, .srl } else null, 12766 }, 12767 .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_q, .sll } else null, 12768 }, 12769 }, 12770 })) |mir_tag| if (try self.air.value(bin_op.rhs, pt)) |rhs_val| { 12771 switch (zcu.intern_pool.indexToKey(rhs_val.toIntern())) { 12772 .aggregate => |rhs_aggregate| switch (rhs_aggregate.storage) { 12773 .repeated_elem => |rhs_elem| { 12774 const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu)); 12775 12776 const lhs_mcv = try self.resolveInst(bin_op.lhs); 12777 const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and 12778 self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) 12779 .{lhs_mcv.getReg().?} ** 2 12780 else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{ 12781 try self.register_manager.allocReg(inst, abi.RegisterClass.sse), 12782 lhs_mcv.getReg().?, 12783 } else .{(try self.copyToRegisterWithInstTracking( 12784 inst, 12785 lhs_ty, 12786 lhs_mcv, 12787 )).register} ** 2; 12788 const reg_locks = 12789 self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg }); 12790 defer for (reg_locks) |reg_lock| if (reg_lock) |lock| 12791 self.register_manager.unlockReg(lock); 12792 12793 const shift_imm: Immediate = 12794 .u(@intCast(Value.fromInterned(rhs_elem).toUnsignedInt(zcu))); 12795 if (self.hasFeature(.avx)) try self.asmRegisterRegisterImmediate( 12796 mir_tag, 12797 registerAlias(dst_reg, abi_size), 12798 registerAlias(lhs_reg, abi_size), 12799 shift_imm, 12800 ) else { 12801 assert(dst_reg.id() == lhs_reg.id()); 12802 try self.asmRegisterImmediate( 12803 mir_tag, 12804 registerAlias(dst_reg, abi_size), 12805 shift_imm, 12806 ); 12807 } 12808 break :result .{ .register = dst_reg }; 12809 }, 12810 else => {}, 12811 }, 12812 else => {}, 12813 } 12814 } else if (bin_op.rhs.toIndex()) |rhs_inst| switch (air_tags[@intFromEnum(rhs_inst)]) { 12815 .splat => { 12816 const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu)); 12817 12818 const lhs_mcv = try self.resolveInst(bin_op.lhs); 12819 const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and 12820 self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) 12821 .{lhs_mcv.getReg().?} ** 2 12822 else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{ 12823 try self.register_manager.allocReg(inst, abi.RegisterClass.sse), 12824 lhs_mcv.getReg().?, 12825 } else .{(try self.copyToRegisterWithInstTracking( 12826 inst, 12827 lhs_ty, 12828 lhs_mcv, 12829 )).register} ** 2; 12830 const reg_locks = self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg }); 12831 defer for (reg_locks) |reg_lock| if (reg_lock) |lock| 12832 self.register_manager.unlockReg(lock); 12833 12834 const shift_reg = 12835 try self.copyToTmpRegister(rhs_ty, .{ .air_ref = bin_op.rhs }); 12836 const shift_lock = self.register_manager.lockRegAssumeUnused(shift_reg); 12837 defer self.register_manager.unlockReg(shift_lock); 12838 12839 const mask_ty = try pt.vectorType(.{ .len = 16, .child = .u8_type }); 12840 const mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ 12841 .ty = mask_ty.toIntern(), 12842 .storage = .{ .elems = &([1]InternPool.Index{ 12843 (try rhs_ty.childType(zcu).maxIntScalar(pt, .u8)).toIntern(), 12844 } ++ [1]InternPool.Index{ 12845 (try pt.intValue(.u8, 0)).toIntern(), 12846 } ** 15) }, 12847 } }))); 12848 const mask_addr_reg = try self.copyToTmpRegister(.usize, mask_mcv.address()); 12849 const mask_addr_lock = self.register_manager.lockRegAssumeUnused(mask_addr_reg); 12850 defer self.register_manager.unlockReg(mask_addr_lock); 12851 12852 if (self.hasFeature(.avx)) { 12853 try self.asmRegisterRegisterMemory( 12854 .{ .vp_, .@"and" }, 12855 shift_reg.to128(), 12856 shift_reg.to128(), 12857 .{ 12858 .base = .{ .reg = mask_addr_reg }, 12859 .mod = .{ .rm = .{ .size = .xword } }, 12860 }, 12861 ); 12862 try self.asmRegisterRegisterRegister( 12863 mir_tag, 12864 registerAlias(dst_reg, abi_size), 12865 registerAlias(lhs_reg, abi_size), 12866 shift_reg.to128(), 12867 ); 12868 } else { 12869 try self.asmRegisterMemory( 12870 .{ .p_, .@"and" }, 12871 shift_reg.to128(), 12872 .{ 12873 .base = .{ .reg = mask_addr_reg }, 12874 .mod = .{ .rm = .{ .size = .xword } }, 12875 }, 12876 ); 12877 assert(dst_reg.id() == lhs_reg.id()); 12878 try self.asmRegisterRegister( 12879 mir_tag, 12880 registerAlias(dst_reg, abi_size), 12881 shift_reg.to128(), 12882 ); 12883 } 12884 break :result .{ .register = dst_reg }; 12885 }, 12886 else => {}, 12887 }, 12888 else => {}, 12889 }, 12890 else => {}, 12891 } 12892 return self.fail("TODO implement airShlShrBinOp for {}", .{lhs_ty.fmt(pt)}); 12893 }; 12894 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 12895 } 12896 12897 fn airShlSat(self: *CodeGen, inst: Air.Inst.Index) !void { 12898 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 12899 _ = bin_op; 12900 return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch}); 12901 //return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 12902 } 12903 12904 fn airOptionalPayload(self: *CodeGen, inst: Air.Inst.Index) !void { 12905 const zcu = self.pt.zcu; 12906 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 12907 const result: MCValue = result: { 12908 const pl_ty = self.typeOfIndex(inst); 12909 if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; 12910 12911 const opt_mcv = try self.resolveInst(ty_op.operand); 12912 if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) { 12913 const pl_mcv: MCValue = switch (opt_mcv) { 12914 .register_overflow => |ro| pl: { 12915 self.eflags_inst = null; // actually stop tracking the overflow part 12916 break :pl .{ .register = ro.reg }; 12917 }, 12918 else => opt_mcv, 12919 }; 12920 switch (pl_mcv) { 12921 .register => |pl_reg| try self.truncateRegister(pl_ty, pl_reg), 12922 else => {}, 12923 } 12924 break :result pl_mcv; 12925 } 12926 12927 const pl_mcv = try self.allocRegOrMem(inst, true); 12928 try self.genCopy(pl_ty, pl_mcv, switch (opt_mcv) { 12929 else => opt_mcv, 12930 .register_overflow => |ro| .{ .register = ro.reg }, 12931 }, .{}); 12932 break :result pl_mcv; 12933 }; 12934 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 12935 } 12936 12937 fn airOptionalPayloadPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 12938 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 12939 12940 const dst_ty = self.typeOfIndex(inst); 12941 const opt_mcv = try self.resolveInst(ty_op.operand); 12942 12943 const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) 12944 opt_mcv 12945 else 12946 try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv); 12947 return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); 12948 } 12949 12950 fn airOptionalPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void { 12951 const pt = self.pt; 12952 const zcu = pt.zcu; 12953 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 12954 const result = result: { 12955 const dst_ty = self.typeOfIndex(inst); 12956 const src_ty = self.typeOf(ty_op.operand); 12957 const opt_ty = src_ty.childType(zcu); 12958 const src_mcv = try self.resolveInst(ty_op.operand); 12959 12960 if (opt_ty.optionalReprIsPayload(zcu)) { 12961 break :result if (self.liveness.isUnused(inst)) 12962 .unreach 12963 else if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 12964 src_mcv 12965 else 12966 try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); 12967 } 12968 12969 const dst_mcv: MCValue = if (src_mcv.isRegister() and 12970 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 12971 src_mcv 12972 else if (self.liveness.isUnused(inst)) 12973 .{ .register = try self.copyToTmpRegister(dst_ty, src_mcv) } 12974 else 12975 try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); 12976 12977 const pl_ty = dst_ty.childType(zcu); 12978 const pl_abi_size: i32 = @intCast(pl_ty.abiSize(zcu)); 12979 try self.genSetMem( 12980 .{ .reg = dst_mcv.getReg().? }, 12981 pl_abi_size, 12982 .bool, 12983 .{ .immediate = 1 }, 12984 .{}, 12985 ); 12986 break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv; 12987 }; 12988 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 12989 } 12990 12991 fn airUnwrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void { 12992 const pt = self.pt; 12993 const zcu = pt.zcu; 12994 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 12995 const err_union_ty = self.typeOf(ty_op.operand); 12996 const err_ty = err_union_ty.errorUnionSet(zcu); 12997 const payload_ty = err_union_ty.errorUnionPayload(zcu); 12998 const operand = try self.resolveInst(ty_op.operand); 12999 13000 const result: MCValue = result: { 13001 if (err_ty.errorSetIsEmpty(zcu)) { 13002 break :result MCValue{ .immediate = 0 }; 13003 } 13004 13005 if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) { 13006 break :result operand; 13007 } 13008 13009 const err_off = codegen.errUnionErrorOffset(payload_ty, zcu); 13010 switch (operand) { 13011 .register => |reg| { 13012 // TODO reuse operand 13013 const eu_lock = self.register_manager.lockReg(reg); 13014 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock); 13015 13016 const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand); 13017 if (err_off > 0) try self.genShiftBinOpMir( 13018 .{ ._r, .sh }, 13019 err_union_ty, 13020 result, 13021 .u8, 13022 .{ .immediate = @as(u6, @intCast(err_off * 8)) }, 13023 ) else try self.truncateRegister(.anyerror, result.register); 13024 break :result result; 13025 }, 13026 .load_frame => |frame_addr| break :result .{ .load_frame = .{ 13027 .index = frame_addr.index, 13028 .off = frame_addr.off + @as(i32, @intCast(err_off)), 13029 } }, 13030 else => return self.fail("TODO implement unwrap_err_err for {}", .{operand}), 13031 } 13032 }; 13033 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 13034 } 13035 13036 fn airUnwrapErrUnionPayload(self: *CodeGen, inst: Air.Inst.Index) !void { 13037 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13038 const operand_ty = self.typeOf(ty_op.operand); 13039 const operand = try self.resolveInst(ty_op.operand); 13040 const result = try self.genUnwrapErrUnionPayloadMir(inst, operand_ty, operand); 13041 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 13042 } 13043 13044 // *(E!T) -> E 13045 fn airUnwrapErrUnionErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 13046 const pt = self.pt; 13047 const zcu = pt.zcu; 13048 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13049 13050 const src_ty = self.typeOf(ty_op.operand); 13051 const src_mcv = try self.resolveInst(ty_op.operand); 13052 const src_reg = switch (src_mcv) { 13053 .register => |reg| reg, 13054 else => try self.copyToTmpRegister(src_ty, src_mcv), 13055 }; 13056 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); 13057 defer self.register_manager.unlockReg(src_lock); 13058 13059 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); 13060 const dst_mcv = MCValue{ .register = dst_reg }; 13061 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); 13062 defer self.register_manager.unlockReg(dst_lock); 13063 13064 const eu_ty = src_ty.childType(zcu); 13065 const pl_ty = eu_ty.errorUnionPayload(zcu); 13066 const err_ty = eu_ty.errorUnionSet(zcu); 13067 const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); 13068 const err_abi_size: u32 = @intCast(err_ty.abiSize(zcu)); 13069 try self.asmRegisterMemory( 13070 .{ ._, .mov }, 13071 registerAlias(dst_reg, err_abi_size), 13072 .{ 13073 .base = .{ .reg = src_reg }, 13074 .mod = .{ .rm = .{ 13075 .size = .fromSize(err_abi_size), 13076 .disp = err_off, 13077 } }, 13078 }, 13079 ); 13080 13081 return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); 13082 } 13083 13084 // *(E!T) -> *T 13085 fn airUnwrapErrUnionPayloadPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 13086 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13087 const operand_ty = self.typeOf(ty_op.operand); 13088 const operand = try self.resolveInst(ty_op.operand); 13089 const result = try self.genUnwrapErrUnionPayloadPtrMir(inst, operand_ty, operand); 13090 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 13091 } 13092 13093 fn airErrUnionPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void { 13094 const pt = self.pt; 13095 const zcu = pt.zcu; 13096 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13097 const result: MCValue = result: { 13098 const src_ty = self.typeOf(ty_op.operand); 13099 const src_mcv = try self.resolveInst(ty_op.operand); 13100 const src_reg = switch (src_mcv) { 13101 .register => |reg| reg, 13102 else => try self.copyToTmpRegister(src_ty, src_mcv), 13103 }; 13104 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); 13105 defer self.register_manager.unlockReg(src_lock); 13106 13107 const eu_ty = src_ty.childType(zcu); 13108 const pl_ty = eu_ty.errorUnionPayload(zcu); 13109 const err_ty = eu_ty.errorUnionSet(zcu); 13110 const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); 13111 const err_abi_size: u32 = @intCast(err_ty.abiSize(zcu)); 13112 try self.asmMemoryImmediate( 13113 .{ ._, .mov }, 13114 .{ 13115 .base = .{ .reg = src_reg }, 13116 .mod = .{ .rm = .{ 13117 .size = .fromSize(err_abi_size), 13118 .disp = err_off, 13119 } }, 13120 }, 13121 .u(0), 13122 ); 13123 13124 if (self.liveness.isUnused(inst)) break :result .unreach; 13125 13126 const dst_ty = self.typeOfIndex(inst); 13127 const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 13128 src_reg 13129 else 13130 try self.register_manager.allocReg(inst, abi.RegisterClass.gp); 13131 const dst_lock = self.register_manager.lockReg(dst_reg); 13132 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 13133 13134 const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); 13135 const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); 13136 try self.asmRegisterMemory( 13137 .{ ._, .lea }, 13138 registerAlias(dst_reg, dst_abi_size), 13139 .{ 13140 .base = .{ .reg = src_reg }, 13141 .mod = .{ .rm = .{ .size = .qword, .disp = pl_off } }, 13142 }, 13143 ); 13144 break :result .{ .register = dst_reg }; 13145 }; 13146 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 13147 } 13148 13149 fn genUnwrapErrUnionPayloadMir( 13150 self: *CodeGen, 13151 maybe_inst: ?Air.Inst.Index, 13152 err_union_ty: Type, 13153 err_union: MCValue, 13154 ) !MCValue { 13155 const pt = self.pt; 13156 const zcu = pt.zcu; 13157 const payload_ty = err_union_ty.errorUnionPayload(zcu); 13158 13159 const result: MCValue = result: { 13160 if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; 13161 13162 const payload_off: u31 = @intCast(codegen.errUnionPayloadOffset(payload_ty, zcu)); 13163 switch (err_union) { 13164 .load_frame => |frame_addr| break :result .{ .load_frame = .{ 13165 .index = frame_addr.index, 13166 .off = frame_addr.off + payload_off, 13167 } }, 13168 .register => |reg| { 13169 // TODO reuse operand 13170 const eu_lock = self.register_manager.lockReg(reg); 13171 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock); 13172 13173 const payload_in_gp = self.regSetForType(payload_ty).supersetOf(abi.RegisterClass.gp); 13174 const result_mcv: MCValue = if (payload_in_gp and maybe_inst != null) 13175 try self.copyToRegisterWithInstTracking(maybe_inst.?, err_union_ty, err_union) 13176 else 13177 .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) }; 13178 if (payload_off > 0) try self.genShiftBinOpMir( 13179 .{ ._r, .sh }, 13180 err_union_ty, 13181 result_mcv, 13182 .u8, 13183 .{ .immediate = @as(u6, @intCast(payload_off * 8)) }, 13184 ) else try self.truncateRegister(payload_ty, result_mcv.register); 13185 break :result if (payload_in_gp) 13186 result_mcv 13187 else if (maybe_inst) |inst| 13188 try self.copyToRegisterWithInstTracking(inst, payload_ty, result_mcv) 13189 else 13190 .{ .register = try self.copyToTmpRegister(payload_ty, result_mcv) }; 13191 }, 13192 else => return self.fail("TODO implement genUnwrapErrUnionPayloadMir for {}", .{err_union}), 13193 } 13194 }; 13195 13196 return result; 13197 } 13198 13199 fn genUnwrapErrUnionPayloadPtrMir( 13200 self: *CodeGen, 13201 maybe_inst: ?Air.Inst.Index, 13202 ptr_ty: Type, 13203 ptr_mcv: MCValue, 13204 ) !MCValue { 13205 const pt = self.pt; 13206 const zcu = pt.zcu; 13207 const err_union_ty = ptr_ty.childType(zcu); 13208 const payload_ty = err_union_ty.errorUnionPayload(zcu); 13209 13210 const result: MCValue = result: { 13211 const payload_off = codegen.errUnionPayloadOffset(payload_ty, zcu); 13212 const result_mcv: MCValue = if (maybe_inst) |inst| 13213 try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr_mcv) 13214 else 13215 .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }; 13216 try self.genBinOpMir(.{ ._, .add }, ptr_ty, result_mcv, .{ .immediate = payload_off }); 13217 break :result result_mcv; 13218 }; 13219 13220 return result; 13221 } 13222 13223 fn airErrReturnTrace(self: *CodeGen, inst: Air.Inst.Index) !void { 13224 _ = inst; 13225 return self.fail("TODO implement airErrReturnTrace for {}", .{self.target.cpu.arch}); 13226 //return self.finishAir(inst, result, .{ .none, .none, .none }); 13227 } 13228 13229 fn airSetErrReturnTrace(self: *CodeGen, inst: Air.Inst.Index) !void { 13230 _ = inst; 13231 return self.fail("TODO implement airSetErrReturnTrace for {}", .{self.target.cpu.arch}); 13232 } 13233 13234 fn airSaveErrReturnTraceIndex(self: *CodeGen, inst: Air.Inst.Index) !void { 13235 _ = inst; 13236 return self.fail("TODO implement airSaveErrReturnTraceIndex for {}", .{self.target.cpu.arch}); 13237 } 13238 13239 fn airWrapOptional(self: *CodeGen, inst: Air.Inst.Index) !void { 13240 const pt = self.pt; 13241 const zcu = pt.zcu; 13242 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13243 const result: MCValue = result: { 13244 const pl_ty = self.typeOf(ty_op.operand); 13245 if (!pl_ty.hasRuntimeBits(zcu)) break :result .{ .immediate = 1 }; 13246 13247 const opt_ty = self.typeOfIndex(inst); 13248 const pl_mcv = try self.resolveInst(ty_op.operand); 13249 const same_repr = opt_ty.optionalReprIsPayload(zcu); 13250 if (same_repr and self.reuseOperand(inst, ty_op.operand, 0, pl_mcv)) break :result pl_mcv; 13251 13252 const pl_lock: ?RegisterLock = switch (pl_mcv) { 13253 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13254 else => null, 13255 }; 13256 defer if (pl_lock) |lock| self.register_manager.unlockReg(lock); 13257 13258 const opt_mcv = try self.allocRegOrMem(inst, true); 13259 try self.genCopy(pl_ty, opt_mcv, pl_mcv, .{}); 13260 13261 if (!same_repr) { 13262 const pl_abi_size: i32 = @intCast(pl_ty.abiSize(zcu)); 13263 switch (opt_mcv) { 13264 else => unreachable, 13265 13266 .register => |opt_reg| { 13267 try self.truncateRegister(pl_ty, opt_reg); 13268 try self.asmRegisterImmediate( 13269 .{ ._s, .bt }, 13270 opt_reg, 13271 .u(@as(u6, @intCast(pl_abi_size * 8))), 13272 ); 13273 }, 13274 13275 .load_frame => |frame_addr| try self.asmMemoryImmediate( 13276 .{ ._, .mov }, 13277 .{ 13278 .base = .{ .frame = frame_addr.index }, 13279 .mod = .{ .rm = .{ 13280 .size = .byte, 13281 .disp = frame_addr.off + pl_abi_size, 13282 } }, 13283 }, 13284 .u(1), 13285 ), 13286 } 13287 } 13288 break :result opt_mcv; 13289 }; 13290 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 13291 } 13292 13293 /// T to E!T 13294 fn airWrapErrUnionPayload(self: *CodeGen, inst: Air.Inst.Index) !void { 13295 const pt = self.pt; 13296 const zcu = pt.zcu; 13297 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13298 13299 const eu_ty = ty_op.ty.toType(); 13300 const pl_ty = eu_ty.errorUnionPayload(zcu); 13301 const err_ty = eu_ty.errorUnionSet(zcu); 13302 const operand = try self.resolveInst(ty_op.operand); 13303 13304 const result: MCValue = result: { 13305 if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .{ .immediate = 0 }; 13306 13307 const frame_index = try self.allocFrameIndex(.initSpill(eu_ty, zcu)); 13308 const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); 13309 const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); 13310 try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand, .{}); 13311 try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 }, .{}); 13312 break :result .{ .load_frame = .{ .index = frame_index } }; 13313 }; 13314 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 13315 } 13316 13317 /// E to E!T 13318 fn airWrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void { 13319 const pt = self.pt; 13320 const zcu = pt.zcu; 13321 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13322 13323 const eu_ty = ty_op.ty.toType(); 13324 const pl_ty = eu_ty.errorUnionPayload(zcu); 13325 const err_ty = eu_ty.errorUnionSet(zcu); 13326 13327 const result: MCValue = result: { 13328 if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result try self.resolveInst(ty_op.operand); 13329 13330 const frame_index = try self.allocFrameIndex(.initSpill(eu_ty, zcu)); 13331 const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); 13332 const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); 13333 try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef, .{}); 13334 const operand = try self.resolveInst(ty_op.operand); 13335 try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand, .{}); 13336 break :result .{ .load_frame = .{ .index = frame_index } }; 13337 }; 13338 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 13339 } 13340 13341 fn airSlicePtr(self: *CodeGen, inst: Air.Inst.Index) !void { 13342 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13343 const result = result: { 13344 const src_mcv = try self.resolveInst(ty_op.operand); 13345 const ptr_mcv: MCValue = switch (src_mcv) { 13346 .register_pair => |regs| .{ .register = regs[0] }, 13347 else => src_mcv, 13348 }; 13349 if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { 13350 switch (src_mcv) { 13351 .register_pair => |regs| try self.freeValue(.{ .register = regs[1] }), 13352 else => {}, 13353 } 13354 break :result ptr_mcv; 13355 } 13356 13357 const dst_mcv = try self.allocRegOrMem(inst, true); 13358 try self.genCopy(self.typeOfIndex(inst), dst_mcv, ptr_mcv, .{}); 13359 break :result dst_mcv; 13360 }; 13361 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 13362 } 13363 13364 fn airSliceLen(self: *CodeGen, inst: Air.Inst.Index) !void { 13365 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13366 const result = result: { 13367 const src_mcv = try self.resolveInst(ty_op.operand); 13368 const len_mcv: MCValue = switch (src_mcv) { 13369 .register_pair => |regs| .{ .register = regs[1] }, 13370 .load_frame => |frame_addr| .{ .load_frame = .{ 13371 .index = frame_addr.index, 13372 .off = frame_addr.off + 8, 13373 } }, 13374 else => return self.fail("TODO implement slice_len for {}", .{src_mcv}), 13375 }; 13376 if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { 13377 switch (src_mcv) { 13378 .register_pair => |regs| try self.freeValue(.{ .register = regs[0] }), 13379 .load_frame => {}, 13380 else => unreachable, 13381 } 13382 break :result len_mcv; 13383 } 13384 13385 const dst_mcv = try self.allocRegOrMem(inst, true); 13386 try self.genCopy(self.typeOfIndex(inst), dst_mcv, len_mcv, .{}); 13387 break :result dst_mcv; 13388 }; 13389 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 13390 } 13391 13392 fn airPtrSliceLenPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 13393 const pt = self.pt; 13394 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13395 13396 const src_ty = self.typeOf(ty_op.operand); 13397 const src_mcv = try self.resolveInst(ty_op.operand); 13398 const src_reg = switch (src_mcv) { 13399 .register => |reg| reg, 13400 else => try self.copyToTmpRegister(src_ty, src_mcv), 13401 }; 13402 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); 13403 defer self.register_manager.unlockReg(src_lock); 13404 13405 const dst_ty = self.typeOfIndex(inst); 13406 const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 13407 src_reg 13408 else 13409 try self.register_manager.allocReg(inst, abi.RegisterClass.gp); 13410 const dst_mcv = MCValue{ .register = dst_reg }; 13411 const dst_lock = self.register_manager.lockReg(dst_reg); 13412 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 13413 13414 const dst_abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu)); 13415 try self.asmRegisterMemory( 13416 .{ ._, .lea }, 13417 registerAlias(dst_reg, dst_abi_size), 13418 .{ 13419 .base = .{ .reg = src_reg }, 13420 .mod = .{ .rm = .{ .size = .qword, .disp = 8 } }, 13421 }, 13422 ); 13423 13424 return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); 13425 } 13426 13427 fn airPtrSlicePtrPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 13428 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13429 13430 const dst_ty = self.typeOfIndex(inst); 13431 const opt_mcv = try self.resolveInst(ty_op.operand); 13432 13433 const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) 13434 opt_mcv 13435 else 13436 try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv); 13437 return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); 13438 } 13439 13440 fn elemOffset(self: *CodeGen, index_ty: Type, index: MCValue, elem_size: u64) !Register { 13441 const reg: Register = blk: { 13442 switch (index) { 13443 .immediate => |imm| { 13444 // Optimisation: if index MCValue is an immediate, we can multiply in `comptime` 13445 // and set the register directly to the scaled offset as an immediate. 13446 const reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 13447 try self.genSetReg(reg, index_ty, .{ .immediate = imm * elem_size }, .{}); 13448 break :blk reg; 13449 }, 13450 else => { 13451 const reg = try self.copyToTmpRegister(index_ty, index); 13452 try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size }); 13453 break :blk reg; 13454 }, 13455 } 13456 }; 13457 return reg; 13458 } 13459 13460 fn genSliceElemPtr(self: *CodeGen, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { 13461 const pt = self.pt; 13462 const zcu = pt.zcu; 13463 const slice_ty = self.typeOf(lhs); 13464 const slice_mcv = try self.resolveInst(lhs); 13465 const slice_mcv_lock: ?RegisterLock = switch (slice_mcv) { 13466 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13467 else => null, 13468 }; 13469 defer if (slice_mcv_lock) |lock| self.register_manager.unlockReg(lock); 13470 13471 const elem_ty = slice_ty.childType(zcu); 13472 const elem_size = elem_ty.abiSize(zcu); 13473 const slice_ptr_field_type = slice_ty.slicePtrFieldType(zcu); 13474 13475 const index_ty = self.typeOf(rhs); 13476 const index_mcv = try self.resolveInst(rhs); 13477 const index_mcv_lock: ?RegisterLock = switch (index_mcv) { 13478 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13479 else => null, 13480 }; 13481 defer if (index_mcv_lock) |lock| self.register_manager.unlockReg(lock); 13482 13483 const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_size); 13484 const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); 13485 defer self.register_manager.unlockReg(offset_reg_lock); 13486 13487 const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 13488 try self.genSetReg(addr_reg, .usize, slice_mcv, .{}); 13489 // TODO we could allocate register here, but need to expect addr register and potentially 13490 // offset register. 13491 try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{ 13492 .register = offset_reg, 13493 }); 13494 return MCValue{ .register = addr_reg.to64() }; 13495 } 13496 13497 fn airSliceElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { 13498 const pt = self.pt; 13499 const zcu = pt.zcu; 13500 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 13501 13502 const result: MCValue = result: { 13503 const elem_ty = self.typeOfIndex(inst); 13504 if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; 13505 13506 const slice_ty = self.typeOf(bin_op.lhs); 13507 const slice_ptr_field_type = slice_ty.slicePtrFieldType(zcu); 13508 const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs); 13509 const dst_mcv = try self.allocRegOrMem(inst, false); 13510 try self.load(dst_mcv, slice_ptr_field_type, elem_ptr); 13511 break :result dst_mcv; 13512 }; 13513 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 13514 } 13515 13516 fn airSliceElemPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 13517 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 13518 const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; 13519 const dst_mcv = try self.genSliceElemPtr(extra.lhs, extra.rhs); 13520 return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); 13521 } 13522 13523 fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { 13524 const pt = self.pt; 13525 const zcu = pt.zcu; 13526 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 13527 13528 const result: MCValue = result: { 13529 const array_ty = self.typeOf(bin_op.lhs); 13530 const elem_ty = array_ty.childType(zcu); 13531 13532 const array_mcv = try self.resolveInst(bin_op.lhs); 13533 const array_lock: ?RegisterLock = switch (array_mcv) { 13534 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13535 else => null, 13536 }; 13537 defer if (array_lock) |lock| self.register_manager.unlockReg(lock); 13538 13539 const index_ty = self.typeOf(bin_op.rhs); 13540 const index_mcv = try self.resolveInst(bin_op.rhs); 13541 const index_lock = switch (index_mcv) { 13542 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13543 else => null, 13544 }; 13545 defer if (index_lock) |lock| self.register_manager.unlockReg(lock); 13546 13547 try self.spillEflagsIfOccupied(); 13548 if (array_ty.isVector(zcu) and elem_ty.bitSize(zcu) == 1) { 13549 const array_mat_mcv: MCValue = switch (array_mcv) { 13550 else => array_mcv, 13551 .register_mask => .{ .register = try self.copyToTmpRegister(array_ty, array_mcv) }, 13552 }; 13553 const array_mat_lock = switch (array_mat_mcv) { 13554 .register => |reg| self.register_manager.lockReg(reg), 13555 else => null, 13556 }; 13557 defer if (array_mat_lock) |lock| self.register_manager.unlockReg(lock); 13558 13559 switch (array_mat_mcv) { 13560 .register => |array_reg| switch (array_reg.class()) { 13561 .general_purpose => switch (index_mcv) { 13562 .immediate => |index_imm| try self.asmRegisterImmediate( 13563 .{ ._, .bt }, 13564 array_reg.to64(), 13565 .u(index_imm), 13566 ), 13567 else => try self.asmRegisterRegister( 13568 .{ ._, .bt }, 13569 array_reg.to64(), 13570 switch (index_mcv) { 13571 .register => |index_reg| index_reg, 13572 else => try self.copyToTmpRegister(index_ty, index_mcv), 13573 }.to64(), 13574 ), 13575 }, 13576 .sse => { 13577 const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu)); 13578 try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mat_mcv, .{}); 13579 switch (index_mcv) { 13580 .immediate => |index_imm| try self.asmMemoryImmediate( 13581 .{ ._, .bt }, 13582 .{ 13583 .base = .{ .frame = frame_index }, 13584 .mod = .{ .rm = .{ .size = .qword } }, 13585 }, 13586 .u(index_imm), 13587 ), 13588 else => try self.asmMemoryRegister( 13589 .{ ._, .bt }, 13590 .{ 13591 .base = .{ .frame = frame_index }, 13592 .mod = .{ .rm = .{ .size = .qword } }, 13593 }, 13594 switch (index_mcv) { 13595 .register => |index_reg| index_reg, 13596 else => try self.copyToTmpRegister(index_ty, index_mcv), 13597 }.to64(), 13598 ), 13599 } 13600 }, 13601 else => unreachable, 13602 }, 13603 .load_frame => switch (index_mcv) { 13604 .immediate => |index_imm| try self.asmMemoryImmediate( 13605 .{ ._, .bt }, 13606 try array_mat_mcv.mem(self, .{ .size = .qword }), 13607 .u(index_imm), 13608 ), 13609 else => try self.asmMemoryRegister( 13610 .{ ._, .bt }, 13611 try array_mat_mcv.mem(self, .{ .size = .qword }), 13612 switch (index_mcv) { 13613 .register => |index_reg| index_reg, 13614 else => try self.copyToTmpRegister(index_ty, index_mcv), 13615 }.to64(), 13616 ), 13617 }, 13618 .memory, .load_symbol, .load_direct, .load_got, .load_tlv => switch (index_mcv) { 13619 .immediate => |index_imm| try self.asmMemoryImmediate( 13620 .{ ._, .bt }, 13621 .{ 13622 .base = .{ 13623 .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()), 13624 }, 13625 .mod = .{ .rm = .{ .size = .qword } }, 13626 }, 13627 .u(index_imm), 13628 ), 13629 else => try self.asmMemoryRegister( 13630 .{ ._, .bt }, 13631 .{ 13632 .base = .{ 13633 .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()), 13634 }, 13635 .mod = .{ .rm = .{ .size = .qword } }, 13636 }, 13637 switch (index_mcv) { 13638 .register => |index_reg| index_reg, 13639 else => try self.copyToTmpRegister(index_ty, index_mcv), 13640 }.to64(), 13641 ), 13642 }, 13643 else => return self.fail("TODO airArrayElemVal for {s} of {}", .{ 13644 @tagName(array_mat_mcv), array_ty.fmt(pt), 13645 }), 13646 } 13647 13648 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); 13649 try self.asmSetccRegister(.c, dst_reg.to8()); 13650 break :result .{ .register = dst_reg }; 13651 } 13652 13653 const elem_abi_size = elem_ty.abiSize(zcu); 13654 const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 13655 const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); 13656 defer self.register_manager.unlockReg(addr_lock); 13657 13658 switch (array_mcv) { 13659 .register => { 13660 const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu)); 13661 try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mcv, .{}); 13662 try self.asmRegisterMemory( 13663 .{ ._, .lea }, 13664 addr_reg, 13665 .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } }, 13666 ); 13667 }, 13668 .load_frame => |frame_addr| try self.asmRegisterMemory( 13669 .{ ._, .lea }, 13670 addr_reg, 13671 .{ 13672 .base = .{ .frame = frame_addr.index }, 13673 .mod = .{ .rm = .{ .size = .qword, .disp = frame_addr.off } }, 13674 }, 13675 ), 13676 .memory, 13677 .load_symbol, 13678 .load_direct, 13679 .load_got, 13680 .load_tlv, 13681 => try self.genSetReg(addr_reg, .usize, array_mcv.address(), .{}), 13682 .lea_symbol, .lea_direct, .lea_tlv => unreachable, 13683 else => return self.fail("TODO airArrayElemVal_val for {s} of {}", .{ 13684 @tagName(array_mcv), array_ty.fmt(pt), 13685 }), 13686 } 13687 13688 const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size); 13689 const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg); 13690 defer self.register_manager.unlockReg(offset_lock); 13691 13692 // TODO we could allocate register here, but need to expect addr register and potentially 13693 // offset register. 13694 const dst_mcv = try self.allocRegOrMem(inst, false); 13695 try self.genBinOpMir(.{ ._, .add }, .usize, .{ .register = addr_reg }, .{ .register = offset_reg }); 13696 try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }, .{}); 13697 break :result dst_mcv; 13698 }; 13699 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 13700 } 13701 13702 fn airPtrElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { 13703 const pt = self.pt; 13704 const zcu = pt.zcu; 13705 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 13706 const ptr_ty = self.typeOf(bin_op.lhs); 13707 13708 // this is identical to the `airPtrElemPtr` codegen expect here an 13709 // additional `mov` is needed at the end to get the actual value 13710 13711 const result = result: { 13712 const elem_ty = ptr_ty.elemType2(zcu); 13713 if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; 13714 13715 const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu)); 13716 const index_ty = self.typeOf(bin_op.rhs); 13717 const index_mcv = try self.resolveInst(bin_op.rhs); 13718 const index_lock = switch (index_mcv) { 13719 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13720 else => null, 13721 }; 13722 defer if (index_lock) |lock| self.register_manager.unlockReg(lock); 13723 13724 const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size); 13725 const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg); 13726 defer self.register_manager.unlockReg(offset_lock); 13727 13728 const ptr_mcv = try self.resolveInst(bin_op.lhs); 13729 const elem_ptr_reg = if (ptr_mcv.isRegister() and self.liveness.operandDies(inst, 0)) 13730 ptr_mcv.register 13731 else 13732 try self.copyToTmpRegister(ptr_ty, ptr_mcv); 13733 const elem_ptr_lock = self.register_manager.lockRegAssumeUnused(elem_ptr_reg); 13734 defer self.register_manager.unlockReg(elem_ptr_lock); 13735 try self.asmRegisterRegister( 13736 .{ ._, .add }, 13737 elem_ptr_reg, 13738 offset_reg, 13739 ); 13740 13741 const dst_mcv = try self.allocRegOrMem(inst, true); 13742 const dst_lock = switch (dst_mcv) { 13743 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13744 else => null, 13745 }; 13746 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 13747 try self.load(dst_mcv, ptr_ty, .{ .register = elem_ptr_reg }); 13748 break :result dst_mcv; 13749 }; 13750 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 13751 } 13752 13753 fn airPtrElemPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 13754 const pt = self.pt; 13755 const zcu = pt.zcu; 13756 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 13757 const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; 13758 13759 const result = result: { 13760 const elem_ptr_ty = self.typeOfIndex(inst); 13761 const base_ptr_ty = self.typeOf(extra.lhs); 13762 13763 const base_ptr_mcv = try self.resolveInst(extra.lhs); 13764 const base_ptr_lock: ?RegisterLock = switch (base_ptr_mcv) { 13765 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13766 else => null, 13767 }; 13768 defer if (base_ptr_lock) |lock| self.register_manager.unlockReg(lock); 13769 13770 if (elem_ptr_ty.ptrInfo(zcu).flags.vector_index != .none) { 13771 break :result if (self.reuseOperand(inst, extra.lhs, 0, base_ptr_mcv)) 13772 base_ptr_mcv 13773 else 13774 try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv); 13775 } 13776 13777 const elem_ty = base_ptr_ty.elemType2(zcu); 13778 const elem_abi_size = elem_ty.abiSize(zcu); 13779 const index_ty = self.typeOf(extra.rhs); 13780 const index_mcv = try self.resolveInst(extra.rhs); 13781 const index_lock: ?RegisterLock = switch (index_mcv) { 13782 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13783 else => null, 13784 }; 13785 defer if (index_lock) |lock| self.register_manager.unlockReg(lock); 13786 13787 const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size); 13788 const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); 13789 defer self.register_manager.unlockReg(offset_reg_lock); 13790 13791 const dst_mcv = try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv); 13792 try self.genBinOpMir(.{ ._, .add }, elem_ptr_ty, dst_mcv, .{ .register = offset_reg }); 13793 13794 break :result dst_mcv; 13795 }; 13796 return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); 13797 } 13798 13799 fn airSetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void { 13800 const pt = self.pt; 13801 const zcu = pt.zcu; 13802 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 13803 const ptr_union_ty = self.typeOf(bin_op.lhs); 13804 const union_ty = ptr_union_ty.childType(zcu); 13805 const tag_ty = self.typeOf(bin_op.rhs); 13806 const layout = union_ty.unionGetLayout(zcu); 13807 13808 if (layout.tag_size == 0) { 13809 return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); 13810 } 13811 13812 const ptr = try self.resolveInst(bin_op.lhs); 13813 const ptr_lock: ?RegisterLock = switch (ptr) { 13814 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13815 else => null, 13816 }; 13817 defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); 13818 13819 const tag = try self.resolveInst(bin_op.rhs); 13820 const tag_lock: ?RegisterLock = switch (tag) { 13821 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13822 else => null, 13823 }; 13824 defer if (tag_lock) |lock| self.register_manager.unlockReg(lock); 13825 13826 const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align.compare(.lt, layout.payload_align)) blk: { 13827 // TODO reusing the operand 13828 const reg = try self.copyToTmpRegister(ptr_union_ty, ptr); 13829 try self.genBinOpMir( 13830 .{ ._, .add }, 13831 ptr_union_ty, 13832 .{ .register = reg }, 13833 .{ .immediate = layout.payload_size }, 13834 ); 13835 break :blk MCValue{ .register = reg }; 13836 } else ptr; 13837 13838 const ptr_tag_ty = try pt.adjustPtrTypeChild(ptr_union_ty, tag_ty); 13839 try self.store(ptr_tag_ty, adjusted_ptr, tag, .{}); 13840 13841 return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); 13842 } 13843 13844 fn airGetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void { 13845 const zcu = self.pt.zcu; 13846 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13847 13848 const tag_ty = self.typeOfIndex(inst); 13849 const union_ty = self.typeOf(ty_op.operand); 13850 const layout = union_ty.unionGetLayout(zcu); 13851 13852 if (layout.tag_size == 0) { 13853 return self.finishAir(inst, .none, .{ ty_op.operand, .none, .none }); 13854 } 13855 13856 // TODO reusing the operand 13857 const operand = try self.resolveInst(ty_op.operand); 13858 const operand_lock: ?RegisterLock = switch (operand) { 13859 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 13860 else => null, 13861 }; 13862 defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); 13863 13864 const tag_abi_size = tag_ty.abiSize(zcu); 13865 const dst_mcv: MCValue = blk: { 13866 switch (operand) { 13867 .load_frame => |frame_addr| { 13868 if (tag_abi_size <= 8) { 13869 const off: i32 = @intCast(layout.tagOffset()); 13870 break :blk try self.copyToRegisterWithInstTracking(inst, tag_ty, .{ 13871 .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off }, 13872 }); 13873 } 13874 13875 return self.fail( 13876 "TODO implement get_union_tag for ABI larger than 8 bytes and operand {}", 13877 .{operand}, 13878 ); 13879 }, 13880 .register => { 13881 const shift: u6 = @intCast(layout.tagOffset() * 8); 13882 const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand); 13883 try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, result, .u8, .{ .immediate = shift }); 13884 break :blk MCValue{ 13885 .register = registerAlias(result.register, @intCast(layout.tag_size)), 13886 }; 13887 }, 13888 else => return self.fail("TODO implement get_union_tag for {}", .{operand}), 13889 } 13890 }; 13891 13892 return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); 13893 } 13894 13895 fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { 13896 const pt = self.pt; 13897 const zcu = pt.zcu; 13898 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 13899 const result = result: { 13900 try self.spillEflagsIfOccupied(); 13901 13902 const dst_ty = self.typeOfIndex(inst); 13903 const src_ty = self.typeOf(ty_op.operand); 13904 if (src_ty.zigTypeTag(zcu) == .vector) return self.fail("TODO implement airClz for {}", .{ 13905 src_ty.fmt(pt), 13906 }); 13907 13908 const src_mcv = try self.resolveInst(ty_op.operand); 13909 const mat_src_mcv = switch (src_mcv) { 13910 .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }, 13911 else => src_mcv, 13912 }; 13913 const mat_src_lock = switch (mat_src_mcv) { 13914 .register => |reg| self.register_manager.lockReg(reg), 13915 else => null, 13916 }; 13917 defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock); 13918 13919 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); 13920 const dst_mcv = MCValue{ .register = dst_reg }; 13921 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); 13922 defer self.register_manager.unlockReg(dst_lock); 13923 13924 const abi_size: u31 = @intCast(src_ty.abiSize(zcu)); 13925 const src_bits: u31 = @intCast(src_ty.bitSize(zcu)); 13926 const has_lzcnt = self.hasFeature(.lzcnt); 13927 if (src_bits > @as(u32, if (has_lzcnt) 128 else 64)) { 13928 const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) { 13929 .load_frame => |src_frame_addr| src_frame_addr, 13930 else => { 13931 const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu)); 13932 try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{}); 13933 break :src_frame_addr .{ .index = src_frame_addr }; 13934 }, 13935 }; 13936 13937 const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; 13938 const extra_bits = abi_size * 8 - src_bits; 13939 13940 const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 13941 const index_lock = self.register_manager.lockRegAssumeUnused(index_reg); 13942 defer self.register_manager.unlockReg(index_lock); 13943 13944 try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), .u(limbs_len)); 13945 switch (extra_bits) { 13946 1 => try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()), 13947 else => try self.asmRegisterImmediate( 13948 .{ ._, .mov }, 13949 dst_reg.to32(), 13950 .s(@as(i32, extra_bits) - 1), 13951 ), 13952 } 13953 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 13954 try self.asmRegisterRegister(.{ ._, .@"test" }, index_reg.to32(), index_reg.to32()); 13955 const zero = try self.asmJccReloc(.z, undefined); 13956 if (self.hasFeature(.slow_incdec)) { 13957 try self.asmRegisterImmediate(.{ ._, .sub }, index_reg.to32(), .u(1)); 13958 } else { 13959 try self.asmRegister(.{ ._, .dec }, index_reg.to32()); 13960 } 13961 try self.asmMemoryImmediate(.{ ._, .cmp }, .{ 13962 .base = .{ .frame = src_frame_addr.index }, 13963 .mod = .{ .rm = .{ 13964 .size = .qword, 13965 .index = index_reg.to64(), 13966 .scale = .@"8", 13967 .disp = src_frame_addr.off, 13968 } }, 13969 }, .u(0)); 13970 _ = try self.asmJccReloc(.e, loop); 13971 try self.asmRegisterMemory(.{ ._r, .bs }, dst_reg.to64(), .{ 13972 .base = .{ .frame = src_frame_addr.index }, 13973 .mod = .{ .rm = .{ 13974 .size = .qword, 13975 .index = index_reg.to64(), 13976 .scale = .@"8", 13977 .disp = src_frame_addr.off, 13978 } }, 13979 }); 13980 self.performReloc(zero); 13981 try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), .u(6)); 13982 try self.asmRegisterRegister(.{ ._, .add }, index_reg.to32(), dst_reg.to32()); 13983 try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), .u(src_bits - 1)); 13984 try self.asmRegisterRegister(.{ ._, .sub }, dst_reg.to32(), index_reg.to32()); 13985 break :result dst_mcv; 13986 } 13987 13988 if (has_lzcnt) { 13989 if (src_bits <= 8) { 13990 const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); 13991 try self.truncateRegister(src_ty, wide_reg); 13992 try self.genBinOpMir(.{ ._, .lzcnt }, .u32, dst_mcv, .{ .register = wide_reg }); 13993 try self.genBinOpMir( 13994 .{ ._, .sub }, 13995 dst_ty, 13996 dst_mcv, 13997 .{ .immediate = 32 - src_bits }, 13998 ); 13999 } else if (src_bits <= 64) { 14000 try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv); 14001 const extra_bits = self.regExtraBits(src_ty); 14002 if (extra_bits > 0) { 14003 try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits }); 14004 } 14005 } else { 14006 assert(src_bits <= 128); 14007 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 14008 const tmp_mcv = MCValue{ .register = tmp_reg }; 14009 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 14010 defer self.register_manager.unlockReg(tmp_lock); 14011 14012 try self.genBinOpMir(.{ ._, .lzcnt }, .u64, dst_mcv, if (mat_src_mcv.isBase()) 14013 mat_src_mcv 14014 else 14015 .{ .register = mat_src_mcv.register_pair[0] }); 14016 try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); 14017 try self.genBinOpMir(.{ ._, .lzcnt }, .u64, tmp_mcv, if (mat_src_mcv.isBase()) 14018 mat_src_mcv.address().offset(8).deref() 14019 else 14020 .{ .register = mat_src_mcv.register_pair[1] }); 14021 try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32()); 14022 14023 if (src_bits < 128) try self.genBinOpMir( 14024 .{ ._, .sub }, 14025 dst_ty, 14026 dst_mcv, 14027 .{ .immediate = 128 - src_bits }, 14028 ); 14029 } 14030 break :result dst_mcv; 14031 } 14032 14033 assert(src_bits <= 64); 14034 const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2); 14035 if (std.math.isPowerOfTwo(src_bits)) { 14036 const imm_reg = try self.copyToTmpRegister(dst_ty, .{ 14037 .immediate = src_bits ^ (src_bits - 1), 14038 }); 14039 const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg); 14040 defer self.register_manager.unlockReg(imm_lock); 14041 14042 if (src_bits <= 8) { 14043 const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); 14044 const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); 14045 defer self.register_manager.unlockReg(wide_lock); 14046 14047 try self.truncateRegister(src_ty, wide_reg); 14048 try self.genBinOpMir(.{ ._r, .bs }, .u16, dst_mcv, .{ .register = wide_reg }); 14049 } else try self.genBinOpMir(.{ ._r, .bs }, src_ty, dst_mcv, mat_src_mcv); 14050 14051 try self.asmCmovccRegisterRegister( 14052 .z, 14053 registerAlias(dst_reg, cmov_abi_size), 14054 registerAlias(imm_reg, cmov_abi_size), 14055 ); 14056 14057 try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 }); 14058 } else { 14059 const imm_reg = try self.copyToTmpRegister(dst_ty, .{ 14060 .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - self.regBitSize(dst_ty)), 14061 }); 14062 const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg); 14063 defer self.register_manager.unlockReg(imm_lock); 14064 14065 const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); 14066 const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); 14067 defer self.register_manager.unlockReg(wide_lock); 14068 14069 try self.truncateRegister(src_ty, wide_reg); 14070 try self.genBinOpMir( 14071 .{ ._r, .bs }, 14072 if (src_bits <= 8) .u16 else src_ty, 14073 dst_mcv, 14074 .{ .register = wide_reg }, 14075 ); 14076 14077 try self.asmCmovccRegisterRegister( 14078 .nz, 14079 registerAlias(imm_reg, cmov_abi_size), 14080 registerAlias(dst_reg, cmov_abi_size), 14081 ); 14082 14083 try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 }, .{}); 14084 try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .register = imm_reg }); 14085 } 14086 break :result dst_mcv; 14087 }; 14088 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 14089 } 14090 14091 fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { 14092 const pt = self.pt; 14093 const zcu = pt.zcu; 14094 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 14095 const result = result: { 14096 try self.spillEflagsIfOccupied(); 14097 14098 const dst_ty = self.typeOfIndex(inst); 14099 const src_ty = self.typeOf(ty_op.operand); 14100 if (src_ty.zigTypeTag(zcu) == .vector) return self.fail("TODO implement airCtz for {}", .{ 14101 src_ty.fmt(pt), 14102 }); 14103 14104 const src_mcv = try self.resolveInst(ty_op.operand); 14105 const mat_src_mcv = switch (src_mcv) { 14106 .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }, 14107 else => src_mcv, 14108 }; 14109 const mat_src_lock = switch (mat_src_mcv) { 14110 .register => |reg| self.register_manager.lockReg(reg), 14111 else => null, 14112 }; 14113 defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock); 14114 14115 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); 14116 const dst_mcv = MCValue{ .register = dst_reg }; 14117 const dst_lock = self.register_manager.lockReg(dst_reg); 14118 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 14119 14120 const abi_size: u31 = @intCast(src_ty.abiSize(zcu)); 14121 const src_bits: u31 = @intCast(src_ty.bitSize(zcu)); 14122 const has_bmi = self.hasFeature(.bmi); 14123 if (src_bits > @as(u32, if (has_bmi) 128 else 64)) { 14124 const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) { 14125 .load_frame => |src_frame_addr| src_frame_addr, 14126 else => { 14127 const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu)); 14128 try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{}); 14129 break :src_frame_addr .{ .index = src_frame_addr }; 14130 }, 14131 }; 14132 14133 const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; 14134 const extra_bits = abi_size * 8 - src_bits; 14135 14136 const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 14137 const index_lock = self.register_manager.lockRegAssumeUnused(index_reg); 14138 defer self.register_manager.unlockReg(index_lock); 14139 14140 try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), .s(-1)); 14141 switch (extra_bits) { 14142 0 => try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()), 14143 1 => try self.asmRegisterRegister(.{ ._, .mov }, dst_reg.to32(), dst_reg.to32()), 14144 else => try self.asmRegisterImmediate( 14145 .{ ._, .mov }, 14146 dst_reg.to32(), 14147 .s(-@as(i32, extra_bits)), 14148 ), 14149 } 14150 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 14151 if (self.hasFeature(.slow_incdec)) { 14152 try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1)); 14153 } else { 14154 try self.asmRegister(.{ ._, .inc }, index_reg.to32()); 14155 } 14156 try self.asmRegisterImmediate(.{ ._, .cmp }, index_reg.to32(), .u(limbs_len)); 14157 const zero = try self.asmJccReloc(.nb, undefined); 14158 try self.asmMemoryImmediate(.{ ._, .cmp }, .{ 14159 .base = .{ .frame = src_frame_addr.index }, 14160 .mod = .{ .rm = .{ 14161 .size = .qword, 14162 .index = index_reg.to64(), 14163 .scale = .@"8", 14164 .disp = src_frame_addr.off, 14165 } }, 14166 }, .u(0)); 14167 _ = try self.asmJccReloc(.e, loop); 14168 try self.asmRegisterMemory(.{ ._f, .bs }, dst_reg.to64(), .{ 14169 .base = .{ .frame = src_frame_addr.index }, 14170 .mod = .{ .rm = .{ 14171 .size = .qword, 14172 .index = index_reg.to64(), 14173 .scale = .@"8", 14174 .disp = src_frame_addr.off, 14175 } }, 14176 }); 14177 self.performReloc(zero); 14178 try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), .u(6)); 14179 try self.asmRegisterRegister(.{ ._, .add }, dst_reg.to32(), index_reg.to32()); 14180 break :result dst_mcv; 14181 } 14182 14183 const wide_ty: Type = if (src_bits <= 8) .u16 else src_ty; 14184 if (has_bmi) { 14185 if (src_bits <= 64) { 14186 const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0); 14187 const masked_mcv = if (extra_bits > 0) masked: { 14188 const tmp_mcv = tmp: { 14189 if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) 14190 break :tmp src_mcv; 14191 try self.genSetReg(dst_reg, wide_ty, src_mcv, .{}); 14192 break :tmp dst_mcv; 14193 }; 14194 try self.genBinOpMir( 14195 .{ ._, .@"or" }, 14196 wide_ty, 14197 tmp_mcv, 14198 .{ .immediate = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - extra_bits)) << 14199 @intCast(src_bits) }, 14200 ); 14201 break :masked tmp_mcv; 14202 } else mat_src_mcv; 14203 try self.genBinOpMir(.{ ._, .tzcnt }, wide_ty, dst_mcv, masked_mcv); 14204 } else { 14205 assert(src_bits <= 128); 14206 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 14207 const tmp_mcv = MCValue{ .register = tmp_reg }; 14208 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 14209 defer self.register_manager.unlockReg(tmp_lock); 14210 14211 const lo_mat_src_mcv: MCValue = if (mat_src_mcv.isBase()) 14212 mat_src_mcv 14213 else 14214 .{ .register = mat_src_mcv.register_pair[0] }; 14215 const hi_mat_src_mcv: MCValue = if (mat_src_mcv.isBase()) 14216 mat_src_mcv.address().offset(8).deref() 14217 else 14218 .{ .register = mat_src_mcv.register_pair[1] }; 14219 const masked_mcv = if (src_bits < 128) masked: { 14220 try self.genCopy(.u64, dst_mcv, hi_mat_src_mcv, .{}); 14221 try self.genBinOpMir( 14222 .{ ._, .@"or" }, 14223 .u64, 14224 dst_mcv, 14225 .{ .immediate = @as(u64, std.math.maxInt(u64)) << @intCast(src_bits - 64) }, 14226 ); 14227 break :masked dst_mcv; 14228 } else hi_mat_src_mcv; 14229 try self.genBinOpMir(.{ ._, .tzcnt }, .u64, dst_mcv, masked_mcv); 14230 try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); 14231 try self.genBinOpMir(.{ ._, .tzcnt }, .u64, tmp_mcv, lo_mat_src_mcv); 14232 try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32()); 14233 } 14234 break :result dst_mcv; 14235 } 14236 14237 assert(src_bits <= 64); 14238 const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits }); 14239 const width_lock = self.register_manager.lockRegAssumeUnused(width_reg); 14240 defer self.register_manager.unlockReg(width_lock); 14241 14242 if (src_bits <= 8 or !std.math.isPowerOfTwo(src_bits)) { 14243 const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); 14244 const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); 14245 defer self.register_manager.unlockReg(wide_lock); 14246 14247 try self.truncateRegister(src_ty, wide_reg); 14248 try self.genBinOpMir(.{ ._f, .bs }, wide_ty, dst_mcv, .{ .register = wide_reg }); 14249 } else try self.genBinOpMir(.{ ._f, .bs }, src_ty, dst_mcv, mat_src_mcv); 14250 14251 const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2); 14252 try self.asmCmovccRegisterRegister( 14253 .z, 14254 registerAlias(dst_reg, cmov_abi_size), 14255 registerAlias(width_reg, cmov_abi_size), 14256 ); 14257 break :result dst_mcv; 14258 }; 14259 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 14260 } 14261 14262 fn airPopCount(self: *CodeGen, inst: Air.Inst.Index) !void { 14263 const pt = self.pt; 14264 const zcu = pt.zcu; 14265 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 14266 const result: MCValue = result: { 14267 try self.spillEflagsIfOccupied(); 14268 14269 const src_ty = self.typeOf(ty_op.operand); 14270 const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); 14271 if (src_ty.zigTypeTag(zcu) == .vector or src_abi_size > 16) 14272 return self.fail("TODO implement airPopCount for {}", .{src_ty.fmt(pt)}); 14273 const src_mcv = try self.resolveInst(ty_op.operand); 14274 14275 const mat_src_mcv = switch (src_mcv) { 14276 .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }, 14277 else => src_mcv, 14278 }; 14279 const mat_src_lock = switch (mat_src_mcv) { 14280 .register => |reg| self.register_manager.lockReg(reg), 14281 else => null, 14282 }; 14283 defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock); 14284 14285 if (src_abi_size <= 8) { 14286 const dst_contains_src = 14287 src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv); 14288 const dst_reg = if (dst_contains_src) 14289 src_mcv.getReg().? 14290 else 14291 try self.register_manager.allocReg(inst, abi.RegisterClass.gp); 14292 const dst_lock = self.register_manager.lockReg(dst_reg); 14293 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 14294 14295 try self.genPopCount(dst_reg, src_ty, mat_src_mcv, dst_contains_src); 14296 break :result .{ .register = dst_reg }; 14297 } 14298 14299 assert(src_abi_size > 8 and src_abi_size <= 16); 14300 const tmp_regs = try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp); 14301 const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs); 14302 defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); 14303 14304 try self.genPopCount(tmp_regs[0], .usize, if (mat_src_mcv.isBase()) 14305 mat_src_mcv 14306 else 14307 .{ .register = mat_src_mcv.register_pair[0] }, false); 14308 const src_info = src_ty.intInfo(zcu); 14309 const hi_ty = try pt.intType(src_info.signedness, (src_info.bits - 1) % 64 + 1); 14310 try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isBase()) 14311 mat_src_mcv.address().offset(8).deref() 14312 else 14313 .{ .register = mat_src_mcv.register_pair[1] }, false); 14314 try self.asmRegisterRegister(.{ ._, .add }, tmp_regs[0].to8(), tmp_regs[1].to8()); 14315 break :result .{ .register = tmp_regs[0] }; 14316 }; 14317 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 14318 } 14319 14320 fn genPopCount( 14321 self: *CodeGen, 14322 dst_reg: Register, 14323 src_ty: Type, 14324 src_mcv: MCValue, 14325 dst_contains_src: bool, 14326 ) !void { 14327 const pt = self.pt; 14328 14329 const src_abi_size: u32 = @intCast(src_ty.abiSize(pt.zcu)); 14330 if (self.hasFeature(.popcnt)) return self.genBinOpMir( 14331 .{ ._, .popcnt }, 14332 if (src_abi_size > 1) src_ty else .u32, 14333 .{ .register = dst_reg }, 14334 if (src_abi_size > 1) src_mcv else src: { 14335 if (!dst_contains_src) try self.genSetReg(dst_reg, src_ty, src_mcv, .{}); 14336 try self.truncateRegister(try src_ty.toUnsigned(pt), dst_reg); 14337 break :src .{ .register = dst_reg }; 14338 }, 14339 ); 14340 14341 const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8); 14342 const imm_0_1: Immediate = .u(mask / 0b1_1); 14343 const imm_00_11: Immediate = .u(mask / 0b01_01); 14344 const imm_0000_1111: Immediate = .u(mask / 0b0001_0001); 14345 const imm_0000_0001: Immediate = .u(mask / 0b1111_1111); 14346 14347 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 14348 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 14349 defer self.register_manager.unlockReg(tmp_lock); 14350 14351 const dst = registerAlias(dst_reg, src_abi_size); 14352 const tmp = registerAlias(tmp_reg, src_abi_size); 14353 const imm = if (src_abi_size > 4) 14354 try self.register_manager.allocReg(null, abi.RegisterClass.gp) 14355 else 14356 undefined; 14357 14358 if (!dst_contains_src) try self.genSetReg(dst, src_ty, src_mcv, .{}); 14359 // dst = operand 14360 try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); 14361 // tmp = operand 14362 try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(1)); 14363 // tmp = operand >> 1 14364 if (src_abi_size > 4) { 14365 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); 14366 try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); 14367 } else try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1); 14368 // tmp = (operand >> 1) & 0x55...55 14369 try self.asmRegisterRegister(.{ ._, .sub }, dst, tmp); 14370 // dst = temp1 = operand - ((operand >> 1) & 0x55...55) 14371 try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); 14372 // tmp = temp1 14373 try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2)); 14374 // dst = temp1 >> 2 14375 if (src_abi_size > 4) { 14376 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); 14377 try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); 14378 try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); 14379 } else { 14380 try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11); 14381 try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11); 14382 } 14383 // tmp = temp1 & 0x33...33 14384 // dst = (temp1 >> 2) & 0x33...33 14385 try self.asmRegisterRegister(.{ ._, .add }, tmp, dst); 14386 // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33) 14387 try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); 14388 // dst = temp2 14389 try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(4)); 14390 // tmp = temp2 >> 4 14391 try self.asmRegisterRegister(.{ ._, .add }, dst, tmp); 14392 // dst = temp2 + (temp2 >> 4) 14393 if (src_abi_size > 4) { 14394 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); 14395 try self.asmRegisterImmediate(.{ ._, .mov }, tmp, imm_0000_0001); 14396 try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); 14397 try self.asmRegisterRegister(.{ .i_, .mul }, dst, tmp); 14398 } else { 14399 try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111); 14400 if (src_abi_size > 1) { 14401 try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst, dst, imm_0000_0001); 14402 } 14403 } 14404 // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f 14405 // dst = temp3 * 0x01...01 14406 if (src_abi_size > 1) { 14407 try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u((src_abi_size - 1) * 8)); 14408 } 14409 // dst = (temp3 * 0x01...01) >> (bits - 8) 14410 } 14411 14412 fn genByteSwap( 14413 self: *CodeGen, 14414 inst: Air.Inst.Index, 14415 src_ty: Type, 14416 src_mcv: MCValue, 14417 mem_ok: bool, 14418 ) !MCValue { 14419 const pt = self.pt; 14420 const zcu = pt.zcu; 14421 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 14422 const has_movbe = self.hasFeature(.movbe); 14423 14424 if (src_ty.zigTypeTag(zcu) == .vector) return self.fail( 14425 "TODO implement genByteSwap for {}", 14426 .{src_ty.fmt(pt)}, 14427 ); 14428 14429 const src_lock = switch (src_mcv) { 14430 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 14431 else => null, 14432 }; 14433 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 14434 14435 const abi_size: u32 = @intCast(src_ty.abiSize(zcu)); 14436 switch (abi_size) { 14437 0 => unreachable, 14438 1 => return if ((mem_ok or src_mcv.isRegister()) and 14439 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 14440 src_mcv 14441 else 14442 try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv), 14443 2 => if ((mem_ok or src_mcv.isRegister()) and 14444 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 14445 { 14446 try self.genBinOpMir(.{ ._l, .ro }, src_ty, src_mcv, .{ .immediate = 8 }); 14447 return src_mcv; 14448 }, 14449 3...8 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { 14450 try self.genUnOpMir(.{ ._, .bswap }, src_ty, src_mcv); 14451 return src_mcv; 14452 }, 14453 9...16 => { 14454 switch (src_mcv) { 14455 .register_pair => |src_regs| if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { 14456 for (src_regs) |src_reg| try self.asmRegister(.{ ._, .bswap }, src_reg.to64()); 14457 return .{ .register_pair = .{ src_regs[1], src_regs[0] } }; 14458 }, 14459 else => {}, 14460 } 14461 14462 const dst_regs = 14463 try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp); 14464 const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); 14465 defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); 14466 14467 for (dst_regs, 0..) |dst_reg, limb_index| { 14468 if (src_mcv.isBase()) { 14469 try self.asmRegisterMemory( 14470 .{ ._, if (has_movbe) .movbe else .mov }, 14471 dst_reg.to64(), 14472 try src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }), 14473 ); 14474 if (!has_movbe) try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); 14475 } else { 14476 try self.asmRegisterRegister( 14477 .{ ._, .mov }, 14478 dst_reg.to64(), 14479 src_mcv.register_pair[limb_index].to64(), 14480 ); 14481 try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); 14482 } 14483 } 14484 return .{ .register_pair = .{ dst_regs[1], dst_regs[0] } }; 14485 }, 14486 else => { 14487 const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; 14488 14489 const temp_regs = 14490 try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); 14491 const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs); 14492 defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); 14493 14494 const dst_mcv = try self.allocRegOrMem(inst, false); 14495 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32()); 14496 try self.asmRegisterImmediate(.{ ._, .mov }, temp_regs[1].to32(), .u(limbs_len - 1)); 14497 14498 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 14499 try self.asmRegisterMemory( 14500 .{ ._, if (has_movbe) .movbe else .mov }, 14501 temp_regs[2].to64(), 14502 .{ 14503 .base = .{ .frame = dst_mcv.load_frame.index }, 14504 .mod = .{ .rm = .{ 14505 .size = .qword, 14506 .index = temp_regs[0].to64(), 14507 .scale = .@"8", 14508 .disp = dst_mcv.load_frame.off, 14509 } }, 14510 }, 14511 ); 14512 try self.asmRegisterMemory( 14513 .{ ._, if (has_movbe) .movbe else .mov }, 14514 temp_regs[3].to64(), 14515 .{ 14516 .base = .{ .frame = dst_mcv.load_frame.index }, 14517 .mod = .{ .rm = .{ 14518 .size = .qword, 14519 .index = temp_regs[1].to64(), 14520 .scale = .@"8", 14521 .disp = dst_mcv.load_frame.off, 14522 } }, 14523 }, 14524 ); 14525 if (!has_movbe) { 14526 try self.asmRegister(.{ ._, .bswap }, temp_regs[2].to64()); 14527 try self.asmRegister(.{ ._, .bswap }, temp_regs[3].to64()); 14528 } 14529 try self.asmMemoryRegister(.{ ._, .mov }, .{ 14530 .base = .{ .frame = dst_mcv.load_frame.index }, 14531 .mod = .{ .rm = .{ 14532 .size = .qword, 14533 .index = temp_regs[0].to64(), 14534 .scale = .@"8", 14535 .disp = dst_mcv.load_frame.off, 14536 } }, 14537 }, temp_regs[3].to64()); 14538 try self.asmMemoryRegister(.{ ._, .mov }, .{ 14539 .base = .{ .frame = dst_mcv.load_frame.index }, 14540 .mod = .{ .rm = .{ 14541 .size = .qword, 14542 .index = temp_regs[1].to64(), 14543 .scale = .@"8", 14544 .disp = dst_mcv.load_frame.off, 14545 } }, 14546 }, temp_regs[2].to64()); 14547 if (self.hasFeature(.slow_incdec)) { 14548 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); 14549 try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1)); 14550 } else { 14551 try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); 14552 try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32()); 14553 } 14554 try self.asmRegisterRegister(.{ ._, .cmp }, temp_regs[0].to32(), temp_regs[1].to32()); 14555 _ = try self.asmJccReloc(.be, loop); 14556 return dst_mcv; 14557 }, 14558 } 14559 14560 const dst_mcv: MCValue = if (mem_ok and has_movbe and src_mcv.isRegister()) 14561 try self.allocRegOrMem(inst, true) 14562 else 14563 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.gp) }; 14564 if (dst_mcv.getReg()) |dst_reg| { 14565 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register); 14566 defer self.register_manager.unlockReg(dst_lock); 14567 14568 try self.genSetReg(dst_reg, src_ty, src_mcv, .{}); 14569 switch (abi_size) { 14570 else => unreachable, 14571 2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }), 14572 3...8 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv), 14573 } 14574 } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); 14575 return dst_mcv; 14576 } 14577 14578 fn airByteSwap(self: *CodeGen, inst: Air.Inst.Index) !void { 14579 const pt = self.pt; 14580 const zcu = pt.zcu; 14581 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 14582 14583 const src_ty = self.typeOf(ty_op.operand); 14584 const src_bits: u32 = @intCast(src_ty.bitSize(zcu)); 14585 const src_mcv = try self.resolveInst(ty_op.operand); 14586 14587 const dst_mcv = try self.genByteSwap(inst, src_ty, src_mcv, true); 14588 try self.genShiftBinOpMir( 14589 .{ ._r, switch (if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned) { 14590 .signed => .sa, 14591 .unsigned => .sh, 14592 } }, 14593 src_ty, 14594 dst_mcv, 14595 if (src_bits > 256) .u16 else .u8, 14596 .{ .immediate = src_ty.abiSize(zcu) * 8 - src_bits }, 14597 ); 14598 return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); 14599 } 14600 14601 fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void { 14602 const pt = self.pt; 14603 const zcu = pt.zcu; 14604 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 14605 14606 const src_ty = self.typeOf(ty_op.operand); 14607 const abi_size: u32 = @intCast(src_ty.abiSize(zcu)); 14608 const bit_size: u32 = @intCast(src_ty.bitSize(zcu)); 14609 const src_mcv = try self.resolveInst(ty_op.operand); 14610 14611 const dst_mcv = try self.genByteSwap(inst, src_ty, src_mcv, false); 14612 const dst_locks: [2]?RegisterLock = switch (dst_mcv) { 14613 .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null }, 14614 .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs), 14615 else => unreachable, 14616 }; 14617 defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); 14618 14619 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 14620 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 14621 defer self.register_manager.unlockReg(tmp_lock); 14622 14623 const limb_abi_size: u32 = @min(abi_size, 8); 14624 const tmp = registerAlias(tmp_reg, limb_abi_size); 14625 const imm = if (limb_abi_size > 4) 14626 try self.register_manager.allocReg(null, abi.RegisterClass.gp) 14627 else 14628 undefined; 14629 14630 const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_size * 8); 14631 const imm_0000_1111: Immediate = .u(mask / 0b0001_0001); 14632 const imm_00_11: Immediate = .u(mask / 0b01_01); 14633 const imm_0_1: Immediate = .u(mask / 0b1_1); 14634 14635 for (dst_mcv.getRegs()) |dst_reg| { 14636 const dst = registerAlias(dst_reg, limb_abi_size); 14637 14638 // dst = temp1 = bswap(operand) 14639 try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); 14640 // tmp = temp1 14641 try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(4)); 14642 // dst = temp1 >> 4 14643 if (limb_abi_size > 4) { 14644 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); 14645 try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); 14646 try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); 14647 } else { 14648 try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111); 14649 try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111); 14650 } 14651 // tmp = temp1 & 0x0F...0F 14652 // dst = (temp1 >> 4) & 0x0F...0F 14653 try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, .u(4)); 14654 // tmp = (temp1 & 0x0F...0F) << 4 14655 try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp); 14656 // dst = temp2 = ((temp1 >> 4) & 0x0F...0F) | ((temp1 & 0x0F...0F) << 4) 14657 try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); 14658 // tmp = temp2 14659 try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2)); 14660 // dst = temp2 >> 2 14661 if (limb_abi_size > 4) { 14662 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); 14663 try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); 14664 try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); 14665 } else { 14666 try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11); 14667 try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11); 14668 } 14669 // tmp = temp2 & 0x33...33 14670 // dst = (temp2 >> 2) & 0x33...33 14671 try self.asmRegisterMemory( 14672 .{ ._, .lea }, 14673 if (limb_abi_size > 4) tmp.to64() else tmp.to32(), 14674 .{ 14675 .base = .{ .reg = dst.to64() }, 14676 .mod = .{ .rm = .{ 14677 .size = .qword, 14678 .index = tmp.to64(), 14679 .scale = .@"4", 14680 } }, 14681 }, 14682 ); 14683 // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2) 14684 try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); 14685 // dst = temp3 14686 try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(1)); 14687 // tmp = temp3 >> 1 14688 if (limb_abi_size > 4) { 14689 try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); 14690 try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); 14691 try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); 14692 } else { 14693 try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0_1); 14694 try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1); 14695 } 14696 // dst = temp3 & 0x55...55 14697 // tmp = (temp3 >> 1) & 0x55...55 14698 try self.asmRegisterMemory( 14699 .{ ._, .lea }, 14700 if (limb_abi_size > 4) dst.to64() else dst.to32(), 14701 .{ 14702 .base = .{ .reg = tmp.to64() }, 14703 .mod = .{ .rm = .{ 14704 .size = .qword, 14705 .index = dst.to64(), 14706 .scale = .@"2", 14707 } }, 14708 }, 14709 ); 14710 // dst = ((temp3 >> 1) & 0x55...55) + ((temp3 & 0x55...55) << 1) 14711 } 14712 14713 const extra_bits = abi_size * 8 - bit_size; 14714 const signedness: std.builtin.Signedness = 14715 if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned; 14716 if (extra_bits > 0) try self.genShiftBinOpMir(switch (signedness) { 14717 .signed => .{ ._r, .sa }, 14718 .unsigned => .{ ._r, .sh }, 14719 }, src_ty, dst_mcv, .u8, .{ .immediate = extra_bits }); 14720 14721 return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); 14722 } 14723 14724 fn floatSign(self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) !void { 14725 const pt = self.pt; 14726 const zcu = pt.zcu; 14727 const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)]; 14728 14729 const result = result: { 14730 const scalar_bits = ty.scalarType(zcu).floatBits(self.target.*); 14731 if (scalar_bits == 80) { 14732 if (ty.zigTypeTag(zcu) != .float) return self.fail("TODO implement floatSign for {}", .{ 14733 ty.fmt(pt), 14734 }); 14735 14736 const src_mcv = try self.resolveInst(operand); 14737 const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; 14738 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 14739 14740 const dst_mcv: MCValue = .{ .register = .st0 }; 14741 if (!std.meta.eql(src_mcv, dst_mcv) or !self.reuseOperand(inst, operand, 0, src_mcv)) 14742 try self.register_manager.getKnownReg(.st0, inst); 14743 14744 try self.genCopy(ty, dst_mcv, src_mcv, .{}); 14745 switch (tag) { 14746 .neg => try self.asmOpOnly(.{ .f_, .chs }), 14747 .abs => try self.asmOpOnly(.{ .f_, .abs }), 14748 else => unreachable, 14749 } 14750 break :result dst_mcv; 14751 } 14752 14753 const abi_size: u32 = switch (ty.abiSize(zcu)) { 14754 1...16 => 16, 14755 17...32 => 32, 14756 else => return self.fail("TODO implement floatSign for {}", .{ 14757 ty.fmt(pt), 14758 }), 14759 }; 14760 14761 const src_mcv = try self.resolveInst(operand); 14762 const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; 14763 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 14764 14765 const dst_mcv: MCValue = if (src_mcv.isRegister() and 14766 self.reuseOperand(inst, operand, 0, src_mcv)) 14767 src_mcv 14768 else if (self.hasFeature(.avx)) 14769 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } 14770 else 14771 try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); 14772 const dst_reg = dst_mcv.getReg().?; 14773 const dst_lock = self.register_manager.lockReg(dst_reg); 14774 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 14775 14776 const vec_ty = try pt.vectorType(.{ 14777 .len = @divExact(abi_size * 8, scalar_bits), 14778 .child = (try pt.intType(.signed, scalar_bits)).ip_index, 14779 }); 14780 14781 const sign_mcv = try self.genTypedValue(switch (tag) { 14782 .neg => try vec_ty.minInt(pt, vec_ty), 14783 .abs => try vec_ty.maxInt(pt, vec_ty), 14784 else => unreachable, 14785 }); 14786 const sign_mem: Memory = if (sign_mcv.isBase()) 14787 try sign_mcv.mem(self, .{ .size = .fromSize(abi_size) }) 14788 else 14789 .{ 14790 .base = .{ .reg = try self.copyToTmpRegister(.usize, sign_mcv.address()) }, 14791 .mod = .{ .rm = .{ .size = .fromSize(abi_size) } }, 14792 }; 14793 14794 if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory( 14795 switch (scalar_bits) { 14796 16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) { 14797 .neg => .{ .vp_, .xor }, 14798 .abs => .{ .vp_, .@"and" }, 14799 else => unreachable, 14800 } else switch (tag) { 14801 .neg => .{ .v_ps, .xor }, 14802 .abs => .{ .v_ps, .@"and" }, 14803 else => unreachable, 14804 }, 14805 32 => switch (tag) { 14806 .neg => .{ .v_ps, .xor }, 14807 .abs => .{ .v_ps, .@"and" }, 14808 else => unreachable, 14809 }, 14810 64 => switch (tag) { 14811 .neg => .{ .v_pd, .xor }, 14812 .abs => .{ .v_pd, .@"and" }, 14813 else => unreachable, 14814 }, 14815 80 => return self.fail("TODO implement floatSign for {}", .{ty.fmt(pt)}), 14816 else => unreachable, 14817 }, 14818 registerAlias(dst_reg, abi_size), 14819 registerAlias(if (src_mcv.isRegister()) 14820 src_mcv.getReg().? 14821 else 14822 try self.copyToTmpRegister(ty, src_mcv), abi_size), 14823 sign_mem, 14824 ) else try self.asmRegisterMemory( 14825 switch (scalar_bits) { 14826 16, 128 => switch (tag) { 14827 .neg => .{ .p_, .xor }, 14828 .abs => .{ .p_, .@"and" }, 14829 else => unreachable, 14830 }, 14831 32 => switch (tag) { 14832 .neg => .{ ._ps, .xor }, 14833 .abs => .{ ._ps, .@"and" }, 14834 else => unreachable, 14835 }, 14836 64 => switch (tag) { 14837 .neg => .{ ._pd, .xor }, 14838 .abs => .{ ._pd, .@"and" }, 14839 else => unreachable, 14840 }, 14841 80 => return self.fail("TODO implement floatSign for {}", .{ty.fmt(pt)}), 14842 else => unreachable, 14843 }, 14844 registerAlias(dst_reg, abi_size), 14845 sign_mem, 14846 ); 14847 break :result dst_mcv; 14848 }; 14849 return self.finishAir(inst, result, .{ operand, .none, .none }); 14850 } 14851 14852 fn airFloatSign(self: *CodeGen, inst: Air.Inst.Index) !void { 14853 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 14854 const ty = self.typeOf(un_op); 14855 return self.floatSign(inst, un_op, ty); 14856 } 14857 14858 const RoundMode = packed struct(u5) { 14859 mode: enum(u4) { 14860 /// Round to nearest (even) 14861 nearest = 0b0_00, 14862 /// Round down (toward -∞) 14863 down = 0b0_01, 14864 /// Round up (toward +∞) 14865 up = 0b0_10, 14866 /// Round toward zero (truncate) 14867 zero = 0b0_11, 14868 /// Use current rounding mode of MXCSR.RC 14869 mxcsr = 0b1_00, 14870 }, 14871 precision: enum(u1) { 14872 normal = 0b0, 14873 inexact = 0b1, 14874 } = .normal, 14875 }; 14876 14877 fn airRound(self: *CodeGen, inst: Air.Inst.Index, mode: RoundMode) !void { 14878 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 14879 const ty = self.typeOf(un_op); 14880 14881 const result = result: { 14882 switch (try self.genRoundLibcall(ty, .{ .air_ref = un_op }, mode)) { 14883 .none => {}, 14884 else => |dst_mcv| break :result dst_mcv, 14885 } 14886 14887 const src_mcv = try self.resolveInst(un_op); 14888 const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) 14889 src_mcv 14890 else 14891 try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); 14892 const dst_reg = dst_mcv.getReg().?; 14893 const dst_lock = self.register_manager.lockReg(dst_reg); 14894 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 14895 try self.genRound(ty, dst_reg, src_mcv, mode); 14896 break :result dst_mcv; 14897 }; 14898 return self.finishAir(inst, result, .{ un_op, .none, .none }); 14899 } 14900 14901 fn getRoundTag(self: *CodeGen, ty: Type) ?Mir.Inst.FixedTag { 14902 const pt = self.pt; 14903 const zcu = pt.zcu; 14904 return if (self.hasFeature(.sse4_1)) switch (ty.zigTypeTag(zcu)) { 14905 .float => switch (ty.floatBits(self.target.*)) { 14906 32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, 14907 64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, 14908 16, 80, 128 => null, 14909 else => unreachable, 14910 }, 14911 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { 14912 .float => switch (ty.childType(zcu).floatBits(self.target.*)) { 14913 32 => switch (ty.vectorLen(zcu)) { 14914 1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, 14915 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round }, 14916 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null, 14917 else => null, 14918 }, 14919 64 => switch (ty.vectorLen(zcu)) { 14920 1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, 14921 2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round }, 14922 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null, 14923 else => null, 14924 }, 14925 16, 80, 128 => null, 14926 else => unreachable, 14927 }, 14928 else => null, 14929 }, 14930 else => unreachable, 14931 } else null; 14932 } 14933 14934 fn genRoundLibcall(self: *CodeGen, ty: Type, src_mcv: MCValue, mode: RoundMode) !MCValue { 14935 const pt = self.pt; 14936 const zcu = pt.zcu; 14937 if (self.getRoundTag(ty)) |_| return .none; 14938 14939 if (ty.zigTypeTag(zcu) != .float) 14940 return self.fail("TODO implement genRound for {}", .{ty.fmt(pt)}); 14941 14942 var callee_buf: ["__trunc?".len]u8 = undefined; 14943 return try self.genCall(.{ .lib = .{ 14944 .return_type = ty.toIntern(), 14945 .param_types = &.{ty.toIntern()}, 14946 .callee = std.fmt.bufPrint(&callee_buf, "{s}{s}{s}", .{ 14947 floatLibcAbiPrefix(ty), 14948 switch (mode.mode) { 14949 .down => "floor", 14950 .up => "ceil", 14951 .zero => "trunc", 14952 else => unreachable, 14953 }, 14954 floatLibcAbiSuffix(ty), 14955 }) catch unreachable, 14956 } }, &.{ty}, &.{src_mcv}, .{}); 14957 } 14958 14959 fn genRound(self: *CodeGen, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: RoundMode) !void { 14960 const pt = self.pt; 14961 const mir_tag = self.getRoundTag(ty) orelse { 14962 const result = try self.genRoundLibcall(ty, src_mcv, mode); 14963 return self.genSetReg(dst_reg, ty, result, .{}); 14964 }; 14965 const abi_size: u32 = @intCast(ty.abiSize(pt.zcu)); 14966 const dst_alias = registerAlias(dst_reg, abi_size); 14967 switch (mir_tag[0]) { 14968 .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 14969 mir_tag, 14970 dst_alias, 14971 dst_alias, 14972 try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), 14973 .u(@as(u5, @bitCast(mode))), 14974 ) else try self.asmRegisterRegisterRegisterImmediate( 14975 mir_tag, 14976 dst_alias, 14977 dst_alias, 14978 registerAlias(if (src_mcv.isRegister()) 14979 src_mcv.getReg().? 14980 else 14981 try self.copyToTmpRegister(ty, src_mcv), abi_size), 14982 .u(@as(u5, @bitCast(mode))), 14983 ), 14984 else => if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( 14985 mir_tag, 14986 dst_alias, 14987 try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), 14988 .u(@as(u5, @bitCast(mode))), 14989 ) else try self.asmRegisterRegisterImmediate( 14990 mir_tag, 14991 dst_alias, 14992 registerAlias(if (src_mcv.isRegister()) 14993 src_mcv.getReg().? 14994 else 14995 try self.copyToTmpRegister(ty, src_mcv), abi_size), 14996 .u(@as(u5, @bitCast(mode))), 14997 ), 14998 } 14999 } 15000 15001 fn airAbs(self: *CodeGen, inst: Air.Inst.Index) !void { 15002 const pt = self.pt; 15003 const zcu = pt.zcu; 15004 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 15005 const ty = self.typeOf(ty_op.operand); 15006 15007 const result: MCValue = result: { 15008 const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(zcu)) { 15009 else => null, 15010 .int => switch (ty.abiSize(zcu)) { 15011 0 => unreachable, 15012 1...8 => { 15013 try self.spillEflagsIfOccupied(); 15014 const src_mcv = try self.resolveInst(ty_op.operand); 15015 const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); 15016 15017 try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv); 15018 15019 const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2); 15020 switch (src_mcv) { 15021 .register => |val_reg| try self.asmCmovccRegisterRegister( 15022 .l, 15023 registerAlias(dst_mcv.register, cmov_abi_size), 15024 registerAlias(val_reg, cmov_abi_size), 15025 ), 15026 .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( 15027 .l, 15028 registerAlias(dst_mcv.register, cmov_abi_size), 15029 try src_mcv.mem(self, .{ .size = .fromSize(cmov_abi_size) }), 15030 ), 15031 else => { 15032 const val_reg = try self.copyToTmpRegister(ty, src_mcv); 15033 try self.asmCmovccRegisterRegister( 15034 .l, 15035 registerAlias(dst_mcv.register, cmov_abi_size), 15036 registerAlias(val_reg, cmov_abi_size), 15037 ); 15038 }, 15039 } 15040 break :result dst_mcv; 15041 }, 15042 9...16 => { 15043 try self.spillEflagsIfOccupied(); 15044 const src_mcv = try self.resolveInst(ty_op.operand); 15045 const dst_mcv = if (src_mcv == .register_pair and 15046 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { 15047 const dst_regs = try self.register_manager.allocRegs( 15048 2, 15049 .{ inst, inst }, 15050 abi.RegisterClass.gp, 15051 ); 15052 const dst_mcv: MCValue = .{ .register_pair = dst_regs }; 15053 const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); 15054 defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); 15055 15056 try self.genCopy(ty, dst_mcv, src_mcv, .{}); 15057 break :dst dst_mcv; 15058 }; 15059 const dst_regs = dst_mcv.register_pair; 15060 const dst_locks = self.register_manager.lockRegs(2, dst_regs); 15061 defer for (dst_locks) |dst_lock| if (dst_lock) |lock| 15062 self.register_manager.unlockReg(lock); 15063 15064 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 15065 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 15066 defer self.register_manager.unlockReg(tmp_lock); 15067 15068 try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]); 15069 try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63)); 15070 try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[0], tmp_reg); 15071 try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[1], tmp_reg); 15072 try self.asmRegisterRegister(.{ ._, .sub }, dst_regs[0], tmp_reg); 15073 try self.asmRegisterRegister(.{ ._, .sbb }, dst_regs[1], tmp_reg); 15074 15075 break :result dst_mcv; 15076 }, 15077 else => { 15078 const abi_size: u31 = @intCast(ty.abiSize(zcu)); 15079 const limb_len = std.math.divCeil(u31, abi_size, 8) catch unreachable; 15080 15081 const tmp_regs = 15082 try self.register_manager.allocRegs(3, @splat(null), abi.RegisterClass.gp); 15083 const tmp_locks = self.register_manager.lockRegsAssumeUnused(3, tmp_regs); 15084 defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); 15085 15086 try self.spillEflagsIfOccupied(); 15087 const src_mcv = try self.resolveInst(ty_op.operand); 15088 const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 15089 src_mcv 15090 else 15091 try self.allocRegOrMem(inst, false); 15092 15093 try self.asmMemoryImmediate( 15094 .{ ._, .cmp }, 15095 try dst_mcv.address().offset((limb_len - 1) * 8).deref().mem(self, .{ .size = .qword }), 15096 .u(0), 15097 ); 15098 const positive = try self.asmJccReloc(.ns, undefined); 15099 15100 try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[0].to32(), tmp_regs[0].to32()); 15101 try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[1].to8(), tmp_regs[1].to8()); 15102 15103 const neg_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 15104 try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[2].to32(), tmp_regs[2].to32()); 15105 try self.asmRegisterImmediate(.{ ._r, .sh }, tmp_regs[1].to8(), .u(1)); 15106 try self.asmRegisterMemory(.{ ._, .sbb }, tmp_regs[2].to64(), .{ 15107 .base = .{ .frame = dst_mcv.load_frame.index }, 15108 .mod = .{ .rm = .{ 15109 .size = .qword, 15110 .index = tmp_regs[0].to64(), 15111 .scale = .@"8", 15112 .disp = dst_mcv.load_frame.off, 15113 } }, 15114 }); 15115 try self.asmSetccRegister(.c, tmp_regs[1].to8()); 15116 try self.asmMemoryRegister(.{ ._, .mov }, .{ 15117 .base = .{ .frame = dst_mcv.load_frame.index }, 15118 .mod = .{ .rm = .{ 15119 .size = .qword, 15120 .index = tmp_regs[0].to64(), 15121 .scale = .@"8", 15122 .disp = dst_mcv.load_frame.off, 15123 } }, 15124 }, tmp_regs[2].to64()); 15125 15126 if (self.hasFeature(.slow_incdec)) { 15127 try self.asmRegisterImmediate(.{ ._, .add }, tmp_regs[0].to32(), .u(1)); 15128 } else { 15129 try self.asmRegister(.{ ._, .inc }, tmp_regs[0].to32()); 15130 } 15131 try self.asmRegisterImmediate(.{ ._, .cmp }, tmp_regs[0].to32(), .u(limb_len)); 15132 _ = try self.asmJccReloc(.b, neg_loop); 15133 15134 self.performReloc(positive); 15135 break :result dst_mcv; 15136 }, 15137 }, 15138 .float => return self.floatSign(inst, ty_op.operand, ty), 15139 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { 15140 else => null, 15141 .int => switch (ty.childType(zcu).intInfo(zcu).bits) { 15142 else => null, 15143 8 => switch (ty.vectorLen(zcu)) { 15144 else => null, 15145 1...16 => if (self.hasFeature(.avx)) 15146 .{ .vp_b, .abs } 15147 else if (self.hasFeature(.ssse3)) 15148 .{ .p_b, .abs } 15149 else 15150 null, 15151 17...32 => if (self.hasFeature(.avx2)) .{ .vp_b, .abs } else null, 15152 }, 15153 16 => switch (ty.vectorLen(zcu)) { 15154 else => null, 15155 1...8 => if (self.hasFeature(.avx)) 15156 .{ .vp_w, .abs } 15157 else if (self.hasFeature(.ssse3)) 15158 .{ .p_w, .abs } 15159 else 15160 null, 15161 9...16 => if (self.hasFeature(.avx2)) .{ .vp_w, .abs } else null, 15162 }, 15163 32 => switch (ty.vectorLen(zcu)) { 15164 else => null, 15165 1...4 => if (self.hasFeature(.avx)) 15166 .{ .vp_d, .abs } 15167 else if (self.hasFeature(.ssse3)) 15168 .{ .p_d, .abs } 15169 else 15170 null, 15171 5...8 => if (self.hasFeature(.avx2)) .{ .vp_d, .abs } else null, 15172 }, 15173 }, 15174 .float => return self.floatSign(inst, ty_op.operand, ty), 15175 }, 15176 }) orelse return self.fail("TODO implement airAbs for {}", .{ty.fmt(pt)}); 15177 15178 const abi_size: u32 = @intCast(ty.abiSize(zcu)); 15179 const src_mcv = try self.resolveInst(ty_op.operand); 15180 const dst_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 15181 src_mcv.getReg().? 15182 else 15183 try self.register_manager.allocReg(inst, self.regSetForType(ty)); 15184 const dst_alias = registerAlias(dst_reg, abi_size); 15185 if (src_mcv.isBase()) try self.asmRegisterMemory( 15186 mir_tag, 15187 dst_alias, 15188 try src_mcv.mem(self, .{ .size = self.memSize(ty) }), 15189 ) else try self.asmRegisterRegister( 15190 mir_tag, 15191 dst_alias, 15192 registerAlias(if (src_mcv.isRegister()) 15193 src_mcv.getReg().? 15194 else 15195 try self.copyToTmpRegister(ty, src_mcv), abi_size), 15196 ); 15197 break :result .{ .register = dst_reg }; 15198 }; 15199 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 15200 } 15201 15202 fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void { 15203 const pt = self.pt; 15204 const zcu = pt.zcu; 15205 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 15206 const ty = self.typeOf(un_op); 15207 const abi_size: u32 = @intCast(ty.abiSize(zcu)); 15208 15209 const result: MCValue = result: { 15210 switch (ty.zigTypeTag(zcu)) { 15211 .float => { 15212 const float_bits = ty.floatBits(self.target.*); 15213 if (switch (float_bits) { 15214 16 => !self.hasFeature(.f16c), 15215 32, 64 => false, 15216 80, 128 => true, 15217 else => unreachable, 15218 }) { 15219 var callee_buf: ["__sqrt?".len]u8 = undefined; 15220 break :result try self.genCall(.{ .lib = .{ 15221 .return_type = ty.toIntern(), 15222 .param_types = &.{ty.toIntern()}, 15223 .callee = std.fmt.bufPrint(&callee_buf, "{s}sqrt{s}", .{ 15224 floatLibcAbiPrefix(ty), 15225 floatLibcAbiSuffix(ty), 15226 }) catch unreachable, 15227 } }, &.{ty}, &.{.{ .air_ref = un_op }}, .{}); 15228 } 15229 }, 15230 else => {}, 15231 } 15232 15233 const src_mcv = try self.resolveInst(un_op); 15234 const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) 15235 src_mcv 15236 else 15237 try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); 15238 const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); 15239 const dst_lock = self.register_manager.lockReg(dst_reg); 15240 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 15241 15242 const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(zcu)) { 15243 .float => switch (ty.floatBits(self.target.*)) { 15244 16 => { 15245 assert(self.hasFeature(.f16c)); 15246 const mat_src_reg = if (src_mcv.isRegister()) 15247 src_mcv.getReg().? 15248 else 15249 try self.copyToTmpRegister(ty, src_mcv); 15250 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128()); 15251 try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg); 15252 try self.asmRegisterRegisterImmediate( 15253 .{ .v_, .cvtps2ph }, 15254 dst_reg, 15255 dst_reg, 15256 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), 15257 ); 15258 break :result dst_mcv; 15259 }, 15260 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, 15261 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, 15262 else => unreachable, 15263 }, 15264 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { 15265 .float => switch (ty.childType(zcu).floatBits(self.target.*)) { 15266 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen(zcu)) { 15267 1 => { 15268 try self.asmRegisterRegister( 15269 .{ .v_ps, .cvtph2 }, 15270 dst_reg, 15271 (if (src_mcv.isRegister()) 15272 src_mcv.getReg().? 15273 else 15274 try self.copyToTmpRegister(ty, src_mcv)).to128(), 15275 ); 15276 try self.asmRegisterRegisterRegister( 15277 .{ .v_ss, .sqrt }, 15278 dst_reg, 15279 dst_reg, 15280 dst_reg, 15281 ); 15282 try self.asmRegisterRegisterImmediate( 15283 .{ .v_, .cvtps2ph }, 15284 dst_reg, 15285 dst_reg, 15286 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), 15287 ); 15288 break :result dst_mcv; 15289 }, 15290 2...8 => { 15291 const wide_reg = registerAlias(dst_reg, abi_size * 2); 15292 if (src_mcv.isBase()) try self.asmRegisterMemory( 15293 .{ .v_ps, .cvtph2 }, 15294 wide_reg, 15295 try src_mcv.mem(self, .{ .size = .fromSize( 15296 @intCast(@divExact(wide_reg.bitSize(), 16)), 15297 ) }), 15298 ) else try self.asmRegisterRegister( 15299 .{ .v_ps, .cvtph2 }, 15300 wide_reg, 15301 (if (src_mcv.isRegister()) 15302 src_mcv.getReg().? 15303 else 15304 try self.copyToTmpRegister(ty, src_mcv)).to128(), 15305 ); 15306 try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg); 15307 try self.asmRegisterRegisterImmediate( 15308 .{ .v_, .cvtps2ph }, 15309 dst_reg, 15310 wide_reg, 15311 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), 15312 ); 15313 break :result dst_mcv; 15314 }, 15315 else => null, 15316 } else null, 15317 32 => switch (ty.vectorLen(zcu)) { 15318 1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, 15319 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt }, 15320 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null, 15321 else => null, 15322 }, 15323 64 => switch (ty.vectorLen(zcu)) { 15324 1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, 15325 2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt }, 15326 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null, 15327 else => null, 15328 }, 15329 80, 128 => null, 15330 else => unreachable, 15331 }, 15332 else => unreachable, 15333 }, 15334 else => unreachable, 15335 }) orelse return self.fail("TODO implement airSqrt for {}", .{ty.fmt(pt)}); 15336 switch (mir_tag[0]) { 15337 .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( 15338 mir_tag, 15339 dst_reg, 15340 dst_reg, 15341 try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), 15342 ) else try self.asmRegisterRegisterRegister( 15343 mir_tag, 15344 dst_reg, 15345 dst_reg, 15346 registerAlias(if (src_mcv.isRegister()) 15347 src_mcv.getReg().? 15348 else 15349 try self.copyToTmpRegister(ty, src_mcv), abi_size), 15350 ), 15351 else => if (src_mcv.isBase()) try self.asmRegisterMemory( 15352 mir_tag, 15353 dst_reg, 15354 try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), 15355 ) else try self.asmRegisterRegister( 15356 mir_tag, 15357 dst_reg, 15358 registerAlias(if (src_mcv.isRegister()) 15359 src_mcv.getReg().? 15360 else 15361 try self.copyToTmpRegister(ty, src_mcv), abi_size), 15362 ), 15363 } 15364 break :result dst_mcv; 15365 }; 15366 return self.finishAir(inst, result, .{ un_op, .none, .none }); 15367 } 15368 15369 fn airUnaryMath(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { 15370 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 15371 const ty = self.typeOf(un_op); 15372 var callee_buf: ["__round?".len]u8 = undefined; 15373 const result = try self.genCall(.{ .lib = .{ 15374 .return_type = ty.toIntern(), 15375 .param_types = &.{ty.toIntern()}, 15376 .callee = std.fmt.bufPrint(&callee_buf, "{s}{s}{s}", .{ 15377 floatLibcAbiPrefix(ty), 15378 switch (tag) { 15379 .sin, 15380 .cos, 15381 .tan, 15382 .exp, 15383 .exp2, 15384 .log, 15385 .log2, 15386 .log10, 15387 .round, 15388 => @tagName(tag), 15389 else => unreachable, 15390 }, 15391 floatLibcAbiSuffix(ty), 15392 }) catch unreachable, 15393 } }, &.{ty}, &.{.{ .air_ref = un_op }}, .{}); 15394 return self.finishAir(inst, result, .{ un_op, .none, .none }); 15395 } 15396 15397 fn reuseOperand( 15398 self: *CodeGen, 15399 inst: Air.Inst.Index, 15400 operand: Air.Inst.Ref, 15401 op_index: Liveness.OperandInt, 15402 mcv: MCValue, 15403 ) bool { 15404 return self.reuseOperandAdvanced(inst, operand, op_index, mcv, inst); 15405 } 15406 15407 fn reuseOperandAdvanced( 15408 self: *CodeGen, 15409 inst: Air.Inst.Index, 15410 operand: Air.Inst.Ref, 15411 op_index: Liveness.OperandInt, 15412 mcv: MCValue, 15413 maybe_tracked_inst: ?Air.Inst.Index, 15414 ) bool { 15415 if (!self.liveness.operandDies(inst, op_index)) 15416 return false; 15417 15418 switch (mcv) { 15419 .register, .register_pair, .register_overflow, .register_mask => for (mcv.getRegs()) |reg| { 15420 // If it's in the registers table, need to associate the register(s) with the 15421 // new instruction. 15422 if (maybe_tracked_inst) |tracked_inst| { 15423 if (!self.register_manager.isRegFree(reg)) { 15424 if (RegisterManager.indexOfRegIntoTracked(reg)) |index| { 15425 self.register_manager.registers[index] = tracked_inst; 15426 } 15427 } 15428 } else self.register_manager.freeReg(reg); 15429 }, 15430 .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false, 15431 else => return false, 15432 } 15433 switch (mcv) { 15434 .eflags, .register_overflow => self.eflags_inst = maybe_tracked_inst, 15435 else => {}, 15436 } 15437 15438 // Prevent the operand deaths processing code from deallocating it. 15439 self.reused_operands.set(op_index); 15440 const op_inst = operand.toIndex().?; 15441 self.getResolvedInstValue(op_inst).reuse(self, maybe_tracked_inst, op_inst); 15442 15443 return true; 15444 } 15445 15446 fn packedLoad(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { 15447 const pt = self.pt; 15448 const zcu = pt.zcu; 15449 15450 const ptr_info = ptr_ty.ptrInfo(zcu); 15451 const val_ty: Type = .fromInterned(ptr_info.child); 15452 if (!val_ty.hasRuntimeBitsIgnoreComptime(zcu)) return; 15453 const val_abi_size: u32 = @intCast(val_ty.abiSize(zcu)); 15454 15455 const val_bit_size: u32 = @intCast(val_ty.bitSize(zcu)); 15456 const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) { 15457 .none => 0, 15458 .runtime => unreachable, 15459 else => |vector_index| @intFromEnum(vector_index) * val_bit_size, 15460 }; 15461 if (ptr_bit_off % 8 == 0) { 15462 { 15463 const mat_ptr_mcv: MCValue = switch (ptr_mcv) { 15464 .immediate, .register, .register_offset, .lea_frame => ptr_mcv, 15465 else => .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, 15466 }; 15467 const mat_ptr_lock = switch (mat_ptr_mcv) { 15468 .register => |mat_ptr_reg| self.register_manager.lockReg(mat_ptr_reg), 15469 else => null, 15470 }; 15471 defer if (mat_ptr_lock) |lock| self.register_manager.unlockReg(lock); 15472 15473 try self.load(dst_mcv, ptr_ty, mat_ptr_mcv.offset(@intCast(@divExact(ptr_bit_off, 8)))); 15474 } 15475 15476 if (val_abi_size * 8 > val_bit_size) { 15477 if (dst_mcv.isRegister()) { 15478 try self.truncateRegister(val_ty, dst_mcv.getReg().?); 15479 } else { 15480 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 15481 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 15482 defer self.register_manager.unlockReg(tmp_lock); 15483 15484 const hi_mcv = dst_mcv.address().offset(@intCast(val_bit_size / 64 * 8)).deref(); 15485 try self.genSetReg(tmp_reg, .usize, hi_mcv, .{}); 15486 try self.truncateRegister(val_ty, tmp_reg); 15487 try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{}); 15488 } 15489 } 15490 return; 15491 } 15492 15493 if (val_abi_size > 8) return self.fail("TODO implement packed load of {}", .{val_ty.fmt(pt)}); 15494 15495 const limb_abi_size: u31 = @min(val_abi_size, 8); 15496 const limb_abi_bits = limb_abi_size * 8; 15497 const val_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size); 15498 const val_bit_off = ptr_bit_off % limb_abi_bits; 15499 const val_extra_bits = self.regExtraBits(val_ty); 15500 15501 const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); 15502 const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg); 15503 defer self.register_manager.unlockReg(ptr_lock); 15504 15505 const dst_reg = switch (dst_mcv) { 15506 .register => |reg| reg, 15507 else => try self.register_manager.allocReg(null, abi.RegisterClass.gp), 15508 }; 15509 const dst_lock = self.register_manager.lockReg(dst_reg); 15510 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 15511 15512 const load_abi_size = 15513 if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2; 15514 if (load_abi_size <= 8) { 15515 const load_reg = registerAlias(dst_reg, load_abi_size); 15516 try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{ 15517 .base = .{ .reg = ptr_reg }, 15518 .mod = .{ .rm = .{ 15519 .size = .fromSize(load_abi_size), 15520 .disp = val_byte_off, 15521 } }, 15522 }); 15523 try self.spillEflagsIfOccupied(); 15524 try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, .u(val_bit_off)); 15525 } else { 15526 const tmp_reg = 15527 registerAlias(try self.register_manager.allocReg(null, abi.RegisterClass.gp), val_abi_size); 15528 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 15529 defer self.register_manager.unlockReg(tmp_lock); 15530 15531 const dst_alias = registerAlias(dst_reg, val_abi_size); 15532 try self.asmRegisterMemory(.{ ._, .mov }, dst_alias, .{ 15533 .base = .{ .reg = ptr_reg }, 15534 .mod = .{ .rm = .{ 15535 .size = .fromSize(val_abi_size), 15536 .disp = val_byte_off, 15537 } }, 15538 }); 15539 try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{ 15540 .base = .{ .reg = ptr_reg }, 15541 .mod = .{ .rm = .{ 15542 .size = .fromSize(val_abi_size), 15543 .disp = val_byte_off + limb_abi_size, 15544 } }, 15545 }); 15546 try self.spillEflagsIfOccupied(); 15547 try self.asmRegisterRegisterImmediate(.{ ._rd, .sh }, dst_alias, tmp_reg, .u(val_bit_off)); 15548 } 15549 15550 if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg); 15551 try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg }, .{}); 15552 } 15553 15554 fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { 15555 const pt = self.pt; 15556 const zcu = pt.zcu; 15557 const dst_ty = ptr_ty.childType(zcu); 15558 if (!dst_ty.hasRuntimeBitsIgnoreComptime(zcu)) return; 15559 switch (ptr_mcv) { 15560 .none, 15561 .unreach, 15562 .dead, 15563 .undef, 15564 .eflags, 15565 .register_pair, 15566 .register_triple, 15567 .register_quadruple, 15568 .register_overflow, 15569 .register_mask, 15570 .elementwise_regs_then_frame, 15571 .reserved_frame, 15572 => unreachable, // not a valid pointer 15573 .immediate, 15574 .register, 15575 .register_offset, 15576 .lea_symbol, 15577 .lea_direct, 15578 .lea_got, 15579 .lea_tlv, 15580 .lea_frame, 15581 => try self.genCopy(dst_ty, dst_mcv, ptr_mcv.deref(), .{}), 15582 .memory, 15583 .indirect, 15584 .load_symbol, 15585 .load_direct, 15586 .load_got, 15587 .load_tlv, 15588 .load_frame, 15589 => { 15590 const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); 15591 const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); 15592 defer self.register_manager.unlockReg(addr_lock); 15593 15594 try self.genCopy(dst_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }, .{}); 15595 }, 15596 .air_ref => |ptr_ref| try self.load(dst_mcv, ptr_ty, try self.resolveInst(ptr_ref)), 15597 } 15598 } 15599 15600 fn airLoad(self: *CodeGen, inst: Air.Inst.Index) !void { 15601 const pt = self.pt; 15602 const zcu = pt.zcu; 15603 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 15604 const elem_ty = self.typeOfIndex(inst); 15605 const result: MCValue = result: { 15606 if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; 15607 15608 try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); 15609 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); 15610 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 15611 15612 const ptr_ty = self.typeOf(ty_op.operand); 15613 const elem_size = elem_ty.abiSize(zcu); 15614 15615 const elem_rs = self.regSetForType(elem_ty); 15616 const ptr_rs = self.regSetForType(ptr_ty); 15617 15618 const ptr_mcv = try self.resolveInst(ty_op.operand); 15619 const dst_mcv = if (elem_size <= 8 and std.math.isPowerOfTwo(elem_size) and 15620 elem_rs.supersetOf(ptr_rs) and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) 15621 // The MCValue that holds the pointer can be re-used as the value. 15622 ptr_mcv 15623 else 15624 try self.allocRegOrMem(inst, true); 15625 15626 const ptr_info = ptr_ty.ptrInfo(zcu); 15627 if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) { 15628 try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv); 15629 } else { 15630 try self.load(dst_mcv, ptr_ty, ptr_mcv); 15631 } 15632 15633 if (elem_ty.isAbiInt(zcu) and elem_size * 8 > elem_ty.bitSize(zcu)) { 15634 const high_mcv: MCValue = switch (dst_mcv) { 15635 .register => |dst_reg| .{ .register = dst_reg }, 15636 .register_pair => |dst_regs| .{ .register = dst_regs[1] }, 15637 else => dst_mcv.address().offset(@intCast((elem_size - 1) / 8 * 8)).deref(), 15638 }; 15639 const high_reg = if (high_mcv.isRegister()) 15640 high_mcv.getReg().? 15641 else 15642 try self.copyToTmpRegister(.usize, high_mcv); 15643 const high_lock = self.register_manager.lockReg(high_reg); 15644 defer if (high_lock) |lock| self.register_manager.unlockReg(lock); 15645 15646 try self.truncateRegister(elem_ty, high_reg); 15647 if (!high_mcv.isRegister()) try self.genCopy( 15648 if (elem_size <= 8) elem_ty else .usize, 15649 high_mcv, 15650 .{ .register = high_reg }, 15651 .{}, 15652 ); 15653 } 15654 break :result dst_mcv; 15655 }; 15656 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 15657 } 15658 15659 fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void { 15660 const pt = self.pt; 15661 const zcu = pt.zcu; 15662 const ptr_info = ptr_ty.ptrInfo(zcu); 15663 const src_ty: Type = .fromInterned(ptr_info.child); 15664 if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) return; 15665 15666 const limb_abi_size: u16 = @min(ptr_info.packed_offset.host_size, 8); 15667 const limb_abi_bits = limb_abi_size * 8; 15668 const limb_ty = try pt.intType(.unsigned, limb_abi_bits); 15669 15670 const src_bit_size = src_ty.bitSize(zcu); 15671 const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) { 15672 .none => 0, 15673 .runtime => unreachable, 15674 else => |vector_index| @intFromEnum(vector_index) * src_bit_size, 15675 }; 15676 const src_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size); 15677 const src_bit_off = ptr_bit_off % limb_abi_bits; 15678 15679 const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); 15680 const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg); 15681 defer self.register_manager.unlockReg(ptr_lock); 15682 15683 var limb_i: u16 = 0; 15684 while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) { 15685 const part_bit_off = if (limb_i == 0) src_bit_off else 0; 15686 const part_bit_size = 15687 @min(src_bit_off + src_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off; 15688 const limb_mem: Memory = .{ 15689 .base = .{ .reg = ptr_reg }, 15690 .mod = .{ .rm = .{ 15691 .size = .fromSize(limb_abi_size), 15692 .disp = src_byte_off + limb_i * limb_abi_size, 15693 } }, 15694 }; 15695 15696 const part_mask = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - part_bit_size)) << 15697 @intCast(part_bit_off); 15698 const part_mask_not = part_mask ^ (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_bits)); 15699 if (limb_abi_size <= 4) { 15700 try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .u(part_mask_not)); 15701 } else if (std.math.cast(i32, @as(i64, @bitCast(part_mask_not)))) |small| { 15702 try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .s(small)); 15703 } else { 15704 const part_mask_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 15705 try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, .u(part_mask_not)); 15706 try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg); 15707 } 15708 15709 if (src_bit_size <= 64) { 15710 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 15711 const tmp_mcv = MCValue{ .register = tmp_reg }; 15712 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 15713 defer self.register_manager.unlockReg(tmp_lock); 15714 15715 try self.genSetReg(tmp_reg, limb_ty, src_mcv, .{}); 15716 switch (limb_i) { 15717 0 => try self.genShiftBinOpMir( 15718 .{ ._l, .sh }, 15719 limb_ty, 15720 tmp_mcv, 15721 .u8, 15722 .{ .immediate = src_bit_off }, 15723 ), 15724 1 => try self.genShiftBinOpMir( 15725 .{ ._r, .sh }, 15726 limb_ty, 15727 tmp_mcv, 15728 .u8, 15729 .{ .immediate = limb_abi_bits - src_bit_off }, 15730 ), 15731 else => unreachable, 15732 } 15733 try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask }); 15734 try self.asmMemoryRegister( 15735 .{ ._, .@"or" }, 15736 limb_mem, 15737 registerAlias(tmp_reg, limb_abi_size), 15738 ); 15739 } else if (src_bit_size <= 128 and src_bit_off == 0) { 15740 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 15741 const tmp_mcv = MCValue{ .register = tmp_reg }; 15742 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 15743 defer self.register_manager.unlockReg(tmp_lock); 15744 15745 try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) { 15746 0 => src_mcv, 15747 else => src_mcv.address().offset(limb_i * limb_abi_size).deref(), 15748 }, .{}); 15749 try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask }); 15750 try self.asmMemoryRegister( 15751 .{ ._, .@"or" }, 15752 limb_mem, 15753 registerAlias(tmp_reg, limb_abi_size), 15754 ); 15755 } else return self.fail("TODO: implement packed store of {}", .{src_ty.fmt(pt)}); 15756 } 15757 } 15758 15759 fn store( 15760 self: *CodeGen, 15761 ptr_ty: Type, 15762 ptr_mcv: MCValue, 15763 src_mcv: MCValue, 15764 opts: CopyOptions, 15765 ) InnerError!void { 15766 const pt = self.pt; 15767 const zcu = pt.zcu; 15768 const src_ty = ptr_ty.childType(zcu); 15769 if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) return; 15770 switch (ptr_mcv) { 15771 .none, 15772 .unreach, 15773 .dead, 15774 .undef, 15775 .eflags, 15776 .register_pair, 15777 .register_triple, 15778 .register_quadruple, 15779 .register_overflow, 15780 .register_mask, 15781 .elementwise_regs_then_frame, 15782 .reserved_frame, 15783 => unreachable, // not a valid pointer 15784 .immediate, 15785 .register, 15786 .register_offset, 15787 .lea_symbol, 15788 .lea_direct, 15789 .lea_got, 15790 .lea_tlv, 15791 .lea_frame, 15792 => try self.genCopy(src_ty, ptr_mcv.deref(), src_mcv, opts), 15793 .memory, 15794 .indirect, 15795 .load_symbol, 15796 .load_direct, 15797 .load_got, 15798 .load_tlv, 15799 .load_frame, 15800 => { 15801 const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); 15802 const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); 15803 defer self.register_manager.unlockReg(addr_lock); 15804 15805 try self.genCopy(src_ty, .{ .indirect = .{ .reg = addr_reg } }, src_mcv, opts); 15806 }, 15807 .air_ref => |ptr_ref| try self.store(ptr_ty, try self.resolveInst(ptr_ref), src_mcv, opts), 15808 } 15809 } 15810 15811 fn airStore(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { 15812 const pt = self.pt; 15813 const zcu = pt.zcu; 15814 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 15815 15816 result: { 15817 if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result; 15818 15819 try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); 15820 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); 15821 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 15822 15823 const src_mcv = try self.resolveInst(bin_op.rhs); 15824 const ptr_mcv = try self.resolveInst(bin_op.lhs); 15825 const ptr_ty = self.typeOf(bin_op.lhs); 15826 15827 const ptr_info = ptr_ty.ptrInfo(zcu); 15828 if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) { 15829 try self.packedStore(ptr_ty, ptr_mcv, src_mcv); 15830 } else { 15831 try self.store(ptr_ty, ptr_mcv, src_mcv, .{ .safety = safety }); 15832 } 15833 } 15834 return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); 15835 } 15836 15837 fn airStructFieldPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 15838 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 15839 const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; 15840 const result = try self.fieldPtr(inst, extra.struct_operand, extra.field_index); 15841 return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none }); 15842 } 15843 15844 fn airStructFieldPtrIndex(self: *CodeGen, inst: Air.Inst.Index, field_index: u8) !void { 15845 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 15846 const result = try self.fieldPtr(inst, ty_op.operand, field_index); 15847 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 15848 } 15849 15850 fn fieldPtr(self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, field_index: u32) !MCValue { 15851 const ptr_field_ty = self.typeOfIndex(inst); 15852 15853 const src_mcv = try self.resolveInst(operand); 15854 const dst_mcv = if (switch (src_mcv) { 15855 .immediate, .lea_frame => true, 15856 .register, .register_offset => self.reuseOperand(inst, operand, 0, src_mcv), 15857 else => false, 15858 }) src_mcv else try self.copyToRegisterWithInstTracking(inst, ptr_field_ty, src_mcv); 15859 return dst_mcv.offset(self.fieldOffset(self.typeOf(operand), ptr_field_ty, field_index)); 15860 } 15861 15862 fn fieldOffset(self: *CodeGen, ptr_agg_ty: Type, ptr_field_ty: Type, field_index: u32) i32 { 15863 const pt = self.pt; 15864 const zcu = pt.zcu; 15865 const agg_ty = ptr_agg_ty.childType(zcu); 15866 return switch (agg_ty.containerLayout(zcu)) { 15867 .auto, .@"extern" => @intCast(agg_ty.structFieldOffset(field_index, zcu)), 15868 .@"packed" => @divExact(@as(i32, ptr_agg_ty.ptrInfo(zcu).packed_offset.bit_offset) + 15869 (if (zcu.typeToStruct(agg_ty)) |loaded_struct| pt.structPackedFieldBitOffset(loaded_struct, field_index) else 0) - 15870 ptr_field_ty.ptrInfo(zcu).packed_offset.bit_offset, 8), 15871 }; 15872 } 15873 15874 fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void { 15875 const pt = self.pt; 15876 const zcu = pt.zcu; 15877 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 15878 const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; 15879 const result: MCValue = result: { 15880 const operand = extra.struct_operand; 15881 const index = extra.field_index; 15882 15883 const container_ty = self.typeOf(operand); 15884 const container_rc = self.regSetForType(container_ty); 15885 const field_ty = container_ty.fieldType(index, zcu); 15886 if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; 15887 const field_rc = self.regSetForType(field_ty); 15888 const field_is_gp = field_rc.supersetOf(abi.RegisterClass.gp); 15889 15890 const src_mcv = try self.resolveInst(operand); 15891 const field_off: u32 = switch (container_ty.containerLayout(zcu)) { 15892 .auto, .@"extern" => @intCast(container_ty.structFieldOffset(extra.field_index, zcu) * 8), 15893 .@"packed" => if (zcu.typeToStruct(container_ty)) |loaded_struct| 15894 pt.structPackedFieldBitOffset(loaded_struct, extra.field_index) 15895 else 15896 0, 15897 }; 15898 15899 switch (src_mcv) { 15900 .register => |src_reg| { 15901 const src_reg_lock = self.register_manager.lockRegAssumeUnused(src_reg); 15902 defer self.register_manager.unlockReg(src_reg_lock); 15903 15904 const src_in_field_rc = 15905 field_rc.isSet(RegisterManager.indexOfRegIntoTracked(src_reg).?); 15906 const dst_reg = if (src_in_field_rc and self.reuseOperand(inst, operand, 0, src_mcv)) 15907 src_reg 15908 else if (field_off == 0) 15909 (try self.copyToRegisterWithInstTracking(inst, field_ty, src_mcv)).register 15910 else 15911 try self.copyToTmpRegister(.usize, .{ .register = src_reg }); 15912 const dst_mcv: MCValue = .{ .register = dst_reg }; 15913 const dst_lock = self.register_manager.lockReg(dst_reg); 15914 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 15915 15916 if (field_off > 0) { 15917 try self.spillEflagsIfOccupied(); 15918 try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, dst_mcv, .u8, .{ .immediate = field_off }); 15919 } 15920 if (abi.RegisterClass.gp.isSet(RegisterManager.indexOfRegIntoTracked(dst_reg).?) and 15921 container_ty.abiSize(zcu) * 8 > field_ty.bitSize(zcu)) 15922 try self.truncateRegister(field_ty, dst_reg); 15923 15924 break :result if (field_off == 0 or field_rc.supersetOf(abi.RegisterClass.gp)) 15925 dst_mcv 15926 else 15927 try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); 15928 }, 15929 .register_pair => |src_regs| { 15930 const src_regs_lock = self.register_manager.lockRegsAssumeUnused(2, src_regs); 15931 defer for (src_regs_lock) |lock| self.register_manager.unlockReg(lock); 15932 15933 const field_bit_size: u32 = @intCast(field_ty.bitSize(zcu)); 15934 const src_reg = if (field_off + field_bit_size <= 64) 15935 src_regs[0] 15936 else if (field_off >= 64) 15937 src_regs[1] 15938 else { 15939 const dst_regs: [2]Register = if (field_rc.supersetOf(container_rc) and 15940 self.reuseOperand(inst, operand, 0, src_mcv)) src_regs else dst: { 15941 const dst_regs = 15942 try self.register_manager.allocRegs(2, @splat(null), field_rc); 15943 const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); 15944 defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); 15945 15946 try self.genCopy(container_ty, .{ .register_pair = dst_regs }, src_mcv, .{}); 15947 break :dst dst_regs; 15948 }; 15949 const dst_mcv = MCValue{ .register_pair = dst_regs }; 15950 const dst_locks = self.register_manager.lockRegs(2, dst_regs); 15951 defer for (dst_locks) |dst_lock| if (dst_lock) |lock| 15952 self.register_manager.unlockReg(lock); 15953 15954 if (field_off > 0) { 15955 try self.spillEflagsIfOccupied(); 15956 try self.genShiftBinOpMir(.{ ._r, .sh }, .u128, dst_mcv, .u8, .{ .immediate = field_off }); 15957 } 15958 15959 if (field_bit_size <= 64) { 15960 if (self.regExtraBits(field_ty) > 0) 15961 try self.truncateRegister(field_ty, dst_regs[0]); 15962 break :result if (field_rc.supersetOf(abi.RegisterClass.gp)) 15963 .{ .register = dst_regs[0] } 15964 else 15965 try self.copyToRegisterWithInstTracking(inst, field_ty, .{ 15966 .register = dst_regs[0], 15967 }); 15968 } 15969 15970 if (field_bit_size < 128) try self.truncateRegister( 15971 try pt.intType(.unsigned, @intCast(field_bit_size - 64)), 15972 dst_regs[1], 15973 ); 15974 break :result if (field_rc.supersetOf(abi.RegisterClass.gp)) 15975 dst_mcv 15976 else 15977 try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); 15978 }; 15979 15980 const dst_reg = try self.copyToTmpRegister(.usize, .{ .register = src_reg }); 15981 const dst_mcv = MCValue{ .register = dst_reg }; 15982 const dst_lock = self.register_manager.lockReg(dst_reg); 15983 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 15984 15985 if (field_off % 64 > 0) { 15986 try self.spillEflagsIfOccupied(); 15987 try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, dst_mcv, .u8, .{ .immediate = field_off % 64 }); 15988 } 15989 if (self.regExtraBits(field_ty) > 0) try self.truncateRegister(field_ty, dst_reg); 15990 15991 break :result if (field_rc.supersetOf(abi.RegisterClass.gp)) 15992 dst_mcv 15993 else 15994 try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); 15995 }, 15996 .register_overflow => |ro| { 15997 switch (index) { 15998 // Get wrapped value for overflow operation. 15999 0 => if (self.reuseOperand(inst, extra.struct_operand, 0, src_mcv)) { 16000 self.eflags_inst = null; // actually stop tracking the overflow part 16001 break :result .{ .register = ro.reg }; 16002 } else break :result try self.copyToRegisterWithInstTracking(inst, .usize, .{ .register = ro.reg }), 16003 // Get overflow bit. 16004 1 => if (self.reuseOperandAdvanced(inst, extra.struct_operand, 0, src_mcv, null)) { 16005 self.eflags_inst = inst; // actually keep tracking the overflow part 16006 break :result .{ .eflags = ro.eflags }; 16007 } else { 16008 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); 16009 try self.asmSetccRegister(ro.eflags, dst_reg.to8()); 16010 break :result .{ .register = dst_reg.to8() }; 16011 }, 16012 else => unreachable, 16013 } 16014 }, 16015 .load_frame => |frame_addr| { 16016 const field_abi_size: u32 = @intCast(field_ty.abiSize(zcu)); 16017 if (field_off % 8 == 0) { 16018 const field_byte_off = @divExact(field_off, 8); 16019 const off_mcv = src_mcv.address().offset(@intCast(field_byte_off)).deref(); 16020 const field_bit_size = field_ty.bitSize(zcu); 16021 16022 if (field_abi_size <= 8) { 16023 const int_ty = try pt.intType( 16024 if (field_ty.isAbiInt(zcu)) field_ty.intInfo(zcu).signedness else .unsigned, 16025 @intCast(field_bit_size), 16026 ); 16027 16028 const dst_reg = try self.register_manager.allocReg( 16029 if (field_is_gp) inst else null, 16030 abi.RegisterClass.gp, 16031 ); 16032 const dst_mcv = MCValue{ .register = dst_reg }; 16033 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); 16034 defer self.register_manager.unlockReg(dst_lock); 16035 16036 try self.genCopy(int_ty, dst_mcv, off_mcv, .{}); 16037 if (self.regExtraBits(field_ty) > 0) try self.truncateRegister(int_ty, dst_reg); 16038 break :result if (field_is_gp) 16039 dst_mcv 16040 else 16041 try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); 16042 } 16043 16044 const container_abi_size: u32 = @intCast(container_ty.abiSize(zcu)); 16045 const dst_mcv = if (field_byte_off + field_abi_size <= container_abi_size and 16046 self.reuseOperand(inst, operand, 0, src_mcv)) 16047 off_mcv 16048 else dst: { 16049 const dst_mcv = try self.allocRegOrMem(inst, true); 16050 try self.genCopy(field_ty, dst_mcv, off_mcv, .{}); 16051 break :dst dst_mcv; 16052 }; 16053 if (field_abi_size * 8 > field_bit_size and dst_mcv.isBase()) { 16054 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 16055 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 16056 defer self.register_manager.unlockReg(tmp_lock); 16057 16058 const hi_mcv = 16059 dst_mcv.address().offset(@intCast(field_bit_size / 64 * 8)).deref(); 16060 try self.genSetReg(tmp_reg, .usize, hi_mcv, .{}); 16061 try self.truncateRegister(field_ty, tmp_reg); 16062 try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{}); 16063 } 16064 break :result dst_mcv; 16065 } 16066 16067 const limb_abi_size: u31 = @min(field_abi_size, 8); 16068 const limb_abi_bits = limb_abi_size * 8; 16069 const field_byte_off: i32 = @intCast(field_off / limb_abi_bits * limb_abi_size); 16070 const field_bit_off = field_off % limb_abi_bits; 16071 16072 if (field_abi_size > 8) { 16073 return self.fail("TODO implement struct_field_val with large packed field", .{}); 16074 } 16075 16076 const dst_reg = try self.register_manager.allocReg( 16077 if (field_is_gp) inst else null, 16078 abi.RegisterClass.gp, 16079 ); 16080 const field_extra_bits = self.regExtraBits(field_ty); 16081 const load_abi_size = 16082 if (field_bit_off < field_extra_bits) field_abi_size else field_abi_size * 2; 16083 if (load_abi_size <= 8) { 16084 const load_reg = registerAlias(dst_reg, load_abi_size); 16085 try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{ 16086 .base = .{ .frame = frame_addr.index }, 16087 .mod = .{ .rm = .{ 16088 .size = .fromSize(load_abi_size), 16089 .disp = frame_addr.off + field_byte_off, 16090 } }, 16091 }); 16092 try self.spillEflagsIfOccupied(); 16093 try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, .u(field_bit_off)); 16094 } else { 16095 const tmp_reg = registerAlias( 16096 try self.register_manager.allocReg(null, abi.RegisterClass.gp), 16097 field_abi_size, 16098 ); 16099 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 16100 defer self.register_manager.unlockReg(tmp_lock); 16101 16102 const dst_alias = registerAlias(dst_reg, field_abi_size); 16103 try self.asmRegisterMemory( 16104 .{ ._, .mov }, 16105 dst_alias, 16106 .{ 16107 .base = .{ .frame = frame_addr.index }, 16108 .mod = .{ .rm = .{ 16109 .size = .fromSize(field_abi_size), 16110 .disp = frame_addr.off + field_byte_off, 16111 } }, 16112 }, 16113 ); 16114 try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{ 16115 .base = .{ .frame = frame_addr.index }, 16116 .mod = .{ .rm = .{ 16117 .size = .fromSize(field_abi_size), 16118 .disp = frame_addr.off + field_byte_off + limb_abi_size, 16119 } }, 16120 }); 16121 try self.spillEflagsIfOccupied(); 16122 try self.asmRegisterRegisterImmediate( 16123 .{ ._rd, .sh }, 16124 dst_alias, 16125 tmp_reg, 16126 .u(field_bit_off), 16127 ); 16128 } 16129 16130 if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg); 16131 16132 const dst_mcv = MCValue{ .register = dst_reg }; 16133 break :result if (field_is_gp) 16134 dst_mcv 16135 else 16136 try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); 16137 }, 16138 else => return self.fail("TODO implement airStructFieldVal for {}", .{src_mcv}), 16139 } 16140 }; 16141 return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none }); 16142 } 16143 16144 fn airFieldParentPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 16145 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 16146 const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; 16147 16148 const ptr_agg_ty = self.typeOfIndex(inst); 16149 const src_mcv = try self.resolveInst(extra.field_ptr); 16150 const dst_mcv = if (src_mcv.isRegisterOffset() and 16151 self.reuseOperand(inst, extra.field_ptr, 0, src_mcv)) 16152 src_mcv 16153 else 16154 try self.copyToRegisterWithInstTracking(inst, ptr_agg_ty, src_mcv); 16155 const result = dst_mcv.offset(-self.fieldOffset(ptr_agg_ty, self.typeOf(extra.field_ptr), extra.field_index)); 16156 return self.finishAir(inst, result, .{ extra.field_ptr, .none, .none }); 16157 } 16158 16159 fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue { 16160 const pt = self.pt; 16161 const zcu = pt.zcu; 16162 const src_ty = self.typeOf(src_air); 16163 if (src_ty.zigTypeTag(zcu) == .vector) 16164 return self.fail("TODO implement genUnOp for {}", .{src_ty.fmt(pt)}); 16165 16166 var src_mcv = try self.resolveInst(src_air); 16167 switch (src_mcv) { 16168 .eflags => |cc| switch (tag) { 16169 .not => { 16170 if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) 16171 return .{ .eflags = cc.negate() }; 16172 try self.spillEflagsIfOccupied(); 16173 src_mcv = try self.resolveInst(src_air); 16174 }, 16175 else => {}, 16176 }, 16177 else => {}, 16178 } 16179 16180 const src_lock = switch (src_mcv) { 16181 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 16182 else => null, 16183 }; 16184 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 16185 16186 const dst_mcv: MCValue = dst: { 16187 if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) break :dst src_mcv; 16188 16189 const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, maybe_inst, true); 16190 try self.genCopy(src_ty, dst_mcv, src_mcv, .{}); 16191 break :dst dst_mcv; 16192 }; 16193 const dst_lock = switch (dst_mcv) { 16194 .register => |reg| self.register_manager.lockReg(reg), 16195 else => null, 16196 }; 16197 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 16198 16199 const abi_size: u16 = @intCast(src_ty.abiSize(zcu)); 16200 switch (tag) { 16201 .not => { 16202 const limb_abi_size: u16 = @min(abi_size, 8); 16203 const int_info: InternPool.Key.IntType = if (src_ty.ip_index == .bool_type) 16204 .{ .signedness = .unsigned, .bits = 1 } 16205 else 16206 src_ty.intInfo(zcu); 16207 var byte_off: i32 = 0; 16208 while (byte_off * 8 < int_info.bits) : (byte_off += limb_abi_size) { 16209 const limb_bits: u16 = @intCast(@min(switch (int_info.signedness) { 16210 .signed => abi_size * 8, 16211 .unsigned => int_info.bits, 16212 } - byte_off * 8, limb_abi_size * 8)); 16213 const limb_ty = try pt.intType(int_info.signedness, limb_bits); 16214 const limb_mcv = switch (byte_off) { 16215 0 => dst_mcv, 16216 else => dst_mcv.address().offset(byte_off).deref(), 16217 }; 16218 16219 if (int_info.signedness == .unsigned and self.regExtraBits(limb_ty) > 0) { 16220 const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_bits); 16221 try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask }); 16222 } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv); 16223 } 16224 }, 16225 .neg => { 16226 try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv); 16227 const bit_size = src_ty.intInfo(zcu).bits; 16228 if (abi_size * 8 > bit_size) { 16229 if (dst_mcv.isRegister()) { 16230 try self.truncateRegister(src_ty, dst_mcv.getReg().?); 16231 } else { 16232 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 16233 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 16234 defer self.register_manager.unlockReg(tmp_lock); 16235 16236 const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref(); 16237 try self.genSetReg(tmp_reg, .usize, hi_mcv, .{}); 16238 try self.truncateRegister(src_ty, tmp_reg); 16239 try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{}); 16240 } 16241 } 16242 }, 16243 else => unreachable, 16244 } 16245 return dst_mcv; 16246 } 16247 16248 fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void { 16249 const pt = self.pt; 16250 const abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu)); 16251 if (abi_size > 8) return self.fail("TODO implement {} for {}", .{ mir_tag, dst_ty.fmt(pt) }); 16252 switch (dst_mcv) { 16253 .none, 16254 .unreach, 16255 .dead, 16256 .undef, 16257 .immediate, 16258 .register_offset, 16259 .eflags, 16260 .register_overflow, 16261 .register_mask, 16262 .lea_symbol, 16263 .lea_direct, 16264 .lea_got, 16265 .lea_tlv, 16266 .lea_frame, 16267 .elementwise_regs_then_frame, 16268 .reserved_frame, 16269 .air_ref, 16270 => unreachable, // unmodifiable destination 16271 .register => |dst_reg| try self.asmRegister(mir_tag, registerAlias(dst_reg, abi_size)), 16272 .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented 16273 .memory, .load_symbol, .load_got, .load_direct, .load_tlv => { 16274 const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 16275 const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); 16276 defer self.register_manager.unlockReg(addr_reg_lock); 16277 16278 try self.genSetReg(addr_reg, .usize, dst_mcv.address(), .{}); 16279 try self.asmMemory(mir_tag, .{ .base = .{ .reg = addr_reg }, .mod = .{ .rm = .{ 16280 .size = .fromSize(abi_size), 16281 } } }); 16282 }, 16283 .indirect, .load_frame => try self.asmMemory( 16284 mir_tag, 16285 try dst_mcv.mem(self, .{ .size = .fromSize(abi_size) }), 16286 ), 16287 } 16288 } 16289 16290 /// Clobbers .rcx for non-immediate shift value. 16291 fn genShiftBinOpMir( 16292 self: *CodeGen, 16293 tag: Mir.Inst.FixedTag, 16294 lhs_ty: Type, 16295 lhs_mcv: MCValue, 16296 rhs_ty: Type, 16297 rhs_mcv: MCValue, 16298 ) !void { 16299 const pt = self.pt; 16300 const zcu = pt.zcu; 16301 const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu)); 16302 const shift_abi_size: u32 = @intCast(rhs_ty.abiSize(zcu)); 16303 try self.spillEflagsIfOccupied(); 16304 16305 if (abi_size > 16) { 16306 const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; 16307 assert(shift_abi_size >= 1 and shift_abi_size <= 2); 16308 16309 const rcx_lock: ?RegisterLock = switch (rhs_mcv) { 16310 .immediate => |shift_imm| switch (shift_imm) { 16311 0 => return, 16312 else => null, 16313 }, 16314 else => lock: { 16315 if (switch (rhs_mcv) { 16316 .register => |rhs_reg| rhs_reg.id() != Register.rcx.id(), 16317 else => true, 16318 }) { 16319 self.register_manager.getRegAssumeFree(.rcx, null); 16320 try self.genSetReg(.rcx, rhs_ty, rhs_mcv, .{}); 16321 } 16322 break :lock self.register_manager.lockReg(.rcx); 16323 }, 16324 }; 16325 defer if (rcx_lock) |lock| self.register_manager.unlockReg(lock); 16326 16327 const temp_regs = try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); 16328 const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs); 16329 defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); 16330 16331 switch (tag[0]) { 16332 ._l => { 16333 try self.asmRegisterImmediate(.{ ._, .mov }, temp_regs[1].to32(), .u(limbs_len - 1)); 16334 switch (rhs_mcv) { 16335 .immediate => |shift_imm| try self.asmRegisterImmediate( 16336 .{ ._, .mov }, 16337 temp_regs[0].to32(), 16338 .u(limbs_len - (shift_imm >> 6) - 1), 16339 ), 16340 else => { 16341 try self.asmRegisterRegister( 16342 .{ ._, .movzx }, 16343 temp_regs[2].to32(), 16344 registerAlias(.rcx, shift_abi_size), 16345 ); 16346 try self.asmRegisterImmediate(.{ ._, .@"and" }, .cl, .u(std.math.maxInt(u6))); 16347 try self.asmRegisterImmediate(.{ ._r, .sh }, temp_regs[2].to32(), .u(6)); 16348 try self.asmRegisterRegister( 16349 .{ ._, .mov }, 16350 temp_regs[0].to32(), 16351 temp_regs[1].to32(), 16352 ); 16353 try self.asmRegisterRegister( 16354 .{ ._, .sub }, 16355 temp_regs[0].to32(), 16356 temp_regs[2].to32(), 16357 ); 16358 }, 16359 } 16360 }, 16361 ._r => { 16362 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[1].to32(), temp_regs[1].to32()); 16363 switch (rhs_mcv) { 16364 .immediate => |shift_imm| try self.asmRegisterImmediate( 16365 .{ ._, .mov }, 16366 temp_regs[0].to32(), 16367 .u(shift_imm >> 6), 16368 ), 16369 else => { 16370 try self.asmRegisterRegister( 16371 .{ ._, .movzx }, 16372 temp_regs[0].to32(), 16373 registerAlias(.rcx, shift_abi_size), 16374 ); 16375 try self.asmRegisterImmediate(.{ ._, .@"and" }, .cl, .u(std.math.maxInt(u6))); 16376 try self.asmRegisterImmediate(.{ ._r, .sh }, temp_regs[0].to32(), .u(6)); 16377 }, 16378 } 16379 }, 16380 else => unreachable, 16381 } 16382 16383 const slow_inc_dec = self.hasFeature(.slow_incdec); 16384 if (switch (rhs_mcv) { 16385 .immediate => |shift_imm| shift_imm >> 6 < limbs_len - 1, 16386 else => true, 16387 }) { 16388 try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[2].to64(), .{ 16389 .base = .{ .frame = lhs_mcv.load_frame.index }, 16390 .mod = .{ .rm = .{ 16391 .size = .qword, 16392 .index = temp_regs[0].to64(), 16393 .scale = .@"8", 16394 .disp = lhs_mcv.load_frame.off, 16395 } }, 16396 }); 16397 const skip = switch (rhs_mcv) { 16398 .immediate => undefined, 16399 else => switch (tag[0]) { 16400 ._l => try self.asmJccReloc(.z, undefined), 16401 ._r => skip: { 16402 try self.asmRegisterImmediate( 16403 .{ ._, .cmp }, 16404 temp_regs[0].to32(), 16405 .u(limbs_len - 1), 16406 ); 16407 break :skip try self.asmJccReloc(.nb, undefined); 16408 }, 16409 else => unreachable, 16410 }, 16411 }; 16412 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 16413 try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[3].to64(), .{ 16414 .base = .{ .frame = lhs_mcv.load_frame.index }, 16415 .mod = .{ .rm = .{ 16416 .size = .qword, 16417 .index = temp_regs[0].to64(), 16418 .scale = .@"8", 16419 .disp = switch (tag[0]) { 16420 ._l => lhs_mcv.load_frame.off - 8, 16421 ._r => lhs_mcv.load_frame.off + 8, 16422 else => unreachable, 16423 }, 16424 } }, 16425 }); 16426 switch (rhs_mcv) { 16427 .immediate => |shift_imm| try self.asmRegisterRegisterImmediate( 16428 .{ switch (tag[0]) { 16429 ._l => ._ld, 16430 ._r => ._rd, 16431 else => unreachable, 16432 }, .sh }, 16433 temp_regs[2].to64(), 16434 temp_regs[3].to64(), 16435 .u(shift_imm & std.math.maxInt(u6)), 16436 ), 16437 else => try self.asmRegisterRegisterRegister(.{ switch (tag[0]) { 16438 ._l => ._ld, 16439 ._r => ._rd, 16440 else => unreachable, 16441 }, .sh }, temp_regs[2].to64(), temp_regs[3].to64(), .cl), 16442 } 16443 try self.asmMemoryRegister(.{ ._, .mov }, .{ 16444 .base = .{ .frame = lhs_mcv.load_frame.index }, 16445 .mod = .{ .rm = .{ 16446 .size = .qword, 16447 .index = temp_regs[1].to64(), 16448 .scale = .@"8", 16449 .disp = lhs_mcv.load_frame.off, 16450 } }, 16451 }, temp_regs[2].to64()); 16452 try self.asmRegisterRegister(.{ ._, .mov }, temp_regs[2].to64(), temp_regs[3].to64()); 16453 switch (tag[0]) { 16454 ._l => { 16455 if (slow_inc_dec) { 16456 try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1)); 16457 try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[0].to32(), .u(1)); 16458 } else { 16459 try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32()); 16460 try self.asmRegister(.{ ._, .dec }, temp_regs[0].to32()); 16461 } 16462 _ = try self.asmJccReloc(.nz, loop); 16463 }, 16464 ._r => { 16465 if (slow_inc_dec) { 16466 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), .u(1)); 16467 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); 16468 } else { 16469 try self.asmRegister(.{ ._, .inc }, temp_regs[1].to32()); 16470 try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); 16471 } 16472 try self.asmRegisterImmediate( 16473 .{ ._, .cmp }, 16474 temp_regs[0].to32(), 16475 .u(limbs_len - 1), 16476 ); 16477 _ = try self.asmJccReloc(.b, loop); 16478 }, 16479 else => unreachable, 16480 } 16481 switch (rhs_mcv) { 16482 .immediate => {}, 16483 else => self.performReloc(skip), 16484 } 16485 } 16486 switch (rhs_mcv) { 16487 .immediate => |shift_imm| try self.asmRegisterImmediate( 16488 tag, 16489 temp_regs[2].to64(), 16490 .u(shift_imm & std.math.maxInt(u6)), 16491 ), 16492 else => try self.asmRegisterRegister(tag, temp_regs[2].to64(), .cl), 16493 } 16494 try self.asmMemoryRegister(.{ ._, .mov }, .{ 16495 .base = .{ .frame = lhs_mcv.load_frame.index }, 16496 .mod = .{ .rm = .{ 16497 .size = .qword, 16498 .index = temp_regs[1].to64(), 16499 .scale = .@"8", 16500 .disp = lhs_mcv.load_frame.off, 16501 } }, 16502 }, temp_regs[2].to64()); 16503 if (tag[0] == ._r and tag[1] == .sa) try self.asmRegisterImmediate( 16504 tag, 16505 temp_regs[2].to64(), 16506 .u(63), 16507 ); 16508 if (switch (rhs_mcv) { 16509 .immediate => |shift_imm| shift_imm >> 6 > 0, 16510 else => true, 16511 }) { 16512 const skip = switch (rhs_mcv) { 16513 .immediate => undefined, 16514 else => switch (tag[0]) { 16515 ._l => skip: { 16516 try self.asmRegisterRegister( 16517 .{ ._, .@"test" }, 16518 temp_regs[1].to32(), 16519 temp_regs[1].to32(), 16520 ); 16521 break :skip try self.asmJccReloc(.z, undefined); 16522 }, 16523 ._r => skip: { 16524 try self.asmRegisterImmediate( 16525 .{ ._, .cmp }, 16526 temp_regs[1].to32(), 16527 .u(limbs_len - 1), 16528 ); 16529 break :skip try self.asmJccReloc(.nb, undefined); 16530 }, 16531 else => unreachable, 16532 }, 16533 }; 16534 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 16535 switch (tag[0]) { 16536 ._l => if (slow_inc_dec) { 16537 try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1)); 16538 } else { 16539 try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32()); 16540 }, 16541 ._r => if (slow_inc_dec) { 16542 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), .u(1)); 16543 } else { 16544 try self.asmRegister(.{ ._, .inc }, temp_regs[1].to32()); 16545 }, 16546 else => unreachable, 16547 } 16548 if (tag[0] == ._r and tag[1] == .sa) try self.asmMemoryRegister(.{ ._, .mov }, .{ 16549 .base = .{ .frame = lhs_mcv.load_frame.index }, 16550 .mod = .{ .rm = .{ 16551 .size = .qword, 16552 .index = temp_regs[1].to64(), 16553 .scale = .@"8", 16554 .disp = lhs_mcv.load_frame.off, 16555 } }, 16556 }, temp_regs[2].to64()) else try self.asmMemoryImmediate(.{ ._, .mov }, .{ 16557 .base = .{ .frame = lhs_mcv.load_frame.index }, 16558 .mod = .{ .rm = .{ 16559 .size = .qword, 16560 .index = temp_regs[1].to64(), 16561 .scale = .@"8", 16562 .disp = lhs_mcv.load_frame.off, 16563 } }, 16564 }, .u(0)); 16565 switch (tag[0]) { 16566 ._l => _ = try self.asmJccReloc(.nz, loop), 16567 ._r => { 16568 try self.asmRegisterImmediate( 16569 .{ ._, .cmp }, 16570 temp_regs[1].to32(), 16571 .u(limbs_len - 1), 16572 ); 16573 _ = try self.asmJccReloc(.b, loop); 16574 }, 16575 else => unreachable, 16576 } 16577 switch (rhs_mcv) { 16578 .immediate => {}, 16579 else => self.performReloc(skip), 16580 } 16581 } 16582 return; 16583 } 16584 16585 assert(shift_abi_size == 1); 16586 const shift_mcv: MCValue = shift: { 16587 switch (rhs_mcv) { 16588 .immediate => |shift_imm| switch (shift_imm) { 16589 0 => return, 16590 else => break :shift rhs_mcv, 16591 }, 16592 .register => |rhs_reg| if (rhs_reg.id() == Register.rcx.id()) 16593 break :shift rhs_mcv, 16594 else => {}, 16595 } 16596 self.register_manager.getRegAssumeFree(.rcx, null); 16597 try self.genSetReg(.cl, rhs_ty, rhs_mcv, .{}); 16598 break :shift .{ .register = .rcx }; 16599 }; 16600 if (abi_size > 8) { 16601 const info: struct { indices: [2]u31, double_tag: Mir.Inst.FixedTag } = switch (tag[0]) { 16602 ._l => .{ .indices = .{ 0, 1 }, .double_tag = .{ ._ld, .sh } }, 16603 ._r => .{ .indices = .{ 1, 0 }, .double_tag = .{ ._rd, .sh } }, 16604 else => unreachable, 16605 }; 16606 switch (lhs_mcv) { 16607 .register_pair => |lhs_regs| switch (shift_mcv) { 16608 .immediate => |shift_imm| if (shift_imm > 0 and shift_imm < 64) { 16609 try self.asmRegisterRegisterImmediate( 16610 info.double_tag, 16611 lhs_regs[info.indices[1]], 16612 lhs_regs[info.indices[0]], 16613 .u(shift_imm), 16614 ); 16615 try self.asmRegisterImmediate( 16616 tag, 16617 lhs_regs[info.indices[0]], 16618 .u(shift_imm), 16619 ); 16620 return; 16621 } else { 16622 assert(shift_imm < 128); 16623 try self.asmRegisterRegister( 16624 .{ ._, .mov }, 16625 lhs_regs[info.indices[1]], 16626 lhs_regs[info.indices[0]], 16627 ); 16628 if (tag[0] == ._r and tag[1] == .sa) try self.asmRegisterImmediate( 16629 tag, 16630 lhs_regs[info.indices[0]], 16631 .u(63), 16632 ) else try self.asmRegisterRegister( 16633 .{ ._, .xor }, 16634 lhs_regs[info.indices[0]], 16635 lhs_regs[info.indices[0]], 16636 ); 16637 if (shift_imm > 64) try self.asmRegisterImmediate( 16638 tag, 16639 lhs_regs[info.indices[1]], 16640 .u(shift_imm - 64), 16641 ); 16642 return; 16643 }, 16644 .register => |shift_reg| { 16645 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 16646 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 16647 defer self.register_manager.unlockReg(tmp_lock); 16648 16649 if (tag[0] == ._r and tag[1] == .sa) { 16650 try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, lhs_regs[info.indices[0]]); 16651 try self.asmRegisterImmediate(tag, tmp_reg, .u(63)); 16652 } else try self.asmRegisterRegister( 16653 .{ ._, .xor }, 16654 tmp_reg.to32(), 16655 tmp_reg.to32(), 16656 ); 16657 try self.asmRegisterRegisterRegister( 16658 info.double_tag, 16659 lhs_regs[info.indices[1]], 16660 lhs_regs[info.indices[0]], 16661 registerAlias(shift_reg, 1), 16662 ); 16663 try self.asmRegisterRegister( 16664 tag, 16665 lhs_regs[info.indices[0]], 16666 registerAlias(shift_reg, 1), 16667 ); 16668 try self.asmRegisterImmediate(.{ ._, .cmp }, registerAlias(shift_reg, 1), .u(64)); 16669 try self.asmCmovccRegisterRegister( 16670 .ae, 16671 lhs_regs[info.indices[1]], 16672 lhs_regs[info.indices[0]], 16673 ); 16674 try self.asmCmovccRegisterRegister(.ae, lhs_regs[info.indices[0]], tmp_reg); 16675 return; 16676 }, 16677 else => {}, 16678 }, 16679 .load_frame => |dst_frame_addr| { 16680 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 16681 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 16682 defer self.register_manager.unlockReg(tmp_lock); 16683 16684 switch (shift_mcv) { 16685 .immediate => |shift_imm| if (shift_imm > 0 and shift_imm < 64) { 16686 try self.asmRegisterMemory( 16687 .{ ._, .mov }, 16688 tmp_reg, 16689 .{ 16690 .base = .{ .frame = dst_frame_addr.index }, 16691 .mod = .{ .rm = .{ 16692 .size = .qword, 16693 .disp = dst_frame_addr.off + info.indices[0] * 8, 16694 } }, 16695 }, 16696 ); 16697 try self.asmMemoryRegisterImmediate( 16698 info.double_tag, 16699 .{ 16700 .base = .{ .frame = dst_frame_addr.index }, 16701 .mod = .{ .rm = .{ 16702 .size = .qword, 16703 .disp = dst_frame_addr.off + info.indices[1] * 8, 16704 } }, 16705 }, 16706 tmp_reg, 16707 .u(shift_imm), 16708 ); 16709 try self.asmMemoryImmediate( 16710 tag, 16711 .{ 16712 .base = .{ .frame = dst_frame_addr.index }, 16713 .mod = .{ .rm = .{ 16714 .size = .qword, 16715 .disp = dst_frame_addr.off + info.indices[0] * 8, 16716 } }, 16717 }, 16718 .u(shift_imm), 16719 ); 16720 return; 16721 } else { 16722 assert(shift_imm < 128); 16723 try self.asmRegisterMemory( 16724 .{ ._, .mov }, 16725 tmp_reg, 16726 .{ 16727 .base = .{ .frame = dst_frame_addr.index }, 16728 .mod = .{ .rm = .{ 16729 .size = .qword, 16730 .disp = dst_frame_addr.off + info.indices[0] * 8, 16731 } }, 16732 }, 16733 ); 16734 if (shift_imm > 64) try self.asmRegisterImmediate( 16735 tag, 16736 tmp_reg, 16737 .u(shift_imm - 64), 16738 ); 16739 try self.asmMemoryRegister( 16740 .{ ._, .mov }, 16741 .{ 16742 .base = .{ .frame = dst_frame_addr.index }, 16743 .mod = .{ .rm = .{ 16744 .size = .qword, 16745 .disp = dst_frame_addr.off + info.indices[1] * 8, 16746 } }, 16747 }, 16748 tmp_reg, 16749 ); 16750 if (tag[0] == ._r and tag[1] == .sa) try self.asmMemoryImmediate( 16751 tag, 16752 .{ 16753 .base = .{ .frame = dst_frame_addr.index }, 16754 .mod = .{ .rm = .{ 16755 .size = .qword, 16756 .disp = dst_frame_addr.off + info.indices[0] * 8, 16757 } }, 16758 }, 16759 .u(63), 16760 ) else { 16761 try self.asmRegisterRegister(.{ ._, .xor }, tmp_reg.to32(), tmp_reg.to32()); 16762 try self.asmMemoryRegister( 16763 .{ ._, .mov }, 16764 .{ 16765 .base = .{ .frame = dst_frame_addr.index }, 16766 .mod = .{ .rm = .{ 16767 .size = .qword, 16768 .disp = dst_frame_addr.off + info.indices[0] * 8, 16769 } }, 16770 }, 16771 tmp_reg, 16772 ); 16773 } 16774 return; 16775 }, 16776 .register => |shift_reg| { 16777 const first_reg = 16778 try self.register_manager.allocReg(null, abi.RegisterClass.gp); 16779 const first_lock = self.register_manager.lockRegAssumeUnused(first_reg); 16780 defer self.register_manager.unlockReg(first_lock); 16781 16782 const second_reg = 16783 try self.register_manager.allocReg(null, abi.RegisterClass.gp); 16784 const second_lock = self.register_manager.lockRegAssumeUnused(second_reg); 16785 defer self.register_manager.unlockReg(second_lock); 16786 16787 try self.asmRegisterMemory( 16788 .{ ._, .mov }, 16789 first_reg, 16790 .{ 16791 .base = .{ .frame = dst_frame_addr.index }, 16792 .mod = .{ .rm = .{ 16793 .size = .qword, 16794 .disp = dst_frame_addr.off + info.indices[0] * 8, 16795 } }, 16796 }, 16797 ); 16798 try self.asmRegisterMemory( 16799 .{ ._, .mov }, 16800 second_reg, 16801 .{ 16802 .base = .{ .frame = dst_frame_addr.index }, 16803 .mod = .{ .rm = .{ 16804 .size = .qword, 16805 .disp = dst_frame_addr.off + info.indices[1] * 8, 16806 } }, 16807 }, 16808 ); 16809 if (tag[0] == ._r and tag[1] == .sa) { 16810 try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, first_reg); 16811 try self.asmRegisterImmediate(tag, tmp_reg, .u(63)); 16812 } else try self.asmRegisterRegister( 16813 .{ ._, .xor }, 16814 tmp_reg.to32(), 16815 tmp_reg.to32(), 16816 ); 16817 try self.asmRegisterRegisterRegister( 16818 info.double_tag, 16819 second_reg, 16820 first_reg, 16821 registerAlias(shift_reg, 1), 16822 ); 16823 try self.asmRegisterRegister(tag, first_reg, registerAlias(shift_reg, 1)); 16824 try self.asmRegisterImmediate( 16825 .{ ._, .cmp }, 16826 registerAlias(shift_reg, 1), 16827 .u(64), 16828 ); 16829 try self.asmCmovccRegisterRegister(.ae, second_reg, first_reg); 16830 try self.asmCmovccRegisterRegister(.ae, first_reg, tmp_reg); 16831 try self.asmMemoryRegister( 16832 .{ ._, .mov }, 16833 .{ 16834 .base = .{ .frame = dst_frame_addr.index }, 16835 .mod = .{ .rm = .{ 16836 .size = .qword, 16837 .disp = dst_frame_addr.off + info.indices[1] * 8, 16838 } }, 16839 }, 16840 second_reg, 16841 ); 16842 try self.asmMemoryRegister( 16843 .{ ._, .mov }, 16844 .{ 16845 .base = .{ .frame = dst_frame_addr.index }, 16846 .mod = .{ .rm = .{ 16847 .size = .qword, 16848 .disp = dst_frame_addr.off + info.indices[0] * 8, 16849 } }, 16850 }, 16851 first_reg, 16852 ); 16853 return; 16854 }, 16855 else => {}, 16856 } 16857 }, 16858 else => {}, 16859 } 16860 } else switch (lhs_mcv) { 16861 .register => |lhs_reg| switch (shift_mcv) { 16862 .immediate => |shift_imm| return self.asmRegisterImmediate( 16863 tag, 16864 registerAlias(lhs_reg, abi_size), 16865 .u(shift_imm), 16866 ), 16867 .register => |shift_reg| return self.asmRegisterRegister( 16868 tag, 16869 registerAlias(lhs_reg, abi_size), 16870 registerAlias(shift_reg, 1), 16871 ), 16872 else => {}, 16873 }, 16874 .memory, .indirect, .load_frame => { 16875 const lhs_mem: Memory = switch (lhs_mcv) { 16876 .memory => |addr| .{ 16877 .base = .{ .reg = .ds }, 16878 .mod = .{ .rm = .{ 16879 .size = .fromSize(abi_size), 16880 .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse 16881 return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{ 16882 @tagName(lhs_mcv), 16883 @tagName(shift_mcv), 16884 }), 16885 } }, 16886 }, 16887 .indirect => |reg_off| .{ 16888 .base = .{ .reg = reg_off.reg }, 16889 .mod = .{ .rm = .{ 16890 .size = .fromSize(abi_size), 16891 .disp = reg_off.off, 16892 } }, 16893 }, 16894 .load_frame => |frame_addr| .{ 16895 .base = .{ .frame = frame_addr.index }, 16896 .mod = .{ .rm = .{ 16897 .size = .fromSize(abi_size), 16898 .disp = frame_addr.off, 16899 } }, 16900 }, 16901 else => unreachable, 16902 }; 16903 switch (shift_mcv) { 16904 .immediate => |shift_imm| return self.asmMemoryImmediate(tag, lhs_mem, .u(shift_imm)), 16905 .register => |shift_reg| return self.asmMemoryRegister( 16906 tag, 16907 lhs_mem, 16908 registerAlias(shift_reg, 1), 16909 ), 16910 else => {}, 16911 } 16912 }, 16913 else => {}, 16914 } 16915 return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{ 16916 @tagName(lhs_mcv), 16917 @tagName(shift_mcv), 16918 }); 16919 } 16920 16921 /// Result is always a register. 16922 /// Clobbers .rcx for non-immediate rhs, therefore care is needed to spill .rcx upfront. 16923 /// Asserts .rcx is free. 16924 fn genShiftBinOp( 16925 self: *CodeGen, 16926 air_tag: Air.Inst.Tag, 16927 maybe_inst: ?Air.Inst.Index, 16928 lhs_mcv: MCValue, 16929 rhs_mcv: MCValue, 16930 lhs_ty: Type, 16931 rhs_ty: Type, 16932 ) !MCValue { 16933 const pt = self.pt; 16934 const zcu = pt.zcu; 16935 if (lhs_ty.zigTypeTag(zcu) == .vector) return self.fail("TODO implement genShiftBinOp for {}", .{ 16936 lhs_ty.fmt(pt), 16937 }); 16938 16939 try self.register_manager.getKnownReg(.rcx, null); 16940 const rcx_lock = self.register_manager.lockReg(.rcx); 16941 defer if (rcx_lock) |lock| self.register_manager.unlockReg(lock); 16942 16943 const mat_lhs_mcv: MCValue, const can_reuse_lhs = switch (lhs_mcv) { 16944 .register => |lhs_reg| switch (lhs_reg.class()) { 16945 .general_purpose => .{ lhs_mcv, true }, 16946 else => lhs: { 16947 const mat_lhs_mcv = try self.allocTempRegOrMem(lhs_ty, true); 16948 try self.genCopy(lhs_ty, mat_lhs_mcv, lhs_mcv, .{}); 16949 break :lhs .{ mat_lhs_mcv, false }; 16950 }, 16951 }, 16952 else => .{ lhs_mcv, true }, 16953 }; 16954 const lhs_lock = switch (mat_lhs_mcv) { 16955 .register => |reg| self.register_manager.lockReg(reg), 16956 else => null, 16957 }; 16958 defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); 16959 16960 const rhs_lock = switch (rhs_mcv) { 16961 .register => |reg| self.register_manager.lockReg(reg), 16962 else => null, 16963 }; 16964 defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); 16965 16966 const dst_mcv: MCValue = dst: { 16967 if (can_reuse_lhs) if (maybe_inst) |inst| { 16968 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 16969 if (self.reuseOperand(inst, bin_op.lhs, 0, mat_lhs_mcv)) break :dst mat_lhs_mcv; 16970 }; 16971 const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true); 16972 try self.genCopy(lhs_ty, dst_mcv, mat_lhs_mcv, .{}); 16973 break :dst dst_mcv; 16974 }; 16975 16976 const signedness = lhs_ty.intInfo(zcu).signedness; 16977 try self.genShiftBinOpMir(switch (air_tag) { 16978 .shl, .shl_exact => switch (signedness) { 16979 .signed => .{ ._l, .sa }, 16980 .unsigned => .{ ._l, .sh }, 16981 }, 16982 .shr, .shr_exact => switch (signedness) { 16983 .signed => .{ ._r, .sa }, 16984 .unsigned => .{ ._r, .sh }, 16985 }, 16986 else => unreachable, 16987 }, lhs_ty, dst_mcv, rhs_ty, rhs_mcv); 16988 return dst_mcv; 16989 } 16990 16991 /// Result is always a register. 16992 /// Clobbers .rax and .rdx therefore care is needed to spill .rax and .rdx upfront. 16993 /// Asserts .rax and .rdx are free. 16994 fn genMulDivBinOp( 16995 self: *CodeGen, 16996 tag: Air.Inst.Tag, 16997 maybe_inst: ?Air.Inst.Index, 16998 dst_ty: Type, 16999 src_ty: Type, 17000 lhs_mcv: MCValue, 17001 rhs_mcv: MCValue, 17002 ) !MCValue { 17003 const pt = self.pt; 17004 const zcu = pt.zcu; 17005 if (dst_ty.zigTypeTag(zcu) == .vector or dst_ty.zigTypeTag(zcu) == .float) return self.fail( 17006 "TODO implement genMulDivBinOp for {s} from {} to {}", 17007 .{ @tagName(tag), src_ty.fmt(pt), dst_ty.fmt(pt) }, 17008 ); 17009 const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); 17010 const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); 17011 17012 assert(self.register_manager.isRegFree(.rax)); 17013 assert(self.register_manager.isRegFree(.rcx)); 17014 assert(self.register_manager.isRegFree(.rdx)); 17015 assert(self.eflags_inst == null); 17016 17017 if (dst_abi_size == 16 and src_abi_size == 16) { 17018 assert(tag == .mul or tag == .mul_wrap); 17019 const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx }); 17020 defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); 17021 17022 const mat_lhs_mcv = switch (lhs_mcv) { 17023 .load_symbol => mat_lhs_mcv: { 17024 // TODO clean this up! 17025 const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); 17026 break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; 17027 }, 17028 else => lhs_mcv, 17029 }; 17030 const mat_lhs_lock = switch (mat_lhs_mcv) { 17031 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), 17032 else => null, 17033 }; 17034 defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); 17035 const mat_rhs_mcv = switch (rhs_mcv) { 17036 .load_symbol => mat_rhs_mcv: { 17037 // TODO clean this up! 17038 const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); 17039 break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; 17040 }, 17041 else => rhs_mcv, 17042 }; 17043 const mat_rhs_lock = switch (mat_rhs_mcv) { 17044 .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), 17045 else => null, 17046 }; 17047 defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); 17048 17049 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 17050 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 17051 defer self.register_manager.unlockReg(tmp_lock); 17052 17053 if (mat_lhs_mcv.isBase()) 17054 try self.asmRegisterMemory(.{ ._, .mov }, .rax, try mat_lhs_mcv.mem(self, .{ .size = .qword })) 17055 else 17056 try self.asmRegisterRegister(.{ ._, .mov }, .rax, mat_lhs_mcv.register_pair[0]); 17057 if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( 17058 .{ ._, .mov }, 17059 tmp_reg, 17060 try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 17061 ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_rhs_mcv.register_pair[1]); 17062 try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, .rax); 17063 if (mat_rhs_mcv.isBase()) 17064 try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword })) 17065 else 17066 try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]); 17067 try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg); 17068 if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( 17069 .{ ._, .mov }, 17070 tmp_reg, 17071 try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 17072 ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_lhs_mcv.register_pair[1]); 17073 if (mat_rhs_mcv.isBase()) 17074 try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, try mat_rhs_mcv.mem(self, .{ .size = .qword })) 17075 else 17076 try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, mat_rhs_mcv.register_pair[0]); 17077 try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg); 17078 return .{ .register_pair = .{ .rax, .rdx } }; 17079 } 17080 17081 if (switch (tag) { 17082 else => unreachable, 17083 .mul, .mul_wrap => dst_abi_size != src_abi_size and dst_abi_size != src_abi_size * 2, 17084 .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_abi_size != src_abi_size, 17085 } or src_abi_size > 8) { 17086 const src_info = src_ty.intInfo(zcu); 17087 switch (tag) { 17088 .mul, .mul_wrap => { 17089 const slow_inc = self.hasFeature(.slow_incdec); 17090 const limb_len = std.math.divCeil(u32, src_abi_size, 8) catch unreachable; 17091 17092 try self.spillRegisters(&.{ .rax, .rcx, .rdx }); 17093 const reg_locks = self.register_manager.lockRegs(3, .{ .rax, .rcx, .rdx }); 17094 defer for (reg_locks) |reg_lock| if (reg_lock) |lock| 17095 self.register_manager.unlockReg(lock); 17096 17097 const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false); 17098 try self.genInlineMemset( 17099 dst_mcv.address(), 17100 .{ .immediate = 0 }, 17101 .{ .immediate = src_abi_size }, 17102 .{}, 17103 ); 17104 17105 const temp_regs = 17106 try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); 17107 const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs); 17108 defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); 17109 17110 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32()); 17111 17112 const outer_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 17113 try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[1].to64(), .{ 17114 .base = .{ .frame = rhs_mcv.load_frame.index }, 17115 .mod = .{ .rm = .{ 17116 .size = .qword, 17117 .index = temp_regs[0].to64(), 17118 .scale = .@"8", 17119 .disp = rhs_mcv.load_frame.off, 17120 } }, 17121 }); 17122 try self.asmRegisterRegister(.{ ._, .@"test" }, temp_regs[1].to64(), temp_regs[1].to64()); 17123 const skip_inner = try self.asmJccReloc(.z, undefined); 17124 17125 try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[2].to32(), temp_regs[2].to32()); 17126 try self.asmRegisterRegister(.{ ._, .mov }, temp_regs[3].to32(), temp_regs[0].to32()); 17127 try self.asmRegisterRegister(.{ ._, .xor }, .ecx, .ecx); 17128 try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx); 17129 17130 const inner_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 17131 try self.asmRegisterImmediate(.{ ._r, .sh }, .cl, .u(1)); 17132 try self.asmMemoryRegister(.{ ._, .adc }, .{ 17133 .base = .{ .frame = dst_mcv.load_frame.index }, 17134 .mod = .{ .rm = .{ 17135 .size = .qword, 17136 .index = temp_regs[3].to64(), 17137 .scale = .@"8", 17138 .disp = dst_mcv.load_frame.off, 17139 } }, 17140 }, .rdx); 17141 try self.asmSetccRegister(.c, .cl); 17142 17143 try self.asmRegisterMemory(.{ ._, .mov }, .rax, .{ 17144 .base = .{ .frame = lhs_mcv.load_frame.index }, 17145 .mod = .{ .rm = .{ 17146 .size = .qword, 17147 .index = temp_regs[2].to64(), 17148 .scale = .@"8", 17149 .disp = lhs_mcv.load_frame.off, 17150 } }, 17151 }); 17152 try self.asmRegister(.{ ._, .mul }, temp_regs[1].to64()); 17153 17154 try self.asmRegisterImmediate(.{ ._r, .sh }, .ch, .u(1)); 17155 try self.asmMemoryRegister(.{ ._, .adc }, .{ 17156 .base = .{ .frame = dst_mcv.load_frame.index }, 17157 .mod = .{ .rm = .{ 17158 .size = .qword, 17159 .index = temp_regs[3].to64(), 17160 .scale = .@"8", 17161 .disp = dst_mcv.load_frame.off, 17162 } }, 17163 }, .rax); 17164 try self.asmSetccRegister(.c, .ch); 17165 17166 if (slow_inc) { 17167 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); 17168 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1)); 17169 } else { 17170 try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32()); 17171 try self.asmRegister(.{ ._, .inc }, temp_regs[3].to32()); 17172 } 17173 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len)); 17174 _ = try self.asmJccReloc(.b, inner_loop); 17175 17176 self.performReloc(skip_inner); 17177 if (slow_inc) { 17178 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); 17179 } else { 17180 try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); 17181 } 17182 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len)); 17183 _ = try self.asmJccReloc(.b, outer_loop); 17184 17185 return dst_mcv; 17186 }, 17187 .div_trunc, .div_floor, .div_exact, .rem, .mod => switch (src_info.signedness) { 17188 .signed => {}, 17189 .unsigned => { 17190 const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false); 17191 const manyptr_u32_ty = try pt.ptrType(.{ 17192 .child = .u32_type, 17193 .flags = .{ 17194 .size = .many, 17195 }, 17196 }); 17197 const manyptr_const_u32_ty = try pt.ptrType(.{ 17198 .child = .u32_type, 17199 .flags = .{ 17200 .size = .many, 17201 .is_const = true, 17202 }, 17203 }); 17204 _ = try self.genCall(.{ .lib = .{ 17205 .return_type = .void_type, 17206 .param_types = &.{ 17207 manyptr_u32_ty.toIntern(), 17208 manyptr_const_u32_ty.toIntern(), 17209 manyptr_const_u32_ty.toIntern(), 17210 .usize_type, 17211 }, 17212 .callee = switch (tag) { 17213 .div_trunc, 17214 .div_floor, 17215 .div_exact, 17216 => "__udivei4", 17217 .rem, 17218 .mod, 17219 => "__umodei4", 17220 else => unreachable, 17221 }, 17222 } }, &.{ 17223 manyptr_u32_ty, 17224 manyptr_const_u32_ty, 17225 manyptr_const_u32_ty, 17226 .usize, 17227 }, &.{ 17228 dst_mcv.address(), 17229 lhs_mcv.address(), 17230 rhs_mcv.address(), 17231 .{ .immediate = src_info.bits }, 17232 }, .{}); 17233 return dst_mcv; 17234 }, 17235 }, 17236 else => {}, 17237 } 17238 return self.fail( 17239 "TODO implement genMulDivBinOp for {s} from {} to {}", 17240 .{ @tagName(tag), src_ty.fmt(pt), dst_ty.fmt(pt) }, 17241 ); 17242 } 17243 const ty = if (dst_abi_size <= 8) dst_ty else src_ty; 17244 const abi_size = if (dst_abi_size <= 8) dst_abi_size else src_abi_size; 17245 17246 const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx }); 17247 defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); 17248 17249 const signedness = ty.intInfo(zcu).signedness; 17250 switch (tag) { 17251 .mul, 17252 .mul_wrap, 17253 .rem, 17254 .div_trunc, 17255 .div_exact, 17256 => { 17257 const track_inst_rax = switch (tag) { 17258 .mul, .mul_wrap => if (dst_abi_size <= 8) maybe_inst else null, 17259 .div_exact, .div_trunc => maybe_inst, 17260 else => null, 17261 }; 17262 const track_inst_rdx = switch (tag) { 17263 .rem => maybe_inst, 17264 else => null, 17265 }; 17266 try self.register_manager.getKnownReg(.rax, track_inst_rax); 17267 try self.register_manager.getKnownReg(.rdx, track_inst_rdx); 17268 17269 try self.genIntMulDivOpMir(switch (signedness) { 17270 .signed => switch (tag) { 17271 .mul, .mul_wrap => .{ .i_, .mul }, 17272 .div_trunc, .div_exact, .rem => .{ .i_, .div }, 17273 else => unreachable, 17274 }, 17275 .unsigned => switch (tag) { 17276 .mul, .mul_wrap => .{ ._, .mul }, 17277 .div_trunc, .div_exact, .rem => .{ ._, .div }, 17278 else => unreachable, 17279 }, 17280 }, ty, lhs_mcv, rhs_mcv); 17281 17282 if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) { 17283 .mul, .mul_wrap, .div_trunc, .div_exact => .rax, 17284 .rem => .rdx, 17285 else => unreachable, 17286 }, dst_abi_size) }; 17287 17288 const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false); 17289 try self.asmMemoryRegister(.{ ._, .mov }, .{ 17290 .base = .{ .frame = dst_mcv.load_frame.index }, 17291 .mod = .{ .rm = .{ 17292 .size = .qword, 17293 .disp = dst_mcv.load_frame.off, 17294 } }, 17295 }, .rax); 17296 try self.asmMemoryRegister(.{ ._, .mov }, .{ 17297 .base = .{ .frame = dst_mcv.load_frame.index }, 17298 .mod = .{ .rm = .{ 17299 .size = .qword, 17300 .disp = dst_mcv.load_frame.off + 8, 17301 } }, 17302 }, .rdx); 17303 return dst_mcv; 17304 }, 17305 17306 .mod => { 17307 try self.register_manager.getKnownReg(.rax, null); 17308 try self.register_manager.getKnownReg( 17309 .rdx, 17310 if (signedness == .unsigned) maybe_inst else null, 17311 ); 17312 17313 switch (signedness) { 17314 .signed => { 17315 const lhs_lock = switch (lhs_mcv) { 17316 .register => |reg| self.register_manager.lockReg(reg), 17317 else => null, 17318 }; 17319 defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); 17320 const rhs_lock = switch (rhs_mcv) { 17321 .register => |reg| self.register_manager.lockReg(reg), 17322 else => null, 17323 }; 17324 defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); 17325 17326 // hack around hazard between rhs and div_floor by copying rhs to another register 17327 const rhs_copy = try self.copyToTmpRegister(ty, rhs_mcv); 17328 const rhs_copy_lock = self.register_manager.lockRegAssumeUnused(rhs_copy); 17329 defer self.register_manager.unlockReg(rhs_copy_lock); 17330 17331 const div_floor = try self.genInlineIntDivFloor(ty, lhs_mcv, rhs_mcv); 17332 try self.genIntMulComplexOpMir(ty, div_floor, .{ .register = rhs_copy }); 17333 const div_floor_lock = self.register_manager.lockReg(div_floor.register); 17334 defer if (div_floor_lock) |lock| self.register_manager.unlockReg(lock); 17335 17336 const result: MCValue = if (maybe_inst) |inst| 17337 try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv) 17338 else 17339 .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) }; 17340 try self.genBinOpMir(.{ ._, .sub }, ty, result, div_floor); 17341 17342 return result; 17343 }, 17344 .unsigned => { 17345 try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs_mcv, rhs_mcv); 17346 return .{ .register = registerAlias(.rdx, abi_size) }; 17347 }, 17348 } 17349 }, 17350 17351 .div_floor => { 17352 try self.register_manager.getKnownReg( 17353 .rax, 17354 if (signedness == .unsigned) maybe_inst else null, 17355 ); 17356 try self.register_manager.getKnownReg(.rdx, null); 17357 17358 const lhs_lock: ?RegisterLock = switch (lhs_mcv) { 17359 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 17360 else => null, 17361 }; 17362 defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); 17363 17364 const actual_rhs_mcv: MCValue = blk: { 17365 switch (signedness) { 17366 .signed => { 17367 const rhs_lock: ?RegisterLock = switch (rhs_mcv) { 17368 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 17369 else => null, 17370 }; 17371 defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); 17372 17373 if (maybe_inst) |inst| { 17374 break :blk try self.copyToRegisterWithInstTracking(inst, ty, rhs_mcv); 17375 } 17376 break :blk MCValue{ .register = try self.copyToTmpRegister(ty, rhs_mcv) }; 17377 }, 17378 .unsigned => break :blk rhs_mcv, 17379 } 17380 }; 17381 const rhs_lock: ?RegisterLock = switch (actual_rhs_mcv) { 17382 .register => |reg| self.register_manager.lockReg(reg), 17383 else => null, 17384 }; 17385 defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); 17386 17387 switch (signedness) { 17388 .signed => return try self.genInlineIntDivFloor(ty, lhs_mcv, actual_rhs_mcv), 17389 .unsigned => { 17390 try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs_mcv, actual_rhs_mcv); 17391 return .{ .register = registerAlias(.rax, abi_size) }; 17392 }, 17393 } 17394 }, 17395 17396 else => unreachable, 17397 } 17398 } 17399 17400 fn genBinOp( 17401 self: *CodeGen, 17402 maybe_inst: ?Air.Inst.Index, 17403 air_tag: Air.Inst.Tag, 17404 lhs_air: Air.Inst.Ref, 17405 rhs_air: Air.Inst.Ref, 17406 ) !MCValue { 17407 const pt = self.pt; 17408 const zcu = pt.zcu; 17409 const lhs_ty = self.typeOf(lhs_air); 17410 const rhs_ty = self.typeOf(rhs_air); 17411 const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu)); 17412 17413 if (lhs_ty.isRuntimeFloat()) libcall: { 17414 const float_bits = lhs_ty.floatBits(self.target.*); 17415 const type_needs_libcall = switch (float_bits) { 17416 16 => !self.hasFeature(.f16c), 17417 32, 64 => false, 17418 80, 128 => true, 17419 else => unreachable, 17420 }; 17421 switch (air_tag) { 17422 .rem, .mod => {}, 17423 else => if (!type_needs_libcall) break :libcall, 17424 } 17425 var callee_buf: ["__mod?f3".len]u8 = undefined; 17426 const callee = switch (air_tag) { 17427 .add, 17428 .sub, 17429 .mul, 17430 .div_float, 17431 .div_trunc, 17432 .div_floor, 17433 .div_exact, 17434 => std.fmt.bufPrint(&callee_buf, "__{s}{c}f3", .{ 17435 @tagName(air_tag)[0..3], 17436 floatCompilerRtAbiName(float_bits), 17437 }), 17438 .rem, .mod, .min, .max => std.fmt.bufPrint(&callee_buf, "{s}f{s}{s}", .{ 17439 floatLibcAbiPrefix(lhs_ty), 17440 switch (air_tag) { 17441 .rem, .mod => "mod", 17442 .min => "min", 17443 .max => "max", 17444 else => unreachable, 17445 }, 17446 floatLibcAbiSuffix(lhs_ty), 17447 }), 17448 else => return self.fail("TODO implement genBinOp for {s} {}", .{ 17449 @tagName(air_tag), lhs_ty.fmt(pt), 17450 }), 17451 } catch unreachable; 17452 const result = try self.genCall(.{ .lib = .{ 17453 .return_type = lhs_ty.toIntern(), 17454 .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() }, 17455 .callee = callee, 17456 } }, &.{ lhs_ty, rhs_ty }, &.{ .{ .air_ref = lhs_air }, .{ .air_ref = rhs_air } }, .{}); 17457 return switch (air_tag) { 17458 .mod => result: { 17459 const adjusted: MCValue = if (type_needs_libcall) adjusted: { 17460 var add_callee_buf: ["__add?f3".len]u8 = undefined; 17461 break :adjusted try self.genCall(.{ .lib = .{ 17462 .return_type = lhs_ty.toIntern(), 17463 .param_types = &.{ 17464 lhs_ty.toIntern(), 17465 rhs_ty.toIntern(), 17466 }, 17467 .callee = std.fmt.bufPrint(&add_callee_buf, "__add{c}f3", .{ 17468 floatCompilerRtAbiName(float_bits), 17469 }) catch unreachable, 17470 } }, &.{ lhs_ty, rhs_ty }, &.{ result, .{ .air_ref = rhs_air } }, .{}); 17471 } else switch (float_bits) { 17472 16, 32, 64 => adjusted: { 17473 const dst_reg = switch (result) { 17474 .register => |reg| reg, 17475 else => if (maybe_inst) |inst| 17476 (try self.copyToRegisterWithInstTracking(inst, lhs_ty, result)).register 17477 else 17478 try self.copyToTmpRegister(lhs_ty, result), 17479 }; 17480 const dst_lock = self.register_manager.lockReg(dst_reg); 17481 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 17482 17483 const rhs_mcv = try self.resolveInst(rhs_air); 17484 const src_mcv: MCValue = if (float_bits == 16) src: { 17485 assert(self.hasFeature(.f16c)); 17486 const tmp_reg = (try self.register_manager.allocReg( 17487 null, 17488 abi.RegisterClass.sse, 17489 )).to128(); 17490 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 17491 defer self.register_manager.unlockReg(tmp_lock); 17492 17493 if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 17494 .{ .vp_w, .insr }, 17495 dst_reg, 17496 dst_reg, 17497 try rhs_mcv.mem(self, .{ .size = .word }), 17498 .u(1), 17499 ) else try self.asmRegisterRegisterRegister( 17500 .{ .vp_, .unpcklwd }, 17501 dst_reg, 17502 dst_reg, 17503 (if (rhs_mcv.isRegister()) 17504 rhs_mcv.getReg().? 17505 else 17506 try self.copyToTmpRegister(rhs_ty, rhs_mcv)).to128(), 17507 ); 17508 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); 17509 break :src .{ .register = tmp_reg }; 17510 } else rhs_mcv; 17511 17512 if (self.hasFeature(.avx)) { 17513 const mir_tag: Mir.Inst.FixedTag = switch (float_bits) { 17514 16, 32 => .{ .v_ss, .add }, 17515 64 => .{ .v_sd, .add }, 17516 else => unreachable, 17517 }; 17518 if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( 17519 mir_tag, 17520 dst_reg, 17521 dst_reg, 17522 try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }), 17523 ) else try self.asmRegisterRegisterRegister( 17524 mir_tag, 17525 dst_reg, 17526 dst_reg, 17527 (if (src_mcv.isRegister()) 17528 src_mcv.getReg().? 17529 else 17530 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), 17531 ); 17532 } else { 17533 const mir_tag: Mir.Inst.FixedTag = switch (float_bits) { 17534 32 => .{ ._ss, .add }, 17535 64 => .{ ._sd, .add }, 17536 else => unreachable, 17537 }; 17538 if (src_mcv.isBase()) try self.asmRegisterMemory( 17539 mir_tag, 17540 dst_reg, 17541 try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }), 17542 ) else try self.asmRegisterRegister( 17543 mir_tag, 17544 dst_reg, 17545 (if (src_mcv.isRegister()) 17546 src_mcv.getReg().? 17547 else 17548 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), 17549 ); 17550 } 17551 17552 if (float_bits == 16) try self.asmRegisterRegisterImmediate( 17553 .{ .v_, .cvtps2ph }, 17554 dst_reg, 17555 dst_reg, 17556 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), 17557 ); 17558 break :adjusted .{ .register = dst_reg }; 17559 }, 17560 80, 128 => return self.fail("TODO implement genBinOp for {s} of {}", .{ 17561 @tagName(air_tag), lhs_ty.fmt(pt), 17562 }), 17563 else => unreachable, 17564 }; 17565 break :result try self.genCall(.{ .lib = .{ 17566 .return_type = lhs_ty.toIntern(), 17567 .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() }, 17568 .callee = callee, 17569 } }, &.{ lhs_ty, rhs_ty }, &.{ adjusted, .{ .air_ref = rhs_air } }, .{}); 17570 }, 17571 .div_trunc, .div_floor => try self.genRoundLibcall(lhs_ty, result, .{ 17572 .mode = switch (air_tag) { 17573 .div_trunc => .zero, 17574 .div_floor => .down, 17575 else => unreachable, 17576 }, 17577 .precision = .inexact, 17578 }), 17579 else => result, 17580 }; 17581 } 17582 17583 const sse_op = switch (lhs_ty.zigTypeTag(zcu)) { 17584 else => false, 17585 .float => true, 17586 .vector => switch (lhs_ty.childType(zcu).toIntern()) { 17587 .bool_type, .u1_type => false, 17588 else => true, 17589 }, 17590 }; 17591 if (sse_op and ((lhs_ty.scalarType(zcu).isRuntimeFloat() and 17592 lhs_ty.scalarType(zcu).floatBits(self.target.*) == 80) or 17593 lhs_ty.abiSize(zcu) > self.vectorSize(.float))) 17594 return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(air_tag), lhs_ty.fmt(pt) }); 17595 17596 const maybe_mask_reg = switch (air_tag) { 17597 else => null, 17598 .rem, .mod => unreachable, 17599 .max, .min => if (lhs_ty.scalarType(zcu).isRuntimeFloat()) registerAlias( 17600 if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: { 17601 try self.register_manager.getKnownReg(.xmm0, null); 17602 break :mask .xmm0; 17603 } else try self.register_manager.allocReg(null, abi.RegisterClass.sse), 17604 abi_size, 17605 ) else null, 17606 }; 17607 const mask_lock = 17608 if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null; 17609 defer if (mask_lock) |lock| self.register_manager.unlockReg(lock); 17610 17611 const ordered_air: [2]Air.Inst.Ref = if (lhs_ty.isVector(zcu) and 17612 switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { 17613 .bool => false, 17614 .int => switch (air_tag) { 17615 .cmp_lt, .cmp_gte => true, 17616 else => false, 17617 }, 17618 .float => switch (air_tag) { 17619 .cmp_gte, .cmp_gt => true, 17620 else => false, 17621 }, 17622 else => unreachable, 17623 }) .{ rhs_air, lhs_air } else .{ lhs_air, rhs_air }; 17624 17625 if (lhs_ty.isAbiInt(zcu)) for (ordered_air) |op_air| { 17626 switch (try self.resolveInst(op_air)) { 17627 .register => |op_reg| switch (op_reg.class()) { 17628 .sse => try self.register_manager.getReg(op_reg, null), 17629 else => {}, 17630 }, 17631 else => {}, 17632 } 17633 }; 17634 17635 const lhs_mcv = try self.resolveInst(ordered_air[0]); 17636 var rhs_mcv = try self.resolveInst(ordered_air[1]); 17637 switch (lhs_mcv) { 17638 .immediate => |imm| switch (imm) { 17639 0 => switch (air_tag) { 17640 .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, ordered_air[1]), 17641 else => {}, 17642 }, 17643 else => {}, 17644 }, 17645 else => {}, 17646 } 17647 17648 const is_commutative = switch (air_tag) { 17649 .add, 17650 .add_wrap, 17651 .mul, 17652 .bool_or, 17653 .bit_or, 17654 .bool_and, 17655 .bit_and, 17656 .xor, 17657 .min, 17658 .max, 17659 .cmp_eq, 17660 .cmp_neq, 17661 => true, 17662 17663 else => false, 17664 }; 17665 17666 const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) { 17667 .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null }, 17668 .register_pair => |lhs_regs| locks: { 17669 const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs); 17670 break :locks .{ locks[0], locks[1] }; 17671 }, 17672 else => @splat(null), 17673 }; 17674 defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock); 17675 17676 const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) { 17677 .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null }, 17678 .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs), 17679 else => @splat(null), 17680 }; 17681 defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock); 17682 17683 var flipped = false; 17684 var copied_to_dst = true; 17685 const dst_mcv: MCValue = dst: { 17686 const tracked_inst = switch (air_tag) { 17687 else => maybe_inst, 17688 .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null, 17689 }; 17690 if (maybe_inst) |inst| { 17691 if ((!sse_op or lhs_mcv.isRegister()) and 17692 self.reuseOperandAdvanced(inst, ordered_air[0], 0, lhs_mcv, tracked_inst)) 17693 break :dst lhs_mcv; 17694 if (is_commutative and (!sse_op or rhs_mcv.isRegister()) and 17695 self.reuseOperandAdvanced(inst, ordered_air[1], 1, rhs_mcv, tracked_inst)) 17696 { 17697 flipped = true; 17698 break :dst rhs_mcv; 17699 } 17700 } 17701 const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, tracked_inst, true); 17702 if (sse_op and lhs_mcv.isRegister() and self.hasFeature(.avx)) 17703 copied_to_dst = false 17704 else 17705 try self.genCopy(lhs_ty, dst_mcv, lhs_mcv, .{}); 17706 rhs_mcv = try self.resolveInst(ordered_air[1]); 17707 break :dst dst_mcv; 17708 }; 17709 const dst_locks: [2]?RegisterLock = switch (dst_mcv) { 17710 .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null }, 17711 .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs), 17712 else => @splat(null), 17713 }; 17714 defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); 17715 17716 const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv; 17717 const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg| 17718 if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and 17719 self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: { 17720 try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv, .{}); 17721 break :src .{ .register = mask_reg }; 17722 } 17723 else 17724 unmat_src_mcv; 17725 const src_locks: [2]?RegisterLock = switch (src_mcv) { 17726 .register => |src_reg| .{ self.register_manager.lockReg(src_reg), null }, 17727 .register_pair => |src_regs| self.register_manager.lockRegs(2, src_regs), 17728 else => @splat(null), 17729 }; 17730 defer for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock); 17731 17732 if (!sse_op) { 17733 switch (air_tag) { 17734 .add, 17735 .add_wrap, 17736 => try self.genBinOpMir(.{ ._, .add }, lhs_ty, dst_mcv, src_mcv), 17737 17738 .sub, 17739 .sub_wrap, 17740 => try self.genBinOpMir(.{ ._, .sub }, lhs_ty, dst_mcv, src_mcv), 17741 17742 .ptr_add, 17743 .ptr_sub, 17744 => { 17745 const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv); 17746 const tmp_mcv = MCValue{ .register = tmp_reg }; 17747 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 17748 defer self.register_manager.unlockReg(tmp_lock); 17749 17750 const elem_size = lhs_ty.elemType2(zcu).abiSize(zcu); 17751 try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size }); 17752 try self.genBinOpMir( 17753 switch (air_tag) { 17754 .ptr_add => .{ ._, .add }, 17755 .ptr_sub => .{ ._, .sub }, 17756 else => unreachable, 17757 }, 17758 lhs_ty, 17759 dst_mcv, 17760 tmp_mcv, 17761 ); 17762 }, 17763 17764 .bool_or, 17765 .bit_or, 17766 => try self.genBinOpMir(.{ ._, .@"or" }, lhs_ty, dst_mcv, src_mcv), 17767 17768 .bool_and, 17769 .bit_and, 17770 => try self.genBinOpMir(.{ ._, .@"and" }, lhs_ty, dst_mcv, src_mcv), 17771 17772 .xor => try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv), 17773 17774 .min, 17775 .max, 17776 => { 17777 const resolved_src_mcv = switch (src_mcv) { 17778 else => src_mcv, 17779 .air_ref => |src_ref| try self.resolveInst(src_ref), 17780 }; 17781 17782 if (abi_size > 8) { 17783 const dst_regs = switch (dst_mcv) { 17784 .register_pair => |dst_regs| dst_regs, 17785 else => dst: { 17786 const dst_regs = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); 17787 const dst_regs_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); 17788 defer for (dst_regs_locks) |lock| self.register_manager.unlockReg(lock); 17789 17790 try self.genCopy(lhs_ty, .{ .register_pair = dst_regs }, dst_mcv, .{}); 17791 break :dst dst_regs; 17792 }, 17793 }; 17794 const dst_regs_locks = self.register_manager.lockRegs(2, dst_regs); 17795 defer for (dst_regs_locks) |dst_lock| if (dst_lock) |lock| 17796 self.register_manager.unlockReg(lock); 17797 17798 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 17799 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 17800 defer self.register_manager.unlockReg(tmp_lock); 17801 17802 const signed = lhs_ty.isSignedInt(zcu); 17803 const cc: Condition = switch (air_tag) { 17804 .min => if (signed) .nl else .nb, 17805 .max => if (signed) .nge else .nae, 17806 else => unreachable, 17807 }; 17808 17809 try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]); 17810 if (src_mcv.isBase()) { 17811 try self.asmRegisterMemory( 17812 .{ ._, .cmp }, 17813 dst_regs[0], 17814 try src_mcv.mem(self, .{ .size = .qword }), 17815 ); 17816 try self.asmRegisterMemory( 17817 .{ ._, .sbb }, 17818 tmp_reg, 17819 try src_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 17820 ); 17821 try self.asmCmovccRegisterMemory( 17822 cc, 17823 dst_regs[0], 17824 try src_mcv.mem(self, .{ .size = .qword }), 17825 ); 17826 try self.asmCmovccRegisterMemory( 17827 cc, 17828 dst_regs[1], 17829 try src_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), 17830 ); 17831 } else { 17832 try self.asmRegisterRegister( 17833 .{ ._, .cmp }, 17834 dst_regs[0], 17835 src_mcv.register_pair[0], 17836 ); 17837 try self.asmRegisterRegister( 17838 .{ ._, .sbb }, 17839 tmp_reg, 17840 src_mcv.register_pair[1], 17841 ); 17842 try self.asmCmovccRegisterRegister(cc, dst_regs[0], src_mcv.register_pair[0]); 17843 try self.asmCmovccRegisterRegister(cc, dst_regs[1], src_mcv.register_pair[1]); 17844 } 17845 try self.genCopy(lhs_ty, dst_mcv, .{ .register_pair = dst_regs }, .{}); 17846 } else { 17847 const mat_src_mcv: MCValue = if (switch (resolved_src_mcv) { 17848 .immediate, 17849 .eflags, 17850 .register_offset, 17851 .load_symbol, 17852 .lea_symbol, 17853 .load_direct, 17854 .lea_direct, 17855 .load_got, 17856 .lea_got, 17857 .load_tlv, 17858 .lea_tlv, 17859 .lea_frame, 17860 => true, 17861 .memory => |addr| std.math.cast(i32, @as(i64, @bitCast(addr))) == null, 17862 else => false, 17863 .register_pair, 17864 .register_overflow, 17865 => unreachable, 17866 }) 17867 .{ .register = try self.copyToTmpRegister(rhs_ty, resolved_src_mcv) } 17868 else 17869 resolved_src_mcv; 17870 const mat_mcv_lock = switch (mat_src_mcv) { 17871 .register => |reg| self.register_manager.lockReg(reg), 17872 else => null, 17873 }; 17874 defer if (mat_mcv_lock) |lock| self.register_manager.unlockReg(lock); 17875 17876 try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, dst_mcv, mat_src_mcv); 17877 17878 const int_info = lhs_ty.intInfo(zcu); 17879 const cc: Condition = switch (int_info.signedness) { 17880 .unsigned => switch (air_tag) { 17881 .min => .a, 17882 .max => .b, 17883 else => unreachable, 17884 }, 17885 .signed => switch (air_tag) { 17886 .min => .g, 17887 .max => .l, 17888 else => unreachable, 17889 }, 17890 }; 17891 17892 const cmov_abi_size = @max(@as(u32, @intCast(lhs_ty.abiSize(zcu))), 2); 17893 const tmp_reg = switch (dst_mcv) { 17894 .register => |reg| reg, 17895 else => try self.copyToTmpRegister(lhs_ty, dst_mcv), 17896 }; 17897 const tmp_lock = self.register_manager.lockReg(tmp_reg); 17898 defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock); 17899 switch (mat_src_mcv) { 17900 .none, 17901 .unreach, 17902 .dead, 17903 .undef, 17904 .immediate, 17905 .eflags, 17906 .register_pair, 17907 .register_triple, 17908 .register_quadruple, 17909 .register_offset, 17910 .register_overflow, 17911 .register_mask, 17912 .load_symbol, 17913 .lea_symbol, 17914 .load_direct, 17915 .lea_direct, 17916 .load_got, 17917 .lea_got, 17918 .load_tlv, 17919 .lea_tlv, 17920 .lea_frame, 17921 .elementwise_regs_then_frame, 17922 .reserved_frame, 17923 .air_ref, 17924 => unreachable, 17925 .register => |src_reg| try self.asmCmovccRegisterRegister( 17926 cc, 17927 registerAlias(tmp_reg, cmov_abi_size), 17928 registerAlias(src_reg, cmov_abi_size), 17929 ), 17930 .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( 17931 cc, 17932 registerAlias(tmp_reg, cmov_abi_size), 17933 switch (mat_src_mcv) { 17934 .memory => |addr| .{ 17935 .base = .{ .reg = .ds }, 17936 .mod = .{ .rm = .{ 17937 .size = .fromSize(cmov_abi_size), 17938 .disp = @intCast(@as(i64, @bitCast(addr))), 17939 } }, 17940 }, 17941 .indirect => |reg_off| .{ 17942 .base = .{ .reg = reg_off.reg }, 17943 .mod = .{ .rm = .{ 17944 .size = .fromSize(cmov_abi_size), 17945 .disp = reg_off.off, 17946 } }, 17947 }, 17948 .load_frame => |frame_addr| .{ 17949 .base = .{ .frame = frame_addr.index }, 17950 .mod = .{ .rm = .{ 17951 .size = .fromSize(cmov_abi_size), 17952 .disp = frame_addr.off, 17953 } }, 17954 }, 17955 else => unreachable, 17956 }, 17957 ), 17958 } 17959 try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg }, .{}); 17960 } 17961 }, 17962 17963 .cmp_eq, .cmp_neq => { 17964 assert(lhs_ty.isVector(zcu) and lhs_ty.childType(zcu).toIntern() == .bool_type); 17965 try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv); 17966 switch (air_tag) { 17967 .cmp_eq => try self.genUnOpMir(.{ ._, .not }, lhs_ty, dst_mcv), 17968 .cmp_neq => {}, 17969 else => unreachable, 17970 } 17971 }, 17972 17973 else => return self.fail("TODO implement genBinOp for {s} {}", .{ 17974 @tagName(air_tag), lhs_ty.fmt(pt), 17975 }), 17976 } 17977 return dst_mcv; 17978 } 17979 17980 const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); 17981 const mir_tag = @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { 17982 else => unreachable, 17983 .float => switch (lhs_ty.floatBits(self.target.*)) { 17984 16 => { 17985 assert(self.hasFeature(.f16c)); 17986 const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); 17987 17988 const tmp_reg = (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); 17989 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 17990 defer self.register_manager.unlockReg(tmp_lock); 17991 17992 if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 17993 .{ .vp_w, .insr }, 17994 dst_reg, 17995 lhs_reg, 17996 try src_mcv.mem(self, .{ .size = .word }), 17997 .u(1), 17998 ) else try self.asmRegisterRegisterRegister( 17999 .{ .vp_, .unpcklwd }, 18000 dst_reg, 18001 lhs_reg, 18002 (if (src_mcv.isRegister()) 18003 src_mcv.getReg().? 18004 else 18005 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), 18006 ); 18007 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); 18008 try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); 18009 try self.asmRegisterRegisterRegister( 18010 switch (air_tag) { 18011 .add => .{ .v_ss, .add }, 18012 .sub => .{ .v_ss, .sub }, 18013 .mul => .{ .v_ss, .mul }, 18014 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, 18015 .max => .{ .v_ss, .max }, 18016 .min => .{ .v_ss, .max }, 18017 else => unreachable, 18018 }, 18019 dst_reg, 18020 dst_reg, 18021 tmp_reg, 18022 ); 18023 switch (air_tag) { 18024 .div_trunc, .div_floor => try self.asmRegisterRegisterRegisterImmediate( 18025 .{ .v_ss, .round }, 18026 dst_reg, 18027 dst_reg, 18028 dst_reg, 18029 .u(@as(u5, @bitCast(RoundMode{ 18030 .mode = switch (air_tag) { 18031 .div_trunc => .zero, 18032 .div_floor => .down, 18033 else => unreachable, 18034 }, 18035 .precision = .inexact, 18036 }))), 18037 ), 18038 else => {}, 18039 } 18040 try self.asmRegisterRegisterImmediate( 18041 .{ .v_, .cvtps2ph }, 18042 dst_reg, 18043 dst_reg, 18044 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), 18045 ); 18046 return dst_mcv; 18047 }, 18048 32 => switch (air_tag) { 18049 .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, 18050 .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, 18051 .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, 18052 .div_float, 18053 .div_trunc, 18054 .div_floor, 18055 .div_exact, 18056 => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, 18057 .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, 18058 .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, 18059 else => unreachable, 18060 }, 18061 64 => switch (air_tag) { 18062 .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, 18063 .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, 18064 .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, 18065 .div_float, 18066 .div_trunc, 18067 .div_floor, 18068 .div_exact, 18069 => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, 18070 .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, 18071 .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, 18072 else => unreachable, 18073 }, 18074 80, 128 => null, 18075 else => unreachable, 18076 }, 18077 .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { 18078 else => null, 18079 .int => switch (lhs_ty.childType(zcu).intInfo(zcu).bits) { 18080 8 => switch (lhs_ty.vectorLen(zcu)) { 18081 1...16 => switch (air_tag) { 18082 .add, 18083 .add_wrap, 18084 => if (self.hasFeature(.avx)) .{ .vp_b, .add } else .{ .p_b, .add }, 18085 .sub, 18086 .sub_wrap, 18087 => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub }, 18088 .bit_and => if (self.hasFeature(.avx)) 18089 .{ .vp_, .@"and" } 18090 else 18091 .{ .p_, .@"and" }, 18092 .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, 18093 .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, 18094 .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18095 .signed => if (self.hasFeature(.avx)) 18096 .{ .vp_b, .mins } 18097 else if (self.hasFeature(.sse4_1)) 18098 .{ .p_b, .mins } 18099 else 18100 null, 18101 .unsigned => if (self.hasFeature(.avx)) 18102 .{ .vp_b, .minu } 18103 else if (self.hasFeature(.sse4_1)) 18104 .{ .p_b, .minu } 18105 else 18106 null, 18107 }, 18108 .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18109 .signed => if (self.hasFeature(.avx)) 18110 .{ .vp_b, .maxs } 18111 else if (self.hasFeature(.sse4_1)) 18112 .{ .p_b, .maxs } 18113 else 18114 null, 18115 .unsigned => if (self.hasFeature(.avx)) 18116 .{ .vp_b, .maxu } 18117 else if (self.hasFeature(.sse4_1)) 18118 .{ .p_b, .maxu } 18119 else 18120 null, 18121 }, 18122 .cmp_lt, 18123 .cmp_lte, 18124 .cmp_gte, 18125 .cmp_gt, 18126 => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18127 .signed => if (self.hasFeature(.avx)) 18128 .{ .vp_b, .cmpgt } 18129 else 18130 .{ .p_b, .cmpgt }, 18131 .unsigned => null, 18132 }, 18133 .cmp_eq, 18134 .cmp_neq, 18135 => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else .{ .p_b, .cmpeq }, 18136 else => null, 18137 }, 18138 17...32 => switch (air_tag) { 18139 .add, 18140 .add_wrap, 18141 => if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null, 18142 .sub, 18143 .sub_wrap, 18144 => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null, 18145 .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, 18146 .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, 18147 .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, 18148 .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18149 .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null, 18150 .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null, 18151 }, 18152 .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18153 .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null, 18154 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null, 18155 }, 18156 .cmp_lt, 18157 .cmp_lte, 18158 .cmp_gte, 18159 .cmp_gt, 18160 => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18161 .signed => if (self.hasFeature(.avx)) .{ .vp_b, .cmpgt } else null, 18162 .unsigned => null, 18163 }, 18164 .cmp_eq, 18165 .cmp_neq, 18166 => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else null, 18167 else => null, 18168 }, 18169 else => null, 18170 }, 18171 16 => switch (lhs_ty.vectorLen(zcu)) { 18172 1...8 => switch (air_tag) { 18173 .add, 18174 .add_wrap, 18175 => if (self.hasFeature(.avx)) .{ .vp_w, .add } else .{ .p_w, .add }, 18176 .sub, 18177 .sub_wrap, 18178 => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub }, 18179 .mul, 18180 .mul_wrap, 18181 => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull }, 18182 .bit_and => if (self.hasFeature(.avx)) 18183 .{ .vp_, .@"and" } 18184 else 18185 .{ .p_, .@"and" }, 18186 .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, 18187 .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, 18188 .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18189 .signed => if (self.hasFeature(.avx)) 18190 .{ .vp_w, .mins } 18191 else 18192 .{ .p_w, .mins }, 18193 .unsigned => if (self.hasFeature(.avx)) 18194 .{ .vp_w, .minu } 18195 else 18196 .{ .p_w, .minu }, 18197 }, 18198 .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18199 .signed => if (self.hasFeature(.avx)) 18200 .{ .vp_w, .maxs } 18201 else 18202 .{ .p_w, .maxs }, 18203 .unsigned => if (self.hasFeature(.avx)) 18204 .{ .vp_w, .maxu } 18205 else 18206 .{ .p_w, .maxu }, 18207 }, 18208 .cmp_lt, 18209 .cmp_lte, 18210 .cmp_gte, 18211 .cmp_gt, 18212 => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18213 .signed => if (self.hasFeature(.avx)) 18214 .{ .vp_w, .cmpgt } 18215 else 18216 .{ .p_w, .cmpgt }, 18217 .unsigned => null, 18218 }, 18219 .cmp_eq, 18220 .cmp_neq, 18221 => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else .{ .p_w, .cmpeq }, 18222 else => null, 18223 }, 18224 9...16 => switch (air_tag) { 18225 .add, 18226 .add_wrap, 18227 => if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null, 18228 .sub, 18229 .sub_wrap, 18230 => if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null, 18231 .mul, 18232 .mul_wrap, 18233 => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null, 18234 .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, 18235 .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, 18236 .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, 18237 .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18238 .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null, 18239 .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null, 18240 }, 18241 .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18242 .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null, 18243 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null, 18244 }, 18245 .cmp_lt, 18246 .cmp_lte, 18247 .cmp_gte, 18248 .cmp_gt, 18249 => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18250 .signed => if (self.hasFeature(.avx)) .{ .vp_w, .cmpgt } else null, 18251 .unsigned => null, 18252 }, 18253 .cmp_eq, 18254 .cmp_neq, 18255 => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else null, 18256 else => null, 18257 }, 18258 else => null, 18259 }, 18260 32 => switch (lhs_ty.vectorLen(zcu)) { 18261 1...4 => switch (air_tag) { 18262 .add, 18263 .add_wrap, 18264 => if (self.hasFeature(.avx)) .{ .vp_d, .add } else .{ .p_d, .add }, 18265 .sub, 18266 .sub_wrap, 18267 => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub }, 18268 .mul, 18269 .mul_wrap, 18270 => if (self.hasFeature(.avx)) 18271 .{ .vp_d, .mull } 18272 else if (self.hasFeature(.sse4_1)) 18273 .{ .p_d, .mull } 18274 else 18275 null, 18276 .bit_and => if (self.hasFeature(.avx)) 18277 .{ .vp_, .@"and" } 18278 else 18279 .{ .p_, .@"and" }, 18280 .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, 18281 .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, 18282 .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18283 .signed => if (self.hasFeature(.avx)) 18284 .{ .vp_d, .mins } 18285 else if (self.hasFeature(.sse4_1)) 18286 .{ .p_d, .mins } 18287 else 18288 null, 18289 .unsigned => if (self.hasFeature(.avx)) 18290 .{ .vp_d, .minu } 18291 else if (self.hasFeature(.sse4_1)) 18292 .{ .p_d, .minu } 18293 else 18294 null, 18295 }, 18296 .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18297 .signed => if (self.hasFeature(.avx)) 18298 .{ .vp_d, .maxs } 18299 else if (self.hasFeature(.sse4_1)) 18300 .{ .p_d, .maxs } 18301 else 18302 null, 18303 .unsigned => if (self.hasFeature(.avx)) 18304 .{ .vp_d, .maxu } 18305 else if (self.hasFeature(.sse4_1)) 18306 .{ .p_d, .maxu } 18307 else 18308 null, 18309 }, 18310 .cmp_lt, 18311 .cmp_lte, 18312 .cmp_gte, 18313 .cmp_gt, 18314 => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18315 .signed => if (self.hasFeature(.avx)) 18316 .{ .vp_d, .cmpgt } 18317 else 18318 .{ .p_d, .cmpgt }, 18319 .unsigned => null, 18320 }, 18321 .cmp_eq, 18322 .cmp_neq, 18323 => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else .{ .p_d, .cmpeq }, 18324 else => null, 18325 }, 18326 5...8 => switch (air_tag) { 18327 .add, 18328 .add_wrap, 18329 => if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null, 18330 .sub, 18331 .sub_wrap, 18332 => if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null, 18333 .mul, 18334 .mul_wrap, 18335 => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null, 18336 .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, 18337 .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, 18338 .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, 18339 .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18340 .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null, 18341 .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null, 18342 }, 18343 .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18344 .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null, 18345 .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null, 18346 }, 18347 .cmp_lt, 18348 .cmp_lte, 18349 .cmp_gte, 18350 .cmp_gt, 18351 => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18352 .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null, 18353 .unsigned => null, 18354 }, 18355 .cmp_eq, 18356 .cmp_neq, 18357 => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null, 18358 else => null, 18359 }, 18360 else => null, 18361 }, 18362 64 => switch (lhs_ty.vectorLen(zcu)) { 18363 1...2 => switch (air_tag) { 18364 .add, 18365 .add_wrap, 18366 => if (self.hasFeature(.avx)) .{ .vp_q, .add } else .{ .p_q, .add }, 18367 .sub, 18368 .sub_wrap, 18369 => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub }, 18370 .bit_and => if (self.hasFeature(.avx)) 18371 .{ .vp_, .@"and" } 18372 else 18373 .{ .p_, .@"and" }, 18374 .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, 18375 .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, 18376 .cmp_lt, 18377 .cmp_lte, 18378 .cmp_gte, 18379 .cmp_gt, 18380 => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18381 .signed => if (self.hasFeature(.avx)) 18382 .{ .vp_q, .cmpgt } 18383 else if (self.hasFeature(.sse4_2)) 18384 .{ .p_q, .cmpgt } 18385 else 18386 null, 18387 .unsigned => null, 18388 }, 18389 .cmp_eq, 18390 .cmp_neq, 18391 => if (self.hasFeature(.avx)) 18392 .{ .vp_q, .cmpeq } 18393 else if (self.hasFeature(.sse4_1)) 18394 .{ .p_q, .cmpeq } 18395 else 18396 null, 18397 else => null, 18398 }, 18399 3...4 => switch (air_tag) { 18400 .add, 18401 .add_wrap, 18402 => if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null, 18403 .sub, 18404 .sub_wrap, 18405 => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null, 18406 .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, 18407 .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, 18408 .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, 18409 .cmp_eq, 18410 .cmp_neq, 18411 => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null, 18412 .cmp_lt, 18413 .cmp_lte, 18414 .cmp_gt, 18415 .cmp_gte, 18416 => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { 18417 .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null, 18418 .unsigned => null, 18419 }, 18420 else => null, 18421 }, 18422 else => null, 18423 }, 18424 else => null, 18425 }, 18426 .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) { 18427 16 => tag: { 18428 assert(self.hasFeature(.f16c)); 18429 const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); 18430 switch (lhs_ty.vectorLen(zcu)) { 18431 1 => { 18432 const tmp_reg = 18433 (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); 18434 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 18435 defer self.register_manager.unlockReg(tmp_lock); 18436 18437 if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 18438 .{ .vp_w, .insr }, 18439 dst_reg, 18440 lhs_reg, 18441 try src_mcv.mem(self, .{ .size = .word }), 18442 .u(1), 18443 ) else try self.asmRegisterRegisterRegister( 18444 .{ .vp_, .unpcklwd }, 18445 dst_reg, 18446 lhs_reg, 18447 (if (src_mcv.isRegister()) 18448 src_mcv.getReg().? 18449 else 18450 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), 18451 ); 18452 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); 18453 try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); 18454 try self.asmRegisterRegisterRegister( 18455 switch (air_tag) { 18456 .add => .{ .v_ss, .add }, 18457 .sub => .{ .v_ss, .sub }, 18458 .mul => .{ .v_ss, .mul }, 18459 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, 18460 .max => .{ .v_ss, .max }, 18461 .min => .{ .v_ss, .max }, 18462 else => unreachable, 18463 }, 18464 dst_reg, 18465 dst_reg, 18466 tmp_reg, 18467 ); 18468 try self.asmRegisterRegisterImmediate( 18469 .{ .v_, .cvtps2ph }, 18470 dst_reg, 18471 dst_reg, 18472 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), 18473 ); 18474 return dst_mcv; 18475 }, 18476 2 => { 18477 const tmp_reg = (try self.register_manager.allocReg( 18478 null, 18479 abi.RegisterClass.sse, 18480 )).to128(); 18481 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 18482 defer self.register_manager.unlockReg(tmp_lock); 18483 18484 if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 18485 .{ .vp_d, .insr }, 18486 dst_reg, 18487 lhs_reg, 18488 try src_mcv.mem(self, .{ .size = .dword }), 18489 .u(1), 18490 ) else try self.asmRegisterRegisterRegister( 18491 .{ .v_ps, .unpckl }, 18492 dst_reg, 18493 lhs_reg, 18494 (if (src_mcv.isRegister()) 18495 src_mcv.getReg().? 18496 else 18497 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), 18498 ); 18499 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); 18500 try self.asmRegisterRegisterRegister( 18501 .{ .v_ps, .movhl }, 18502 tmp_reg, 18503 dst_reg, 18504 dst_reg, 18505 ); 18506 try self.asmRegisterRegisterRegister( 18507 switch (air_tag) { 18508 .add => .{ .v_ps, .add }, 18509 .sub => .{ .v_ps, .sub }, 18510 .mul => .{ .v_ps, .mul }, 18511 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, 18512 .max => .{ .v_ps, .max }, 18513 .min => .{ .v_ps, .max }, 18514 else => unreachable, 18515 }, 18516 dst_reg, 18517 dst_reg, 18518 tmp_reg, 18519 ); 18520 try self.asmRegisterRegisterImmediate( 18521 .{ .v_, .cvtps2ph }, 18522 dst_reg, 18523 dst_reg, 18524 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), 18525 ); 18526 return dst_mcv; 18527 }, 18528 3...4 => { 18529 const tmp_reg = (try self.register_manager.allocReg( 18530 null, 18531 abi.RegisterClass.sse, 18532 )).to128(); 18533 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 18534 defer self.register_manager.unlockReg(tmp_lock); 18535 18536 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, lhs_reg); 18537 if (src_mcv.isBase()) try self.asmRegisterMemory( 18538 .{ .v_ps, .cvtph2 }, 18539 tmp_reg, 18540 try src_mcv.mem(self, .{ .size = .qword }), 18541 ) else try self.asmRegisterRegister( 18542 .{ .v_ps, .cvtph2 }, 18543 tmp_reg, 18544 (if (src_mcv.isRegister()) 18545 src_mcv.getReg().? 18546 else 18547 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), 18548 ); 18549 try self.asmRegisterRegisterRegister( 18550 switch (air_tag) { 18551 .add => .{ .v_ps, .add }, 18552 .sub => .{ .v_ps, .sub }, 18553 .mul => .{ .v_ps, .mul }, 18554 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, 18555 .max => .{ .v_ps, .max }, 18556 .min => .{ .v_ps, .max }, 18557 else => unreachable, 18558 }, 18559 dst_reg, 18560 dst_reg, 18561 tmp_reg, 18562 ); 18563 try self.asmRegisterRegisterImmediate( 18564 .{ .v_, .cvtps2ph }, 18565 dst_reg, 18566 dst_reg, 18567 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), 18568 ); 18569 return dst_mcv; 18570 }, 18571 5...8 => { 18572 const tmp_reg = (try self.register_manager.allocReg( 18573 null, 18574 abi.RegisterClass.sse, 18575 )).to256(); 18576 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 18577 defer self.register_manager.unlockReg(tmp_lock); 18578 18579 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), lhs_reg); 18580 if (src_mcv.isBase()) try self.asmRegisterMemory( 18581 .{ .v_ps, .cvtph2 }, 18582 tmp_reg, 18583 try src_mcv.mem(self, .{ .size = .xword }), 18584 ) else try self.asmRegisterRegister( 18585 .{ .v_ps, .cvtph2 }, 18586 tmp_reg, 18587 (if (src_mcv.isRegister()) 18588 src_mcv.getReg().? 18589 else 18590 try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), 18591 ); 18592 try self.asmRegisterRegisterRegister( 18593 switch (air_tag) { 18594 .add => .{ .v_ps, .add }, 18595 .sub => .{ .v_ps, .sub }, 18596 .mul => .{ .v_ps, .mul }, 18597 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, 18598 .max => .{ .v_ps, .max }, 18599 .min => .{ .v_ps, .max }, 18600 else => unreachable, 18601 }, 18602 dst_reg.to256(), 18603 dst_reg.to256(), 18604 tmp_reg, 18605 ); 18606 try self.asmRegisterRegisterImmediate( 18607 .{ .v_, .cvtps2ph }, 18608 dst_reg, 18609 dst_reg.to256(), 18610 .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), 18611 ); 18612 return dst_mcv; 18613 }, 18614 else => break :tag null, 18615 } 18616 }, 18617 32 => switch (lhs_ty.vectorLen(zcu)) { 18618 1 => switch (air_tag) { 18619 .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, 18620 .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, 18621 .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, 18622 .div_float, 18623 .div_trunc, 18624 .div_floor, 18625 .div_exact, 18626 => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, 18627 .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, 18628 .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, 18629 .cmp_lt, 18630 .cmp_lte, 18631 .cmp_eq, 18632 .cmp_gte, 18633 .cmp_gt, 18634 .cmp_neq, 18635 => if (self.hasFeature(.avx)) .{ .v_ss, .cmp } else .{ ._ss, .cmp }, 18636 else => unreachable, 18637 }, 18638 2...4 => switch (air_tag) { 18639 .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add }, 18640 .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub }, 18641 .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul }, 18642 .div_float, 18643 .div_trunc, 18644 .div_floor, 18645 .div_exact, 18646 => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div }, 18647 .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max }, 18648 .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min }, 18649 .cmp_lt, 18650 .cmp_lte, 18651 .cmp_eq, 18652 .cmp_gte, 18653 .cmp_gt, 18654 .cmp_neq, 18655 => if (self.hasFeature(.avx)) .{ .v_ps, .cmp } else .{ ._ps, .cmp }, 18656 else => unreachable, 18657 }, 18658 5...8 => if (self.hasFeature(.avx)) switch (air_tag) { 18659 .add => .{ .v_ps, .add }, 18660 .sub => .{ .v_ps, .sub }, 18661 .mul => .{ .v_ps, .mul }, 18662 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, 18663 .max => .{ .v_ps, .max }, 18664 .min => .{ .v_ps, .min }, 18665 .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_ps, .cmp }, 18666 else => unreachable, 18667 } else null, 18668 else => null, 18669 }, 18670 64 => switch (lhs_ty.vectorLen(zcu)) { 18671 1 => switch (air_tag) { 18672 .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, 18673 .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, 18674 .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, 18675 .div_float, 18676 .div_trunc, 18677 .div_floor, 18678 .div_exact, 18679 => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, 18680 .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, 18681 .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, 18682 .cmp_lt, 18683 .cmp_lte, 18684 .cmp_eq, 18685 .cmp_gte, 18686 .cmp_gt, 18687 .cmp_neq, 18688 => if (self.hasFeature(.avx)) .{ .v_sd, .cmp } else .{ ._sd, .cmp }, 18689 else => unreachable, 18690 }, 18691 2 => switch (air_tag) { 18692 .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add }, 18693 .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub }, 18694 .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul }, 18695 .div_float, 18696 .div_trunc, 18697 .div_floor, 18698 .div_exact, 18699 => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div }, 18700 .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max }, 18701 .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min }, 18702 .cmp_lt, 18703 .cmp_lte, 18704 .cmp_eq, 18705 .cmp_gte, 18706 .cmp_gt, 18707 .cmp_neq, 18708 => if (self.hasFeature(.avx)) .{ .v_pd, .cmp } else .{ ._pd, .cmp }, 18709 else => unreachable, 18710 }, 18711 3...4 => if (self.hasFeature(.avx)) switch (air_tag) { 18712 .add => .{ .v_pd, .add }, 18713 .sub => .{ .v_pd, .sub }, 18714 .mul => .{ .v_pd, .mul }, 18715 .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div }, 18716 .max => .{ .v_pd, .max }, 18717 .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_pd, .cmp }, 18718 .min => .{ .v_pd, .min }, 18719 else => unreachable, 18720 } else null, 18721 else => null, 18722 }, 18723 80, 128 => null, 18724 else => unreachable, 18725 }, 18726 }, 18727 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{ 18728 @tagName(air_tag), lhs_ty.fmt(pt), 18729 }); 18730 18731 const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias( 18732 if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?, 18733 abi_size, 18734 ) else null; 18735 const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null; 18736 defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock); 18737 18738 switch (mir_tag[1]) { 18739 else => if (self.hasFeature(.avx)) { 18740 const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); 18741 if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( 18742 mir_tag, 18743 dst_reg, 18744 lhs_reg, 18745 try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { 18746 else => .fromSize(abi_size), 18747 .vector => .fromBitSize(dst_reg.bitSize()), 18748 } }), 18749 ) else try self.asmRegisterRegisterRegister( 18750 mir_tag, 18751 dst_reg, 18752 lhs_reg, 18753 registerAlias(if (src_mcv.isRegister()) 18754 src_mcv.getReg().? 18755 else 18756 try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), 18757 ); 18758 } else { 18759 assert(copied_to_dst); 18760 if (src_mcv.isBase()) try self.asmRegisterMemory( 18761 mir_tag, 18762 dst_reg, 18763 try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { 18764 else => .fromSize(abi_size), 18765 .vector => .fromBitSize(dst_reg.bitSize()), 18766 } }), 18767 ) else try self.asmRegisterRegister( 18768 mir_tag, 18769 dst_reg, 18770 registerAlias(if (src_mcv.isRegister()) 18771 src_mcv.getReg().? 18772 else 18773 try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), 18774 ); 18775 }, 18776 .cmp => { 18777 const imm: Immediate = .u(switch (air_tag) { 18778 .cmp_eq => 0, 18779 .cmp_lt, .cmp_gt => 1, 18780 .cmp_lte, .cmp_gte => 2, 18781 .cmp_neq => 4, 18782 else => unreachable, 18783 }); 18784 if (self.hasFeature(.avx)) { 18785 const lhs_reg = 18786 if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); 18787 if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 18788 mir_tag, 18789 dst_reg, 18790 lhs_reg, 18791 try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { 18792 else => .fromSize(abi_size), 18793 .vector => .fromBitSize(dst_reg.bitSize()), 18794 } }), 18795 imm, 18796 ) else try self.asmRegisterRegisterRegisterImmediate( 18797 mir_tag, 18798 dst_reg, 18799 lhs_reg, 18800 registerAlias(if (src_mcv.isRegister()) 18801 src_mcv.getReg().? 18802 else 18803 try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), 18804 imm, 18805 ); 18806 } else { 18807 assert(copied_to_dst); 18808 if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( 18809 mir_tag, 18810 dst_reg, 18811 try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { 18812 else => .fromSize(abi_size), 18813 .vector => .fromBitSize(dst_reg.bitSize()), 18814 } }), 18815 imm, 18816 ) else try self.asmRegisterRegisterImmediate( 18817 mir_tag, 18818 dst_reg, 18819 registerAlias(if (src_mcv.isRegister()) 18820 src_mcv.getReg().? 18821 else 18822 try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), 18823 imm, 18824 ); 18825 } 18826 }, 18827 } 18828 18829 switch (air_tag) { 18830 .add, .add_wrap, .sub, .sub_wrap, .mul, .mul_wrap, .div_float, .div_exact => {}, 18831 .div_trunc, .div_floor => try self.genRound(lhs_ty, dst_reg, .{ .register = dst_reg }, .{ 18832 .mode = switch (air_tag) { 18833 .div_trunc => .zero, 18834 .div_floor => .down, 18835 else => unreachable, 18836 }, 18837 .precision = .inexact, 18838 }), 18839 .bit_and, .bit_or, .xor => {}, 18840 .max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) { 18841 const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size); 18842 18843 try self.asmRegisterRegisterRegisterImmediate( 18844 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { 18845 .float => switch (lhs_ty.floatBits(self.target.*)) { 18846 32 => .{ .v_ss, .cmp }, 18847 64 => .{ .v_sd, .cmp }, 18848 16, 80, 128 => null, 18849 else => unreachable, 18850 }, 18851 .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { 18852 .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) { 18853 32 => switch (lhs_ty.vectorLen(zcu)) { 18854 1 => .{ .v_ss, .cmp }, 18855 2...8 => .{ .v_ps, .cmp }, 18856 else => null, 18857 }, 18858 64 => switch (lhs_ty.vectorLen(zcu)) { 18859 1 => .{ .v_sd, .cmp }, 18860 2...4 => .{ .v_pd, .cmp }, 18861 else => null, 18862 }, 18863 16, 80, 128 => null, 18864 else => unreachable, 18865 }, 18866 else => unreachable, 18867 }, 18868 else => unreachable, 18869 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{ 18870 @tagName(air_tag), lhs_ty.fmt(pt), 18871 }), 18872 mask_reg, 18873 rhs_copy_reg, 18874 rhs_copy_reg, 18875 .u(3), // unord 18876 ); 18877 try self.asmRegisterRegisterRegisterRegister( 18878 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { 18879 .float => switch (lhs_ty.floatBits(self.target.*)) { 18880 32 => .{ .v_ps, .blendv }, 18881 64 => .{ .v_pd, .blendv }, 18882 16, 80, 128 => null, 18883 else => unreachable, 18884 }, 18885 .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { 18886 .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) { 18887 32 => switch (lhs_ty.vectorLen(zcu)) { 18888 1...8 => .{ .v_ps, .blendv }, 18889 else => null, 18890 }, 18891 64 => switch (lhs_ty.vectorLen(zcu)) { 18892 1...4 => .{ .v_pd, .blendv }, 18893 else => null, 18894 }, 18895 16, 80, 128 => null, 18896 else => unreachable, 18897 }, 18898 else => unreachable, 18899 }, 18900 else => unreachable, 18901 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{ 18902 @tagName(air_tag), lhs_ty.fmt(pt), 18903 }), 18904 dst_reg, 18905 dst_reg, 18906 lhs_copy_reg.?, 18907 mask_reg, 18908 ); 18909 } else { 18910 const has_blend = self.hasFeature(.sse4_1); 18911 try self.asmRegisterRegisterImmediate( 18912 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { 18913 .float => switch (lhs_ty.floatBits(self.target.*)) { 18914 32 => .{ ._ss, .cmp }, 18915 64 => .{ ._sd, .cmp }, 18916 16, 80, 128 => null, 18917 else => unreachable, 18918 }, 18919 .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { 18920 .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) { 18921 32 => switch (lhs_ty.vectorLen(zcu)) { 18922 1 => .{ ._ss, .cmp }, 18923 2...4 => .{ ._ps, .cmp }, 18924 else => null, 18925 }, 18926 64 => switch (lhs_ty.vectorLen(zcu)) { 18927 1 => .{ ._sd, .cmp }, 18928 2 => .{ ._pd, .cmp }, 18929 else => null, 18930 }, 18931 16, 80, 128 => null, 18932 else => unreachable, 18933 }, 18934 else => unreachable, 18935 }, 18936 else => unreachable, 18937 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{ 18938 @tagName(air_tag), lhs_ty.fmt(pt), 18939 }), 18940 mask_reg, 18941 mask_reg, 18942 .u(if (has_blend) 3 else 7), // unord, ord 18943 ); 18944 if (has_blend) try self.asmRegisterRegisterRegister( 18945 @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { 18946 .float => switch (lhs_ty.floatBits(self.target.*)) { 18947 32 => .{ ._ps, .blendv }, 18948 64 => .{ ._pd, .blendv }, 18949 16, 80, 128 => null, 18950 else => unreachable, 18951 }, 18952 .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { 18953 .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) { 18954 32 => switch (lhs_ty.vectorLen(zcu)) { 18955 1...4 => .{ ._ps, .blendv }, 18956 else => null, 18957 }, 18958 64 => switch (lhs_ty.vectorLen(zcu)) { 18959 1...2 => .{ ._pd, .blendv }, 18960 else => null, 18961 }, 18962 16, 80, 128 => null, 18963 else => unreachable, 18964 }, 18965 else => unreachable, 18966 }, 18967 else => unreachable, 18968 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{ 18969 @tagName(air_tag), lhs_ty.fmt(pt), 18970 }), 18971 dst_reg, 18972 lhs_copy_reg.?, 18973 mask_reg, 18974 ) else { 18975 const mir_fixes = @as(?Mir.Inst.Fixes, switch (lhs_ty.zigTypeTag(zcu)) { 18976 .float => switch (lhs_ty.floatBits(self.target.*)) { 18977 32 => ._ps, 18978 64 => ._pd, 18979 16, 80, 128 => null, 18980 else => unreachable, 18981 }, 18982 .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { 18983 .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) { 18984 32 => switch (lhs_ty.vectorLen(zcu)) { 18985 1...4 => ._ps, 18986 else => null, 18987 }, 18988 64 => switch (lhs_ty.vectorLen(zcu)) { 18989 1...2 => ._pd, 18990 else => null, 18991 }, 18992 16, 80, 128 => null, 18993 else => unreachable, 18994 }, 18995 else => unreachable, 18996 }, 18997 else => unreachable, 18998 }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{ 18999 @tagName(air_tag), lhs_ty.fmt(pt), 19000 }); 19001 try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_reg, mask_reg); 19002 try self.asmRegisterRegister(.{ mir_fixes, .andn }, mask_reg, lhs_copy_reg.?); 19003 try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_reg, mask_reg); 19004 } 19005 }, 19006 .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => { 19007 switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { 19008 .int => switch (air_tag) { 19009 .cmp_lt, 19010 .cmp_eq, 19011 .cmp_gt, 19012 => {}, 19013 .cmp_lte, 19014 .cmp_gte, 19015 .cmp_neq, 19016 => { 19017 const unsigned_ty = try lhs_ty.toUnsigned(pt); 19018 const not_mcv = try self.genTypedValue(try unsigned_ty.maxInt(pt, unsigned_ty)); 19019 const not_mem: Memory = if (not_mcv.isBase()) 19020 try not_mcv.mem(self, .{ .size = .fromSize(abi_size) }) 19021 else 19022 .{ .base = .{ 19023 .reg = try self.copyToTmpRegister(.usize, not_mcv.address()), 19024 }, .mod = .{ .rm = .{ .size = .fromSize(abi_size) } } }; 19025 switch (mir_tag[0]) { 19026 .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory( 19027 .{ .vp_, .xor }, 19028 dst_reg, 19029 dst_reg, 19030 not_mem, 19031 ), 19032 .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory( 19033 .{ .p_, .xor }, 19034 dst_reg, 19035 not_mem, 19036 ), 19037 else => unreachable, 19038 } 19039 }, 19040 else => unreachable, 19041 }, 19042 .float => {}, 19043 else => unreachable, 19044 } 19045 19046 const gp_reg = try self.register_manager.allocReg(maybe_inst, abi.RegisterClass.gp); 19047 const gp_lock = self.register_manager.lockRegAssumeUnused(gp_reg); 19048 defer self.register_manager.unlockReg(gp_lock); 19049 19050 try self.asmRegisterRegister(switch (mir_tag[0]) { 19051 ._pd, ._sd, .p_q => .{ ._pd, .movmsk }, 19052 ._ps, ._ss, .p_d => .{ ._ps, .movmsk }, 19053 .p_b => .{ .p_b, .movmsk }, 19054 .p_w => movmsk: { 19055 try self.asmRegisterRegister(.{ .p_b, .ackssw }, dst_reg, dst_reg); 19056 break :movmsk .{ .p_b, .movmsk }; 19057 }, 19058 .v_pd, .v_sd, .vp_q => .{ .v_pd, .movmsk }, 19059 .v_ps, .v_ss, .vp_d => .{ .v_ps, .movmsk }, 19060 .vp_b => .{ .vp_b, .movmsk }, 19061 .vp_w => movmsk: { 19062 try self.asmRegisterRegisterRegister( 19063 .{ .vp_b, .ackssw }, 19064 dst_reg, 19065 dst_reg, 19066 dst_reg, 19067 ); 19068 break :movmsk .{ .vp_b, .movmsk }; 19069 }, 19070 else => unreachable, 19071 }, gp_reg.to32(), dst_reg); 19072 return .{ .register = gp_reg }; 19073 }, 19074 else => unreachable, 19075 } 19076 19077 return dst_mcv; 19078 } 19079 19080 fn genBinOpMir( 19081 self: *CodeGen, 19082 mir_tag: Mir.Inst.FixedTag, 19083 ty: Type, 19084 dst_mcv: MCValue, 19085 src_mcv: MCValue, 19086 ) !void { 19087 const pt = self.pt; 19088 const zcu = pt.zcu; 19089 const abi_size: u32 = @intCast(ty.abiSize(zcu)); 19090 try self.spillEflagsIfOccupied(); 19091 switch (dst_mcv) { 19092 .none, 19093 .unreach, 19094 .dead, 19095 .undef, 19096 .immediate, 19097 .eflags, 19098 .register_overflow, 19099 .register_mask, 19100 .lea_direct, 19101 .lea_got, 19102 .lea_tlv, 19103 .lea_frame, 19104 .lea_symbol, 19105 .elementwise_regs_then_frame, 19106 .reserved_frame, 19107 .air_ref, 19108 => unreachable, // unmodifiable destination 19109 .register, .register_pair, .register_triple, .register_quadruple, .register_offset => { 19110 switch (dst_mcv) { 19111 .register, .register_pair, .register_triple, .register_quadruple => {}, 19112 .register_offset => |ro| assert(ro.off == 0), 19113 else => unreachable, 19114 } 19115 for (dst_mcv.getRegs(), 0..) |dst_reg, dst_reg_i| { 19116 const dst_reg_lock = self.register_manager.lockReg(dst_reg); 19117 defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock); 19118 19119 const mir_limb_tag: Mir.Inst.FixedTag = switch (dst_reg_i) { 19120 0 => mir_tag, 19121 1 => switch (mir_tag[1]) { 19122 .add => .{ ._, .adc }, 19123 .sub, .cmp => .{ ._, .sbb }, 19124 .@"or", .@"and", .xor => mir_tag, 19125 else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{ 19126 @tagName(mir_tag[1]), 19127 }), 19128 }, 19129 else => unreachable, 19130 }; 19131 const off: u4 = @intCast(dst_reg_i * 8); 19132 const limb_abi_size = @min(abi_size - off, 8); 19133 const dst_alias = registerAlias(dst_reg, limb_abi_size); 19134 switch (src_mcv) { 19135 .none, 19136 .unreach, 19137 .dead, 19138 .undef, 19139 .register_overflow, 19140 .register_mask, 19141 .elementwise_regs_then_frame, 19142 .reserved_frame, 19143 => unreachable, 19144 .register, 19145 .register_pair, 19146 .register_triple, 19147 .register_quadruple, 19148 => try self.asmRegisterRegister( 19149 mir_limb_tag, 19150 dst_alias, 19151 registerAlias(src_mcv.getRegs()[dst_reg_i], limb_abi_size), 19152 ), 19153 .immediate => |imm| { 19154 assert(off == 0); 19155 switch (self.regBitSize(ty)) { 19156 8 => try self.asmRegisterImmediate( 19157 mir_limb_tag, 19158 dst_alias, 19159 if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |small| 19160 .s(small) 19161 else 19162 .u(@as(u8, @intCast(imm))), 19163 ), 19164 16 => try self.asmRegisterImmediate( 19165 mir_limb_tag, 19166 dst_alias, 19167 if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |small| 19168 .s(small) 19169 else 19170 .u(@as(u16, @intCast(imm))), 19171 ), 19172 32 => try self.asmRegisterImmediate( 19173 mir_limb_tag, 19174 dst_alias, 19175 if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| 19176 .s(small) 19177 else 19178 .u(@as(u32, @intCast(imm))), 19179 ), 19180 64 => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| 19181 try self.asmRegisterImmediate(mir_limb_tag, dst_alias, .s(small)) 19182 else 19183 try self.asmRegisterRegister(mir_limb_tag, dst_alias, registerAlias( 19184 try self.copyToTmpRegister(ty, src_mcv), 19185 limb_abi_size, 19186 )), 19187 else => unreachable, 19188 } 19189 }, 19190 .eflags, 19191 .register_offset, 19192 .memory, 19193 .indirect, 19194 .load_symbol, 19195 .lea_symbol, 19196 .load_direct, 19197 .lea_direct, 19198 .load_got, 19199 .lea_got, 19200 .load_tlv, 19201 .lea_tlv, 19202 .load_frame, 19203 .lea_frame, 19204 => { 19205 direct: { 19206 try self.asmRegisterMemory(mir_limb_tag, dst_alias, switch (src_mcv) { 19207 .memory => |addr| .{ 19208 .base = .{ .reg = .ds }, 19209 .mod = .{ .rm = .{ 19210 .size = .fromSize(limb_abi_size), 19211 .disp = std.math.cast(i32, addr + off) orelse break :direct, 19212 } }, 19213 }, 19214 .indirect => |reg_off| .{ 19215 .base = .{ .reg = reg_off.reg }, 19216 .mod = .{ .rm = .{ 19217 .size = .fromSize(limb_abi_size), 19218 .disp = reg_off.off + off, 19219 } }, 19220 }, 19221 .load_frame => |frame_addr| .{ 19222 .base = .{ .frame = frame_addr.index }, 19223 .mod = .{ .rm = .{ 19224 .size = .fromSize(limb_abi_size), 19225 .disp = frame_addr.off + off, 19226 } }, 19227 }, 19228 else => break :direct, 19229 }); 19230 continue; 19231 } 19232 19233 switch (src_mcv) { 19234 .eflags, 19235 .register_offset, 19236 .lea_symbol, 19237 .lea_direct, 19238 .lea_got, 19239 .lea_tlv, 19240 .lea_frame, 19241 => { 19242 assert(off == 0); 19243 const reg = try self.copyToTmpRegister(ty, src_mcv); 19244 return self.genBinOpMir( 19245 mir_limb_tag, 19246 ty, 19247 dst_mcv, 19248 .{ .register = reg }, 19249 ); 19250 }, 19251 .memory, 19252 .load_symbol, 19253 .load_direct, 19254 .load_got, 19255 .load_tlv, 19256 => { 19257 const ptr_ty = try pt.singleConstPtrType(ty); 19258 const addr_reg = try self.copyToTmpRegister(ptr_ty, src_mcv.address()); 19259 return self.genBinOpMir(mir_limb_tag, ty, dst_mcv, .{ 19260 .indirect = .{ .reg = addr_reg, .off = off }, 19261 }); 19262 }, 19263 else => unreachable, 19264 } 19265 }, 19266 .air_ref => |src_ref| return self.genBinOpMir( 19267 mir_tag, 19268 ty, 19269 dst_mcv, 19270 try self.resolveInst(src_ref), 19271 ), 19272 } 19273 } 19274 }, 19275 .memory, .indirect, .load_symbol, .load_got, .load_direct, .load_tlv, .load_frame => { 19276 const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock }; 19277 const limb_abi_size: u32 = @min(abi_size, 8); 19278 19279 const dst_info: OpInfo = switch (dst_mcv) { 19280 else => unreachable, 19281 .memory, .load_symbol, .load_got, .load_direct, .load_tlv => dst: { 19282 const dst_addr_reg = 19283 (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64(); 19284 const dst_addr_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg); 19285 errdefer self.register_manager.unlockReg(dst_addr_lock); 19286 19287 try self.genSetReg(dst_addr_reg, .usize, dst_mcv.address(), .{}); 19288 break :dst .{ .addr_reg = dst_addr_reg, .addr_lock = dst_addr_lock }; 19289 }, 19290 .load_frame => null, 19291 }; 19292 defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock); 19293 19294 const resolved_src_mcv = switch (src_mcv) { 19295 else => src_mcv, 19296 .air_ref => |src_ref| try self.resolveInst(src_ref), 19297 }; 19298 const src_info: OpInfo = switch (resolved_src_mcv) { 19299 .none, 19300 .unreach, 19301 .dead, 19302 .undef, 19303 .register_overflow, 19304 .register_mask, 19305 .elementwise_regs_then_frame, 19306 .reserved_frame, 19307 .air_ref, 19308 => unreachable, 19309 .immediate, 19310 .eflags, 19311 .register, 19312 .register_pair, 19313 .register_triple, 19314 .register_quadruple, 19315 .register_offset, 19316 .indirect, 19317 .lea_direct, 19318 .lea_got, 19319 .lea_tlv, 19320 .load_frame, 19321 .lea_frame, 19322 .lea_symbol, 19323 => null, 19324 .memory, .load_symbol, .load_got, .load_direct, .load_tlv => src: { 19325 switch (resolved_src_mcv) { 19326 .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr))) != null and 19327 std.math.cast(i32, @as(i64, @bitCast(addr)) + abi_size - limb_abi_size) != null) 19328 break :src null, 19329 .load_symbol, .load_got, .load_direct, .load_tlv => {}, 19330 else => unreachable, 19331 } 19332 19333 const src_addr_reg = 19334 (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64(); 19335 const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg); 19336 errdefer self.register_manager.unlockReg(src_addr_lock); 19337 19338 try self.genSetReg(src_addr_reg, .usize, resolved_src_mcv.address(), .{}); 19339 break :src .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock }; 19340 }, 19341 }; 19342 defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock); 19343 19344 const ty_signedness = 19345 if (ty.isAbiInt(zcu)) ty.intInfo(zcu).signedness else .unsigned; 19346 const limb_ty: Type = if (abi_size <= 8) ty else switch (ty_signedness) { 19347 .signed => .usize, 19348 .unsigned => .isize, 19349 }; 19350 var limb_i: usize = 0; 19351 var off: i32 = 0; 19352 while (off < abi_size) : ({ 19353 limb_i += 1; 19354 off += 8; 19355 }) { 19356 const mir_limb_tag: Mir.Inst.FixedTag = switch (limb_i) { 19357 0 => mir_tag, 19358 else => switch (mir_tag[1]) { 19359 .add => .{ ._, .adc }, 19360 .sub, .cmp => .{ ._, .sbb }, 19361 .@"or", .@"and", .xor => mir_tag, 19362 else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{ 19363 @tagName(mir_tag[1]), 19364 }), 19365 }, 19366 }; 19367 const dst_limb_mem: Memory = switch (dst_mcv) { 19368 .memory, 19369 .load_symbol, 19370 .load_got, 19371 .load_direct, 19372 .load_tlv, 19373 => .{ 19374 .base = .{ .reg = dst_info.?.addr_reg }, 19375 .mod = .{ .rm = .{ 19376 .size = .fromSize(limb_abi_size), 19377 .disp = off, 19378 } }, 19379 }, 19380 .indirect => |reg_off| .{ 19381 .base = .{ .reg = reg_off.reg }, 19382 .mod = .{ .rm = .{ 19383 .size = .fromSize(limb_abi_size), 19384 .disp = reg_off.off + off, 19385 } }, 19386 }, 19387 .load_frame => |frame_addr| .{ 19388 .base = .{ .frame = frame_addr.index }, 19389 .mod = .{ .rm = .{ 19390 .size = .fromSize(limb_abi_size), 19391 .disp = frame_addr.off + off, 19392 } }, 19393 }, 19394 else => unreachable, 19395 }; 19396 switch (resolved_src_mcv) { 19397 .none, 19398 .unreach, 19399 .dead, 19400 .undef, 19401 .register_overflow, 19402 .register_mask, 19403 .elementwise_regs_then_frame, 19404 .reserved_frame, 19405 .air_ref, 19406 => unreachable, 19407 .immediate => |src_imm| { 19408 const imm: u64 = switch (limb_i) { 19409 0 => src_imm, 19410 else => switch (ty_signedness) { 19411 .signed => @bitCast(@as(i64, @bitCast(src_imm)) >> 63), 19412 .unsigned => 0, 19413 }, 19414 }; 19415 switch (self.regBitSize(limb_ty)) { 19416 8 => try self.asmMemoryImmediate( 19417 mir_limb_tag, 19418 dst_limb_mem, 19419 if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |small| 19420 .s(small) 19421 else 19422 .u(@as(u8, @intCast(imm))), 19423 ), 19424 16 => try self.asmMemoryImmediate( 19425 mir_limb_tag, 19426 dst_limb_mem, 19427 if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |small| 19428 .s(small) 19429 else 19430 .u(@as(u16, @intCast(imm))), 19431 ), 19432 32 => try self.asmMemoryImmediate( 19433 mir_limb_tag, 19434 dst_limb_mem, 19435 if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| 19436 .s(small) 19437 else 19438 .u(@as(u32, @intCast(imm))), 19439 ), 19440 64 => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| 19441 try self.asmMemoryImmediate(mir_limb_tag, dst_limb_mem, .s(small)) 19442 else 19443 try self.asmMemoryRegister( 19444 mir_limb_tag, 19445 dst_limb_mem, 19446 registerAlias( 19447 try self.copyToTmpRegister(limb_ty, .{ .immediate = imm }), 19448 limb_abi_size, 19449 ), 19450 ), 19451 else => unreachable, 19452 } 19453 }, 19454 .register, 19455 .register_pair, 19456 .register_triple, 19457 .register_quadruple, 19458 .register_offset, 19459 .eflags, 19460 .memory, 19461 .indirect, 19462 .load_symbol, 19463 .lea_symbol, 19464 .load_direct, 19465 .lea_direct, 19466 .load_got, 19467 .lea_got, 19468 .load_tlv, 19469 .lea_tlv, 19470 .load_frame, 19471 .lea_frame, 19472 => { 19473 const src_limb_mcv: MCValue = if (src_info) |info| .{ 19474 .indirect = .{ .reg = info.addr_reg, .off = off }, 19475 } else switch (resolved_src_mcv) { 19476 .register, .register_pair, .register_triple, .register_quadruple => .{ 19477 .register = resolved_src_mcv.getRegs()[limb_i], 19478 }, 19479 .eflags, 19480 .register_offset, 19481 .lea_symbol, 19482 .lea_direct, 19483 .lea_got, 19484 .lea_tlv, 19485 .lea_frame, 19486 => switch (limb_i) { 19487 0 => resolved_src_mcv, 19488 else => .{ .immediate = 0 }, 19489 }, 19490 .memory => |addr| .{ .memory = @bitCast(@as(i64, @bitCast(addr)) + off) }, 19491 .indirect => |reg_off| .{ .indirect = .{ 19492 .reg = reg_off.reg, 19493 .off = reg_off.off + off, 19494 } }, 19495 .load_frame => |frame_addr| .{ .load_frame = .{ 19496 .index = frame_addr.index, 19497 .off = frame_addr.off + off, 19498 } }, 19499 else => unreachable, 19500 }; 19501 const src_limb_reg = if (src_limb_mcv.isRegister()) 19502 src_limb_mcv.getReg().? 19503 else 19504 try self.copyToTmpRegister(limb_ty, src_limb_mcv); 19505 try self.asmMemoryRegister( 19506 mir_limb_tag, 19507 dst_limb_mem, 19508 registerAlias(src_limb_reg, limb_abi_size), 19509 ); 19510 }, 19511 } 19512 } 19513 }, 19514 } 19515 } 19516 19517 /// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. 19518 /// Does not support byte-size operands. 19519 fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { 19520 const pt = self.pt; 19521 const abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu)); 19522 try self.spillEflagsIfOccupied(); 19523 switch (dst_mcv) { 19524 .none, 19525 .unreach, 19526 .dead, 19527 .undef, 19528 .immediate, 19529 .eflags, 19530 .register_offset, 19531 .register_overflow, 19532 .register_mask, 19533 .lea_symbol, 19534 .lea_direct, 19535 .lea_got, 19536 .lea_tlv, 19537 .lea_frame, 19538 .elementwise_regs_then_frame, 19539 .reserved_frame, 19540 .air_ref, 19541 => unreachable, // unmodifiable destination 19542 .register => |dst_reg| { 19543 const dst_alias = registerAlias(dst_reg, abi_size); 19544 const dst_lock = self.register_manager.lockReg(dst_reg); 19545 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 19546 19547 const resolved_src_mcv = switch (src_mcv) { 19548 else => src_mcv, 19549 .air_ref => |src_ref| try self.resolveInst(src_ref), 19550 }; 19551 switch (resolved_src_mcv) { 19552 .none, 19553 .unreach, 19554 .dead, 19555 .undef, 19556 .register_pair, 19557 .register_triple, 19558 .register_quadruple, 19559 .register_overflow, 19560 .register_mask, 19561 .elementwise_regs_then_frame, 19562 .reserved_frame, 19563 .air_ref, 19564 => unreachable, 19565 .register => |src_reg| try self.asmRegisterRegister( 19566 .{ .i_, .mul }, 19567 dst_alias, 19568 registerAlias(src_reg, abi_size), 19569 ), 19570 .immediate => |imm| { 19571 if (std.math.cast(i32, imm)) |small| { 19572 try self.asmRegisterRegisterImmediate( 19573 .{ .i_, .mul }, 19574 dst_alias, 19575 dst_alias, 19576 .s(small), 19577 ); 19578 } else { 19579 const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv); 19580 return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); 19581 } 19582 }, 19583 .register_offset, 19584 .eflags, 19585 .load_symbol, 19586 .lea_symbol, 19587 .load_direct, 19588 .lea_direct, 19589 .load_got, 19590 .lea_got, 19591 .load_tlv, 19592 .lea_tlv, 19593 .lea_frame, 19594 => try self.asmRegisterRegister( 19595 .{ .i_, .mul }, 19596 dst_alias, 19597 registerAlias(try self.copyToTmpRegister(dst_ty, resolved_src_mcv), abi_size), 19598 ), 19599 .memory, .indirect, .load_frame => try self.asmRegisterMemory( 19600 .{ .i_, .mul }, 19601 dst_alias, 19602 switch (resolved_src_mcv) { 19603 .memory => |addr| .{ 19604 .base = .{ .reg = .ds }, 19605 .mod = .{ .rm = .{ 19606 .size = .fromSize(abi_size), 19607 .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse 19608 return self.asmRegisterRegister( 19609 .{ .i_, .mul }, 19610 dst_alias, 19611 registerAlias( 19612 try self.copyToTmpRegister(dst_ty, resolved_src_mcv), 19613 abi_size, 19614 ), 19615 ), 19616 } }, 19617 }, 19618 .indirect => |reg_off| .{ 19619 .base = .{ .reg = reg_off.reg }, 19620 .mod = .{ .rm = .{ 19621 .size = .fromSize(abi_size), 19622 .disp = reg_off.off, 19623 } }, 19624 }, 19625 .load_frame => |frame_addr| .{ 19626 .base = .{ .frame = frame_addr.index }, 19627 .mod = .{ .rm = .{ 19628 .size = .fromSize(abi_size), 19629 .disp = frame_addr.off, 19630 } }, 19631 }, 19632 else => unreachable, 19633 }, 19634 ), 19635 } 19636 }, 19637 .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented 19638 .memory, .indirect, .load_symbol, .load_direct, .load_got, .load_tlv, .load_frame => { 19639 const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv); 19640 const tmp_mcv = MCValue{ .register = tmp_reg }; 19641 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 19642 defer self.register_manager.unlockReg(tmp_lock); 19643 19644 try self.genIntMulComplexOpMir(dst_ty, tmp_mcv, src_mcv); 19645 try self.genCopy(dst_ty, dst_mcv, tmp_mcv, .{}); 19646 }, 19647 } 19648 } 19649 19650 fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void { 19651 const pt = self.pt; 19652 const zcu = pt.zcu; 19653 // skip zero-bit arguments as they don't have a corresponding arg instruction 19654 var arg_index = self.arg_index; 19655 while (self.args[arg_index] == .none) arg_index += 1; 19656 self.arg_index = arg_index + 1; 19657 19658 const result: MCValue = if (self.debug_output == .none and self.liveness.isUnused(inst)) .unreach else result: { 19659 const arg_ty = self.typeOfIndex(inst); 19660 const src_mcv = self.args[arg_index]; 19661 switch (src_mcv) { 19662 .register, .register_pair, .load_frame => { 19663 for (src_mcv.getRegs()) |reg| self.register_manager.getRegAssumeFree(reg, inst); 19664 break :result src_mcv; 19665 }, 19666 .indirect => |reg_off| { 19667 self.register_manager.getRegAssumeFree(reg_off.reg, inst); 19668 const dst_mcv = try self.allocRegOrMem(inst, false); 19669 try self.genCopy(arg_ty, dst_mcv, src_mcv, .{}); 19670 break :result dst_mcv; 19671 }, 19672 .elementwise_regs_then_frame => |regs_frame_addr| { 19673 try self.spillEflagsIfOccupied(); 19674 19675 const fn_info = zcu.typeToFunc(self.fn_type).?; 19676 const param_int_regs = abi.getCAbiIntParamRegs(fn_info.cc); 19677 var prev_reg: Register = undefined; 19678 for ( 19679 param_int_regs[param_int_regs.len - regs_frame_addr.regs ..], 19680 0.., 19681 ) |dst_reg, elem_index| { 19682 assert(self.register_manager.isRegFree(dst_reg)); 19683 if (elem_index > 0) { 19684 try self.asmRegisterImmediate(.{ ._l, .sh }, dst_reg.to8(), .u(elem_index)); 19685 try self.asmRegisterRegister( 19686 .{ ._, .@"or" }, 19687 dst_reg.to8(), 19688 prev_reg.to8(), 19689 ); 19690 } 19691 prev_reg = dst_reg; 19692 } 19693 19694 const prev_lock = if (regs_frame_addr.regs > 0) 19695 self.register_manager.lockRegAssumeUnused(prev_reg) 19696 else 19697 null; 19698 defer if (prev_lock) |lock| self.register_manager.unlockReg(lock); 19699 19700 const dst_mcv = try self.allocRegOrMem(inst, false); 19701 if (regs_frame_addr.regs > 0) try self.asmMemoryRegister( 19702 .{ ._, .mov }, 19703 try dst_mcv.mem(self, .{ .size = .byte }), 19704 prev_reg.to8(), 19705 ); 19706 try self.genInlineMemset( 19707 dst_mcv.address().offset(@intFromBool(regs_frame_addr.regs > 0)), 19708 .{ .immediate = 0 }, 19709 .{ .immediate = arg_ty.abiSize(zcu) - @intFromBool(regs_frame_addr.regs > 0) }, 19710 .{}, 19711 ); 19712 19713 const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 19714 const index_lock = self.register_manager.lockRegAssumeUnused(index_reg); 19715 defer self.register_manager.unlockReg(index_lock); 19716 19717 try self.asmRegisterImmediate( 19718 .{ ._, .mov }, 19719 index_reg.to32(), 19720 .u(regs_frame_addr.regs), 19721 ); 19722 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 19723 try self.asmMemoryImmediate(.{ ._, .cmp }, .{ 19724 .base = .{ .frame = regs_frame_addr.frame_index }, 19725 .mod = .{ .rm = .{ 19726 .size = .byte, 19727 .index = index_reg.to64(), 19728 .scale = .@"8", 19729 .disp = regs_frame_addr.frame_off - @as(u6, regs_frame_addr.regs) * 8, 19730 } }, 19731 }, Immediate.u(0)); 19732 const unset = try self.asmJccReloc(.e, undefined); 19733 try self.asmMemoryRegister( 19734 .{ ._s, .bt }, 19735 try dst_mcv.mem(self, .{ .size = .dword }), 19736 index_reg.to32(), 19737 ); 19738 self.performReloc(unset); 19739 if (self.hasFeature(.slow_incdec)) { 19740 try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1)); 19741 } else { 19742 try self.asmRegister(.{ ._, .inc }, index_reg.to32()); 19743 } 19744 try self.asmRegisterImmediate( 19745 .{ ._, .cmp }, 19746 index_reg.to32(), 19747 .u(arg_ty.vectorLen(zcu)), 19748 ); 19749 _ = try self.asmJccReloc(.b, loop); 19750 19751 break :result dst_mcv; 19752 }, 19753 else => return self.fail("TODO implement arg for {}", .{src_mcv}), 19754 } 19755 }; 19756 return self.finishAir(inst, result, .{ .none, .none, .none }); 19757 } 19758 19759 fn airDbgArg(self: *CodeGen, inst: Air.Inst.Index) !void { 19760 // skip zero-bit arguments as they don't have a corresponding arg instruction 19761 var arg_index = self.arg_index; 19762 while (self.args[arg_index] == .none) arg_index += 1; 19763 self.arg_index = arg_index + 1; 19764 19765 if (self.debug_output != .none) { 19766 const name = self.air.instructions.items(.data)[@intFromEnum(inst)].arg.name; 19767 if (name != .none) try self.genLocalDebugInfo(inst, self.getResolvedInstValue(inst).short); 19768 if (self.liveness.isUnused(inst)) try self.processDeath(inst); 19769 } 19770 for (self.args[self.arg_index..]) |arg| { 19771 if (arg != .none) break; 19772 } else try self.airDbgVarArgs(); 19773 } 19774 19775 fn airDbgVarArgs(self: *CodeGen) !void { 19776 if (self.pt.zcu.typeToFunc(self.fn_type).?.is_var_args) try self.asmPseudo(.pseudo_dbg_var_args_none); 19777 } 19778 19779 fn genLocalDebugInfo( 19780 self: *CodeGen, 19781 inst: Air.Inst.Index, 19782 mcv: MCValue, 19783 ) !void { 19784 if (self.debug_output == .none) return; 19785 switch (self.air.instructions.items(.tag)[@intFromEnum(inst)]) { 19786 else => unreachable, 19787 .arg, .dbg_arg_inline, .dbg_var_val => |tag| { 19788 switch (mcv) { 19789 .none => try self.asmAir(.dbg_local, inst), 19790 .unreach, .dead, .elementwise_regs_then_frame, .reserved_frame, .air_ref => unreachable, 19791 .immediate => |imm| try self.asmAirImmediate(.dbg_local, inst, .u(imm)), 19792 .lea_frame => |frame_addr| try self.asmAirFrameAddress(.dbg_local, inst, frame_addr), 19793 .lea_symbol => |sym_off| try self.asmAirImmediate(.dbg_local, inst, .rel(sym_off)), 19794 else => { 19795 const ty = switch (tag) { 19796 else => unreachable, 19797 .arg => self.typeOfIndex(inst), 19798 .dbg_arg_inline, .dbg_var_val => self.typeOf( 19799 self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op.operand, 19800 ), 19801 }; 19802 const frame_index = try self.allocFrameIndex(.initSpill(ty, self.pt.zcu)); 19803 try self.genSetMem(.{ .frame = frame_index }, 0, ty, mcv, .{}); 19804 try self.asmAirMemory(.dbg_local, inst, .{ 19805 .base = .{ .frame = frame_index }, 19806 .mod = .{ .rm = .{ .size = .qword } }, 19807 }); 19808 }, 19809 } 19810 }, 19811 .dbg_var_ptr => switch (mcv) { 19812 else => unreachable, 19813 .unreach, .dead, .elementwise_regs_then_frame, .reserved_frame, .air_ref => unreachable, 19814 .lea_frame => |frame_addr| try self.asmAirMemory(.dbg_local, inst, .{ 19815 .base = .{ .frame = frame_addr.index }, 19816 .mod = .{ .rm = .{ 19817 .size = .qword, 19818 .disp = frame_addr.off, 19819 } }, 19820 }), 19821 .lea_symbol => |sym_off| try self.asmAirMemory(.dbg_local, inst, .{ 19822 .base = .{ .reloc = sym_off.sym_index }, 19823 .mod = .{ .rm = .{ 19824 .size = .qword, 19825 .disp = sym_off.off, 19826 } }, 19827 }), 19828 .lea_direct, .lea_got, .lea_tlv => |sym_index| try self.asmAirMemory(.dbg_local, inst, .{ 19829 .base = .{ .reloc = sym_index }, 19830 .mod = .{ .rm = .{ .size = .qword } }, 19831 }), 19832 }, 19833 } 19834 } 19835 19836 fn airRetAddr(self: *CodeGen, inst: Air.Inst.Index) !void { 19837 const dst_mcv = try self.allocRegOrMem(inst, true); 19838 try self.genCopy(.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } }, .{}); 19839 return self.finishAir(inst, dst_mcv, .{ .none, .none, .none }); 19840 } 19841 19842 fn airFrameAddress(self: *CodeGen, inst: Air.Inst.Index) !void { 19843 const dst_mcv = try self.allocRegOrMem(inst, true); 19844 try self.genCopy(.usize, dst_mcv, .{ .lea_frame = .{ .index = .base_ptr } }, .{}); 19845 return self.finishAir(inst, dst_mcv, .{ .none, .none, .none }); 19846 } 19847 19848 fn airCall(self: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifier, opts: CopyOptions) !void { 19849 if (modifier == .always_tail) return self.fail("TODO implement tail calls for x86_64", .{}); 19850 19851 const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; 19852 const extra = self.air.extraData(Air.Call, pl_op.payload); 19853 const arg_refs: []const Air.Inst.Ref = 19854 @ptrCast(self.air.extra[extra.end..][0..extra.data.args_len]); 19855 19856 const ExpectedContents = extern struct { 19857 tys: [16][@sizeOf(Type)]u8 align(@alignOf(Type)), 19858 vals: [16][@sizeOf(MCValue)]u8 align(@alignOf(MCValue)), 19859 }; 19860 var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = 19861 std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); 19862 const allocator = stack.get(); 19863 19864 const arg_tys = try allocator.alloc(Type, arg_refs.len); 19865 defer allocator.free(arg_tys); 19866 for (arg_tys, arg_refs) |*arg_ty, arg_ref| arg_ty.* = self.typeOf(arg_ref); 19867 19868 const arg_vals = try allocator.alloc(MCValue, arg_refs.len); 19869 defer allocator.free(arg_vals); 19870 for (arg_vals, arg_refs) |*arg_val, arg_ref| arg_val.* = .{ .air_ref = arg_ref }; 19871 19872 const ret = try self.genCall(.{ .air = pl_op.operand }, arg_tys, arg_vals, opts); 19873 19874 var bt = self.liveness.iterateBigTomb(inst); 19875 try self.feed(&bt, pl_op.operand); 19876 for (arg_refs) |arg_ref| try self.feed(&bt, arg_ref); 19877 19878 const result = if (self.liveness.isUnused(inst)) .unreach else ret; 19879 return self.finishAirResult(inst, result); 19880 } 19881 19882 fn genCall(self: *CodeGen, info: union(enum) { 19883 air: Air.Inst.Ref, 19884 lib: struct { 19885 return_type: InternPool.Index, 19886 param_types: []const InternPool.Index, 19887 lib: ?[]const u8 = null, 19888 callee: []const u8, 19889 }, 19890 }, arg_types: []const Type, args: []const MCValue, opts: CopyOptions) !MCValue { 19891 const pt = self.pt; 19892 const zcu = pt.zcu; 19893 const ip = &zcu.intern_pool; 19894 19895 const fn_ty = switch (info) { 19896 .air => |callee| fn_info: { 19897 const callee_ty = self.typeOf(callee); 19898 break :fn_info switch (callee_ty.zigTypeTag(zcu)) { 19899 .@"fn" => callee_ty, 19900 .pointer => callee_ty.childType(zcu), 19901 else => unreachable, 19902 }; 19903 }, 19904 .lib => |lib| try pt.funcType(.{ 19905 .param_types = lib.param_types, 19906 .return_type = lib.return_type, 19907 .cc = self.target.cCallingConvention().?, 19908 }), 19909 }; 19910 const fn_info = zcu.typeToFunc(fn_ty).?; 19911 19912 const ExpectedContents = extern struct { 19913 var_args: [16][@sizeOf(Type)]u8 align(@alignOf(Type)), 19914 frame_indices: [16]FrameIndex, 19915 reg_locks: [16][@sizeOf(?RegisterLock)]u8 align(@alignOf(?RegisterLock)), 19916 }; 19917 var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = 19918 std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); 19919 const allocator = stack.get(); 19920 19921 const var_args = try allocator.alloc(Type, args.len - fn_info.param_types.len); 19922 defer allocator.free(var_args); 19923 for (var_args, arg_types[fn_info.param_types.len..]) |*var_arg, arg_ty| var_arg.* = arg_ty; 19924 19925 const frame_indices = try allocator.alloc(FrameIndex, args.len); 19926 defer allocator.free(frame_indices); 19927 19928 var reg_locks: std.ArrayList(?RegisterLock) = .init(allocator); 19929 defer reg_locks.deinit(); 19930 try reg_locks.ensureTotalCapacity(16); 19931 defer for (reg_locks.items) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); 19932 19933 var call_info = try self.resolveCallingConventionValues(fn_info, var_args, .call_frame); 19934 defer call_info.deinit(self); 19935 19936 // We need a properly aligned and sized call frame to be able to call this function. 19937 { 19938 const needed_call_frame: FrameAlloc = .init(.{ 19939 .size = call_info.stack_byte_count, 19940 .alignment = call_info.stack_align, 19941 }); 19942 const frame_allocs_slice = self.frame_allocs.slice(); 19943 const stack_frame_size = 19944 &frame_allocs_slice.items(.abi_size)[@intFromEnum(FrameIndex.call_frame)]; 19945 stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size); 19946 const stack_frame_align = 19947 &frame_allocs_slice.items(.abi_align)[@intFromEnum(FrameIndex.call_frame)]; 19948 stack_frame_align.* = stack_frame_align.max(needed_call_frame.abi_align); 19949 } 19950 19951 try self.spillEflagsIfOccupied(); 19952 try self.spillCallerPreservedRegs(fn_info.cc); 19953 19954 // set stack arguments first because this can clobber registers 19955 // also clobber spill arguments as we go 19956 switch (call_info.return_value.long) { 19957 .none, .unreach => {}, 19958 .indirect => |reg_off| try self.register_manager.getReg(reg_off.reg, null), 19959 else => unreachable, 19960 } 19961 for (call_info.args, arg_types, args, frame_indices) |dst_arg, arg_ty, src_arg, *frame_index| 19962 switch (dst_arg) { 19963 .none => {}, 19964 .register => |reg| { 19965 try self.register_manager.getReg(reg, null); 19966 try reg_locks.append(self.register_manager.lockReg(reg)); 19967 }, 19968 .register_pair => |regs| { 19969 for (regs) |reg| try self.register_manager.getReg(reg, null); 19970 try reg_locks.appendSlice(&self.register_manager.lockRegs(2, regs)); 19971 }, 19972 .indirect => |reg_off| { 19973 frame_index.* = try self.allocFrameIndex(.initType(arg_ty, zcu)); 19974 try self.genSetMem(.{ .frame = frame_index.* }, 0, arg_ty, src_arg, opts); 19975 try self.register_manager.getReg(reg_off.reg, null); 19976 try reg_locks.append(self.register_manager.lockReg(reg_off.reg)); 19977 }, 19978 .load_frame => { 19979 try self.genCopy(arg_ty, dst_arg, src_arg, opts); 19980 try self.freeValue(src_arg); 19981 }, 19982 .elementwise_regs_then_frame => |regs_frame_addr| { 19983 const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 19984 const index_lock = self.register_manager.lockRegAssumeUnused(index_reg); 19985 defer self.register_manager.unlockReg(index_lock); 19986 19987 const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{ 19988 .base = .{ .reg = try self.copyToTmpRegister(.usize, switch (src_arg) { 19989 else => src_arg, 19990 .air_ref => |src_ref| try self.resolveInst(src_ref), 19991 }.address()) }, 19992 .mod = .{ .rm = .{ .size = .dword } }, 19993 }; 19994 const src_lock = switch (src_mem.base) { 19995 .reg => |src_reg| self.register_manager.lockReg(src_reg), 19996 else => null, 19997 }; 19998 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 19999 20000 try self.asmRegisterImmediate( 20001 .{ ._, .mov }, 20002 index_reg.to32(), 20003 .u(regs_frame_addr.regs), 20004 ); 20005 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 20006 try self.asmMemoryRegister(.{ ._, .bt }, src_mem, index_reg.to32()); 20007 try self.asmSetccMemory(.c, .{ 20008 .base = .{ .frame = regs_frame_addr.frame_index }, 20009 .mod = .{ .rm = .{ 20010 .size = .byte, 20011 .index = index_reg.to64(), 20012 .scale = .@"8", 20013 .disp = regs_frame_addr.frame_off - @as(u6, regs_frame_addr.regs) * 8, 20014 } }, 20015 }); 20016 if (self.hasFeature(.slow_incdec)) { 20017 try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1)); 20018 } else { 20019 try self.asmRegister(.{ ._, .inc }, index_reg.to32()); 20020 } 20021 try self.asmRegisterImmediate( 20022 .{ ._, .cmp }, 20023 index_reg.to32(), 20024 .u(arg_ty.vectorLen(zcu)), 20025 ); 20026 _ = try self.asmJccReloc(.b, loop); 20027 20028 const param_int_regs = abi.getCAbiIntParamRegs(fn_info.cc); 20029 for (param_int_regs[param_int_regs.len - regs_frame_addr.regs ..]) |dst_reg| { 20030 try self.register_manager.getReg(dst_reg, null); 20031 try reg_locks.append(self.register_manager.lockReg(dst_reg)); 20032 } 20033 }, 20034 else => unreachable, 20035 }; 20036 20037 // now we are free to set register arguments 20038 switch (call_info.return_value.long) { 20039 .none, .unreach => {}, 20040 .indirect => |reg_off| { 20041 const ret_ty: Type = .fromInterned(fn_info.return_type); 20042 const frame_index = try self.allocFrameIndex(.initSpill(ret_ty, zcu)); 20043 try self.genSetReg(reg_off.reg, .usize, .{ 20044 .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, 20045 }, .{}); 20046 call_info.return_value.short = .{ .load_frame = .{ .index = frame_index } }; 20047 try reg_locks.append(self.register_manager.lockReg(reg_off.reg)); 20048 }, 20049 else => unreachable, 20050 } 20051 20052 for (call_info.args, arg_types, args, frame_indices) |dst_arg, arg_ty, src_arg, frame_index| 20053 switch (dst_arg) { 20054 .none, .load_frame => {}, 20055 .register => |dst_reg| switch (fn_info.cc) { 20056 else => try self.genSetReg(registerAlias( 20057 dst_reg, 20058 @intCast(arg_ty.abiSize(zcu)), 20059 ), arg_ty, src_arg, opts), 20060 .x86_64_sysv, .x86_64_win => { 20061 const promoted_ty = self.promoteInt(arg_ty); 20062 const promoted_abi_size: u32 = @intCast(promoted_ty.abiSize(zcu)); 20063 const dst_alias = registerAlias(dst_reg, promoted_abi_size); 20064 try self.genSetReg(dst_alias, promoted_ty, src_arg, opts); 20065 if (promoted_ty.toIntern() != arg_ty.toIntern()) 20066 try self.truncateRegister(arg_ty, dst_alias); 20067 }, 20068 }, 20069 .register_pair => try self.genCopy(arg_ty, dst_arg, src_arg, opts), 20070 .indirect => |reg_off| try self.genSetReg(reg_off.reg, .usize, .{ 20071 .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, 20072 }, .{}), 20073 .elementwise_regs_then_frame => |regs_frame_addr| { 20074 const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{ 20075 .base = .{ .reg = try self.copyToTmpRegister( 20076 .usize, 20077 switch (src_arg) { 20078 else => src_arg, 20079 .air_ref => |src_ref| try self.resolveInst(src_ref), 20080 }.address(), 20081 ) }, 20082 .mod = .{ .rm = .{ .size = .dword } }, 20083 }; 20084 const src_lock = switch (src_mem.base) { 20085 .reg => |src_reg| self.register_manager.lockReg(src_reg), 20086 else => null, 20087 }; 20088 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 20089 20090 const param_int_regs = abi.getCAbiIntParamRegs(fn_info.cc); 20091 for ( 20092 param_int_regs[param_int_regs.len - regs_frame_addr.regs ..], 20093 0.., 20094 ) |dst_reg, elem_index| { 20095 try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()); 20096 try self.asmMemoryImmediate(.{ ._, .bt }, src_mem, .u(elem_index)); 20097 try self.asmSetccRegister(.c, dst_reg.to8()); 20098 } 20099 }, 20100 else => unreachable, 20101 }; 20102 20103 if (fn_info.is_var_args) try self.asmRegisterImmediate(.{ ._, .mov }, .al, .u(call_info.fp_count)); 20104 20105 // Due to incremental compilation, how function calls are generated depends 20106 // on linking. 20107 switch (info) { 20108 .air => |callee| if (try self.air.value(callee, pt)) |func_value| { 20109 const func_key = ip.indexToKey(func_value.ip_index); 20110 switch (switch (func_key) { 20111 else => func_key, 20112 .ptr => |ptr| if (ptr.byte_offset == 0) switch (ptr.base_addr) { 20113 .nav => |nav| ip.indexToKey(zcu.navValue(nav).toIntern()), 20114 else => func_key, 20115 } else func_key, 20116 }) { 20117 .func => |func| { 20118 if (self.bin_file.cast(.elf)) |elf_file| { 20119 const zo = elf_file.zigObjectPtr().?; 20120 const sym_index = try zo.getOrCreateMetadataForNav(zcu, func.owner_nav); 20121 try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = sym_index })); 20122 } else if (self.bin_file.cast(.coff)) |coff_file| { 20123 const atom = try coff_file.getOrCreateAtomForNav(func.owner_nav); 20124 const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; 20125 const scratch_reg = abi.getCAbiLinkerScratchReg(fn_info.cc); 20126 try self.genSetReg(scratch_reg, .usize, .{ .lea_got = sym_index }, .{}); 20127 try self.asmRegister(.{ ._, .call }, scratch_reg); 20128 } else if (self.bin_file.cast(.macho)) |macho_file| { 20129 const zo = macho_file.getZigObject().?; 20130 const sym_index = try zo.getOrCreateMetadataForNav(macho_file, func.owner_nav); 20131 const sym = zo.symbols.items[sym_index]; 20132 try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = sym.nlist_idx })); 20133 } else if (self.bin_file.cast(.plan9)) |p9| { 20134 const atom_index = try p9.seeNav(pt, func.owner_nav); 20135 const atom = p9.getAtom(atom_index); 20136 try self.asmMemory(.{ ._, .call }, .{ 20137 .base = .{ .reg = .ds }, 20138 .mod = .{ .rm = .{ 20139 .size = .qword, 20140 .disp = @intCast(atom.getOffsetTableAddress(p9)), 20141 } }, 20142 }); 20143 } else unreachable; 20144 }, 20145 .@"extern" => |@"extern"| if (self.bin_file.cast(.elf)) |elf_file| { 20146 const target_sym_index = try elf_file.getGlobalSymbol( 20147 @"extern".name.toSlice(ip), 20148 @"extern".lib_name.toSlice(ip), 20149 ); 20150 try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index })); 20151 } else if (self.bin_file.cast(.macho)) |macho_file| { 20152 const target_sym_index = try macho_file.getGlobalSymbol( 20153 @"extern".name.toSlice(ip), 20154 @"extern".lib_name.toSlice(ip), 20155 ); 20156 try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index })); 20157 } else try self.genExternSymbolRef( 20158 .call, 20159 @"extern".lib_name.toSlice(ip), 20160 @"extern".name.toSlice(ip), 20161 ), 20162 else => return self.fail("TODO implement calling bitcasted functions", .{}), 20163 } 20164 } else { 20165 assert(self.typeOf(callee).zigTypeTag(zcu) == .pointer); 20166 const scratch_reg = abi.getCAbiLinkerScratchReg(fn_info.cc); 20167 try self.genSetReg(scratch_reg, .usize, .{ .air_ref = callee }, .{}); 20168 try self.asmRegister(.{ ._, .call }, scratch_reg); 20169 }, 20170 .lib => |lib| if (self.bin_file.cast(.elf)) |elf_file| { 20171 const target_sym_index = try elf_file.getGlobalSymbol(lib.callee, lib.lib); 20172 try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index })); 20173 } else if (self.bin_file.cast(.macho)) |macho_file| { 20174 const target_sym_index = try macho_file.getGlobalSymbol(lib.callee, lib.lib); 20175 try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index })); 20176 } else try self.genExternSymbolRef(.call, lib.lib, lib.callee), 20177 } 20178 return call_info.return_value.short; 20179 } 20180 20181 fn airRet(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { 20182 const pt = self.pt; 20183 const zcu = pt.zcu; 20184 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 20185 20186 const ret_ty = self.fn_type.fnReturnType(zcu); 20187 switch (self.ret_mcv.short) { 20188 .none => {}, 20189 .register, 20190 .register_pair, 20191 .register_triple, 20192 .register_quadruple, 20193 => try self.genCopy(ret_ty, self.ret_mcv.short, .{ .air_ref = un_op }, .{ .safety = safety }), 20194 .indirect => |reg_off| { 20195 try self.register_manager.getReg(reg_off.reg, null); 20196 const lock = self.register_manager.lockRegAssumeUnused(reg_off.reg); 20197 defer self.register_manager.unlockReg(lock); 20198 20199 try self.genSetReg(reg_off.reg, .usize, self.ret_mcv.long, .{}); 20200 try self.genSetMem( 20201 .{ .reg = reg_off.reg }, 20202 reg_off.off, 20203 ret_ty, 20204 .{ .air_ref = un_op }, 20205 .{ .safety = safety }, 20206 ); 20207 }, 20208 else => unreachable, 20209 } 20210 self.ret_mcv.liveOut(self, inst); 20211 try self.finishAir(inst, .unreach, .{ un_op, .none, .none }); 20212 20213 // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction 20214 // which is available if the jump is 127 bytes or less forward. 20215 const jmp_reloc = try self.asmJmpReloc(undefined); 20216 try self.epilogue_relocs.append(self.gpa, jmp_reloc); 20217 } 20218 20219 fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void { 20220 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 20221 const ptr = try self.resolveInst(un_op); 20222 20223 const ptr_ty = self.typeOf(un_op); 20224 switch (self.ret_mcv.short) { 20225 .none => {}, 20226 .register, .register_pair => try self.load(self.ret_mcv.short, ptr_ty, ptr), 20227 .indirect => |reg_off| try self.genSetReg(reg_off.reg, ptr_ty, ptr, .{}), 20228 else => unreachable, 20229 } 20230 self.ret_mcv.liveOut(self, inst); 20231 try self.finishAir(inst, .unreach, .{ un_op, .none, .none }); 20232 20233 // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction 20234 // which is available if the jump is 127 bytes or less forward. 20235 const jmp_reloc = try self.asmJmpReloc(undefined); 20236 try self.epilogue_relocs.append(self.gpa, jmp_reloc); 20237 } 20238 20239 fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !void { 20240 const pt = self.pt; 20241 const zcu = pt.zcu; 20242 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 20243 var ty = self.typeOf(bin_op.lhs); 20244 var null_compare: ?Mir.Inst.Index = null; 20245 20246 const result: Condition = result: { 20247 try self.spillEflagsIfOccupied(); 20248 20249 const lhs_mcv = try self.resolveInst(bin_op.lhs); 20250 const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) { 20251 .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null }, 20252 .register_pair => |lhs_regs| locks: { 20253 const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs); 20254 break :locks .{ locks[0], locks[1] }; 20255 }, 20256 .register_offset => |lhs_ro| .{ 20257 self.register_manager.lockRegAssumeUnused(lhs_ro.reg), 20258 null, 20259 }, 20260 else => @splat(null), 20261 }; 20262 defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock); 20263 20264 const rhs_mcv = try self.resolveInst(bin_op.rhs); 20265 const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) { 20266 .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null }, 20267 .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs), 20268 .register_offset => |rhs_ro| .{ self.register_manager.lockReg(rhs_ro.reg), null }, 20269 else => @splat(null), 20270 }; 20271 defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock); 20272 20273 switch (ty.zigTypeTag(zcu)) { 20274 .float => { 20275 const float_bits = ty.floatBits(self.target.*); 20276 if (switch (float_bits) { 20277 16 => !self.hasFeature(.f16c), 20278 32, 64 => false, 20279 80, 128 => true, 20280 else => unreachable, 20281 }) { 20282 var callee_buf: ["__???f2".len]u8 = undefined; 20283 const ret = try self.genCall(.{ .lib = .{ 20284 .return_type = .i32_type, 20285 .param_types = &.{ ty.toIntern(), ty.toIntern() }, 20286 .callee = std.fmt.bufPrint(&callee_buf, "__{s}{c}f2", .{ 20287 switch (op) { 20288 .eq => "eq", 20289 .neq => "ne", 20290 .lt => "lt", 20291 .lte => "le", 20292 .gt => "gt", 20293 .gte => "ge", 20294 }, 20295 floatCompilerRtAbiName(float_bits), 20296 }) catch unreachable, 20297 } }, &.{ ty, ty }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, .{}); 20298 try self.genBinOpMir(.{ ._, .@"test" }, .i32, ret, ret); 20299 break :result switch (op) { 20300 .eq => .e, 20301 .neq => .ne, 20302 .lt => .l, 20303 .lte => .le, 20304 .gt => .g, 20305 .gte => .ge, 20306 }; 20307 } 20308 }, 20309 .optional => if (!ty.optionalReprIsPayload(zcu)) { 20310 const opt_ty = ty; 20311 const opt_abi_size: u31 = @intCast(opt_ty.abiSize(zcu)); 20312 ty = opt_ty.optionalChild(zcu); 20313 const payload_abi_size: u31 = @intCast(ty.abiSize(zcu)); 20314 20315 const temp_lhs_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 20316 const temp_lhs_lock = self.register_manager.lockRegAssumeUnused(temp_lhs_reg); 20317 defer self.register_manager.unlockReg(temp_lhs_lock); 20318 20319 if (lhs_mcv.isBase()) try self.asmRegisterMemory( 20320 .{ ._, .mov }, 20321 temp_lhs_reg.to8(), 20322 try lhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }), 20323 ) else { 20324 try self.genSetReg(temp_lhs_reg, opt_ty, lhs_mcv, .{}); 20325 try self.asmRegisterImmediate( 20326 .{ ._r, .sh }, 20327 registerAlias(temp_lhs_reg, opt_abi_size), 20328 .u(payload_abi_size * 8), 20329 ); 20330 } 20331 20332 const payload_compare = payload_compare: { 20333 if (rhs_mcv.isBase()) { 20334 const rhs_mem = 20335 try rhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }); 20336 try self.asmMemoryRegister(.{ ._, .@"test" }, rhs_mem, temp_lhs_reg.to8()); 20337 const payload_compare = try self.asmJccReloc(.nz, undefined); 20338 try self.asmRegisterMemory(.{ ._, .cmp }, temp_lhs_reg.to8(), rhs_mem); 20339 break :payload_compare payload_compare; 20340 } 20341 20342 const temp_rhs_reg = try self.copyToTmpRegister(opt_ty, rhs_mcv); 20343 const temp_rhs_lock = self.register_manager.lockRegAssumeUnused(temp_rhs_reg); 20344 defer self.register_manager.unlockReg(temp_rhs_lock); 20345 20346 try self.asmRegisterImmediate( 20347 .{ ._r, .sh }, 20348 registerAlias(temp_rhs_reg, opt_abi_size), 20349 .u(payload_abi_size * 8), 20350 ); 20351 try self.asmRegisterRegister( 20352 .{ ._, .@"test" }, 20353 temp_lhs_reg.to8(), 20354 temp_rhs_reg.to8(), 20355 ); 20356 const payload_compare = try self.asmJccReloc(.nz, undefined); 20357 try self.asmRegisterRegister( 20358 .{ ._, .cmp }, 20359 temp_lhs_reg.to8(), 20360 temp_rhs_reg.to8(), 20361 ); 20362 break :payload_compare payload_compare; 20363 }; 20364 null_compare = try self.asmJmpReloc(undefined); 20365 self.performReloc(payload_compare); 20366 }, 20367 else => {}, 20368 } 20369 20370 switch (ty.zigTypeTag(zcu)) { 20371 else => { 20372 const abi_size: u16 = @intCast(ty.abiSize(zcu)); 20373 const may_flip: enum { 20374 may_flip, 20375 must_flip, 20376 must_not_flip, 20377 } = if (abi_size > 8) switch (op) { 20378 .lt, .gte => .must_not_flip, 20379 .lte, .gt => .must_flip, 20380 .eq, .neq => .may_flip, 20381 } else .may_flip; 20382 20383 const flipped = switch (may_flip) { 20384 .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isBase(), 20385 .must_flip => true, 20386 .must_not_flip => false, 20387 }; 20388 const unmat_dst_mcv = if (flipped) rhs_mcv else lhs_mcv; 20389 const dst_mcv = if (unmat_dst_mcv.isRegister() or 20390 (abi_size <= 8 and unmat_dst_mcv.isBase())) unmat_dst_mcv else dst: { 20391 const dst_mcv = try self.allocTempRegOrMem(ty, true); 20392 try self.genCopy(ty, dst_mcv, unmat_dst_mcv, .{}); 20393 break :dst dst_mcv; 20394 }; 20395 const dst_lock = 20396 if (dst_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; 20397 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 20398 20399 const src_mcv = try self.resolveInst(if (flipped) bin_op.lhs else bin_op.rhs); 20400 const src_lock = 20401 if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; 20402 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 20403 20404 break :result .fromCompareOperator( 20405 if (ty.isAbiInt(zcu)) ty.intInfo(zcu).signedness else .unsigned, 20406 result_op: { 20407 const flipped_op = if (flipped) op.reverse() else op; 20408 if (abi_size > 8) switch (flipped_op) { 20409 .lt, .gte => {}, 20410 .lte, .gt => unreachable, 20411 .eq, .neq => { 20412 const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock }; 20413 20414 const resolved_dst_mcv = switch (dst_mcv) { 20415 else => dst_mcv, 20416 .air_ref => |dst_ref| try self.resolveInst(dst_ref), 20417 }; 20418 const dst_info: OpInfo = switch (resolved_dst_mcv) { 20419 .none, 20420 .unreach, 20421 .dead, 20422 .undef, 20423 .immediate, 20424 .eflags, 20425 .register_offset, 20426 .register_overflow, 20427 .register_mask, 20428 .indirect, 20429 .lea_direct, 20430 .lea_got, 20431 .lea_tlv, 20432 .lea_frame, 20433 .lea_symbol, 20434 .elementwise_regs_then_frame, 20435 .reserved_frame, 20436 .air_ref, 20437 => unreachable, 20438 .register, .register_pair, .register_triple, .register_quadruple, .load_frame => null, 20439 .memory, .load_symbol, .load_got, .load_direct, .load_tlv => dst: { 20440 switch (resolved_dst_mcv) { 20441 .memory => |addr| if (std.math.cast( 20442 i32, 20443 @as(i64, @bitCast(addr)), 20444 ) != null and std.math.cast( 20445 i32, 20446 @as(i64, @bitCast(addr)) + abi_size - 8, 20447 ) != null) break :dst null, 20448 .load_symbol, .load_got, .load_direct, .load_tlv => {}, 20449 else => unreachable, 20450 } 20451 20452 const dst_addr_reg = (try self.register_manager.allocReg( 20453 null, 20454 abi.RegisterClass.gp, 20455 )).to64(); 20456 const dst_addr_lock = 20457 self.register_manager.lockRegAssumeUnused(dst_addr_reg); 20458 errdefer self.register_manager.unlockReg(dst_addr_lock); 20459 20460 try self.genSetReg(dst_addr_reg, .usize, resolved_dst_mcv.address(), .{}); 20461 break :dst .{ 20462 .addr_reg = dst_addr_reg, 20463 .addr_lock = dst_addr_lock, 20464 }; 20465 }, 20466 }; 20467 defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock); 20468 20469 const resolved_src_mcv = switch (src_mcv) { 20470 else => src_mcv, 20471 .air_ref => |src_ref| try self.resolveInst(src_ref), 20472 }; 20473 const src_info: OpInfo = switch (resolved_src_mcv) { 20474 .none, 20475 .unreach, 20476 .dead, 20477 .undef, 20478 .immediate, 20479 .eflags, 20480 .register, 20481 .register_offset, 20482 .register_overflow, 20483 .register_mask, 20484 .indirect, 20485 .lea_symbol, 20486 .lea_direct, 20487 .lea_got, 20488 .lea_tlv, 20489 .lea_frame, 20490 .elementwise_regs_then_frame, 20491 .reserved_frame, 20492 .air_ref, 20493 => unreachable, 20494 .register_pair, .register_triple, .register_quadruple, .load_frame => null, 20495 .memory, .load_symbol, .load_got, .load_direct, .load_tlv => src: { 20496 switch (resolved_src_mcv) { 20497 .memory => |addr| if (std.math.cast( 20498 i32, 20499 @as(i64, @bitCast(addr)), 20500 ) != null and std.math.cast( 20501 i32, 20502 @as(i64, @bitCast(addr)) + abi_size - 8, 20503 ) != null) break :src null, 20504 .load_symbol, .load_got, .load_direct, .load_tlv => {}, 20505 else => unreachable, 20506 } 20507 20508 const src_addr_reg = (try self.register_manager.allocReg( 20509 null, 20510 abi.RegisterClass.gp, 20511 )).to64(); 20512 const src_addr_lock = 20513 self.register_manager.lockRegAssumeUnused(src_addr_reg); 20514 errdefer self.register_manager.unlockReg(src_addr_lock); 20515 20516 try self.genSetReg(src_addr_reg, .usize, resolved_src_mcv.address(), .{}); 20517 break :src .{ 20518 .addr_reg = src_addr_reg, 20519 .addr_lock = src_addr_lock, 20520 }; 20521 }, 20522 }; 20523 defer if (src_info) |info| 20524 self.register_manager.unlockReg(info.addr_lock); 20525 20526 const regs = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); 20527 const acc_reg = regs[0].to64(); 20528 const locks = self.register_manager.lockRegsAssumeUnused(2, regs); 20529 defer for (locks) |lock| self.register_manager.unlockReg(lock); 20530 20531 const limbs_len = std.math.divCeil(u16, abi_size, 8) catch unreachable; 20532 var limb_i: u16 = 0; 20533 while (limb_i < limbs_len) : (limb_i += 1) { 20534 const off = limb_i * 8; 20535 const tmp_reg = regs[@min(limb_i, 1)].to64(); 20536 20537 try self.genSetReg(tmp_reg, .usize, if (dst_info) |info| .{ 20538 .indirect = .{ .reg = info.addr_reg, .off = off }, 20539 } else switch (resolved_dst_mcv) { 20540 inline .register_pair, 20541 .register_triple, 20542 .register_quadruple, 20543 => |dst_regs| .{ .register = dst_regs[limb_i] }, 20544 .memory => |dst_addr| .{ 20545 .memory = @bitCast(@as(i64, @bitCast(dst_addr)) + off), 20546 }, 20547 .indirect => |reg_off| .{ .indirect = .{ 20548 .reg = reg_off.reg, 20549 .off = reg_off.off + off, 20550 } }, 20551 .load_frame => |frame_addr| .{ .load_frame = .{ 20552 .index = frame_addr.index, 20553 .off = frame_addr.off + off, 20554 } }, 20555 else => unreachable, 20556 }, .{}); 20557 20558 try self.genBinOpMir( 20559 .{ ._, .xor }, 20560 .usize, 20561 .{ .register = tmp_reg }, 20562 if (src_info) |info| .{ 20563 .indirect = .{ .reg = info.addr_reg, .off = off }, 20564 } else switch (resolved_src_mcv) { 20565 inline .register_pair, 20566 .register_triple, 20567 .register_quadruple, 20568 => |src_regs| .{ .register = src_regs[limb_i] }, 20569 .memory => |src_addr| .{ 20570 .memory = @bitCast(@as(i64, @bitCast(src_addr)) + off), 20571 }, 20572 .indirect => |reg_off| .{ .indirect = .{ 20573 .reg = reg_off.reg, 20574 .off = reg_off.off + off, 20575 } }, 20576 .load_frame => |frame_addr| .{ .load_frame = .{ 20577 .index = frame_addr.index, 20578 .off = frame_addr.off + off, 20579 } }, 20580 else => unreachable, 20581 }, 20582 ); 20583 20584 if (limb_i > 0) 20585 try self.asmRegisterRegister(.{ ._, .@"or" }, acc_reg, tmp_reg); 20586 } 20587 assert(limbs_len >= 2); // use flags from or 20588 break :result_op flipped_op; 20589 }, 20590 }; 20591 try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, src_mcv); 20592 break :result_op flipped_op; 20593 }, 20594 ); 20595 }, 20596 .float => { 20597 const flipped = switch (op) { 20598 .lt, .lte => true, 20599 .eq, .gte, .gt, .neq => false, 20600 }; 20601 20602 const dst_mcv = if (flipped) rhs_mcv else lhs_mcv; 20603 const dst_reg = if (dst_mcv.isRegister()) 20604 dst_mcv.getReg().? 20605 else 20606 try self.copyToTmpRegister(ty, dst_mcv); 20607 const dst_lock = self.register_manager.lockReg(dst_reg); 20608 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 20609 const src_mcv = if (flipped) lhs_mcv else rhs_mcv; 20610 20611 switch (ty.floatBits(self.target.*)) { 20612 16 => { 20613 assert(self.hasFeature(.f16c)); 20614 const tmp1_reg = 20615 (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); 20616 const tmp1_mcv = MCValue{ .register = tmp1_reg }; 20617 const tmp1_lock = self.register_manager.lockRegAssumeUnused(tmp1_reg); 20618 defer self.register_manager.unlockReg(tmp1_lock); 20619 20620 const tmp2_reg = 20621 (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); 20622 const tmp2_mcv = MCValue{ .register = tmp2_reg }; 20623 const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg); 20624 defer self.register_manager.unlockReg(tmp2_lock); 20625 20626 if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 20627 .{ .vp_w, .insr }, 20628 tmp1_reg, 20629 dst_reg.to128(), 20630 try src_mcv.mem(self, .{ .size = .word }), 20631 .u(1), 20632 ) else try self.asmRegisterRegisterRegister( 20633 .{ .vp_, .unpcklwd }, 20634 tmp1_reg, 20635 dst_reg.to128(), 20636 (if (src_mcv.isRegister()) 20637 src_mcv.getReg().? 20638 else 20639 try self.copyToTmpRegister(ty, src_mcv)).to128(), 20640 ); 20641 try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg); 20642 try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg); 20643 try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv); 20644 }, 20645 32 => try self.genBinOpMir( 20646 .{ ._ss, .ucomi }, 20647 ty, 20648 .{ .register = dst_reg }, 20649 src_mcv, 20650 ), 20651 64 => try self.genBinOpMir( 20652 .{ ._sd, .ucomi }, 20653 ty, 20654 .{ .register = dst_reg }, 20655 src_mcv, 20656 ), 20657 else => unreachable, 20658 } 20659 20660 break :result switch (if (flipped) op.reverse() else op) { 20661 .lt, .lte => unreachable, // required to have been canonicalized to gt(e) 20662 .gt => .a, 20663 .gte => .ae, 20664 .eq => .z_and_np, 20665 .neq => .nz_or_p, 20666 }; 20667 }, 20668 } 20669 }; 20670 20671 if (null_compare) |reloc| self.performReloc(reloc); 20672 self.eflags_inst = inst; 20673 return self.finishAir(inst, .{ .eflags = result }, .{ bin_op.lhs, bin_op.rhs, .none }); 20674 } 20675 20676 fn airCmpVector(self: *CodeGen, inst: Air.Inst.Index) !void { 20677 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 20678 const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data; 20679 const dst_mcv = try self.genBinOp( 20680 inst, 20681 .fromCmpOp(extra.compareOperator(), false), 20682 extra.lhs, 20683 extra.rhs, 20684 ); 20685 return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); 20686 } 20687 20688 fn airCmpLtErrorsLen(self: *CodeGen, inst: Air.Inst.Index) !void { 20689 const pt = self.pt; 20690 const zcu = pt.zcu; 20691 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 20692 20693 const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 20694 const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); 20695 defer self.register_manager.unlockReg(addr_lock); 20696 const anyerror_lazy_sym: link.File.LazySymbol = .{ .kind = .const_data, .ty = .anyerror_type }; 20697 try self.genLazySymbolRef(.lea, addr_reg, anyerror_lazy_sym); 20698 20699 try self.spillEflagsIfOccupied(); 20700 20701 const op_ty = self.typeOf(un_op); 20702 const op_abi_size: u32 = @intCast(op_ty.abiSize(zcu)); 20703 const op_mcv = try self.resolveInst(un_op); 20704 const dst_reg = switch (op_mcv) { 20705 .register => |reg| reg, 20706 else => try self.copyToTmpRegister(op_ty, op_mcv), 20707 }; 20708 try self.asmRegisterMemory( 20709 .{ ._, .cmp }, 20710 registerAlias(dst_reg, op_abi_size), 20711 .{ 20712 .base = .{ .reg = addr_reg }, 20713 .mod = .{ .rm = .{ .size = .fromSize(op_abi_size) } }, 20714 }, 20715 ); 20716 20717 self.eflags_inst = inst; 20718 return self.finishAir(inst, .{ .eflags = .b }, .{ un_op, .none, .none }); 20719 } 20720 20721 fn airTry(self: *CodeGen, inst: Air.Inst.Index) !void { 20722 const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; 20723 const extra = self.air.extraData(Air.Try, pl_op.payload); 20724 const body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]); 20725 const operand_ty = self.typeOf(pl_op.operand); 20726 const result = try self.genTry(inst, pl_op.operand, body, operand_ty, false); 20727 return self.finishAir(inst, result, .{ .none, .none, .none }); 20728 } 20729 20730 fn airTryPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 20731 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 20732 const extra = self.air.extraData(Air.TryPtr, ty_pl.payload); 20733 const body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]); 20734 const operand_ty = self.typeOf(extra.data.ptr); 20735 const result = try self.genTry(inst, extra.data.ptr, body, operand_ty, true); 20736 return self.finishAir(inst, result, .{ .none, .none, .none }); 20737 } 20738 20739 fn genTry( 20740 self: *CodeGen, 20741 inst: Air.Inst.Index, 20742 operand: Air.Inst.Ref, 20743 body: []const Air.Inst.Index, 20744 operand_ty: Type, 20745 operand_is_ptr: bool, 20746 ) !MCValue { 20747 const liveness_cond_br = self.liveness.getCondBr(inst); 20748 20749 const operand_mcv = try self.resolveInst(operand); 20750 const is_err_mcv = if (operand_is_ptr) 20751 try self.isErrPtr(null, operand_ty, operand_mcv) 20752 else 20753 try self.isErr(null, operand_ty, operand_mcv); 20754 20755 const reloc = try self.genCondBrMir(.anyerror, is_err_mcv); 20756 20757 if (self.liveness.operandDies(inst, 0)) { 20758 if (operand.toIndex()) |operand_inst| try self.processDeath(operand_inst); 20759 } 20760 20761 self.scope_generation += 1; 20762 const state = try self.saveState(); 20763 20764 for (liveness_cond_br.else_deaths) |death| try self.processDeath(death); 20765 try self.genBodyBlock(body); 20766 try self.restoreState(state, &.{}, .{ 20767 .emit_instructions = false, 20768 .update_tracking = true, 20769 .resurrect = true, 20770 .close_scope = true, 20771 }); 20772 20773 self.performReloc(reloc); 20774 20775 for (liveness_cond_br.then_deaths) |death| try self.processDeath(death); 20776 20777 const result = if (self.liveness.isUnused(inst)) 20778 .unreach 20779 else if (operand_is_ptr) 20780 try self.genUnwrapErrUnionPayloadPtrMir(inst, operand_ty, operand_mcv) 20781 else 20782 try self.genUnwrapErrUnionPayloadMir(inst, operand_ty, operand_mcv); 20783 return result; 20784 } 20785 20786 fn airDbgStmt(self: *CodeGen, inst: Air.Inst.Index) !void { 20787 const dbg_stmt = self.air.instructions.items(.data)[@intFromEnum(inst)].dbg_stmt; 20788 _ = try self.addInst(.{ 20789 .tag = .pseudo, 20790 .ops = .pseudo_dbg_line_stmt_line_column, 20791 .data = .{ .line_column = .{ 20792 .line = dbg_stmt.line, 20793 .column = dbg_stmt.column, 20794 } }, 20795 }); 20796 } 20797 20798 fn airDbgEmptyStmt(self: *CodeGen) !void { 20799 if (self.mir_instructions.len > 0 and 20800 self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] == .pseudo_dbg_line_stmt_line_column) 20801 self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] = .pseudo_dbg_line_line_column; 20802 try self.asmOpOnly(.{ ._, .nop }); 20803 } 20804 20805 fn airDbgInlineBlock(self: *CodeGen, inst: Air.Inst.Index) !void { 20806 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 20807 const extra = self.air.extraData(Air.DbgInlineBlock, ty_pl.payload); 20808 const old_inline_func = self.inline_func; 20809 defer self.inline_func = old_inline_func; 20810 self.inline_func = extra.data.func; 20811 _ = try self.addInst(.{ 20812 .tag = .pseudo, 20813 .ops = .pseudo_dbg_enter_inline_func, 20814 .data = .{ .func = extra.data.func }, 20815 }); 20816 try self.lowerBlock(inst, @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len])); 20817 _ = try self.addInst(.{ 20818 .tag = .pseudo, 20819 .ops = .pseudo_dbg_leave_inline_func, 20820 .data = .{ .func = old_inline_func }, 20821 }); 20822 } 20823 20824 fn airDbgVar(self: *CodeGen, inst: Air.Inst.Index) !void { 20825 const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; 20826 try self.genLocalDebugInfo(inst, try self.resolveInst(pl_op.operand)); 20827 return self.finishAir(inst, .unreach, .{ pl_op.operand, .none, .none }); 20828 } 20829 20830 fn genCondBrMir(self: *CodeGen, ty: Type, mcv: MCValue) !Mir.Inst.Index { 20831 const pt = self.pt; 20832 const abi_size = ty.abiSize(pt.zcu); 20833 switch (mcv) { 20834 .eflags => |cc| { 20835 // Here we map the opposites since the jump is to the false branch. 20836 return self.asmJccReloc(cc.negate(), undefined); 20837 }, 20838 .register => |reg| { 20839 try self.spillEflagsIfOccupied(); 20840 try self.asmRegisterImmediate(.{ ._, .@"test" }, reg.to8(), .u(1)); 20841 return self.asmJccReloc(.z, undefined); 20842 }, 20843 .immediate, 20844 .load_frame, 20845 => { 20846 try self.spillEflagsIfOccupied(); 20847 if (abi_size <= 8) { 20848 const reg = try self.copyToTmpRegister(ty, mcv); 20849 return self.genCondBrMir(ty, .{ .register = reg }); 20850 } 20851 return self.fail("TODO implement condbr when condition is {} with abi larger than 8 bytes", .{mcv}); 20852 }, 20853 else => return self.fail("TODO implement condbr when condition is {s}", .{@tagName(mcv)}), 20854 } 20855 } 20856 20857 fn airCondBr(self: *CodeGen, inst: Air.Inst.Index) !void { 20858 const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; 20859 const cond = try self.resolveInst(pl_op.operand); 20860 const cond_ty = self.typeOf(pl_op.operand); 20861 const extra = self.air.extraData(Air.CondBr, pl_op.payload); 20862 const then_body: []const Air.Inst.Index = 20863 @ptrCast(self.air.extra[extra.end..][0..extra.data.then_body_len]); 20864 const else_body: []const Air.Inst.Index = 20865 @ptrCast(self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len]); 20866 const liveness_cond_br = self.liveness.getCondBr(inst); 20867 20868 // If the condition dies here in this condbr instruction, process 20869 // that death now instead of later as this has an effect on 20870 // whether it needs to be spilled in the branches 20871 if (self.liveness.operandDies(inst, 0)) { 20872 if (pl_op.operand.toIndex()) |op_inst| try self.processDeath(op_inst); 20873 } 20874 20875 self.scope_generation += 1; 20876 const state = try self.saveState(); 20877 const reloc = try self.genCondBrMir(cond_ty, cond); 20878 20879 for (liveness_cond_br.then_deaths) |death| try self.processDeath(death); 20880 try self.genBodyBlock(then_body); 20881 try self.restoreState(state, &.{}, .{ 20882 .emit_instructions = false, 20883 .update_tracking = true, 20884 .resurrect = true, 20885 .close_scope = true, 20886 }); 20887 20888 self.performReloc(reloc); 20889 20890 for (liveness_cond_br.else_deaths) |death| try self.processDeath(death); 20891 try self.genBodyBlock(else_body); 20892 try self.restoreState(state, &.{}, .{ 20893 .emit_instructions = false, 20894 .update_tracking = true, 20895 .resurrect = true, 20896 .close_scope = true, 20897 }); 20898 20899 // We already took care of pl_op.operand earlier, so there's nothing left to do. 20900 } 20901 20902 fn isNull(self: *CodeGen, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MCValue { 20903 const pt = self.pt; 20904 const zcu = pt.zcu; 20905 switch (opt_mcv) { 20906 .register_overflow => |ro| return .{ .eflags = ro.eflags.negate() }, 20907 else => {}, 20908 } 20909 20910 try self.spillEflagsIfOccupied(); 20911 20912 const pl_ty = opt_ty.optionalChild(zcu); 20913 20914 const some_info: struct { off: u31, ty: Type } = if (opt_ty.optionalReprIsPayload(zcu)) 20915 .{ .off = 0, .ty = if (pl_ty.isSlice(zcu)) pl_ty.slicePtrFieldType(zcu) else pl_ty } 20916 else 20917 .{ .off = @intCast(pl_ty.abiSize(zcu)), .ty = .bool }; 20918 20919 self.eflags_inst = inst; 20920 switch (opt_mcv) { 20921 .none, 20922 .unreach, 20923 .dead, 20924 .undef, 20925 .immediate, 20926 .eflags, 20927 .register_triple, 20928 .register_quadruple, 20929 .register_offset, 20930 .register_overflow, 20931 .register_mask, 20932 .lea_direct, 20933 .lea_got, 20934 .lea_tlv, 20935 .lea_symbol, 20936 .elementwise_regs_then_frame, 20937 .reserved_frame, 20938 .air_ref, 20939 => unreachable, 20940 20941 .lea_frame => { 20942 self.eflags_inst = null; 20943 return .{ .immediate = @intFromBool(false) }; 20944 }, 20945 20946 .register => |opt_reg| { 20947 if (some_info.off == 0) { 20948 const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu)); 20949 const alias_reg = registerAlias(opt_reg, some_abi_size); 20950 assert(some_abi_size * 8 == alias_reg.bitSize()); 20951 try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg); 20952 return .{ .eflags = .z }; 20953 } 20954 assert(some_info.ty.ip_index == .bool_type); 20955 const opt_abi_size: u32 = @intCast(opt_ty.abiSize(zcu)); 20956 try self.asmRegisterImmediate( 20957 .{ ._, .bt }, 20958 registerAlias(opt_reg, opt_abi_size), 20959 .u(@as(u6, @intCast(some_info.off * 8))), 20960 ); 20961 return .{ .eflags = .nc }; 20962 }, 20963 20964 .register_pair => |opt_regs| { 20965 if (some_info.off == 0) { 20966 const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu)); 20967 const alias_reg = registerAlias(opt_regs[0], some_abi_size); 20968 assert(some_abi_size * 8 == alias_reg.bitSize()); 20969 try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg); 20970 return .{ .eflags = .z }; 20971 } 20972 assert(some_info.ty.ip_index == .bool_type); 20973 const opt_abi_size: u32 = @intCast(opt_ty.abiSize(zcu)); 20974 try self.asmRegisterImmediate( 20975 .{ ._, .bt }, 20976 registerAlias(opt_regs[some_info.off / 8], opt_abi_size), 20977 .u(@as(u6, @truncate(some_info.off * 8))), 20978 ); 20979 return .{ .eflags = .nc }; 20980 }, 20981 20982 .memory, 20983 .load_symbol, 20984 .load_got, 20985 .load_direct, 20986 .load_tlv, 20987 => { 20988 const addr_reg = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64(); 20989 const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); 20990 defer self.register_manager.unlockReg(addr_reg_lock); 20991 20992 try self.genSetReg(addr_reg, .usize, opt_mcv.address(), .{}); 20993 const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu)); 20994 try self.asmMemoryImmediate( 20995 .{ ._, .cmp }, 20996 .{ 20997 .base = .{ .reg = addr_reg }, 20998 .mod = .{ .rm = .{ 20999 .size = .fromSize(some_abi_size), 21000 .disp = some_info.off, 21001 } }, 21002 }, 21003 .u(0), 21004 ); 21005 return .{ .eflags = .e }; 21006 }, 21007 21008 .indirect, .load_frame => { 21009 const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu)); 21010 try self.asmMemoryImmediate( 21011 .{ ._, .cmp }, 21012 switch (opt_mcv) { 21013 .indirect => |reg_off| .{ 21014 .base = .{ .reg = reg_off.reg }, 21015 .mod = .{ .rm = .{ 21016 .size = .fromSize(some_abi_size), 21017 .disp = reg_off.off + some_info.off, 21018 } }, 21019 }, 21020 .load_frame => |frame_addr| .{ 21021 .base = .{ .frame = frame_addr.index }, 21022 .mod = .{ .rm = .{ 21023 .size = .fromSize(some_abi_size), 21024 .disp = frame_addr.off + some_info.off, 21025 } }, 21026 }, 21027 else => unreachable, 21028 }, 21029 .u(0), 21030 ); 21031 return .{ .eflags = .e }; 21032 }, 21033 } 21034 } 21035 21036 fn isNullPtr(self: *CodeGen, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue { 21037 const pt = self.pt; 21038 const zcu = pt.zcu; 21039 const opt_ty = ptr_ty.childType(zcu); 21040 const pl_ty = opt_ty.optionalChild(zcu); 21041 21042 try self.spillEflagsIfOccupied(); 21043 21044 const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(zcu)) 21045 .{ .off = 0, .ty = if (pl_ty.isSlice(zcu)) pl_ty.slicePtrFieldType(zcu) else pl_ty } 21046 else 21047 .{ .off = @intCast(pl_ty.abiSize(zcu)), .ty = .bool }; 21048 21049 const ptr_reg = switch (ptr_mcv) { 21050 .register => |reg| reg, 21051 else => try self.copyToTmpRegister(ptr_ty, ptr_mcv), 21052 }; 21053 const ptr_lock = self.register_manager.lockReg(ptr_reg); 21054 defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); 21055 21056 const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu)); 21057 try self.asmMemoryImmediate( 21058 .{ ._, .cmp }, 21059 .{ 21060 .base = .{ .reg = ptr_reg }, 21061 .mod = .{ .rm = .{ 21062 .size = .fromSize(some_abi_size), 21063 .disp = some_info.off, 21064 } }, 21065 }, 21066 .u(0), 21067 ); 21068 21069 self.eflags_inst = inst; 21070 return .{ .eflags = .e }; 21071 } 21072 21073 fn isErr(self: *CodeGen, maybe_inst: ?Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue { 21074 const pt = self.pt; 21075 const zcu = pt.zcu; 21076 const err_ty = eu_ty.errorUnionSet(zcu); 21077 if (err_ty.errorSetIsEmpty(zcu)) return MCValue{ .immediate = 0 }; // always false 21078 21079 try self.spillEflagsIfOccupied(); 21080 21081 const err_off: u31 = @intCast(codegen.errUnionErrorOffset(eu_ty.errorUnionPayload(zcu), zcu)); 21082 switch (eu_mcv) { 21083 .register => |reg| { 21084 const eu_lock = self.register_manager.lockReg(reg); 21085 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock); 21086 21087 const tmp_reg = try self.copyToTmpRegister(eu_ty, eu_mcv); 21088 if (err_off > 0) { 21089 try self.genShiftBinOpMir( 21090 .{ ._r, .sh }, 21091 eu_ty, 21092 .{ .register = tmp_reg }, 21093 .u8, 21094 .{ .immediate = @as(u6, @intCast(err_off * 8)) }, 21095 ); 21096 } else { 21097 try self.truncateRegister(.anyerror, tmp_reg); 21098 } 21099 try self.genBinOpMir(.{ ._, .cmp }, .anyerror, .{ .register = tmp_reg }, .{ .immediate = 0 }); 21100 }, 21101 .load_frame => |frame_addr| try self.genBinOpMir( 21102 .{ ._, .cmp }, 21103 .anyerror, 21104 .{ .load_frame = .{ 21105 .index = frame_addr.index, 21106 .off = frame_addr.off + err_off, 21107 } }, 21108 .{ .immediate = 0 }, 21109 ), 21110 else => return self.fail("TODO implement isErr for {}", .{eu_mcv}), 21111 } 21112 21113 if (maybe_inst) |inst| self.eflags_inst = inst; 21114 return MCValue{ .eflags = .a }; 21115 } 21116 21117 fn isErrPtr(self: *CodeGen, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue { 21118 const pt = self.pt; 21119 const zcu = pt.zcu; 21120 const eu_ty = ptr_ty.childType(zcu); 21121 const err_ty = eu_ty.errorUnionSet(zcu); 21122 if (err_ty.errorSetIsEmpty(zcu)) return MCValue{ .immediate = 0 }; // always false 21123 21124 try self.spillEflagsIfOccupied(); 21125 21126 const ptr_reg = switch (ptr_mcv) { 21127 .register => |reg| reg, 21128 else => try self.copyToTmpRegister(ptr_ty, ptr_mcv), 21129 }; 21130 const ptr_lock = self.register_manager.lockReg(ptr_reg); 21131 defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); 21132 21133 const err_off: u31 = @intCast(codegen.errUnionErrorOffset(eu_ty.errorUnionPayload(zcu), zcu)); 21134 try self.asmMemoryImmediate( 21135 .{ ._, .cmp }, 21136 .{ 21137 .base = .{ .reg = ptr_reg }, 21138 .mod = .{ .rm = .{ 21139 .size = self.memSize(.anyerror), 21140 .disp = err_off, 21141 } }, 21142 }, 21143 .u(0), 21144 ); 21145 21146 if (maybe_inst) |inst| self.eflags_inst = inst; 21147 return MCValue{ .eflags = .a }; 21148 } 21149 21150 fn isNonErr(self: *CodeGen, inst: Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue { 21151 const is_err_res = try self.isErr(inst, eu_ty, eu_mcv); 21152 switch (is_err_res) { 21153 .eflags => |cc| { 21154 assert(cc == .a); 21155 return MCValue{ .eflags = cc.negate() }; 21156 }, 21157 .immediate => |imm| { 21158 assert(imm == 0); 21159 return MCValue{ .immediate = @intFromBool(imm == 0) }; 21160 }, 21161 else => unreachable, 21162 } 21163 } 21164 21165 fn isNonErrPtr(self: *CodeGen, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue { 21166 const is_err_res = try self.isErrPtr(inst, ptr_ty, ptr_mcv); 21167 switch (is_err_res) { 21168 .eflags => |cc| { 21169 assert(cc == .a); 21170 return MCValue{ .eflags = cc.negate() }; 21171 }, 21172 .immediate => |imm| { 21173 assert(imm == 0); 21174 return MCValue{ .immediate = @intFromBool(imm == 0) }; 21175 }, 21176 else => unreachable, 21177 } 21178 } 21179 21180 fn airIsNull(self: *CodeGen, inst: Air.Inst.Index) !void { 21181 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 21182 const operand = try self.resolveInst(un_op); 21183 const ty = self.typeOf(un_op); 21184 const result = try self.isNull(inst, ty, operand); 21185 return self.finishAir(inst, result, .{ un_op, .none, .none }); 21186 } 21187 21188 fn airIsNullPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 21189 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 21190 const operand = try self.resolveInst(un_op); 21191 const ty = self.typeOf(un_op); 21192 const result = try self.isNullPtr(inst, ty, operand); 21193 return self.finishAir(inst, result, .{ un_op, .none, .none }); 21194 } 21195 21196 fn airIsNonNull(self: *CodeGen, inst: Air.Inst.Index) !void { 21197 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 21198 const operand = try self.resolveInst(un_op); 21199 const ty = self.typeOf(un_op); 21200 const result: MCValue = switch (try self.isNull(inst, ty, operand)) { 21201 .immediate => |imm| .{ .immediate = @intFromBool(imm == 0) }, 21202 .eflags => |cc| .{ .eflags = cc.negate() }, 21203 else => unreachable, 21204 }; 21205 return self.finishAir(inst, result, .{ un_op, .none, .none }); 21206 } 21207 21208 fn airIsNonNullPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 21209 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 21210 const operand = try self.resolveInst(un_op); 21211 const ty = self.typeOf(un_op); 21212 const result: MCValue = switch (try self.isNullPtr(inst, ty, operand)) { 21213 .eflags => |cc| .{ .eflags = cc.negate() }, 21214 else => unreachable, 21215 }; 21216 return self.finishAir(inst, result, .{ un_op, .none, .none }); 21217 } 21218 21219 fn airIsErr(self: *CodeGen, inst: Air.Inst.Index) !void { 21220 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 21221 const operand = try self.resolveInst(un_op); 21222 const ty = self.typeOf(un_op); 21223 const result = try self.isErr(inst, ty, operand); 21224 return self.finishAir(inst, result, .{ un_op, .none, .none }); 21225 } 21226 21227 fn airIsErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 21228 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 21229 const operand = try self.resolveInst(un_op); 21230 const ty = self.typeOf(un_op); 21231 const result = try self.isErrPtr(inst, ty, operand); 21232 return self.finishAir(inst, result, .{ un_op, .none, .none }); 21233 } 21234 21235 fn airIsNonErr(self: *CodeGen, inst: Air.Inst.Index) !void { 21236 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 21237 const operand = try self.resolveInst(un_op); 21238 const ty = self.typeOf(un_op); 21239 const result = try self.isNonErr(inst, ty, operand); 21240 return self.finishAir(inst, result, .{ un_op, .none, .none }); 21241 } 21242 21243 fn airIsNonErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 21244 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 21245 const operand = try self.resolveInst(un_op); 21246 const ty = self.typeOf(un_op); 21247 const result = try self.isNonErrPtr(inst, ty, operand); 21248 return self.finishAir(inst, result, .{ un_op, .none, .none }); 21249 } 21250 21251 fn airLoop(self: *CodeGen, inst: Air.Inst.Index) !void { 21252 // A loop is a setup to be able to jump back to the beginning. 21253 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 21254 const loop = self.air.extraData(Air.Block, ty_pl.payload); 21255 const body: []const Air.Inst.Index = @ptrCast(self.air.extra[loop.end..][0..loop.data.body_len]); 21256 21257 self.scope_generation += 1; 21258 const state = try self.saveState(); 21259 21260 try self.loops.putNoClobber(self.gpa, inst, .{ 21261 .state = state, 21262 .target = @intCast(self.mir_instructions.len), 21263 }); 21264 defer assert(self.loops.remove(inst)); 21265 21266 try self.genBodyBlock(body); 21267 } 21268 21269 fn airBlock(self: *CodeGen, inst: Air.Inst.Index) !void { 21270 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 21271 const extra = self.air.extraData(Air.Block, ty_pl.payload); 21272 try self.asmPseudo(.pseudo_dbg_enter_block_none); 21273 try self.lowerBlock(inst, @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len])); 21274 try self.asmPseudo(.pseudo_dbg_leave_block_none); 21275 } 21276 21277 fn lowerBlock(self: *CodeGen, inst: Air.Inst.Index, body: []const Air.Inst.Index) !void { 21278 // A block is a setup to be able to jump to the end. 21279 const inst_tracking_i = self.inst_tracking.count(); 21280 self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(.unreach)); 21281 21282 self.scope_generation += 1; 21283 try self.blocks.putNoClobber(self.gpa, inst, .{ .state = self.initRetroactiveState() }); 21284 const liveness = self.liveness.getBlock(inst); 21285 21286 try self.genBody(body); 21287 21288 var block_data = self.blocks.fetchRemove(inst).?; 21289 defer block_data.value.deinit(self.gpa); 21290 if (block_data.value.relocs.items.len > 0) { 21291 try self.restoreState(block_data.value.state, liveness.deaths, .{ 21292 .emit_instructions = false, 21293 .update_tracking = true, 21294 .resurrect = true, 21295 .close_scope = true, 21296 }); 21297 for (block_data.value.relocs.items) |reloc| self.performReloc(reloc); 21298 } 21299 21300 if (std.debug.runtime_safety) assert(self.inst_tracking.getIndex(inst).? == inst_tracking_i); 21301 const tracking = &self.inst_tracking.values()[inst_tracking_i]; 21302 if (self.liveness.isUnused(inst)) try tracking.die(self, inst); 21303 self.getValueIfFree(tracking.short, inst); 21304 } 21305 21306 fn lowerSwitchBr( 21307 self: *CodeGen, 21308 inst: Air.Inst.Index, 21309 switch_br: Air.UnwrappedSwitch, 21310 condition: MCValue, 21311 condition_dies: bool, 21312 is_loop: bool, 21313 ) !void { 21314 const zcu = self.pt.zcu; 21315 const condition_ty = self.typeOf(switch_br.operand); 21316 21317 const ExpectedContents = extern struct { 21318 liveness_deaths: [1 << 8 | 1]Air.Inst.Index, 21319 bigint_limbs: [std.math.big.int.calcTwosCompLimbCount(1 << 8)]std.math.big.Limb, 21320 relocs: [1 << 6]Mir.Inst.Index, 21321 }; 21322 var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = 21323 std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); 21324 const allocator = stack.get(); 21325 21326 self.scope_generation += 1; 21327 const state = try self.saveState(); 21328 21329 const liveness = try self.liveness.getSwitchBr(allocator, inst, switch_br.cases_len + 1); 21330 defer allocator.free(liveness.deaths); 21331 21332 if (!self.mod.pic and self.target.ofmt == .elf) table: { 21333 var prong_items: u32 = 0; 21334 var min: ?Value = null; 21335 var max: ?Value = null; 21336 { 21337 var cases_it = switch_br.iterateCases(); 21338 while (cases_it.next()) |case| { 21339 prong_items += @intCast(case.items.len + case.ranges.len); 21340 for (case.items) |item| { 21341 const val = Value.fromInterned(item.toInterned().?); 21342 if (min == null or val.compareHetero(.lt, min.?, zcu)) min = val; 21343 if (max == null or val.compareHetero(.gt, max.?, zcu)) max = val; 21344 } 21345 for (case.ranges) |range| { 21346 const low = Value.fromInterned(range[0].toInterned().?); 21347 if (min == null or low.compareHetero(.lt, min.?, zcu)) min = low; 21348 const high = Value.fromInterned(range[1].toInterned().?); 21349 if (max == null or high.compareHetero(.gt, max.?, zcu)) max = high; 21350 } 21351 } 21352 } 21353 // This condition also triggers for switches with no non-else prongs and switches on bool. 21354 if (prong_items < 1 << 2 or prong_items > 1 << 8) break :table; 21355 21356 var min_space: Value.BigIntSpace = undefined; 21357 const min_bigint = min.?.toBigInt(&min_space, zcu); 21358 var max_space: Value.BigIntSpace = undefined; 21359 const max_bigint = max.?.toBigInt(&max_space, zcu); 21360 const limbs = try allocator.alloc( 21361 std.math.big.Limb, 21362 @max(min_bigint.limbs.len, max_bigint.limbs.len) + 1, 21363 ); 21364 defer allocator.free(limbs); 21365 const table_len = table_len: { 21366 var table_len_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined }; 21367 table_len_bigint.sub(max_bigint, min_bigint); 21368 assert(table_len_bigint.positive); // min <= max 21369 break :table_len @as(u11, table_len_bigint.toConst().to(u10) catch break :table) + 1; // no more than a 1024 entry table 21370 }; 21371 assert(prong_items <= table_len); // each prong item introduces at least one unique integer to the range 21372 if (prong_items < table_len >> 2) break :table; // no more than 75% waste 21373 21374 const condition_index = if (condition_dies and condition.isModifiable()) condition else condition_index: { 21375 const condition_index = try self.allocTempRegOrMem(condition_ty, true); 21376 try self.genCopy(condition_ty, condition_index, condition, .{}); 21377 break :condition_index condition_index; 21378 }; 21379 try self.spillEflagsIfOccupied(); 21380 if (min.?.orderAgainstZero(zcu).compare(.neq)) try self.genBinOpMir( 21381 .{ ._, .sub }, 21382 condition_ty, 21383 condition_index, 21384 .{ .air_ref = Air.internedToRef(min.?.toIntern()) }, 21385 ); 21386 const else_reloc = if (switch_br.else_body_len > 0) else_reloc: { 21387 try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_index, .{ .immediate = table_len - 1 }); 21388 break :else_reloc try self.asmJccReloc(.a, undefined); 21389 } else undefined; 21390 const table_start: u31 = @intCast(self.mir_table.items.len); 21391 { 21392 const condition_index_reg = if (condition_index.isRegister()) 21393 condition_index.getReg().? 21394 else 21395 try self.copyToTmpRegister(.usize, condition_index); 21396 const condition_index_lock = self.register_manager.lockReg(condition_index_reg); 21397 defer if (condition_index_lock) |lock| self.register_manager.unlockReg(lock); 21398 try self.truncateRegister(condition_ty, condition_index_reg); 21399 const ptr_size = @divExact(self.target.ptrBitWidth(), 8); 21400 try self.asmMemory(.{ ._, .jmp }, .{ 21401 .base = .table, 21402 .mod = .{ .rm = .{ 21403 .size = .ptr, 21404 .index = registerAlias(condition_index_reg, ptr_size), 21405 .scale = .fromFactor(@intCast(ptr_size)), 21406 .disp = table_start * ptr_size, 21407 } }, 21408 }); 21409 } 21410 const else_reloc_marker: u32 = 0; 21411 assert(self.mir_instructions.len > else_reloc_marker); 21412 try self.mir_table.appendNTimes(self.gpa, else_reloc_marker, table_len); 21413 if (is_loop) try self.loop_switches.putNoClobber(self.gpa, inst, .{ 21414 .start = table_start, 21415 .len = table_len, 21416 .min = min.?, 21417 .else_relocs = if (switch_br.else_body_len > 0) .{ .forward = .empty } else .@"unreachable", 21418 }); 21419 defer if (is_loop) { 21420 var loop_switch_data = self.loop_switches.fetchRemove(inst).?.value; 21421 switch (loop_switch_data.else_relocs) { 21422 .@"unreachable", .backward => {}, 21423 .forward => |*else_relocs| else_relocs.deinit(self.gpa), 21424 } 21425 }; 21426 var cases_it = switch_br.iterateCases(); 21427 while (cases_it.next()) |case| { 21428 { 21429 const table = self.mir_table.items[table_start..][0..table_len]; 21430 for (case.items) |item| { 21431 const val = Value.fromInterned(item.toInterned().?); 21432 var val_space: Value.BigIntSpace = undefined; 21433 const val_bigint = val.toBigInt(&val_space, zcu); 21434 var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined }; 21435 index_bigint.sub(val_bigint, min_bigint); 21436 table[index_bigint.toConst().to(u10) catch unreachable] = @intCast(self.mir_instructions.len); 21437 } 21438 for (case.ranges) |range| { 21439 var low_space: Value.BigIntSpace = undefined; 21440 const low_bigint = Value.fromInterned(range[0].toInterned().?).toBigInt(&low_space, zcu); 21441 var high_space: Value.BigIntSpace = undefined; 21442 const high_bigint = Value.fromInterned(range[1].toInterned().?).toBigInt(&high_space, zcu); 21443 var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined }; 21444 index_bigint.sub(low_bigint, min_bigint); 21445 const start = index_bigint.toConst().to(u10) catch unreachable; 21446 index_bigint.sub(high_bigint, min_bigint); 21447 const end = @as(u11, index_bigint.toConst().to(u10) catch unreachable) + 1; 21448 @memset(table[start..end], @intCast(self.mir_instructions.len)); 21449 } 21450 } 21451 21452 for (liveness.deaths[case.idx]) |operand| try self.processDeath(operand); 21453 21454 try self.genBodyBlock(case.body); 21455 try self.restoreState(state, &.{}, .{ 21456 .emit_instructions = false, 21457 .update_tracking = true, 21458 .resurrect = true, 21459 .close_scope = true, 21460 }); 21461 } 21462 if (switch_br.else_body_len > 0) { 21463 const else_body = cases_it.elseBody(); 21464 21465 const else_deaths = liveness.deaths.len - 1; 21466 for (liveness.deaths[else_deaths]) |operand| try self.processDeath(operand); 21467 21468 self.performReloc(else_reloc); 21469 if (is_loop) { 21470 const loop_switch_data = self.loop_switches.getPtr(inst).?; 21471 for (loop_switch_data.else_relocs.forward.items) |reloc| self.performReloc(reloc); 21472 loop_switch_data.else_relocs.forward.deinit(self.gpa); 21473 loop_switch_data.else_relocs = .{ .backward = @intCast(self.mir_instructions.len) }; 21474 } 21475 for (self.mir_table.items[table_start..][0..table_len]) |*entry| if (entry.* == else_reloc_marker) { 21476 entry.* = @intCast(self.mir_instructions.len); 21477 }; 21478 21479 try self.genBodyBlock(else_body); 21480 try self.restoreState(state, &.{}, .{ 21481 .emit_instructions = false, 21482 .update_tracking = true, 21483 .resurrect = true, 21484 .close_scope = true, 21485 }); 21486 } 21487 return; 21488 } 21489 21490 const signedness = if (condition_ty.isAbiInt(zcu)) condition_ty.intInfo(zcu).signedness else .unsigned; 21491 var cases_it = switch_br.iterateCases(); 21492 while (cases_it.next()) |case| { 21493 var relocs = try allocator.alloc(Mir.Inst.Index, case.items.len + case.ranges.len); 21494 defer allocator.free(relocs); 21495 21496 try self.spillEflagsIfOccupied(); 21497 for (case.items, relocs[0..case.items.len]) |item, *reloc| { 21498 const item_mcv = try self.resolveInst(item); 21499 const cc: Condition = switch (condition) { 21500 .eflags => |cc| switch (item_mcv.immediate) { 21501 0 => cc.negate(), 21502 1 => cc, 21503 else => unreachable, 21504 }, 21505 else => cc: { 21506 try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, item_mcv); 21507 break :cc .e; 21508 }, 21509 }; 21510 reloc.* = try self.asmJccReloc(cc, undefined); 21511 } 21512 21513 for (case.ranges, relocs[case.items.len..]) |range, *reloc| { 21514 const min_mcv = try self.resolveInst(range[0]); 21515 const max_mcv = try self.resolveInst(range[1]); 21516 // `null` means always false. 21517 const lt_min: ?Condition = switch (condition) { 21518 .eflags => |cc| switch (min_mcv.immediate) { 21519 0 => null, // condition never <0 21520 1 => cc.negate(), 21521 else => unreachable, 21522 }, 21523 else => cc: { 21524 try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, min_mcv); 21525 break :cc switch (signedness) { 21526 .unsigned => .b, 21527 .signed => .l, 21528 }; 21529 }, 21530 }; 21531 const lt_min_reloc = if (lt_min) |cc| r: { 21532 break :r try self.asmJccReloc(cc, undefined); 21533 } else null; 21534 // `null` means always true. 21535 const lte_max: ?Condition = switch (condition) { 21536 .eflags => |cc| switch (max_mcv.immediate) { 21537 0 => cc.negate(), 21538 1 => null, // condition always >=1 21539 else => unreachable, 21540 }, 21541 else => cc: { 21542 try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, max_mcv); 21543 break :cc switch (signedness) { 21544 .unsigned => .be, 21545 .signed => .le, 21546 }; 21547 }, 21548 }; 21549 // "Success" case is in `reloc`.... 21550 if (lte_max) |cc| { 21551 reloc.* = try self.asmJccReloc(cc, undefined); 21552 } else { 21553 reloc.* = try self.asmJmpReloc(undefined); 21554 } 21555 // ...and "fail" case falls through to next checks. 21556 if (lt_min_reloc) |r| self.performReloc(r); 21557 } 21558 21559 // The jump to skip this case if the conditions all failed. 21560 const skip_case_reloc = try self.asmJmpReloc(undefined); 21561 21562 for (liveness.deaths[case.idx]) |operand| try self.processDeath(operand); 21563 21564 // Relocate all success cases to the body we're about to generate. 21565 for (relocs) |reloc| self.performReloc(reloc); 21566 try self.genBodyBlock(case.body); 21567 try self.restoreState(state, &.{}, .{ 21568 .emit_instructions = false, 21569 .update_tracking = true, 21570 .resurrect = true, 21571 .close_scope = true, 21572 }); 21573 21574 // Relocate the "skip" branch to fall through to the next case. 21575 self.performReloc(skip_case_reloc); 21576 } 21577 if (switch_br.else_body_len > 0) { 21578 const else_body = cases_it.elseBody(); 21579 21580 const else_deaths = liveness.deaths.len - 1; 21581 for (liveness.deaths[else_deaths]) |operand| try self.processDeath(operand); 21582 21583 try self.genBodyBlock(else_body); 21584 try self.restoreState(state, &.{}, .{ 21585 .emit_instructions = false, 21586 .update_tracking = true, 21587 .resurrect = true, 21588 .close_scope = true, 21589 }); 21590 } 21591 } 21592 21593 fn airSwitchBr(self: *CodeGen, inst: Air.Inst.Index) !void { 21594 const switch_br = self.air.unwrapSwitch(inst); 21595 const condition = try self.resolveInst(switch_br.operand); 21596 21597 // If the condition dies here in this switch instruction, process 21598 // that death now instead of later as this has an effect on 21599 // whether it needs to be spilled in the branches 21600 const condition_dies = self.liveness.operandDies(inst, 0); 21601 if (condition_dies) { 21602 if (switch_br.operand.toIndex()) |op_inst| try self.processDeath(op_inst); 21603 } 21604 try self.lowerSwitchBr(inst, switch_br, condition, condition_dies, false); 21605 21606 // We already took care of pl_op.operand earlier, so there's nothing left to do 21607 } 21608 21609 fn airLoopSwitchBr(self: *CodeGen, inst: Air.Inst.Index) !void { 21610 const switch_br = self.air.unwrapSwitch(inst); 21611 const condition = try self.resolveInst(switch_br.operand); 21612 21613 const mat_cond = if (condition.isModifiable() and 21614 self.reuseOperand(inst, switch_br.operand, 0, condition)) 21615 condition 21616 else mat_cond: { 21617 const mat_cond = try self.allocRegOrMem(inst, true); 21618 try self.genCopy(self.typeOf(switch_br.operand), mat_cond, condition, .{}); 21619 break :mat_cond mat_cond; 21620 }; 21621 self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(mat_cond)); 21622 21623 // If the condition dies here in this switch instruction, process 21624 // that death now instead of later as this has an effect on 21625 // whether it needs to be spilled in the branches 21626 if (self.liveness.operandDies(inst, 0)) { 21627 if (switch_br.operand.toIndex()) |op_inst| try self.processDeath(op_inst); 21628 } 21629 21630 self.scope_generation += 1; 21631 const state = try self.saveState(); 21632 21633 try self.loops.putNoClobber(self.gpa, inst, .{ 21634 .state = state, 21635 .target = @intCast(self.mir_instructions.len), 21636 }); 21637 defer assert(self.loops.remove(inst)); 21638 21639 // Stop tracking block result without forgetting tracking info 21640 try self.freeValue(mat_cond); 21641 21642 try self.lowerSwitchBr(inst, switch_br, mat_cond, true, true); 21643 21644 try self.processDeath(inst); 21645 } 21646 21647 fn airSwitchDispatch(self: *CodeGen, inst: Air.Inst.Index) !void { 21648 const br = self.air.instructions.items(.data)[@intFromEnum(inst)].br; 21649 21650 const block_ty = self.typeOfIndex(br.block_inst); 21651 const loop_data = self.loops.getPtr(br.block_inst).?; 21652 if (self.loop_switches.getPtr(br.block_inst)) |table| { 21653 // Process operand death so that it is properly accounted for in the State below. 21654 const condition_dies = self.liveness.operandDies(inst, 0); 21655 21656 try self.restoreState(loop_data.state, &.{}, .{ 21657 .emit_instructions = true, 21658 .update_tracking = false, 21659 .resurrect = false, 21660 .close_scope = false, 21661 }); 21662 21663 const condition_ty = self.typeOf(br.operand); 21664 const condition = try self.resolveInst(br.operand); 21665 const condition_index = if (condition_dies and condition.isModifiable()) condition else condition_index: { 21666 const condition_index = try self.allocTempRegOrMem(condition_ty, true); 21667 try self.genCopy(condition_ty, condition_index, condition, .{}); 21668 break :condition_index condition_index; 21669 }; 21670 try self.spillEflagsIfOccupied(); 21671 if (table.min.orderAgainstZero(self.pt.zcu).compare(.neq)) try self.genBinOpMir( 21672 .{ ._, .sub }, 21673 condition_ty, 21674 condition_index, 21675 .{ .air_ref = Air.internedToRef(table.min.toIntern()) }, 21676 ); 21677 switch (table.else_relocs) { 21678 .@"unreachable" => {}, 21679 .forward => |*else_relocs| { 21680 try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_index, .{ .immediate = table.len - 1 }); 21681 try else_relocs.append(self.gpa, try self.asmJccReloc(.a, undefined)); 21682 }, 21683 .backward => |else_reloc| { 21684 try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_index, .{ .immediate = table.len - 1 }); 21685 _ = try self.asmJccReloc(.a, else_reloc); 21686 }, 21687 } 21688 { 21689 const condition_index_reg = if (condition_index.isRegister()) 21690 condition_index.getReg().? 21691 else 21692 try self.copyToTmpRegister(.usize, condition_index); 21693 const condition_index_lock = self.register_manager.lockReg(condition_index_reg); 21694 defer if (condition_index_lock) |lock| self.register_manager.unlockReg(lock); 21695 try self.truncateRegister(condition_ty, condition_index_reg); 21696 const ptr_size = @divExact(self.target.ptrBitWidth(), 8); 21697 try self.asmMemory(.{ ._, .jmp }, .{ 21698 .base = .table, 21699 .mod = .{ .rm = .{ 21700 .size = .ptr, 21701 .index = registerAlias(condition_index_reg, ptr_size), 21702 .scale = .fromFactor(@intCast(ptr_size)), 21703 .disp = @intCast(table.start * ptr_size), 21704 } }, 21705 }); 21706 } 21707 21708 return self.finishAir(inst, .none, .{ br.operand, .none, .none }); 21709 } 21710 21711 const block_tracking = self.inst_tracking.getPtr(br.block_inst).?; 21712 done: { 21713 try self.getValue(block_tracking.short, null); 21714 const src_mcv = try self.resolveInst(br.operand); 21715 21716 if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) { 21717 try self.getValue(block_tracking.short, br.block_inst); 21718 // .long = .none to avoid merging operand and block result stack frames. 21719 const current_tracking: InstTracking = .{ .long = .none, .short = src_mcv }; 21720 try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*); 21721 for (current_tracking.getRegs()) |src_reg| self.register_manager.freeReg(src_reg); 21722 break :done; 21723 } 21724 21725 try self.getValue(block_tracking.short, br.block_inst); 21726 const dst_mcv = block_tracking.short; 21727 try self.genCopy(block_ty, dst_mcv, try self.resolveInst(br.operand), .{}); 21728 break :done; 21729 } 21730 21731 // Process operand death so that it is properly accounted for in the State below. 21732 if (self.liveness.operandDies(inst, 0)) { 21733 if (br.operand.toIndex()) |op_inst| try self.processDeath(op_inst); 21734 } 21735 21736 try self.restoreState(loop_data.state, &.{}, .{ 21737 .emit_instructions = true, 21738 .update_tracking = false, 21739 .resurrect = false, 21740 .close_scope = false, 21741 }); 21742 21743 // Emit a jump with a relocation. It will be patched up after the block ends. 21744 // Leave the jump offset undefined 21745 _ = try self.asmJmpReloc(loop_data.target); 21746 21747 // Stop tracking block result without forgetting tracking info 21748 try self.freeValue(block_tracking.short); 21749 } 21750 21751 fn performReloc(self: *CodeGen, reloc: Mir.Inst.Index) void { 21752 const next_inst: u32 = @intCast(self.mir_instructions.len); 21753 switch (self.mir_instructions.items(.tag)[reloc]) { 21754 .j, .jmp => {}, 21755 .pseudo => switch (self.mir_instructions.items(.ops)[reloc]) { 21756 .pseudo_j_z_and_np_inst, .pseudo_j_nz_or_p_inst => {}, 21757 else => unreachable, 21758 }, 21759 else => unreachable, 21760 } 21761 self.mir_instructions.items(.data)[reloc].inst.inst = next_inst; 21762 } 21763 21764 fn airBr(self: *CodeGen, inst: Air.Inst.Index) !void { 21765 const zcu = self.pt.zcu; 21766 const br = self.air.instructions.items(.data)[@intFromEnum(inst)].br; 21767 21768 const block_ty = self.typeOfIndex(br.block_inst); 21769 const block_unused = 21770 !block_ty.hasRuntimeBitsIgnoreComptime(zcu) or self.liveness.isUnused(br.block_inst); 21771 const block_tracking = self.inst_tracking.getPtr(br.block_inst).?; 21772 const block_data = self.blocks.getPtr(br.block_inst).?; 21773 const first_br = block_data.relocs.items.len == 0; 21774 const block_result = result: { 21775 if (block_unused) break :result .none; 21776 21777 if (!first_br) try self.getValue(block_tracking.short, null); 21778 const src_mcv = try self.resolveInst(br.operand); 21779 21780 if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) { 21781 if (first_br) break :result src_mcv; 21782 21783 try self.getValue(block_tracking.short, br.block_inst); 21784 // .long = .none to avoid merging operand and block result stack frames. 21785 const current_tracking: InstTracking = .{ .long = .none, .short = src_mcv }; 21786 try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*); 21787 for (current_tracking.getRegs()) |src_reg| self.register_manager.freeReg(src_reg); 21788 break :result block_tracking.short; 21789 } 21790 21791 const dst_mcv = if (first_br) try self.allocRegOrMem(br.block_inst, true) else dst: { 21792 try self.getValue(block_tracking.short, br.block_inst); 21793 break :dst block_tracking.short; 21794 }; 21795 try self.genCopy(block_ty, dst_mcv, try self.resolveInst(br.operand), .{}); 21796 break :result dst_mcv; 21797 }; 21798 21799 // Process operand death so that it is properly accounted for in the State below. 21800 if (self.liveness.operandDies(inst, 0)) { 21801 if (br.operand.toIndex()) |op_inst| try self.processDeath(op_inst); 21802 } 21803 21804 if (first_br) { 21805 block_tracking.* = .init(block_result); 21806 try self.saveRetroactiveState(&block_data.state); 21807 } else try self.restoreState(block_data.state, &.{}, .{ 21808 .emit_instructions = true, 21809 .update_tracking = false, 21810 .resurrect = false, 21811 .close_scope = false, 21812 }); 21813 21814 // Emit a jump with a relocation. It will be patched up after the block ends. 21815 // Leave the jump offset undefined 21816 const jmp_reloc = try self.asmJmpReloc(undefined); 21817 try block_data.relocs.append(self.gpa, jmp_reloc); 21818 21819 // Stop tracking block result without forgetting tracking info 21820 try self.freeValue(block_tracking.short); 21821 } 21822 21823 fn airRepeat(self: *CodeGen, inst: Air.Inst.Index) !void { 21824 const loop_inst = self.air.instructions.items(.data)[@intFromEnum(inst)].repeat.loop_inst; 21825 const repeat_info = self.loops.get(loop_inst).?; 21826 try self.restoreState(repeat_info.state, &.{}, .{ 21827 .emit_instructions = true, 21828 .update_tracking = false, 21829 .resurrect = false, 21830 .close_scope = true, 21831 }); 21832 _ = try self.asmJmpReloc(repeat_info.target); 21833 } 21834 21835 fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { 21836 const pt = self.pt; 21837 const zcu = pt.zcu; 21838 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 21839 const extra = self.air.extraData(Air.Asm, ty_pl.payload); 21840 const clobbers_len: u31 = @truncate(extra.data.flags); 21841 var extra_i: usize = extra.end; 21842 const outputs: []const Air.Inst.Ref = 21843 @ptrCast(self.air.extra[extra_i..][0..extra.data.outputs_len]); 21844 extra_i += outputs.len; 21845 const inputs: []const Air.Inst.Ref = @ptrCast(self.air.extra[extra_i..][0..extra.data.inputs_len]); 21846 extra_i += inputs.len; 21847 21848 var result: MCValue = .none; 21849 var args: std.ArrayList(MCValue) = .init(self.gpa); 21850 try args.ensureTotalCapacity(outputs.len + inputs.len); 21851 defer { 21852 for (args.items) |arg| if (arg.getReg()) |reg| self.register_manager.unlockReg(.{ 21853 .tracked_index = RegisterManager.indexOfRegIntoTracked(reg) orelse continue, 21854 }); 21855 args.deinit(); 21856 } 21857 var arg_map: std.StringHashMap(u8) = .init(self.gpa); 21858 try arg_map.ensureTotalCapacity(@intCast(outputs.len + inputs.len)); 21859 defer arg_map.deinit(); 21860 21861 var outputs_extra_i = extra_i; 21862 for (outputs) |output| { 21863 const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); 21864 const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); 21865 const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); 21866 // This equation accounts for the fact that even if we have exactly 4 bytes 21867 // for the string, we still use the next u32 for the null terminator. 21868 extra_i += (constraint.len + name.len + (2 + 3)) / 4; 21869 21870 const maybe_inst = switch (output) { 21871 .none => inst, 21872 else => null, 21873 }; 21874 const ty = switch (output) { 21875 .none => self.typeOfIndex(inst), 21876 else => self.typeOf(output).childType(zcu), 21877 }; 21878 const is_read = switch (constraint[0]) { 21879 '=' => false, 21880 '+' => read: { 21881 if (output == .none) return self.fail( 21882 "read-write constraint unsupported for asm result: '{s}'", 21883 .{constraint}, 21884 ); 21885 break :read true; 21886 }, 21887 else => return self.fail("invalid constraint: '{s}'", .{constraint}), 21888 }; 21889 const is_early_clobber = constraint[1] == '&'; 21890 const rest = constraint[@as(usize, 1) + @intFromBool(is_early_clobber) ..]; 21891 const arg_mcv: MCValue = arg_mcv: { 21892 const arg_maybe_reg: ?Register = if (std.mem.eql(u8, rest, "r") or 21893 std.mem.eql(u8, rest, "f") or std.mem.eql(u8, rest, "x")) 21894 registerAlias( 21895 self.register_manager.tryAllocReg(maybe_inst, switch (rest[0]) { 21896 'r' => abi.RegisterClass.gp, 21897 'f' => abi.RegisterClass.x87, 21898 'x' => abi.RegisterClass.sse, 21899 else => unreachable, 21900 }) orelse return self.fail("ran out of registers lowering inline asm", .{}), 21901 @intCast(ty.abiSize(zcu)), 21902 ) 21903 else if (std.mem.eql(u8, rest, "m")) 21904 if (output != .none) null else return self.fail( 21905 "memory constraint unsupported for asm result: '{s}'", 21906 .{constraint}, 21907 ) 21908 else if (std.mem.eql(u8, rest, "g") or 21909 std.mem.eql(u8, rest, "rm") or std.mem.eql(u8, rest, "mr") or 21910 std.mem.eql(u8, rest, "r,m") or std.mem.eql(u8, rest, "m,r")) 21911 self.register_manager.tryAllocReg(maybe_inst, abi.RegisterClass.gp) orelse 21912 if (output != .none) 21913 null 21914 else 21915 return self.fail("ran out of registers lowering inline asm", .{}) 21916 else if (std.mem.startsWith(u8, rest, "{") and std.mem.endsWith(u8, rest, "}")) 21917 parseRegName(rest["{".len .. rest.len - "}".len]) orelse 21918 return self.fail("invalid register constraint: '{s}'", .{constraint}) 21919 else if (rest.len == 1 and std.ascii.isDigit(rest[0])) { 21920 const index = std.fmt.charToDigit(rest[0], 10) catch unreachable; 21921 if (index >= args.items.len) return self.fail("constraint out of bounds: '{s}'", .{ 21922 constraint, 21923 }); 21924 break :arg_mcv args.items[index]; 21925 } else return self.fail("invalid constraint: '{s}'", .{constraint}); 21926 break :arg_mcv if (arg_maybe_reg) |reg| .{ .register = reg } else arg: { 21927 const ptr_mcv = try self.resolveInst(output); 21928 switch (ptr_mcv) { 21929 .immediate => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |_| 21930 break :arg ptr_mcv.deref(), 21931 .register, .register_offset, .lea_frame => break :arg ptr_mcv.deref(), 21932 else => {}, 21933 } 21934 break :arg .{ .indirect = .{ .reg = try self.copyToTmpRegister(.usize, ptr_mcv) } }; 21935 }; 21936 }; 21937 if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| { 21938 _ = self.register_manager.lockReg(reg); 21939 }; 21940 if (!std.mem.eql(u8, name, "_")) 21941 arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len)); 21942 args.appendAssumeCapacity(arg_mcv); 21943 if (output == .none) result = arg_mcv; 21944 if (is_read) try self.load(arg_mcv, self.typeOf(output), .{ .air_ref = output }); 21945 } 21946 21947 for (inputs) |input| { 21948 const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); 21949 const constraint = std.mem.sliceTo(input_bytes, 0); 21950 const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); 21951 // This equation accounts for the fact that even if we have exactly 4 bytes 21952 // for the string, we still use the next u32 for the null terminator. 21953 extra_i += (constraint.len + name.len + (2 + 3)) / 4; 21954 21955 const ty = self.typeOf(input); 21956 const input_mcv = try self.resolveInst(input); 21957 const arg_mcv: MCValue = if (std.mem.eql(u8, constraint, "r") or 21958 std.mem.eql(u8, constraint, "f") or std.mem.eql(u8, constraint, "x")) 21959 arg: { 21960 const rc = switch (constraint[0]) { 21961 'r' => abi.RegisterClass.gp, 21962 'f' => abi.RegisterClass.x87, 21963 'x' => abi.RegisterClass.sse, 21964 else => unreachable, 21965 }; 21966 if (input_mcv.isRegister() and 21967 rc.isSet(RegisterManager.indexOfRegIntoTracked(input_mcv.getReg().?).?)) 21968 break :arg input_mcv; 21969 const reg = try self.register_manager.allocReg(null, rc); 21970 try self.genSetReg(reg, ty, input_mcv, .{}); 21971 break :arg .{ .register = registerAlias(reg, @intCast(ty.abiSize(zcu))) }; 21972 } else if (std.mem.eql(u8, constraint, "i") or std.mem.eql(u8, constraint, "n")) 21973 switch (input_mcv) { 21974 .immediate => |imm| .{ .immediate = imm }, 21975 else => return self.fail("immediate operand requires comptime value: '{s}'", .{ 21976 constraint, 21977 }), 21978 } 21979 else if (std.mem.eql(u8, constraint, "m")) arg: { 21980 switch (input_mcv) { 21981 .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |_| 21982 break :arg input_mcv, 21983 .indirect, .load_frame => break :arg input_mcv, 21984 .load_symbol, .load_direct, .load_got, .load_tlv => {}, 21985 else => { 21986 const temp_mcv = try self.allocTempRegOrMem(ty, false); 21987 try self.genCopy(ty, temp_mcv, input_mcv, .{}); 21988 break :arg temp_mcv; 21989 }, 21990 } 21991 const addr_reg = self.register_manager.tryAllocReg(null, abi.RegisterClass.gp) orelse { 21992 const temp_mcv = try self.allocTempRegOrMem(ty, false); 21993 try self.genCopy(ty, temp_mcv, input_mcv, .{}); 21994 break :arg temp_mcv; 21995 }; 21996 try self.genSetReg(addr_reg, .usize, input_mcv.address(), .{}); 21997 break :arg .{ .indirect = .{ .reg = addr_reg } }; 21998 } else if (std.mem.eql(u8, constraint, "g") or 21999 std.mem.eql(u8, constraint, "rm") or std.mem.eql(u8, constraint, "mr") or 22000 std.mem.eql(u8, constraint, "r,m") or std.mem.eql(u8, constraint, "m,r")) 22001 arg: { 22002 switch (input_mcv) { 22003 .register, .indirect, .load_frame => break :arg input_mcv, 22004 .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |_| 22005 break :arg input_mcv, 22006 else => {}, 22007 } 22008 const temp_mcv = try self.allocTempRegOrMem(ty, true); 22009 try self.genCopy(ty, temp_mcv, input_mcv, .{}); 22010 break :arg temp_mcv; 22011 } else if (std.mem.eql(u8, constraint, "X")) 22012 input_mcv 22013 else if (std.mem.startsWith(u8, constraint, "{") and std.mem.endsWith(u8, constraint, "}")) arg: { 22014 const reg = parseRegName(constraint["{".len .. constraint.len - "}".len]) orelse 22015 return self.fail("invalid register constraint: '{s}'", .{constraint}); 22016 try self.register_manager.getReg(reg, null); 22017 try self.genSetReg(reg, ty, input_mcv, .{}); 22018 break :arg .{ .register = reg }; 22019 } else if (constraint.len == 1 and std.ascii.isDigit(constraint[0])) arg: { 22020 const index = std.fmt.charToDigit(constraint[0], 10) catch unreachable; 22021 if (index >= args.items.len) return self.fail("constraint out of bounds: '{s}'", .{constraint}); 22022 try self.genCopy(ty, args.items[index], input_mcv, .{}); 22023 break :arg args.items[index]; 22024 } else return self.fail("invalid constraint: '{s}'", .{constraint}); 22025 if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| { 22026 _ = self.register_manager.lockReg(reg); 22027 }; 22028 if (!std.mem.eql(u8, name, "_")) 22029 arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len)); 22030 args.appendAssumeCapacity(arg_mcv); 22031 } 22032 22033 { 22034 var clobber_i: u32 = 0; 22035 while (clobber_i < clobbers_len) : (clobber_i += 1) { 22036 const clobber = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); 22037 // This equation accounts for the fact that even if we have exactly 4 bytes 22038 // for the string, we still use the next u32 for the null terminator. 22039 extra_i += clobber.len / 4 + 1; 22040 22041 if (std.mem.eql(u8, clobber, "") or std.mem.eql(u8, clobber, "memory")) { 22042 // ok, sure 22043 } else if (std.mem.eql(u8, clobber, "cc") or 22044 std.mem.eql(u8, clobber, "flags") or 22045 std.mem.eql(u8, clobber, "eflags") or 22046 std.mem.eql(u8, clobber, "rflags")) 22047 { 22048 try self.spillEflagsIfOccupied(); 22049 } else { 22050 try self.register_manager.getReg(parseRegName(clobber) orelse 22051 return self.fail("invalid clobber: '{s}'", .{clobber}), null); 22052 } 22053 } 22054 } 22055 22056 const Label = struct { 22057 target: Mir.Inst.Index = undefined, 22058 pending_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, 22059 22060 const Kind = enum { definition, reference }; 22061 22062 fn isValid(kind: Kind, name: []const u8) bool { 22063 for (name, 0..) |c, i| switch (c) { 22064 else => return false, 22065 '$' => if (i == 0) return false, 22066 '.' => {}, 22067 '0'...'9' => if (i == 0) switch (kind) { 22068 .definition => if (name.len != 1) return false, 22069 .reference => { 22070 if (name.len != 2) return false; 22071 switch (name[1]) { 22072 else => return false, 22073 'B', 'F', 'b', 'f' => {}, 22074 } 22075 }, 22076 }, 22077 '@', 'A'...'Z', '_', 'a'...'z' => {}, 22078 }; 22079 return name.len > 0; 22080 } 22081 }; 22082 var labels: std.StringHashMapUnmanaged(Label) = .empty; 22083 defer { 22084 var label_it = labels.valueIterator(); 22085 while (label_it.next()) |label| label.pending_relocs.deinit(self.gpa); 22086 labels.deinit(self.gpa); 22087 } 22088 22089 const asm_source = std.mem.sliceAsBytes(self.air.extra[extra_i..])[0..extra.data.source_len]; 22090 var line_it = std.mem.tokenizeAny(u8, asm_source, "\n\r;"); 22091 next_line: while (line_it.next()) |line| { 22092 var mnem_it = std.mem.tokenizeAny(u8, line, " \t"); 22093 var prefix: encoder.Instruction.Prefix = .none; 22094 const mnem_str = while (mnem_it.next()) |mnem_str| { 22095 if (mnem_str[0] == '#') continue :next_line; 22096 if (std.mem.startsWith(u8, mnem_str, "//")) continue :next_line; 22097 if (std.meta.stringToEnum(encoder.Instruction.Prefix, mnem_str)) |pre| { 22098 if (prefix != .none) return self.fail("extra prefix: '{s}'", .{mnem_str}); 22099 prefix = pre; 22100 continue; 22101 } 22102 if (!std.mem.endsWith(u8, mnem_str, ":")) break mnem_str; 22103 const label_name = mnem_str[0 .. mnem_str.len - ":".len]; 22104 if (!Label.isValid(.definition, label_name)) 22105 return self.fail("invalid label: '{s}'", .{label_name}); 22106 const label_gop = try labels.getOrPut(self.gpa, label_name); 22107 if (!label_gop.found_existing) label_gop.value_ptr.* = .{} else { 22108 const anon = std.ascii.isDigit(label_name[0]); 22109 if (!anon and label_gop.value_ptr.pending_relocs.items.len == 0) 22110 return self.fail("redefined label: '{s}'", .{label_name}); 22111 for (label_gop.value_ptr.pending_relocs.items) |pending_reloc| 22112 self.performReloc(pending_reloc); 22113 if (anon) 22114 label_gop.value_ptr.pending_relocs.clearRetainingCapacity() 22115 else 22116 label_gop.value_ptr.pending_relocs.clearAndFree(self.gpa); 22117 } 22118 label_gop.value_ptr.target = @intCast(self.mir_instructions.len); 22119 } else continue; 22120 if (mnem_str[0] == '.') { 22121 if (prefix != .none) return self.fail("prefixed directive: '{s} {s}'", .{ @tagName(prefix), mnem_str }); 22122 prefix = .directive; 22123 } 22124 22125 var mnem_size: ?Memory.Size = if (prefix == .directive) 22126 null 22127 else if (std.mem.endsWith(u8, mnem_str, "b")) 22128 .byte 22129 else if (std.mem.endsWith(u8, mnem_str, "w")) 22130 .word 22131 else if (std.mem.endsWith(u8, mnem_str, "l")) 22132 .dword 22133 else if (std.mem.endsWith(u8, mnem_str, "q") and 22134 (std.mem.indexOfScalar(u8, "vp", mnem_str[0]) == null or !std.mem.endsWith(u8, mnem_str, "dq"))) 22135 .qword 22136 else if (std.mem.endsWith(u8, mnem_str, "t")) 22137 .tbyte 22138 else 22139 null; 22140 const mnem_tag = while (true) break std.meta.stringToEnum( 22141 encoder.Instruction.Mnemonic, 22142 mnem_str[0 .. mnem_str.len - @intFromBool(mnem_size != null)], 22143 ) orelse if (mnem_size) |_| { 22144 mnem_size = null; 22145 continue; 22146 } else return self.fail("invalid mnemonic: '{s}'", .{mnem_str}); 22147 if (@as(?Memory.Size, switch (mnem_tag) { 22148 .clflush => .byte, 22149 .fldenv, .fnstenv, .fstenv => .none, 22150 .ldmxcsr, .stmxcsr, .vldmxcsr, .vstmxcsr => .dword, 22151 else => null, 22152 })) |fixed_mnem_size| { 22153 if (mnem_size) |size| if (size != fixed_mnem_size) 22154 return self.fail("invalid size: '{s}'", .{mnem_str}); 22155 mnem_size = fixed_mnem_size; 22156 } 22157 const mnem_name = @tagName(mnem_tag); 22158 const mnem_fixed_tag: Mir.Inst.FixedTag = if (prefix == .directive) 22159 .{ ._, .pseudo } 22160 else for (std.enums.values(Mir.Inst.Fixes)) |fixes| { 22161 const fixes_name = @tagName(fixes); 22162 const space_i = std.mem.indexOfScalar(u8, fixes_name, ' '); 22163 const fixes_prefix = if (space_i) |i| 22164 std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..i]).? 22165 else 22166 .none; 22167 if (fixes_prefix != prefix) continue; 22168 const pattern = fixes_name[if (space_i) |i| i + " ".len else 0..]; 22169 const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?; 22170 const mnem_prefix = pattern[0..wildcard_i]; 22171 const mnem_suffix = pattern[wildcard_i + "_".len ..]; 22172 if (!std.mem.startsWith(u8, mnem_name, mnem_prefix)) continue; 22173 if (!std.mem.endsWith(u8, mnem_name, mnem_suffix)) continue; 22174 break .{ fixes, std.meta.stringToEnum( 22175 Mir.Inst.Tag, 22176 mnem_name[mnem_prefix.len .. mnem_name.len - mnem_suffix.len], 22177 ) orelse continue }; 22178 } else { 22179 assert(prefix != .none); // no combination of fixes produced a known mnemonic 22180 return self.fail("invalid prefix for mnemonic: '{s} {s}'", .{ 22181 @tagName(prefix), mnem_name, 22182 }); 22183 }; 22184 22185 var ops: [4]Operand = @splat(.none); 22186 var ops_len: usize = 0; 22187 22188 var last_op = false; 22189 var op_it = std.mem.splitScalar(u8, mnem_it.rest(), ','); 22190 next_op: for (&ops) |*op| { 22191 const op_str = while (!last_op) { 22192 const full_str = op_it.next() orelse break :next_op; 22193 const code_str = if (std.mem.indexOfScalar(u8, full_str, '#') orelse 22194 std.mem.indexOf(u8, full_str, "//")) |comment| 22195 code: { 22196 last_op = true; 22197 break :code full_str[0..comment]; 22198 } else full_str; 22199 const trim_str = std.mem.trim(u8, code_str, " \t*"); 22200 if (trim_str.len > 0) break trim_str; 22201 } else break; 22202 if (std.mem.startsWith(u8, op_str, "%%")) { 22203 const colon = std.mem.indexOfScalarPos(u8, op_str, "%%".len + 2, ':'); 22204 const reg = parseRegName(op_str["%%".len .. colon orelse op_str.len]) orelse 22205 return self.fail("invalid register: '{s}'", .{op_str}); 22206 if (colon) |colon_pos| { 22207 const disp = std.fmt.parseInt(i32, op_str[colon_pos + ":".len ..], 0) catch 22208 return self.fail("invalid displacement: '{s}'", .{op_str}); 22209 op.* = .{ .mem = .{ 22210 .base = .{ .reg = reg }, 22211 .mod = .{ .rm = .{ 22212 .size = mnem_size orelse return self.fail("unknown size: '{s}'", .{op_str}), 22213 .disp = disp, 22214 } }, 22215 } }; 22216 } else { 22217 if (mnem_size) |size| if (reg.bitSize() != size.bitSize(self.target)) 22218 return self.fail("invalid register size: '{s}'", .{op_str}); 22219 op.* = .{ .reg = reg }; 22220 } 22221 } else if (std.mem.startsWith(u8, op_str, "%[") and std.mem.endsWith(u8, op_str, "]")) { 22222 const colon = std.mem.indexOfScalarPos(u8, op_str, "%[".len, ':'); 22223 const modifier = if (colon) |colon_pos| 22224 op_str[colon_pos + ":".len .. op_str.len - "]".len] 22225 else 22226 ""; 22227 op.* = switch (args.items[ 22228 arg_map.get(op_str["%[".len .. colon orelse op_str.len - "]".len]) orelse 22229 return self.fail("no matching constraint: '{s}'", .{op_str}) 22230 ]) { 22231 .immediate => |imm| if (std.mem.eql(u8, modifier, "") or std.mem.eql(u8, modifier, "c")) 22232 .{ .imm = .u(imm) } 22233 else 22234 return self.fail("invalid modifier: '{s}'", .{modifier}), 22235 .register => |reg| if (std.mem.eql(u8, modifier, "")) 22236 .{ .reg = reg } 22237 else 22238 return self.fail("invalid modifier: '{s}'", .{modifier}), 22239 .memory => |addr| if (std.mem.eql(u8, modifier, "") or std.mem.eql(u8, modifier, "P")) 22240 .{ .mem = .{ 22241 .base = .{ .reg = .ds }, 22242 .mod = .{ .rm = .{ 22243 .size = mnem_size orelse 22244 return self.fail("unknown size: '{s}'", .{op_str}), 22245 .disp = @intCast(@as(i64, @bitCast(addr))), 22246 } }, 22247 } } 22248 else 22249 return self.fail("invalid modifier: '{s}'", .{modifier}), 22250 .indirect => |reg_off| if (std.mem.eql(u8, modifier, "")) 22251 .{ .mem = .{ 22252 .base = .{ .reg = reg_off.reg }, 22253 .mod = .{ .rm = .{ 22254 .size = mnem_size orelse 22255 return self.fail("unknown size: '{s}'", .{op_str}), 22256 .disp = reg_off.off, 22257 } }, 22258 } } 22259 else 22260 return self.fail("invalid modifier: '{s}'", .{modifier}), 22261 .load_frame => |frame_addr| if (std.mem.eql(u8, modifier, "")) 22262 .{ .mem = .{ 22263 .base = .{ .frame = frame_addr.index }, 22264 .mod = .{ .rm = .{ 22265 .size = mnem_size orelse 22266 return self.fail("unknown size: '{s}'", .{op_str}), 22267 .disp = frame_addr.off, 22268 } }, 22269 } } 22270 else 22271 return self.fail("invalid modifier: '{s}'", .{modifier}), 22272 .lea_got => |sym_index| if (std.mem.eql(u8, modifier, "P")) 22273 .{ .reg = try self.copyToTmpRegister(.usize, .{ .lea_got = sym_index }) } 22274 else 22275 return self.fail("invalid modifier: '{s}'", .{modifier}), 22276 .lea_symbol => |sym_off| if (std.mem.eql(u8, modifier, "P")) 22277 .{ .reg = try self.copyToTmpRegister(.usize, .{ .lea_symbol = sym_off }) } 22278 else 22279 return self.fail("invalid modifier: '{s}'", .{modifier}), 22280 else => return self.fail("invalid constraint: '{s}'", .{op_str}), 22281 }; 22282 } else if (std.mem.startsWith(u8, op_str, "$")) { 22283 if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| { 22284 if (mnem_size) |size| { 22285 const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - (size.bitSize(self.target) - 1)); 22286 if ((if (s < 0) ~s else s) > max) 22287 return self.fail("invalid immediate size: '{s}'", .{op_str}); 22288 } 22289 op.* = .{ .imm = .s(s) }; 22290 } else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| { 22291 if (mnem_size) |size| { 22292 const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - size.bitSize(self.target)); 22293 if (u > max) 22294 return self.fail("invalid immediate size: '{s}'", .{op_str}); 22295 } 22296 op.* = .{ .imm = .u(u) }; 22297 } else |_| return self.fail("invalid immediate: '{s}'", .{op_str}); 22298 } else if (std.mem.endsWith(u8, op_str, ")")) { 22299 const open = std.mem.indexOfScalar(u8, op_str, '(') orelse 22300 return self.fail("invalid operand: '{s}'", .{op_str}); 22301 var sib_it = std.mem.splitScalar(u8, op_str[open + "(".len .. op_str.len - ")".len], ','); 22302 const base_str = sib_it.next() orelse 22303 return self.fail("invalid memory operand: '{s}'", .{op_str}); 22304 if (base_str.len > 0 and !std.mem.startsWith(u8, base_str, "%%")) 22305 return self.fail("invalid memory operand: '{s}'", .{op_str}); 22306 const index_str = sib_it.next() orelse ""; 22307 if (index_str.len > 0 and !std.mem.startsWith(u8, base_str, "%%")) 22308 return self.fail("invalid memory operand: '{s}'", .{op_str}); 22309 const scale_str = sib_it.next() orelse ""; 22310 if (index_str.len == 0 and scale_str.len > 0) 22311 return self.fail("invalid memory operand: '{s}'", .{op_str}); 22312 const scale: Memory.Scale = if (scale_str.len > 0) 22313 switch (std.fmt.parseInt(u4, scale_str, 10) catch 22314 return self.fail("invalid scale: '{s}'", .{op_str})) { 22315 1 => .@"1", 22316 2 => .@"2", 22317 4 => .@"4", 22318 8 => .@"8", 22319 else => return self.fail("invalid scale: '{s}'", .{op_str}), 22320 } 22321 else 22322 .@"1"; 22323 if (sib_it.next()) |_| return self.fail("invalid memory operand: '{s}'", .{op_str}); 22324 op.* = .{ 22325 .mem = .{ 22326 .base = if (base_str.len > 0) 22327 .{ .reg = parseRegName(base_str["%%".len..]) orelse 22328 return self.fail("invalid base register: '{s}'", .{base_str}) } 22329 else 22330 .none, 22331 .mod = .{ .rm = .{ 22332 .size = mnem_size orelse return self.fail("unknown size: '{s}'", .{op_str}), 22333 .index = if (index_str.len > 0) 22334 parseRegName(index_str["%%".len..]) orelse 22335 return self.fail("invalid index register: '{s}'", .{op_str}) 22336 else 22337 .none, 22338 .scale = scale, 22339 .disp = if (std.mem.startsWith(u8, op_str[0..open], "%[") and 22340 std.mem.endsWith(u8, op_str[0..open], "]")) 22341 disp: { 22342 const colon = std.mem.indexOfScalarPos(u8, op_str[0..open], "%[".len, ':'); 22343 const modifier = if (colon) |colon_pos| 22344 op_str[colon_pos + ":".len .. open - "]".len] 22345 else 22346 ""; 22347 break :disp switch (args.items[ 22348 arg_map.get(op_str["%[".len .. colon orelse open - "]".len]) orelse 22349 return self.fail("no matching constraint: '{s}'", .{op_str}) 22350 ]) { 22351 .immediate => |imm| if (std.mem.eql(u8, modifier, "") or 22352 std.mem.eql(u8, modifier, "c")) 22353 std.math.cast(i32, @as(i64, @bitCast(imm))) orelse 22354 return self.fail("invalid displacement: '{s}'", .{op_str}) 22355 else 22356 return self.fail("invalid modifier: '{s}'", .{modifier}), 22357 else => return self.fail("invalid constraint: '{s}'", .{op_str}), 22358 }; 22359 } else if (open > 0) 22360 std.fmt.parseInt(i32, op_str[0..open], 0) catch 22361 return self.fail("invalid displacement: '{s}'", .{op_str}) 22362 else 22363 0, 22364 } }, 22365 }, 22366 }; 22367 } else if (Label.isValid(.reference, op_str)) { 22368 const anon = std.ascii.isDigit(op_str[0]); 22369 const label_gop = try labels.getOrPut(self.gpa, op_str[0..if (anon) 1 else op_str.len]); 22370 if (!label_gop.found_existing) label_gop.value_ptr.* = .{}; 22371 if (anon and (op_str[1] == 'b' or op_str[1] == 'B') and !label_gop.found_existing) 22372 return self.fail("undefined label: '{s}'", .{op_str}); 22373 const pending_relocs = &label_gop.value_ptr.pending_relocs; 22374 if (if (anon) 22375 op_str[1] == 'f' or op_str[1] == 'F' 22376 else 22377 !label_gop.found_existing or pending_relocs.items.len > 0) 22378 try pending_relocs.append(self.gpa, @intCast(self.mir_instructions.len)); 22379 op.* = .{ .inst = label_gop.value_ptr.target }; 22380 } else return self.fail("invalid operand: '{s}'", .{op_str}); 22381 ops_len += 1; 22382 } else if (op_it.next()) |op_str| return self.fail("extra operand: '{s}'", .{op_str}); 22383 22384 // convert from att syntax to intel syntax 22385 std.mem.reverse(Operand, ops[0..ops_len]); 22386 22387 (if (prefix == .directive) switch (mnem_tag) { 22388 .@".cfi_def_cfa" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none) 22389 self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, ops[0].reg, ops[1].imm) 22390 else 22391 error.InvalidInstruction, 22392 .@".cfi_def_cfa_register" => if (ops[0] == .reg and ops[1] == .none) 22393 self.asmPseudoRegister(.pseudo_cfi_def_cfa_register_r, ops[0].reg) 22394 else 22395 error.InvalidInstruction, 22396 .@".cfi_def_cfa_offset" => if (ops[0] == .imm and ops[1] == .none) 22397 self.asmPseudoImmediate(.pseudo_cfi_def_cfa_offset_i_s, ops[0].imm) 22398 else 22399 error.InvalidInstruction, 22400 .@".cfi_adjust_cfa_offset" => if (ops[0] == .imm and ops[1] == .none) 22401 self.asmPseudoImmediate(.pseudo_cfi_adjust_cfa_offset_i_s, ops[0].imm) 22402 else 22403 error.InvalidInstruction, 22404 .@".cfi_offset" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none) 22405 self.asmPseudoRegisterImmediate(.pseudo_cfi_offset_ri_s, ops[0].reg, ops[1].imm) 22406 else 22407 error.InvalidInstruction, 22408 .@".cfi_val_offset" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none) 22409 self.asmPseudoRegisterImmediate(.pseudo_cfi_val_offset_ri_s, ops[0].reg, ops[1].imm) 22410 else 22411 error.InvalidInstruction, 22412 .@".cfi_rel_offset" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none) 22413 self.asmPseudoRegisterImmediate(.pseudo_cfi_rel_offset_ri_s, ops[0].reg, ops[1].imm) 22414 else 22415 error.InvalidInstruction, 22416 .@".cfi_register" => if (ops[0] == .reg and ops[1] == .reg and ops[2] == .none) 22417 self.asmPseudoRegisterRegister(.pseudo_cfi_register_rr, ops[0].reg, ops[1].reg) 22418 else 22419 error.InvalidInstruction, 22420 .@".cfi_restore" => if (ops[0] == .reg and ops[1] == .none) 22421 self.asmPseudoRegister(.pseudo_cfi_restore_r, ops[0].reg) 22422 else 22423 error.InvalidInstruction, 22424 .@".cfi_undefined" => if (ops[0] == .reg and ops[1] == .none) 22425 self.asmPseudoRegister(.pseudo_cfi_undefined_r, ops[0].reg) 22426 else 22427 error.InvalidInstruction, 22428 .@".cfi_same_value" => if (ops[0] == .reg and ops[1] == .none) 22429 self.asmPseudoRegister(.pseudo_cfi_same_value_r, ops[0].reg) 22430 else 22431 error.InvalidInstruction, 22432 .@".cfi_remember_state" => if (ops[0] == .none) 22433 self.asmPseudo(.pseudo_cfi_remember_state_none) 22434 else 22435 error.InvalidInstruction, 22436 .@".cfi_restore_state" => if (ops[0] == .none) 22437 self.asmPseudo(.pseudo_cfi_restore_state_none) 22438 else 22439 error.InvalidInstruction, 22440 .@".cfi_escape" => error.InvalidInstruction, 22441 else => unreachable, 22442 } else self.asmOps(mnem_fixed_tag, ops)) catch |err| switch (err) { 22443 error.InvalidInstruction => return self.fail( 22444 "invalid instruction: '{s} {s} {s} {s} {s}'", 22445 .{ 22446 mnem_str, 22447 @tagName(ops[0]), 22448 @tagName(ops[1]), 22449 @tagName(ops[2]), 22450 @tagName(ops[3]), 22451 }, 22452 ), 22453 else => |e| return e, 22454 }; 22455 } 22456 22457 var label_it = labels.iterator(); 22458 while (label_it.next()) |label| if (label.value_ptr.pending_relocs.items.len > 0) 22459 return self.fail("undefined label: '{s}'", .{label.key_ptr.*}); 22460 22461 for (outputs, args.items[0..outputs.len]) |output, arg_mcv| { 22462 const extra_bytes = std.mem.sliceAsBytes(self.air.extra[outputs_extra_i..]); 22463 const constraint = 22464 std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[outputs_extra_i..]), 0); 22465 const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); 22466 // This equation accounts for the fact that even if we have exactly 4 bytes 22467 // for the string, we still use the next u32 for the null terminator. 22468 outputs_extra_i += (constraint.len + name.len + (2 + 3)) / 4; 22469 22470 if (output == .none) continue; 22471 if (arg_mcv != .register) continue; 22472 if (constraint.len == 2 and std.ascii.isDigit(constraint[1])) continue; 22473 try self.store(self.typeOf(output), .{ .air_ref = output }, arg_mcv, .{}); 22474 } 22475 22476 simple: { 22477 var buf: [Liveness.bpi - 1]Air.Inst.Ref = @splat(.none); 22478 var buf_index: usize = 0; 22479 for (outputs) |output| { 22480 if (output == .none) continue; 22481 22482 if (buf_index >= buf.len) break :simple; 22483 buf[buf_index] = output; 22484 buf_index += 1; 22485 } 22486 if (buf_index + inputs.len > buf.len) break :simple; 22487 @memcpy(buf[buf_index..][0..inputs.len], inputs); 22488 return self.finishAir(inst, result, buf); 22489 } 22490 var bt = self.liveness.iterateBigTomb(inst); 22491 for (outputs) |output| if (output != .none) try self.feed(&bt, output); 22492 for (inputs) |input| try self.feed(&bt, input); 22493 return self.finishAirResult(inst, result); 22494 } 22495 22496 const MoveStrategy = union(enum) { 22497 move: Mir.Inst.FixedTag, 22498 x87_load_store, 22499 insert_extract: InsertExtract, 22500 vex_insert_extract: InsertExtract, 22501 22502 const InsertExtract = struct { 22503 insert: Mir.Inst.FixedTag, 22504 extract: Mir.Inst.FixedTag, 22505 }; 22506 22507 pub fn read(strat: MoveStrategy, self: *CodeGen, dst_reg: Register, src_mem: Memory) !void { 22508 switch (strat) { 22509 .move => |tag| try self.asmRegisterMemory(tag, switch (tag[1]) { 22510 else => dst_reg, 22511 .lea => if (dst_reg.bitSize() >= 32) dst_reg else dst_reg.to32(), 22512 }, src_mem), 22513 .x87_load_store => { 22514 try self.asmMemory(.{ .f_, .ld }, src_mem); 22515 assert(dst_reg != .st7); 22516 try self.asmRegister(.{ .f_p, .st }, @enumFromInt(@intFromEnum(dst_reg) + 1)); 22517 }, 22518 .insert_extract => |ie| try self.asmRegisterMemoryImmediate( 22519 ie.insert, 22520 dst_reg, 22521 src_mem, 22522 .u(0), 22523 ), 22524 .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( 22525 ie.insert, 22526 dst_reg, 22527 dst_reg, 22528 src_mem, 22529 .u(0), 22530 ), 22531 } 22532 } 22533 pub fn write(strat: MoveStrategy, self: *CodeGen, dst_mem: Memory, src_reg: Register) !void { 22534 switch (strat) { 22535 .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_reg), 22536 .x87_load_store => { 22537 try self.asmRegister(.{ .f_, .ld }, src_reg); 22538 try self.asmMemory(.{ .f_p, .st }, dst_mem); 22539 }, 22540 .insert_extract, .vex_insert_extract => |ie| if (ie.extract[0] != .p_w or self.hasFeature(.sse4_1)) 22541 try self.asmMemoryRegisterImmediate(ie.extract, dst_mem, src_reg, .u(0)) 22542 else if (self.hasFeature(.sse2)) { 22543 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 22544 try self.asmRegisterRegisterImmediate(ie.extract, tmp_reg.to32(), src_reg.to128(), .u(0)); 22545 try self.asmMemoryRegister(.{ ._, .mov }, dst_mem, tmp_reg.to16()); 22546 } else { 22547 const tmp_frame_index = try self.allocFrameIndex(.init(.{ 22548 .size = 16, 22549 .alignment = .@"16", 22550 })); 22551 try self.asmMemoryRegister(.{ ._ps, .mova }, .{ 22552 .base = .{ .frame = tmp_frame_index }, 22553 .mod = .{ .rm = .{ .size = .xword } }, 22554 }, src_reg.to128()); 22555 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 22556 try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg.to16(), .{ 22557 .base = .{ .frame = tmp_frame_index }, 22558 .mod = .{ .rm = .{ .size = .word } }, 22559 }); 22560 try self.asmMemoryRegister(.{ ._, .mov }, dst_mem, tmp_reg.to16()); 22561 }, 22562 } 22563 } 22564 }; 22565 fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) !MoveStrategy { 22566 const pt = self.pt; 22567 const zcu = pt.zcu; 22568 switch (class) { 22569 .general_purpose, .segment => return .{ .move = .{ ._, .mov } }, 22570 .x87 => return .x87_load_store, 22571 .mmx => {}, 22572 .sse => switch (ty.zigTypeTag(zcu)) { 22573 else => { 22574 const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none); 22575 assert(std.mem.indexOfNone(abi.Class, classes, &.{ 22576 .integer, .sse, .sseup, .memory, .float, .float_combine, 22577 }) == null); 22578 const abi_size = ty.abiSize(zcu); 22579 if (abi_size < 4 or 22580 std.mem.indexOfScalar(abi.Class, classes, .integer) != null) switch (abi_size) { 22581 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{ 22582 .insert = .{ .vp_b, .insr }, 22583 .extract = .{ .vp_b, .extr }, 22584 } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{ 22585 .insert = .{ .p_b, .insr }, 22586 .extract = .{ .p_b, .extr }, 22587 } }, 22588 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ 22589 .insert = .{ .vp_w, .insr }, 22590 .extract = .{ .vp_w, .extr }, 22591 } } else .{ .insert_extract = .{ 22592 .insert = .{ .p_w, .insr }, 22593 .extract = .{ .p_w, .extr }, 22594 } }, 22595 3...4 => return .{ .move = if (self.hasFeature(.avx)) 22596 .{ .v_d, .mov } 22597 else 22598 .{ ._d, .mov } }, 22599 5...8 => return .{ .move = if (self.hasFeature(.avx)) 22600 .{ .v_q, .mov } 22601 else 22602 .{ ._q, .mov } }, 22603 9...16 => return .{ .move = if (self.hasFeature(.avx)) 22604 .{ if (aligned) .v_dqa else .v_dqu, .mov } 22605 else if (self.hasFeature(.sse2)) 22606 .{ if (aligned) ._dqa else ._dqu, .mov } 22607 else 22608 .{ ._ps, if (aligned) .mova else .movu } }, 22609 17...32 => if (self.hasFeature(.avx)) 22610 return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, 22611 else => {}, 22612 } else switch (abi_size) { 22613 4 => return .{ .move = if (self.hasFeature(.avx)) 22614 .{ .v_ss, .mov } 22615 else 22616 .{ ._ss, .mov } }, 22617 5...8 => return .{ .move = if (self.hasFeature(.avx)) 22618 .{ .v_sd, .mov } 22619 else if (self.hasFeature(.sse2)) 22620 .{ ._sd, .mov } 22621 else 22622 .{ ._ps, .movl } }, 22623 9...16 => return .{ .move = if (self.hasFeature(.avx)) 22624 .{ .v_pd, if (aligned) .mova else .movu } 22625 else if (self.hasFeature(.sse2)) 22626 .{ ._pd, if (aligned) .mova else .movu } 22627 else 22628 .{ ._ps, if (aligned) .mova else .movu } }, 22629 17...32 => if (self.hasFeature(.avx)) 22630 return .{ .move = .{ .v_pd, if (aligned) .mova else .movu } }, 22631 else => {}, 22632 } 22633 }, 22634 .float => switch (ty.floatBits(self.target.*)) { 22635 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ 22636 .insert = .{ .vp_w, .insr }, 22637 .extract = .{ .vp_w, .extr }, 22638 } } else .{ .insert_extract = .{ 22639 .insert = .{ .p_w, .insr }, 22640 .extract = .{ .p_w, .extr }, 22641 } }, 22642 32 => return .{ .move = if (self.hasFeature(.avx)) 22643 .{ .v_ss, .mov } 22644 else 22645 .{ ._ss, .mov } }, 22646 64 => return .{ .move = if (self.hasFeature(.avx)) 22647 .{ .v_sd, .mov } 22648 else if (self.hasFeature(.sse2)) 22649 .{ ._sd, .mov } 22650 else 22651 .{ ._ps, .movl } }, 22652 128 => return .{ .move = if (self.hasFeature(.avx)) 22653 .{ if (aligned) .v_dqa else .v_dqu, .mov } 22654 else if (self.hasFeature(.sse2)) 22655 .{ if (aligned) ._dqa else ._dqu, .mov } 22656 else 22657 .{ ._ps, if (aligned) .mova else .movu } }, 22658 else => {}, 22659 }, 22660 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { 22661 .bool => switch (ty.vectorLen(zcu)) { 22662 33...64 => return .{ .move = if (self.hasFeature(.avx)) 22663 .{ .v_q, .mov } 22664 else 22665 .{ ._q, .mov } }, 22666 else => {}, 22667 }, 22668 .int => switch (ty.childType(zcu).intInfo(zcu).bits) { 22669 1...8 => switch (ty.vectorLen(zcu)) { 22670 1...16 => return .{ .move = if (self.hasFeature(.avx)) 22671 .{ if (aligned) .v_dqa else .v_dqu, .mov } 22672 else if (self.hasFeature(.sse2)) 22673 .{ if (aligned) ._dqa else ._dqu, .mov } 22674 else 22675 .{ ._ps, if (aligned) .mova else .movu } }, 22676 17...32 => if (self.hasFeature(.avx)) 22677 return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, 22678 else => {}, 22679 }, 22680 9...16 => switch (ty.vectorLen(zcu)) { 22681 1...8 => return .{ .move = if (self.hasFeature(.avx)) 22682 .{ if (aligned) .v_dqa else .v_dqu, .mov } 22683 else if (self.hasFeature(.sse2)) 22684 .{ if (aligned) ._dqa else ._dqu, .mov } 22685 else 22686 .{ ._ps, if (aligned) .mova else .movu } }, 22687 9...16 => if (self.hasFeature(.avx)) 22688 return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, 22689 else => {}, 22690 }, 22691 17...32 => switch (ty.vectorLen(zcu)) { 22692 1...4 => return .{ .move = if (self.hasFeature(.avx)) 22693 .{ if (aligned) .v_dqa else .v_dqu, .mov } 22694 else if (self.hasFeature(.sse2)) 22695 .{ if (aligned) ._dqa else ._dqu, .mov } 22696 else 22697 .{ ._ps, if (aligned) .mova else .movu } }, 22698 5...8 => if (self.hasFeature(.avx)) 22699 return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, 22700 else => {}, 22701 }, 22702 33...64 => switch (ty.vectorLen(zcu)) { 22703 1...2 => return .{ .move = if (self.hasFeature(.avx)) 22704 .{ if (aligned) .v_dqa else .v_dqu, .mov } 22705 else if (self.hasFeature(.sse2)) 22706 .{ if (aligned) ._dqa else ._dqu, .mov } 22707 else 22708 .{ ._ps, if (aligned) .mova else .movu } }, 22709 3...4 => if (self.hasFeature(.avx)) 22710 return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, 22711 else => {}, 22712 }, 22713 65...128 => switch (ty.vectorLen(zcu)) { 22714 1 => return .{ .move = if (self.hasFeature(.avx)) 22715 .{ if (aligned) .v_dqa else .v_dqu, .mov } 22716 else if (self.hasFeature(.sse2)) 22717 .{ if (aligned) ._dqa else ._dqu, .mov } 22718 else 22719 .{ ._ps, if (aligned) .mova else .movu } }, 22720 2 => if (self.hasFeature(.avx)) 22721 return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, 22722 else => {}, 22723 }, 22724 129...256 => switch (ty.vectorLen(zcu)) { 22725 1 => if (self.hasFeature(.avx)) 22726 return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, 22727 else => {}, 22728 }, 22729 else => {}, 22730 }, 22731 .pointer, .optional => if (ty.childType(zcu).isPtrAtRuntime(zcu)) 22732 switch (ty.vectorLen(zcu)) { 22733 1...2 => return .{ .move = if (self.hasFeature(.avx)) 22734 .{ if (aligned) .v_dqa else .v_dqu, .mov } 22735 else if (self.hasFeature(.sse2)) 22736 .{ if (aligned) ._dqa else ._dqu, .mov } 22737 else 22738 .{ ._ps, if (aligned) .mova else .movu } }, 22739 3...4 => if (self.hasFeature(.avx)) 22740 return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, 22741 else => {}, 22742 } 22743 else 22744 unreachable, 22745 .float => switch (ty.childType(zcu).floatBits(self.target.*)) { 22746 16 => switch (ty.vectorLen(zcu)) { 22747 1...8 => return .{ .move = if (self.hasFeature(.avx)) 22748 .{ if (aligned) .v_dqa else .v_dqu, .mov } 22749 else if (self.hasFeature(.sse2)) 22750 .{ if (aligned) ._dqa else ._dqu, .mov } 22751 else 22752 .{ ._ps, if (aligned) .mova else .movu } }, 22753 9...16 => if (self.hasFeature(.avx)) 22754 return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, 22755 else => {}, 22756 }, 22757 32 => switch (ty.vectorLen(zcu)) { 22758 1...4 => return .{ .move = if (self.hasFeature(.avx)) 22759 .{ .v_ps, if (aligned) .mova else .movu } 22760 else 22761 .{ ._ps, if (aligned) .mova else .movu } }, 22762 5...8 => if (self.hasFeature(.avx)) 22763 return .{ .move = .{ .v_ps, if (aligned) .mova else .movu } }, 22764 else => {}, 22765 }, 22766 64 => switch (ty.vectorLen(zcu)) { 22767 1...2 => return .{ .move = if (self.hasFeature(.avx)) 22768 .{ .v_pd, if (aligned) .mova else .movu } 22769 else 22770 .{ ._pd, if (aligned) .mova else .movu } }, 22771 3...4 => if (self.hasFeature(.avx)) 22772 return .{ .move = .{ .v_pd, if (aligned) .mova else .movu } }, 22773 else => {}, 22774 }, 22775 128 => switch (ty.vectorLen(zcu)) { 22776 1 => return .{ .move = if (self.hasFeature(.avx)) 22777 .{ if (aligned) .v_dqa else .v_dqu, .mov } 22778 else if (self.hasFeature(.sse2)) 22779 .{ if (aligned) ._dqa else ._dqu, .mov } 22780 else 22781 .{ ._ps, if (aligned) .mova else .movu } }, 22782 2 => if (self.hasFeature(.avx)) 22783 return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, 22784 else => {}, 22785 }, 22786 else => {}, 22787 }, 22788 else => {}, 22789 }, 22790 }, 22791 .ip => {}, 22792 } 22793 return self.fail("TODO moveStrategy for {}", .{ty.fmt(pt)}); 22794 } 22795 22796 const CopyOptions = struct { 22797 safety: bool = false, 22798 }; 22799 22800 fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: CopyOptions) InnerError!void { 22801 const pt = self.pt; 22802 22803 const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; 22804 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 22805 22806 switch (dst_mcv) { 22807 .none, 22808 .unreach, 22809 .dead, 22810 .undef, 22811 .immediate, 22812 .eflags, 22813 .register_overflow, 22814 .register_mask, 22815 .lea_direct, 22816 .lea_got, 22817 .lea_tlv, 22818 .lea_frame, 22819 .lea_symbol, 22820 .elementwise_regs_then_frame, 22821 .reserved_frame, 22822 .air_ref, 22823 => unreachable, // unmodifiable destination 22824 .register => |reg| try self.genSetReg(reg, ty, src_mcv, opts), 22825 .register_offset => |dst_reg_off| try self.genSetReg(dst_reg_off.reg, ty, switch (src_mcv) { 22826 .none, 22827 .unreach, 22828 .dead, 22829 .undef, 22830 .register_overflow, 22831 .elementwise_regs_then_frame, 22832 .reserved_frame, 22833 => unreachable, 22834 .immediate, 22835 .register, 22836 .register_offset, 22837 .lea_frame, 22838 => src_mcv.offset(-dst_reg_off.off), 22839 else => .{ .register_offset = .{ 22840 .reg = try self.copyToTmpRegister(ty, src_mcv), 22841 .off = -dst_reg_off.off, 22842 } }, 22843 }, opts), 22844 inline .register_pair, .register_triple, .register_quadruple => |dst_regs, dst_tag| { 22845 const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = src_info: switch (src_mcv) { 22846 .undef, .memory, .indirect, .load_frame => null, 22847 .register => |src_reg| switch (dst_regs[0].class()) { 22848 .general_purpose => switch (src_reg.class()) { 22849 else => unreachable, 22850 .sse => if (ty.abiSize(pt.zcu) <= 16) { 22851 if (self.hasFeature(.avx)) { 22852 try self.asmRegisterRegister(.{ .v_q, .mov }, dst_regs[0].to64(), src_reg.to128()); 22853 try self.asmRegisterRegisterImmediate(.{ .vp_q, .extr }, dst_regs[1].to64(), src_reg.to128(), .u(1)); 22854 } else if (self.hasFeature(.sse4_1)) { 22855 try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128()); 22856 try self.asmRegisterRegisterImmediate(.{ .p_q, .extr }, dst_regs[1].to64(), src_reg.to128(), .u(1)); 22857 } else { 22858 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); 22859 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 22860 defer self.register_manager.unlockReg(tmp_lock); 22861 22862 try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128()); 22863 try self.asmRegisterRegister(.{ ._ps, .movhl }, tmp_reg.to128(), src_reg.to128()); 22864 try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), src_reg.to128()); 22865 } 22866 return; 22867 } else unreachable, 22868 }, 22869 else => unreachable, 22870 }, 22871 dst_tag => |src_regs| { 22872 var hazard_regs = src_regs; 22873 for (dst_regs, &hazard_regs, 1..) |dst_reg, src_reg, hazard_index| { 22874 const dst_id = dst_reg.id(); 22875 if (dst_id == src_reg.id()) continue; 22876 var mir_tag: Mir.Inst.Tag = .mov; 22877 for (hazard_regs[hazard_index..]) |*hazard_reg| { 22878 if (dst_id != hazard_reg.id()) continue; 22879 mir_tag = .xchg; 22880 hazard_reg.* = src_reg; 22881 } 22882 try self.asmRegisterRegister(.{ ._, mir_tag }, dst_reg.to64(), src_reg.to64()); 22883 } 22884 return; 22885 }, 22886 .load_symbol, .load_direct, .load_got, .load_tlv => { 22887 const src_addr_reg = 22888 (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64(); 22889 const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg); 22890 errdefer self.register_manager.unlockReg(src_addr_lock); 22891 22892 try self.genSetReg(src_addr_reg, .usize, src_mcv.address(), opts); 22893 break :src_info .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock }; 22894 }, 22895 .air_ref => |src_ref| return self.genCopy(ty, dst_mcv, try self.resolveInst(src_ref), opts), 22896 else => return self.fail("TODO implement genCopy for {s} of {}", .{ 22897 @tagName(src_mcv), ty.fmt(pt), 22898 }), 22899 }; 22900 defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock); 22901 22902 for ([_]bool{ false, true }) |emit_hazard| { 22903 var hazard_count: u3 = 0; 22904 var part_disp: i32 = 0; 22905 for (dst_regs, try self.splitType(dst_regs.len, ty), 0..) |dst_reg, dst_ty, part_i| { 22906 defer part_disp += @intCast(dst_ty.abiSize(pt.zcu)); 22907 const is_hazard = if (src_mcv.getReg()) |src_reg| 22908 dst_reg.id() == src_reg.id() 22909 else if (src_info) |info| 22910 dst_reg.id() == info.addr_reg.id() 22911 else 22912 false; 22913 if (is_hazard) hazard_count += 1; 22914 if (is_hazard != emit_hazard) continue; 22915 try self.genSetReg(dst_reg, dst_ty, switch (src_mcv) { 22916 .undef => if (opts.safety and part_i > 0) .{ .register = dst_regs[0] } else .undef, 22917 dst_tag => |src_regs| .{ .register = src_regs[part_i] }, 22918 .memory, .indirect, .load_frame => src_mcv.address().offset(part_disp).deref(), 22919 .load_symbol, .load_direct, .load_got, .load_tlv => .{ .indirect = .{ 22920 .reg = src_info.?.addr_reg, 22921 .off = part_disp, 22922 } }, 22923 else => unreachable, 22924 }, opts); 22925 } 22926 switch (hazard_count) { 22927 0 => break, 22928 1 => continue, 22929 else => unreachable, 22930 } 22931 } 22932 }, 22933 .indirect => |reg_off| try self.genSetMem( 22934 .{ .reg = reg_off.reg }, 22935 reg_off.off, 22936 ty, 22937 src_mcv, 22938 opts, 22939 ), 22940 .memory, .load_symbol, .load_direct, .load_got, .load_tlv => { 22941 switch (dst_mcv) { 22942 .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| 22943 return self.genSetMem(.{ .reg = .ds }, small_addr, ty, src_mcv, opts), 22944 .load_symbol, .load_direct, .load_got, .load_tlv => {}, 22945 else => unreachable, 22946 } 22947 22948 const addr_reg = try self.copyToTmpRegister(.usize, dst_mcv.address()); 22949 const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); 22950 defer self.register_manager.unlockReg(addr_lock); 22951 22952 try self.genSetMem(.{ .reg = addr_reg }, 0, ty, src_mcv, opts); 22953 }, 22954 .load_frame => |frame_addr| try self.genSetMem( 22955 .{ .frame = frame_addr.index }, 22956 frame_addr.off, 22957 ty, 22958 src_mcv, 22959 opts, 22960 ), 22961 } 22962 } 22963 22964 fn genSetReg( 22965 self: *CodeGen, 22966 dst_reg: Register, 22967 ty: Type, 22968 src_mcv: MCValue, 22969 opts: CopyOptions, 22970 ) InnerError!void { 22971 const pt = self.pt; 22972 const zcu = pt.zcu; 22973 const abi_size: u32 = @intCast(ty.abiSize(zcu)); 22974 if (ty.bitSize(zcu) > dst_reg.bitSize()) 22975 return self.fail("genSetReg called with a value larger than dst_reg", .{}); 22976 switch (src_mcv) { 22977 .none, 22978 .unreach, 22979 .dead, 22980 .register_overflow, 22981 .elementwise_regs_then_frame, 22982 .reserved_frame, 22983 => unreachable, 22984 .undef => if (opts.safety) switch (dst_reg.class()) { 22985 .general_purpose => switch (abi_size) { 22986 1 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to8(), .u(0xAA)), 22987 2 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to16(), .u(0xAAAA)), 22988 3...4 => try self.asmRegisterImmediate( 22989 .{ ._, .mov }, 22990 dst_reg.to32(), 22991 .s(@as(i32, @bitCast(@as(u32, 0xAAAAAAAA)))), 22992 ), 22993 5...8 => try self.asmRegisterImmediate( 22994 .{ ._, .mov }, 22995 dst_reg.to64(), 22996 .u(0xAAAAAAAAAAAAAAAA), 22997 ), 22998 else => unreachable, 22999 }, 23000 .segment, .x87, .mmx, .sse => try self.genSetReg(dst_reg, ty, try self.genTypedValue(try pt.undefValue(ty)), opts), 23001 .ip => unreachable, 23002 }, 23003 .eflags => |cc| try self.asmSetccRegister(cc, dst_reg.to8()), 23004 .immediate => |imm| { 23005 if (imm == 0) { 23006 // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit 23007 // register is the fastest way to zero a register. 23008 try self.spillEflagsIfOccupied(); 23009 try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()); 23010 } else if (abi_size > 4 and std.math.cast(u32, imm) != null) { 23011 // 32-bit moves zero-extend to 64-bit. 23012 try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), .u(imm)); 23013 } else if (abi_size <= 4 and @as(i64, @bitCast(imm)) < 0) { 23014 try self.asmRegisterImmediate( 23015 .{ ._, .mov }, 23016 registerAlias(dst_reg, abi_size), 23017 .s(@intCast(@as(i64, @bitCast(imm)))), 23018 ); 23019 } else { 23020 try self.asmRegisterImmediate( 23021 .{ ._, .mov }, 23022 registerAlias(dst_reg, abi_size), 23023 .u(imm), 23024 ); 23025 } 23026 }, 23027 .register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) { 23028 .general_purpose => switch (src_reg.class()) { 23029 .general_purpose => try self.asmRegisterRegister( 23030 .{ ._, .mov }, 23031 registerAlias(dst_reg, abi_size), 23032 registerAlias(src_reg, abi_size), 23033 ), 23034 .segment => try self.asmRegisterRegister( 23035 .{ ._, .mov }, 23036 registerAlias(dst_reg, abi_size), 23037 src_reg, 23038 ), 23039 .x87, .mmx, .ip => unreachable, 23040 .sse => if (self.hasFeature(.sse2)) try self.asmRegisterRegister( 23041 switch (abi_size) { 23042 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, 23043 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, 23044 else => unreachable, 23045 }, 23046 registerAlias(dst_reg, @max(abi_size, 4)), 23047 src_reg.to128(), 23048 ) else { 23049 const frame_index = try self.allocFrameIndex(.init(.{ 23050 .size = 4, 23051 .alignment = .@"4", 23052 })); 23053 try self.asmMemoryRegister(.{ ._ss, .mov }, .{ 23054 .base = .{ .frame = frame_index }, 23055 .mod = .{ .rm = .{ .size = .dword } }, 23056 }, src_reg.to128()); 23057 try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(dst_reg, abi_size), .{ 23058 .base = .{ .frame = frame_index }, 23059 .mod = .{ .rm = .{ .size = .fromSize(abi_size) } }, 23060 }); 23061 }, 23062 }, 23063 .segment => try self.asmRegisterRegister( 23064 .{ ._, .mov }, 23065 dst_reg, 23066 switch (src_reg.class()) { 23067 .general_purpose, .segment => registerAlias(src_reg, abi_size), 23068 .x87, .mmx, .ip => unreachable, 23069 .sse => try self.copyToTmpRegister(ty, src_mcv), 23070 }, 23071 ), 23072 .x87 => switch (src_reg.class()) { 23073 .general_purpose, .segment => unreachable, 23074 .x87 => switch (src_reg) { 23075 .st0 => try self.asmRegister(.{ .f_, .st }, dst_reg), 23076 .st1, .st2, .st3, .st4, .st5, .st6 => { 23077 try self.asmRegister(.{ .f_, .ld }, src_reg); 23078 assert(dst_reg != .st7); 23079 try self.asmRegister(.{ .f_p, .st }, @enumFromInt(@intFromEnum(dst_reg) + 1)); 23080 }, 23081 else => unreachable, 23082 }, 23083 .mmx, .sse, .ip => unreachable, 23084 }, 23085 .mmx => unreachable, 23086 .sse => switch (src_reg.class()) { 23087 .general_purpose => try self.asmRegisterRegister( 23088 switch (abi_size) { 23089 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, 23090 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, 23091 else => unreachable, 23092 }, 23093 dst_reg.to128(), 23094 registerAlias(src_reg, @max(abi_size, 4)), 23095 ), 23096 .segment => try self.genSetReg( 23097 dst_reg, 23098 ty, 23099 .{ .register = try self.copyToTmpRegister(ty, src_mcv) }, 23100 opts, 23101 ), 23102 .x87, .mmx, .ip => unreachable, 23103 .sse => try self.asmRegisterRegister( 23104 @as(?Mir.Inst.FixedTag, switch (ty.scalarType(zcu).zigTypeTag(zcu)) { 23105 else => switch (abi_size) { 23106 1...16 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else .{ ._dqa, .mov }, 23107 17...32 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else null, 23108 else => null, 23109 }, 23110 .float => switch (ty.scalarType(zcu).floatBits(self.target.*)) { 23111 16, 128 => switch (abi_size) { 23112 2...16 => if (self.hasFeature(.avx)) 23113 .{ .v_dqa, .mov } 23114 else 23115 .{ ._dqa, .mov }, 23116 17...32 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else null, 23117 else => null, 23118 }, 23119 32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova }, 23120 64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova }, 23121 80 => null, 23122 else => unreachable, 23123 }, 23124 }) orelse return self.fail("TODO implement genSetReg for {}", .{ty.fmt(pt)}), 23125 registerAlias(dst_reg, abi_size), 23126 registerAlias(src_reg, abi_size), 23127 ), 23128 }, 23129 .ip => unreachable, 23130 }, 23131 inline .register_pair, 23132 .register_triple, 23133 .register_quadruple, 23134 => |src_regs| switch (dst_reg.class()) { 23135 .general_purpose => switch (src_regs[0].class()) { 23136 .general_purpose => try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts), 23137 else => unreachable, 23138 }, 23139 .sse => switch (src_regs[0].class()) { 23140 .general_purpose => if (abi_size <= 16) { 23141 if (self.hasFeature(.avx)) { 23142 try self.asmRegisterRegister(.{ .v_q, .mov }, dst_reg.to128(), src_regs[0].to64()); 23143 try self.asmRegisterRegisterRegisterImmediate( 23144 .{ .vp_q, .insr }, 23145 dst_reg.to128(), 23146 dst_reg.to128(), 23147 src_regs[1].to64(), 23148 .u(1), 23149 ); 23150 } else if (self.hasFeature(.sse4_1)) { 23151 try self.asmRegisterRegister(.{ ._q, .mov }, dst_reg.to128(), src_regs[0].to64()); 23152 try self.asmRegisterRegisterImmediate(.{ .p_q, .insr }, dst_reg.to128(), src_regs[1].to64(), .u(1)); 23153 } else { 23154 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); 23155 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 23156 defer self.register_manager.unlockReg(tmp_lock); 23157 23158 try self.asmRegisterRegister(.{ ._q, .mov }, dst_reg.to128(), src_regs[0].to64()); 23159 try self.asmRegisterRegister(.{ ._q, .mov }, tmp_reg.to128(), src_regs[1].to64()); 23160 try self.asmRegisterRegister(.{ ._ps, .movlh }, dst_reg.to128(), tmp_reg.to128()); 23161 } 23162 } else unreachable, 23163 else => unreachable, 23164 }, 23165 else => unreachable, 23166 }, 23167 .register_offset, 23168 .indirect, 23169 .load_frame, 23170 .lea_frame, 23171 => try @as(MoveStrategy, switch (src_mcv) { 23172 .register_offset => |reg_off| switch (reg_off.off) { 23173 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }, opts), 23174 else => .{ .move = .{ ._, .lea } }, 23175 }, 23176 .indirect => try self.moveStrategy(ty, dst_reg.class(), false), 23177 .load_frame => |frame_addr| try self.moveStrategy( 23178 ty, 23179 dst_reg.class(), 23180 self.getFrameAddrAlignment(frame_addr).compare(.gte, .fromLog2Units( 23181 std.math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)), 23182 )), 23183 ), 23184 .lea_frame => .{ .move = .{ ._, .lea } }, 23185 else => unreachable, 23186 }).read(self, registerAlias(dst_reg, abi_size), switch (src_mcv) { 23187 .register_offset, .indirect => |reg_off| .{ 23188 .base = .{ .reg = reg_off.reg.to64() }, 23189 .mod = .{ .rm = .{ 23190 .size = self.memSize(ty), 23191 .disp = reg_off.off, 23192 } }, 23193 }, 23194 .load_frame, .lea_frame => |frame_addr| .{ 23195 .base = .{ .frame = frame_addr.index }, 23196 .mod = .{ .rm = .{ 23197 .size = self.memSize(ty), 23198 .disp = frame_addr.off, 23199 } }, 23200 }, 23201 else => unreachable, 23202 }), 23203 .register_mask => |src_reg_mask| { 23204 assert(src_reg_mask.reg.class() == .sse); 23205 const has_avx = self.hasFeature(.avx); 23206 const bits_reg = switch (dst_reg.class()) { 23207 .general_purpose => dst_reg, 23208 else => try self.register_manager.allocReg(null, abi.RegisterClass.gp), 23209 }; 23210 const bits_lock = self.register_manager.lockReg(bits_reg); 23211 defer if (bits_lock) |lock| self.register_manager.unlockReg(lock); 23212 23213 const pack_reg = switch (src_reg_mask.info.scalar) { 23214 else => src_reg_mask.reg, 23215 .word => try self.register_manager.allocReg(null, abi.RegisterClass.sse), 23216 }; 23217 const pack_lock = self.register_manager.lockReg(pack_reg); 23218 defer if (pack_lock) |lock| self.register_manager.unlockReg(lock); 23219 23220 var mask_size: u32 = @intCast(ty.vectorLen(zcu) * @divExact(src_reg_mask.info.scalar.bitSize(self.target), 8)); 23221 switch (src_reg_mask.info.scalar) { 23222 else => {}, 23223 .word => { 23224 const src_alias = registerAlias(src_reg_mask.reg, mask_size); 23225 const pack_alias = registerAlias(pack_reg, mask_size); 23226 if (has_avx) { 23227 try self.asmRegisterRegisterRegister(.{ .vp_b, .ackssw }, pack_alias, src_alias, src_alias); 23228 } else { 23229 try self.asmRegisterRegister(.{ ._dqa, .mov }, pack_alias, src_alias); 23230 try self.asmRegisterRegister(.{ .p_b, .ackssw }, pack_alias, pack_alias); 23231 } 23232 mask_size = std.math.divCeil(u32, mask_size, 2) catch unreachable; 23233 }, 23234 } 23235 try self.asmRegisterRegister(.{ switch (src_reg_mask.info.scalar) { 23236 .byte, .word => if (has_avx) .vp_b else .p_b, 23237 .dword => if (has_avx) .v_ps else ._ps, 23238 .qword => if (has_avx) .v_pd else ._pd, 23239 else => unreachable, 23240 }, .movmsk }, bits_reg.to32(), registerAlias(pack_reg, mask_size)); 23241 if (src_reg_mask.info.inverted) try self.asmRegister(.{ ._, .not }, registerAlias(bits_reg, abi_size)); 23242 try self.genSetReg(dst_reg, ty, .{ .register = bits_reg }, .{}); 23243 }, 23244 .memory, .load_symbol, .load_direct, .load_got, .load_tlv => { 23245 switch (src_mcv) { 23246 .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| 23247 return (try self.moveStrategy( 23248 ty, 23249 dst_reg.class(), 23250 ty.abiAlignment(zcu).check(@as(u32, @bitCast(small_addr))), 23251 )).read(self, registerAlias(dst_reg, abi_size), .{ 23252 .base = .{ .reg = .ds }, 23253 .mod = .{ .rm = .{ 23254 .size = self.memSize(ty), 23255 .disp = small_addr, 23256 } }, 23257 }), 23258 .load_symbol => |sym_off| switch (dst_reg.class()) { 23259 .general_purpose => { 23260 assert(sym_off.off == 0); 23261 try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(dst_reg, abi_size), .{ 23262 .base = .{ .reloc = sym_off.sym_index }, 23263 .mod = .{ .rm = .{ 23264 .size = self.memSize(ty), 23265 .disp = sym_off.off, 23266 } }, 23267 }); 23268 return; 23269 }, 23270 .segment, .mmx, .ip => unreachable, 23271 .x87, .sse => {}, 23272 }, 23273 .load_direct => |sym_index| switch (dst_reg.class()) { 23274 .general_purpose => { 23275 _ = try self.addInst(.{ 23276 .tag = .mov, 23277 .ops = .direct_reloc, 23278 .data = .{ .rx = .{ 23279 .r1 = registerAlias(dst_reg, abi_size), 23280 .payload = try self.addExtra(bits.SymbolOffset{ .sym_index = sym_index }), 23281 } }, 23282 }); 23283 return; 23284 }, 23285 .segment, .mmx, .ip => unreachable, 23286 .x87, .sse => {}, 23287 }, 23288 .load_got, .load_tlv => {}, 23289 else => unreachable, 23290 } 23291 23292 const addr_reg = try self.copyToTmpRegister(.usize, src_mcv.address()); 23293 const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); 23294 defer self.register_manager.unlockReg(addr_lock); 23295 23296 try (try self.moveStrategy(ty, dst_reg.class(), false)).read(self, registerAlias(dst_reg, abi_size), .{ 23297 .base = .{ .reg = addr_reg.to64() }, 23298 .mod = .{ .rm = .{ .size = self.memSize(ty) } }, 23299 }); 23300 }, 23301 .lea_symbol => |sym_off| switch (self.bin_file.tag) { 23302 .elf, .macho => try self.asmRegisterMemory( 23303 .{ ._, .lea }, 23304 dst_reg.to64(), 23305 .{ 23306 .base = .{ .reloc = sym_off.sym_index }, 23307 .mod = .{ .rm = .{ 23308 .size = .qword, 23309 .disp = sym_off.off, 23310 } }, 23311 }, 23312 ), 23313 else => return self.fail("TODO emit symbol sequence on {s}", .{ 23314 @tagName(self.bin_file.tag), 23315 }), 23316 }, 23317 .lea_direct, .lea_got => |sym_index| _ = try self.addInst(.{ 23318 .tag = switch (src_mcv) { 23319 .lea_direct => .lea, 23320 .lea_got => .mov, 23321 else => unreachable, 23322 }, 23323 .ops = switch (src_mcv) { 23324 .lea_direct => .direct_reloc, 23325 .lea_got => .got_reloc, 23326 else => unreachable, 23327 }, 23328 .data = .{ .rx = .{ 23329 .r1 = dst_reg.to64(), 23330 .payload = try self.addExtra(bits.SymbolOffset{ .sym_index = sym_index }), 23331 } }, 23332 }), 23333 .lea_tlv => unreachable, // TODO: remove this 23334 .air_ref => |src_ref| try self.genSetReg(dst_reg, ty, try self.resolveInst(src_ref), opts), 23335 } 23336 } 23337 23338 fn genSetMem( 23339 self: *CodeGen, 23340 base: Memory.Base, 23341 disp: i32, 23342 ty: Type, 23343 src_mcv: MCValue, 23344 opts: CopyOptions, 23345 ) InnerError!void { 23346 const pt = self.pt; 23347 const zcu = pt.zcu; 23348 const abi_size: u32 = @intCast(ty.abiSize(zcu)); 23349 const dst_ptr_mcv: MCValue = switch (base) { 23350 .none => .{ .immediate = @bitCast(@as(i64, disp)) }, 23351 .reg => |base_reg| .{ .register_offset = .{ .reg = base_reg, .off = disp } }, 23352 .frame => |base_frame_index| .{ .lea_frame = .{ .index = base_frame_index, .off = disp } }, 23353 .table => unreachable, 23354 .reloc => |sym_index| .{ .lea_symbol = .{ .sym_index = sym_index, .off = disp } }, 23355 }; 23356 switch (src_mcv) { 23357 .none, 23358 .unreach, 23359 .dead, 23360 .elementwise_regs_then_frame, 23361 .reserved_frame, 23362 => unreachable, 23363 .undef => if (opts.safety) try self.genInlineMemset( 23364 dst_ptr_mcv, 23365 src_mcv, 23366 .{ .immediate = abi_size }, 23367 opts, 23368 ), 23369 .immediate => |imm| switch (abi_size) { 23370 1, 2, 4 => { 23371 const immediate: Immediate = switch (if (ty.isAbiInt(zcu)) 23372 ty.intInfo(zcu).signedness 23373 else 23374 .unsigned) { 23375 .signed => .s(@truncate(@as(i64, @bitCast(imm)))), 23376 .unsigned => .u(@as(u32, @intCast(imm))), 23377 }; 23378 try self.asmMemoryImmediate( 23379 .{ ._, .mov }, 23380 .{ .base = base, .mod = .{ .rm = .{ 23381 .size = .fromSize(abi_size), 23382 .disp = disp, 23383 } } }, 23384 immediate, 23385 ); 23386 }, 23387 3, 5...7 => unreachable, 23388 else => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| { 23389 try self.asmMemoryImmediate( 23390 .{ ._, .mov }, 23391 .{ .base = base, .mod = .{ .rm = .{ 23392 .size = .fromSize(abi_size), 23393 .disp = disp, 23394 } } }, 23395 .s(small), 23396 ); 23397 } else { 23398 var offset: i32 = 0; 23399 while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate( 23400 .{ ._, .mov }, 23401 .{ .base = base, .mod = .{ .rm = .{ 23402 .size = .dword, 23403 .disp = disp + offset, 23404 } } }, 23405 if (ty.isSignedInt(zcu)) .s( 23406 @truncate(@as(i64, @bitCast(imm)) >> (std.math.cast(u6, offset * 8) orelse 63)), 23407 ) else .u( 23408 @as(u32, @truncate(if (std.math.cast(u6, offset * 8)) |shift| imm >> shift else 0)), 23409 ), 23410 ); 23411 }, 23412 }, 23413 .eflags => |cc| try self.asmSetccMemory(cc, .{ .base = base, .mod = .{ 23414 .rm = .{ .size = .byte, .disp = disp }, 23415 } }), 23416 .register => |src_reg| { 23417 const mem_size = switch (base) { 23418 .frame => |base_fi| mem_size: { 23419 assert(disp >= 0); 23420 const frame_abi_size = self.frame_allocs.items(.abi_size)[@intFromEnum(base_fi)]; 23421 const frame_spill_pad = self.frame_allocs.items(.spill_pad)[@intFromEnum(base_fi)]; 23422 assert(frame_abi_size - frame_spill_pad - disp >= abi_size); 23423 break :mem_size if (frame_abi_size - frame_spill_pad - disp == abi_size) 23424 frame_abi_size 23425 else 23426 abi_size; 23427 }, 23428 else => abi_size, 23429 }; 23430 const src_alias = registerAlias(src_reg, abi_size); 23431 const src_size: u32 = @intCast(switch (src_alias.class()) { 23432 .general_purpose, .segment, .x87, .ip => @divExact(src_alias.bitSize(), 8), 23433 .mmx, .sse => abi_size, 23434 }); 23435 const src_align: InternPool.Alignment = .fromNonzeroByteUnits( 23436 std.math.ceilPowerOfTwoAssert(u32, src_size), 23437 ); 23438 if (src_size > mem_size) { 23439 const frame_index = try self.allocFrameIndex(.init(.{ 23440 .size = src_size, 23441 .alignment = src_align, 23442 })); 23443 const frame_mcv: MCValue = .{ .load_frame = .{ .index = frame_index } }; 23444 try (try self.moveStrategy(ty, src_alias.class(), true)).write( 23445 self, 23446 .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ 23447 .size = .fromSize(src_size), 23448 } } }, 23449 src_alias, 23450 ); 23451 try self.genSetMem(base, disp, ty, frame_mcv, opts); 23452 try self.freeValue(frame_mcv); 23453 } else try (try self.moveStrategy(ty, src_alias.class(), switch (base) { 23454 .none => src_align.check(@as(u32, @bitCast(disp))), 23455 .reg => |reg| switch (reg) { 23456 .es, .cs, .ss, .ds => src_align.check(@as(u32, @bitCast(disp))), 23457 else => false, 23458 }, 23459 .frame => |frame_index| self.getFrameAddrAlignment(.{ 23460 .index = frame_index, 23461 .off = disp, 23462 }).compare(.gte, src_align), 23463 .table => unreachable, 23464 .reloc => false, 23465 })).write( 23466 self, 23467 .{ .base = base, .mod = .{ .rm = .{ 23468 .size = .fromBitSize(@min( 23469 self.memSize(ty).bitSize(self.target), 23470 src_alias.bitSize(), 23471 )), 23472 .disp = disp, 23473 } } }, 23474 src_alias, 23475 ); 23476 }, 23477 inline .register_pair, .register_triple, .register_quadruple => |src_regs| { 23478 var part_disp: i32 = disp; 23479 for (try self.splitType(src_regs.len, ty), src_regs) |src_ty, src_reg| { 23480 try self.genSetMem(base, part_disp, src_ty, .{ .register = src_reg }, opts); 23481 part_disp += @intCast(src_ty.abiSize(zcu)); 23482 } 23483 }, 23484 .register_overflow => |ro| switch (ty.zigTypeTag(zcu)) { 23485 .@"struct" => { 23486 try self.genSetMem( 23487 base, 23488 disp + @as(i32, @intCast(ty.structFieldOffset(0, zcu))), 23489 ty.fieldType(0, zcu), 23490 .{ .register = ro.reg }, 23491 opts, 23492 ); 23493 try self.genSetMem( 23494 base, 23495 disp + @as(i32, @intCast(ty.structFieldOffset(1, zcu))), 23496 ty.fieldType(1, zcu), 23497 .{ .eflags = ro.eflags }, 23498 opts, 23499 ); 23500 }, 23501 .optional => { 23502 assert(!ty.optionalReprIsPayload(zcu)); 23503 const child_ty = ty.optionalChild(zcu); 23504 try self.genSetMem(base, disp, child_ty, .{ .register = ro.reg }, opts); 23505 try self.genSetMem( 23506 base, 23507 disp + @as(i32, @intCast(child_ty.abiSize(zcu))), 23508 .bool, 23509 .{ .eflags = ro.eflags }, 23510 opts, 23511 ); 23512 }, 23513 else => return self.fail("TODO implement genSetMem for {s} of {}", .{ 23514 @tagName(src_mcv), ty.fmt(pt), 23515 }), 23516 }, 23517 .register_offset => |reg_off| { 23518 const src_reg = self.copyToTmpRegister(ty, src_mcv) catch |err| switch (err) { 23519 error.OutOfRegisters => { 23520 const src_reg = registerAlias(reg_off.reg, abi_size); 23521 try self.asmRegisterMemory(.{ ._, .lea }, src_reg, .{ 23522 .base = .{ .reg = src_reg }, 23523 .mod = .{ .rm = .{ 23524 .size = .qword, 23525 .disp = reg_off.off, 23526 } }, 23527 }); 23528 try self.genSetMem(base, disp, ty, .{ .register = reg_off.reg }, opts); 23529 return self.asmRegisterMemory(.{ ._, .lea }, src_reg, .{ 23530 .base = .{ .reg = src_reg }, 23531 .mod = .{ .rm = .{ 23532 .size = .qword, 23533 .disp = -reg_off.off, 23534 } }, 23535 }); 23536 }, 23537 else => |e| return e, 23538 }; 23539 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); 23540 defer self.register_manager.unlockReg(src_lock); 23541 23542 try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts); 23543 }, 23544 .register_mask => { 23545 const src_reg = try self.copyToTmpRegister(ty, src_mcv); 23546 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); 23547 defer self.register_manager.unlockReg(src_lock); 23548 23549 try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts); 23550 }, 23551 .memory, 23552 .indirect, 23553 .load_direct, 23554 .lea_direct, 23555 .load_got, 23556 .lea_got, 23557 .load_tlv, 23558 .lea_tlv, 23559 .load_frame, 23560 .lea_frame, 23561 .load_symbol, 23562 .lea_symbol, 23563 => switch (abi_size) { 23564 0 => {}, 23565 1, 2, 4, 8 => { 23566 const src_reg = try self.copyToTmpRegister(ty, src_mcv); 23567 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); 23568 defer self.register_manager.unlockReg(src_lock); 23569 23570 try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts); 23571 }, 23572 else => try self.genInlineMemcpy(dst_ptr_mcv, src_mcv.address(), .{ .immediate = abi_size }, .{ .no_alias = true }), 23573 }, 23574 .air_ref => |src_ref| try self.genSetMem(base, disp, ty, try self.resolveInst(src_ref), opts), 23575 } 23576 } 23577 23578 fn genInlineMemcpy(self: *CodeGen, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue, opts: struct { 23579 no_alias: bool, 23580 }) InnerError!void { 23581 if (opts.no_alias and dst_ptr.isAddress() and src_ptr.isAddress()) switch (len) { 23582 else => {}, 23583 .immediate => |len_imm| switch (len_imm) { 23584 else => {}, 23585 1 => if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| { 23586 try self.asmRegisterMemory(.{ ._, .mov }, reg.to8(), try src_ptr.deref().mem(self, .{ .size = .byte })); 23587 try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .byte }), reg.to8()); 23588 return; 23589 }, 23590 2 => if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| { 23591 try self.asmRegisterMemory(.{ ._, .mov }, reg.to16(), try src_ptr.deref().mem(self, .{ .size = .word })); 23592 try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .word }), reg.to16()); 23593 return; 23594 }, 23595 4 => if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| { 23596 try self.asmRegisterMemory(.{ ._, .mov }, reg.to32(), try src_ptr.deref().mem(self, .{ .size = .dword })); 23597 try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .dword }), reg.to32()); 23598 return; 23599 }, 23600 8 => if (self.target.cpu.arch == .x86_64) { 23601 if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| { 23602 try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), try src_ptr.deref().mem(self, .{ .size = .qword })); 23603 try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .qword }), reg.to64()); 23604 return; 23605 } 23606 }, 23607 16 => if (self.hasFeature(.avx)) { 23608 if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| { 23609 try self.asmRegisterMemory(.{ .v_dqu, .mov }, reg.to128(), try src_ptr.deref().mem(self, .{ .size = .xword })); 23610 try self.asmMemoryRegister(.{ .v_dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128()); 23611 return; 23612 } 23613 } else if (self.hasFeature(.sse2)) { 23614 if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| { 23615 try self.asmRegisterMemory(.{ ._dqu, .mov }, reg.to128(), try src_ptr.deref().mem(self, .{ .size = .xword })); 23616 try self.asmMemoryRegister(.{ ._dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128()); 23617 return; 23618 } 23619 } else if (self.hasFeature(.sse)) { 23620 if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| { 23621 try self.asmRegisterMemory(.{ ._ps, .movu }, reg.to128(), try src_ptr.deref().mem(self, .{ .size = .xword })); 23622 try self.asmMemoryRegister(.{ ._ps, .movu }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128()); 23623 return; 23624 } 23625 }, 23626 32 => if (self.hasFeature(.avx)) { 23627 if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| { 23628 try self.asmRegisterMemory(.{ .v_dqu, .mov }, reg.to256(), try src_ptr.deref().mem(self, .{ .size = .yword })); 23629 try self.asmMemoryRegister(.{ .v_dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .yword }), reg.to256()); 23630 return; 23631 } 23632 }, 23633 }, 23634 }; 23635 try self.spillRegisters(&.{ .rsi, .rdi, .rcx }); 23636 try self.genSetReg(.rsi, .usize, src_ptr, .{}); 23637 try self.genSetReg(.rdi, .usize, dst_ptr, .{}); 23638 try self.genSetReg(.rcx, .usize, len, .{}); 23639 try self.asmOpOnly(.{ .@"rep _sb", .mov }); 23640 } 23641 23642 fn genInlineMemset( 23643 self: *CodeGen, 23644 dst_ptr: MCValue, 23645 value: MCValue, 23646 len: MCValue, 23647 opts: CopyOptions, 23648 ) InnerError!void { 23649 try self.spillRegisters(&.{ .rdi, .al, .rcx }); 23650 try self.genSetReg(.rdi, .usize, dst_ptr, .{}); 23651 try self.genSetReg(.al, .u8, value, opts); 23652 try self.genSetReg(.rcx, .usize, len, .{}); 23653 try self.asmOpOnly(.{ .@"rep _sb", .sto }); 23654 } 23655 23656 fn genExternSymbolRef( 23657 self: *CodeGen, 23658 comptime tag: Mir.Inst.Tag, 23659 lib: ?[]const u8, 23660 callee: []const u8, 23661 ) InnerError!void { 23662 if (self.bin_file.cast(.coff)) |coff_file| { 23663 const global_index = try coff_file.getGlobalSymbol(callee, lib); 23664 const scratch_reg = abi.getCAbiLinkerScratchReg(self.fn_type.fnCallingConvention(self.pt.zcu)); 23665 _ = try self.addInst(.{ 23666 .tag = .mov, 23667 .ops = .import_reloc, 23668 .data = .{ .rx = .{ 23669 .r1 = scratch_reg, 23670 .payload = try self.addExtra(bits.SymbolOffset{ 23671 .sym_index = link.File.Coff.global_symbol_bit | global_index, 23672 }), 23673 } }, 23674 }); 23675 switch (tag) { 23676 .mov => {}, 23677 .call => try self.asmRegister(.{ ._, .call }, scratch_reg), 23678 else => unreachable, 23679 } 23680 } else return self.fail("TODO implement calling extern functions", .{}); 23681 } 23682 23683 fn genLazySymbolRef( 23684 self: *CodeGen, 23685 comptime tag: Mir.Inst.Tag, 23686 reg: Register, 23687 lazy_sym: link.File.LazySymbol, 23688 ) InnerError!void { 23689 const pt = self.pt; 23690 if (self.bin_file.cast(.elf)) |elf_file| { 23691 const zo = elf_file.zigObjectPtr().?; 23692 const sym_index = zo.getOrCreateMetadataForLazySymbol(elf_file, pt, lazy_sym) catch |err| 23693 return self.fail("{s} creating lazy symbol", .{@errorName(err)}); 23694 if (self.mod.pic) { 23695 switch (tag) { 23696 .lea, .call => try self.genSetReg(reg, .usize, .{ 23697 .lea_symbol = .{ .sym_index = sym_index }, 23698 }, .{}), 23699 .mov => try self.genSetReg(reg, .usize, .{ 23700 .load_symbol = .{ .sym_index = sym_index }, 23701 }, .{}), 23702 else => unreachable, 23703 } 23704 switch (tag) { 23705 .lea, .mov => {}, 23706 .call => try self.asmRegister(.{ ._, .call }, reg), 23707 else => unreachable, 23708 } 23709 } else switch (tag) { 23710 .lea, .mov => try self.asmRegisterMemory(.{ ._, tag }, reg.to64(), .{ 23711 .base = .{ .reloc = sym_index }, 23712 .mod = .{ .rm = .{ .size = .qword } }, 23713 }), 23714 .call => try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = sym_index })), 23715 else => unreachable, 23716 } 23717 } else if (self.bin_file.cast(.plan9)) |p9_file| { 23718 const atom_index = p9_file.getOrCreateAtomForLazySymbol(pt, lazy_sym) catch |err| 23719 return self.fail("{s} creating lazy symbol", .{@errorName(err)}); 23720 var atom = p9_file.getAtom(atom_index); 23721 _ = atom.getOrCreateOffsetTableEntry(p9_file); 23722 const got_addr = atom.getOffsetTableAddress(p9_file); 23723 const got_mem: Memory = .{ 23724 .base = .{ .reg = .ds }, 23725 .mod = .{ .rm = .{ 23726 .size = .qword, 23727 .disp = @intCast(got_addr), 23728 } }, 23729 }; 23730 switch (tag) { 23731 .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), got_mem), 23732 .call => try self.asmMemory(.{ ._, .call }, got_mem), 23733 else => unreachable, 23734 } 23735 switch (tag) { 23736 .lea, .call => {}, 23737 .mov => try self.asmRegisterMemory( 23738 .{ ._, tag }, 23739 reg.to64(), 23740 .initSib(.qword, .{ .base = .{ .reg = reg.to64() } }), 23741 ), 23742 else => unreachable, 23743 } 23744 } else if (self.bin_file.cast(.coff)) |coff_file| { 23745 const atom_index = coff_file.getOrCreateAtomForLazySymbol(pt, lazy_sym) catch |err| 23746 return self.fail("{s} creating lazy symbol", .{@errorName(err)}); 23747 const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; 23748 switch (tag) { 23749 .lea, .call => try self.genSetReg(reg, .usize, .{ .lea_got = sym_index }, .{}), 23750 .mov => try self.genSetReg(reg, .usize, .{ .load_got = sym_index }, .{}), 23751 else => unreachable, 23752 } 23753 switch (tag) { 23754 .lea, .mov => {}, 23755 .call => try self.asmRegister(.{ ._, .call }, reg), 23756 else => unreachable, 23757 } 23758 } else if (self.bin_file.cast(.macho)) |macho_file| { 23759 const zo = macho_file.getZigObject().?; 23760 const sym_index = zo.getOrCreateMetadataForLazySymbol(macho_file, pt, lazy_sym) catch |err| 23761 return self.fail("{s} creating lazy symbol", .{@errorName(err)}); 23762 const sym = zo.symbols.items[sym_index]; 23763 switch (tag) { 23764 .lea, .call => try self.genSetReg(reg, .usize, .{ 23765 .lea_symbol = .{ .sym_index = sym.nlist_idx }, 23766 }, .{}), 23767 .mov => try self.genSetReg(reg, .usize, .{ 23768 .load_symbol = .{ .sym_index = sym.nlist_idx }, 23769 }, .{}), 23770 else => unreachable, 23771 } 23772 switch (tag) { 23773 .lea, .mov => {}, 23774 .call => try self.asmRegister(.{ ._, .call }, reg), 23775 else => unreachable, 23776 } 23777 } else { 23778 return self.fail("TODO implement genLazySymbol for x86_64 {s}", .{@tagName(self.bin_file.tag)}); 23779 } 23780 } 23781 23782 fn airIntFromPtr(self: *CodeGen, inst: Air.Inst.Index) !void { 23783 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 23784 const result = result: { 23785 // TODO: handle case where the operand is a slice not a raw pointer 23786 const src_mcv = try self.resolveInst(un_op); 23787 if (self.reuseOperand(inst, un_op, 0, src_mcv)) break :result src_mcv; 23788 23789 const dst_mcv = try self.allocRegOrMem(inst, true); 23790 const dst_ty = self.typeOfIndex(inst); 23791 try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); 23792 break :result dst_mcv; 23793 }; 23794 return self.finishAir(inst, result, .{ un_op, .none, .none }); 23795 } 23796 23797 fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void { 23798 const pt = self.pt; 23799 const zcu = pt.zcu; 23800 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 23801 const dst_ty = self.typeOfIndex(inst); 23802 const src_ty = self.typeOf(ty_op.operand); 23803 23804 const result = result: { 23805 const src_mcv = try self.resolveInst(ty_op.operand); 23806 if (dst_ty.isPtrAtRuntime(zcu) and src_ty.isPtrAtRuntime(zcu)) switch (src_mcv) { 23807 .lea_frame => break :result src_mcv, 23808 else => if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv, 23809 }; 23810 23811 const dst_rc = self.regSetForType(dst_ty); 23812 const src_rc = self.regSetForType(src_ty); 23813 23814 const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; 23815 defer if (src_lock) |lock| self.register_manager.unlockReg(lock); 23816 23817 const dst_mcv = if (dst_rc.supersetOf(src_rc) and dst_ty.abiSize(zcu) <= src_ty.abiSize(zcu) and 23818 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { 23819 const dst_mcv = try self.allocRegOrMem(inst, true); 23820 try self.genCopy(switch (std.math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) { 23821 .lt => dst_ty, 23822 .eq => if (!dst_mcv.isBase() or src_mcv.isBase()) dst_ty else src_ty, 23823 .gt => src_ty, 23824 }, dst_mcv, src_mcv, .{}); 23825 break :dst dst_mcv; 23826 }; 23827 23828 if (dst_ty.isRuntimeFloat()) break :result dst_mcv; 23829 23830 if (dst_ty.isAbiInt(zcu) and src_ty.isAbiInt(zcu) and 23831 dst_ty.intInfo(zcu).signedness == src_ty.intInfo(zcu).signedness) break :result dst_mcv; 23832 23833 const abi_size = dst_ty.abiSize(zcu); 23834 const bit_size = dst_ty.bitSize(zcu); 23835 if (abi_size * 8 <= bit_size or dst_ty.isVector(zcu)) break :result dst_mcv; 23836 23837 const dst_limbs_len = std.math.divCeil(u31, @intCast(bit_size), 64) catch unreachable; 23838 const high_mcv: MCValue = switch (dst_mcv) { 23839 .register => |dst_reg| .{ .register = dst_reg }, 23840 .register_pair => |dst_regs| .{ .register = dst_regs[1] }, 23841 else => dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(), 23842 }; 23843 const high_reg = if (high_mcv.isRegister()) 23844 high_mcv.getReg().? 23845 else 23846 try self.copyToTmpRegister(.usize, high_mcv); 23847 const high_lock = self.register_manager.lockReg(high_reg); 23848 defer if (high_lock) |lock| self.register_manager.unlockReg(lock); 23849 try self.truncateRegister(dst_ty, high_reg); 23850 if (!high_mcv.isRegister()) try self.genCopy( 23851 if (abi_size <= 8) dst_ty else .usize, 23852 high_mcv, 23853 .{ .register = high_reg }, 23854 .{}, 23855 ); 23856 var offset = dst_limbs_len * 8; 23857 if (offset < abi_size) { 23858 const dst_signedness: std.builtin.Signedness = if (dst_ty.isAbiInt(zcu)) 23859 dst_ty.intInfo(zcu).signedness 23860 else 23861 .unsigned; 23862 const ext_mcv: MCValue = ext_mcv: switch (dst_signedness) { 23863 .signed => { 23864 try self.asmRegisterImmediate(.{ ._r, .sa }, high_reg, .u(63)); 23865 break :ext_mcv .{ .register = high_reg }; 23866 }, 23867 .unsigned => .{ .immediate = 0 }, 23868 }; 23869 while (offset < abi_size) : (offset += 8) { 23870 const limb_mcv: MCValue = switch (dst_mcv) { 23871 .register => |dst_reg| .{ .register = dst_reg }, 23872 .register_pair => |dst_regs| .{ .register = dst_regs[@divExact(offset, 8)] }, 23873 else => dst_mcv.address().offset(offset).deref(), 23874 }; 23875 const limb_lock = if (limb_mcv.isRegister()) 23876 self.register_manager.lockReg(limb_mcv.getReg().?) 23877 else 23878 null; 23879 defer if (limb_lock) |lock| self.register_manager.unlockReg(lock); 23880 try self.genCopy(.usize, limb_mcv, ext_mcv, .{}); 23881 } 23882 } 23883 break :result dst_mcv; 23884 }; 23885 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 23886 } 23887 23888 fn airArrayToSlice(self: *CodeGen, inst: Air.Inst.Index) !void { 23889 const pt = self.pt; 23890 const zcu = pt.zcu; 23891 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 23892 23893 const slice_ty = self.typeOfIndex(inst); 23894 const ptr_ty = self.typeOf(ty_op.operand); 23895 const ptr = try self.resolveInst(ty_op.operand); 23896 const array_ty = ptr_ty.childType(zcu); 23897 const array_len = array_ty.arrayLen(zcu); 23898 23899 const frame_index = try self.allocFrameIndex(.initSpill(slice_ty, zcu)); 23900 try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr, .{}); 23901 try self.genSetMem( 23902 .{ .frame = frame_index }, 23903 @intCast(ptr_ty.abiSize(zcu)), 23904 .usize, 23905 .{ .immediate = array_len }, 23906 .{}, 23907 ); 23908 23909 const result = MCValue{ .load_frame = .{ .index = frame_index } }; 23910 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 23911 } 23912 23913 fn airFloatFromInt(self: *CodeGen, inst: Air.Inst.Index) !void { 23914 const pt = self.pt; 23915 const zcu = pt.zcu; 23916 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 23917 23918 const dst_ty = self.typeOfIndex(inst); 23919 const dst_bits = dst_ty.floatBits(self.target.*); 23920 23921 const src_ty = self.typeOf(ty_op.operand); 23922 const src_bits: u32 = @intCast(src_ty.bitSize(zcu)); 23923 const src_signedness = 23924 if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned; 23925 const src_size = std.math.divCeil(u32, @max(switch (src_signedness) { 23926 .signed => src_bits, 23927 .unsigned => src_bits + 1, 23928 }, 32), 8) catch unreachable; 23929 23930 const result = result: { 23931 if (switch (dst_bits) { 23932 16, 80, 128 => true, 23933 32, 64 => src_size > 8, 23934 else => unreachable, 23935 }) { 23936 if (src_bits > 128) return self.fail("TODO implement airFloatFromInt from {} to {}", .{ 23937 src_ty.fmt(pt), dst_ty.fmt(pt), 23938 }); 23939 23940 var callee_buf: ["__floatun?i?f".len]u8 = undefined; 23941 break :result try self.genCall(.{ .lib = .{ 23942 .return_type = dst_ty.toIntern(), 23943 .param_types = &.{src_ty.toIntern()}, 23944 .callee = std.fmt.bufPrint(&callee_buf, "__float{s}{c}i{c}f", .{ 23945 switch (src_signedness) { 23946 .signed => "", 23947 .unsigned => "un", 23948 }, 23949 intCompilerRtAbiName(src_bits), 23950 floatCompilerRtAbiName(dst_bits), 23951 }) catch unreachable, 23952 } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); 23953 } 23954 23955 const src_mcv = try self.resolveInst(ty_op.operand); 23956 const src_reg = if (src_mcv.isRegister()) 23957 src_mcv.getReg().? 23958 else 23959 try self.copyToTmpRegister(src_ty, src_mcv); 23960 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); 23961 defer self.register_manager.unlockReg(src_lock); 23962 23963 if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg); 23964 23965 const dst_reg = try self.register_manager.allocReg(inst, self.regSetForType(dst_ty)); 23966 const dst_mcv = MCValue{ .register = dst_reg }; 23967 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); 23968 defer self.register_manager.unlockReg(dst_lock); 23969 23970 const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag(zcu)) { 23971 .float => switch (dst_ty.floatBits(self.target.*)) { 23972 32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 }, 23973 64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 }, 23974 16, 80, 128 => null, 23975 else => unreachable, 23976 }, 23977 else => null, 23978 }) orelse return self.fail("TODO implement airFloatFromInt from {} to {}", .{ 23979 src_ty.fmt(pt), dst_ty.fmt(pt), 23980 }); 23981 const dst_alias = dst_reg.to128(); 23982 const src_alias = registerAlias(src_reg, src_size); 23983 switch (mir_tag[0]) { 23984 .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias), 23985 else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias), 23986 } 23987 23988 break :result dst_mcv; 23989 }; 23990 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 23991 } 23992 23993 fn airIntFromFloat(self: *CodeGen, inst: Air.Inst.Index) !void { 23994 const pt = self.pt; 23995 const zcu = pt.zcu; 23996 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 23997 23998 const dst_ty = self.typeOfIndex(inst); 23999 const dst_bits: u32 = @intCast(dst_ty.bitSize(zcu)); 24000 const dst_signedness = 24001 if (dst_ty.isAbiInt(zcu)) dst_ty.intInfo(zcu).signedness else .unsigned; 24002 const dst_size = std.math.divCeil(u32, @max(switch (dst_signedness) { 24003 .signed => dst_bits, 24004 .unsigned => dst_bits + 1, 24005 }, 32), 8) catch unreachable; 24006 24007 const src_ty = self.typeOf(ty_op.operand); 24008 const src_bits = src_ty.floatBits(self.target.*); 24009 24010 const result = result: { 24011 if (switch (src_bits) { 24012 16, 80, 128 => true, 24013 32, 64 => dst_size > 8, 24014 else => unreachable, 24015 }) { 24016 if (dst_bits > 128) return self.fail("TODO implement airIntFromFloat from {} to {}", .{ 24017 src_ty.fmt(pt), dst_ty.fmt(pt), 24018 }); 24019 24020 var callee_buf: ["__fixuns?f?i".len]u8 = undefined; 24021 break :result try self.genCall(.{ .lib = .{ 24022 .return_type = dst_ty.toIntern(), 24023 .param_types = &.{src_ty.toIntern()}, 24024 .callee = std.fmt.bufPrint(&callee_buf, "__fix{s}{c}f{c}i", .{ 24025 switch (dst_signedness) { 24026 .signed => "", 24027 .unsigned => "uns", 24028 }, 24029 floatCompilerRtAbiName(src_bits), 24030 intCompilerRtAbiName(dst_bits), 24031 }) catch unreachable, 24032 } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); 24033 } 24034 24035 const src_mcv = try self.resolveInst(ty_op.operand); 24036 const src_reg = if (src_mcv.isRegister()) 24037 src_mcv.getReg().? 24038 else 24039 try self.copyToTmpRegister(src_ty, src_mcv); 24040 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); 24041 defer self.register_manager.unlockReg(src_lock); 24042 24043 const dst_reg = try self.register_manager.allocReg(inst, self.regSetForType(dst_ty)); 24044 const dst_mcv = MCValue{ .register = dst_reg }; 24045 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); 24046 defer self.register_manager.unlockReg(dst_lock); 24047 24048 try self.asmRegisterRegister( 24049 switch (src_bits) { 24050 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si }, 24051 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si }, 24052 else => unreachable, 24053 }, 24054 registerAlias(dst_reg, dst_size), 24055 src_reg.to128(), 24056 ); 24057 24058 if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg); 24059 24060 break :result dst_mcv; 24061 }; 24062 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 24063 } 24064 24065 fn airCmpxchg(self: *CodeGen, inst: Air.Inst.Index) !void { 24066 const pt = self.pt; 24067 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 24068 const extra = self.air.extraData(Air.Cmpxchg, ty_pl.payload).data; 24069 24070 const ptr_ty = self.typeOf(extra.ptr); 24071 const val_ty = self.typeOf(extra.expected_value); 24072 const val_abi_size: u32 = @intCast(val_ty.abiSize(pt.zcu)); 24073 24074 try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx }); 24075 const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx }); 24076 defer for (regs_lock) |lock| self.register_manager.unlockReg(lock); 24077 24078 const exp_mcv = try self.resolveInst(extra.expected_value); 24079 if (val_abi_size > 8) { 24080 const exp_addr_mcv: MCValue = switch (exp_mcv) { 24081 .memory, .indirect, .load_frame => exp_mcv.address(), 24082 else => .{ .register = try self.copyToTmpRegister(.usize, exp_mcv.address()) }, 24083 }; 24084 const exp_addr_lock = 24085 if (exp_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; 24086 defer if (exp_addr_lock) |lock| self.register_manager.unlockReg(lock); 24087 24088 try self.genSetReg(.rax, .usize, exp_addr_mcv.deref(), .{}); 24089 try self.genSetReg(.rdx, .usize, exp_addr_mcv.offset(8).deref(), .{}); 24090 } else try self.genSetReg(.rax, val_ty, exp_mcv, .{}); 24091 24092 const new_mcv = try self.resolveInst(extra.new_value); 24093 const new_reg = if (val_abi_size > 8) new: { 24094 const new_addr_mcv: MCValue = switch (new_mcv) { 24095 .memory, .indirect, .load_frame => new_mcv.address(), 24096 else => .{ .register = try self.copyToTmpRegister(.usize, new_mcv.address()) }, 24097 }; 24098 const new_addr_lock = 24099 if (new_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; 24100 defer if (new_addr_lock) |lock| self.register_manager.unlockReg(lock); 24101 24102 try self.genSetReg(.rbx, .usize, new_addr_mcv.deref(), .{}); 24103 try self.genSetReg(.rcx, .usize, new_addr_mcv.offset(8).deref(), .{}); 24104 break :new null; 24105 } else try self.copyToTmpRegister(val_ty, new_mcv); 24106 const new_lock = if (new_reg) |reg| self.register_manager.lockRegAssumeUnused(reg) else null; 24107 defer if (new_lock) |lock| self.register_manager.unlockReg(lock); 24108 24109 const ptr_mcv = try self.resolveInst(extra.ptr); 24110 const mem_size: Memory.Size = .fromSize(val_abi_size); 24111 const ptr_mem: Memory = switch (ptr_mcv) { 24112 .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, .{ .size = mem_size }), 24113 else => .{ 24114 .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, 24115 .mod = .{ .rm = .{ .size = mem_size } }, 24116 }, 24117 }; 24118 switch (ptr_mem.mod) { 24119 .rm => {}, 24120 .off => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}), 24121 } 24122 const ptr_lock = switch (ptr_mem.base) { 24123 .none, .frame, .reloc => null, 24124 .reg => |reg| self.register_manager.lockReg(reg), 24125 .table => unreachable, 24126 }; 24127 defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); 24128 24129 try self.spillEflagsIfOccupied(); 24130 if (val_abi_size <= 8) try self.asmMemoryRegister( 24131 .{ .@"lock _", .cmpxchg }, 24132 ptr_mem, 24133 registerAlias(new_reg.?, val_abi_size), 24134 ) else try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem); 24135 24136 const result: MCValue = result: { 24137 if (self.liveness.isUnused(inst)) break :result .unreach; 24138 24139 if (val_abi_size <= 8) { 24140 self.eflags_inst = inst; 24141 break :result .{ .register_overflow = .{ .reg = .rax, .eflags = .ne } }; 24142 } 24143 24144 const dst_mcv = try self.allocRegOrMem(inst, false); 24145 try self.genCopy(.usize, dst_mcv, .{ .register = .rax }, .{}); 24146 try self.genCopy(.usize, dst_mcv.address().offset(8).deref(), .{ .register = .rdx }, .{}); 24147 try self.genCopy(.bool, dst_mcv.address().offset(16).deref(), .{ .eflags = .ne }, .{}); 24148 break :result dst_mcv; 24149 }; 24150 return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value }); 24151 } 24152 24153 fn atomicOp( 24154 self: *CodeGen, 24155 ptr_mcv: MCValue, 24156 val_mcv: MCValue, 24157 ptr_ty: Type, 24158 val_ty: Type, 24159 unused: bool, 24160 rmw_op: ?std.builtin.AtomicRmwOp, 24161 order: std.builtin.AtomicOrder, 24162 ) InnerError!MCValue { 24163 const pt = self.pt; 24164 const zcu = pt.zcu; 24165 const ptr_lock = switch (ptr_mcv) { 24166 .register => |reg| self.register_manager.lockReg(reg), 24167 else => null, 24168 }; 24169 defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); 24170 24171 const val_lock = switch (val_mcv) { 24172 .register => |reg| self.register_manager.lockReg(reg), 24173 else => null, 24174 }; 24175 defer if (val_lock) |lock| self.register_manager.unlockReg(lock); 24176 24177 const val_abi_size: u32 = @intCast(val_ty.abiSize(zcu)); 24178 const mem_size: Memory.Size = .fromSize(val_abi_size); 24179 const ptr_mem: Memory = switch (ptr_mcv) { 24180 .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, .{ .size = mem_size }), 24181 else => .{ 24182 .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, 24183 .mod = .{ .rm = .{ .size = mem_size } }, 24184 }, 24185 }; 24186 switch (ptr_mem.mod) { 24187 .rm => {}, 24188 .off => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}), 24189 } 24190 const mem_lock = switch (ptr_mem.base) { 24191 .none, .frame, .reloc => null, 24192 .reg => |reg| self.register_manager.lockReg(reg), 24193 .table => unreachable, 24194 }; 24195 defer if (mem_lock) |lock| self.register_manager.unlockReg(lock); 24196 24197 const use_sse = rmw_op orelse .Xchg != .Xchg and val_ty.isRuntimeFloat(); 24198 const strat: enum { lock, loop, libcall } = if (use_sse) .loop else switch (rmw_op orelse .Xchg) { 24199 .Xchg, 24200 .Add, 24201 .Sub, 24202 => if (val_abi_size <= 8) .lock else if (val_abi_size <= 16) .loop else .libcall, 24203 .And, 24204 .Or, 24205 .Xor, 24206 => if (val_abi_size <= 8 and unused) .lock else if (val_abi_size <= 16) .loop else .libcall, 24207 .Nand, 24208 .Max, 24209 .Min, 24210 => if (val_abi_size <= 16) .loop else .libcall, 24211 }; 24212 switch (strat) { 24213 .lock => { 24214 const tag: Mir.Inst.Tag = if (rmw_op) |op| switch (op) { 24215 .Xchg => if (unused) .mov else .xchg, 24216 .Add => if (unused) .add else .xadd, 24217 .Sub => if (unused) .sub else .xadd, 24218 .And => .@"and", 24219 .Or => .@"or", 24220 .Xor => .xor, 24221 else => unreachable, 24222 } else switch (order) { 24223 .unordered, .monotonic, .release, .acq_rel => .mov, 24224 .acquire => unreachable, 24225 .seq_cst => .xchg, 24226 }; 24227 24228 const dst_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 24229 const dst_mcv = MCValue{ .register = dst_reg }; 24230 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); 24231 defer self.register_manager.unlockReg(dst_lock); 24232 24233 try self.genSetReg(dst_reg, val_ty, val_mcv, .{}); 24234 if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) { 24235 try self.genUnOpMir(.{ ._, .neg }, val_ty, dst_mcv); 24236 } 24237 try self.asmMemoryRegister( 24238 switch (tag) { 24239 .mov, .xchg => .{ ._, tag }, 24240 .xadd, .add, .sub, .@"and", .@"or", .xor => .{ .@"lock _", tag }, 24241 else => unreachable, 24242 }, 24243 ptr_mem, 24244 registerAlias(dst_reg, val_abi_size), 24245 ); 24246 24247 return if (unused) .unreach else dst_mcv; 24248 }, 24249 .loop => _ = if (val_abi_size <= 8) { 24250 const sse_reg: Register = if (use_sse) 24251 try self.register_manager.allocReg(null, abi.RegisterClass.sse) 24252 else 24253 undefined; 24254 const sse_lock = 24255 if (use_sse) self.register_manager.lockRegAssumeUnused(sse_reg) else undefined; 24256 defer if (use_sse) self.register_manager.unlockReg(sse_lock); 24257 24258 const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 24259 const tmp_mcv = MCValue{ .register = tmp_reg }; 24260 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 24261 defer self.register_manager.unlockReg(tmp_lock); 24262 24263 try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(.rax, val_abi_size), ptr_mem); 24264 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 24265 if (!use_sse and rmw_op orelse .Xchg != .Xchg) { 24266 try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax }, .{}); 24267 } 24268 if (rmw_op) |op| if (use_sse) { 24269 const mir_tag = @as(?Mir.Inst.FixedTag, switch (op) { 24270 .Add => switch (val_ty.floatBits(self.target.*)) { 24271 32 => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, 24272 64 => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, 24273 else => null, 24274 }, 24275 .Sub => switch (val_ty.floatBits(self.target.*)) { 24276 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, 24277 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, 24278 else => null, 24279 }, 24280 .Min => switch (val_ty.floatBits(self.target.*)) { 24281 32 => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, 24282 64 => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, 24283 else => null, 24284 }, 24285 .Max => switch (val_ty.floatBits(self.target.*)) { 24286 32 => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, 24287 64 => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, 24288 else => null, 24289 }, 24290 else => unreachable, 24291 }) orelse return self.fail("TODO implement atomicOp of {s} for {}", .{ 24292 @tagName(op), val_ty.fmt(pt), 24293 }); 24294 try self.genSetReg(sse_reg, val_ty, .{ .register = .rax }, .{}); 24295 switch (mir_tag[0]) { 24296 .v_ss, .v_sd => if (val_mcv.isBase()) try self.asmRegisterRegisterMemory( 24297 mir_tag, 24298 sse_reg.to128(), 24299 sse_reg.to128(), 24300 try val_mcv.mem(self, .{ .size = self.memSize(val_ty) }), 24301 ) else try self.asmRegisterRegisterRegister( 24302 mir_tag, 24303 sse_reg.to128(), 24304 sse_reg.to128(), 24305 (if (val_mcv.isRegister()) 24306 val_mcv.getReg().? 24307 else 24308 try self.copyToTmpRegister(val_ty, val_mcv)).to128(), 24309 ), 24310 ._ss, ._sd => if (val_mcv.isBase()) try self.asmRegisterMemory( 24311 mir_tag, 24312 sse_reg.to128(), 24313 try val_mcv.mem(self, .{ .size = self.memSize(val_ty) }), 24314 ) else try self.asmRegisterRegister( 24315 mir_tag, 24316 sse_reg.to128(), 24317 (if (val_mcv.isRegister()) 24318 val_mcv.getReg().? 24319 else 24320 try self.copyToTmpRegister(val_ty, val_mcv)).to128(), 24321 ), 24322 else => unreachable, 24323 } 24324 try self.genSetReg(tmp_reg, val_ty, .{ .register = sse_reg }, .{}); 24325 } else switch (op) { 24326 .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv, .{}), 24327 .Add => try self.genBinOpMir(.{ ._, .add }, val_ty, tmp_mcv, val_mcv), 24328 .Sub => try self.genBinOpMir(.{ ._, .sub }, val_ty, tmp_mcv, val_mcv), 24329 .And => try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv), 24330 .Nand => { 24331 try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv); 24332 try self.genUnOpMir(.{ ._, .not }, val_ty, tmp_mcv); 24333 }, 24334 .Or => try self.genBinOpMir(.{ ._, .@"or" }, val_ty, tmp_mcv, val_mcv), 24335 .Xor => try self.genBinOpMir(.{ ._, .xor }, val_ty, tmp_mcv, val_mcv), 24336 .Min, .Max => { 24337 const cc: Condition = switch (if (val_ty.isAbiInt(zcu)) 24338 val_ty.intInfo(zcu).signedness 24339 else 24340 .unsigned) { 24341 .unsigned => switch (op) { 24342 .Min => .a, 24343 .Max => .b, 24344 else => unreachable, 24345 }, 24346 .signed => switch (op) { 24347 .Min => .g, 24348 .Max => .l, 24349 else => unreachable, 24350 }, 24351 }; 24352 24353 const cmov_abi_size = @max(val_abi_size, 2); 24354 switch (val_mcv) { 24355 .register => |val_reg| { 24356 try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv); 24357 try self.asmCmovccRegisterRegister( 24358 cc, 24359 registerAlias(tmp_reg, cmov_abi_size), 24360 registerAlias(val_reg, cmov_abi_size), 24361 ); 24362 }, 24363 .memory, .indirect, .load_frame => { 24364 try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv); 24365 try self.asmCmovccRegisterMemory( 24366 cc, 24367 registerAlias(tmp_reg, cmov_abi_size), 24368 try val_mcv.mem(self, .{ .size = .fromSize(cmov_abi_size) }), 24369 ); 24370 }, 24371 else => { 24372 const mat_reg = try self.copyToTmpRegister(val_ty, val_mcv); 24373 const mat_lock = self.register_manager.lockRegAssumeUnused(mat_reg); 24374 defer self.register_manager.unlockReg(mat_lock); 24375 24376 try self.genBinOpMir( 24377 .{ ._, .cmp }, 24378 val_ty, 24379 tmp_mcv, 24380 .{ .register = mat_reg }, 24381 ); 24382 try self.asmCmovccRegisterRegister( 24383 cc, 24384 registerAlias(tmp_reg, cmov_abi_size), 24385 registerAlias(mat_reg, cmov_abi_size), 24386 ); 24387 }, 24388 } 24389 }, 24390 }; 24391 try self.asmMemoryRegister( 24392 .{ .@"lock _", .cmpxchg }, 24393 ptr_mem, 24394 registerAlias(tmp_reg, val_abi_size), 24395 ); 24396 _ = try self.asmJccReloc(.ne, loop); 24397 return if (unused) .unreach else .{ .register = .rax }; 24398 } else { 24399 try self.asmRegisterMemory(.{ ._, .mov }, .rax, .{ 24400 .base = ptr_mem.base, 24401 .mod = .{ .rm = .{ 24402 .size = .qword, 24403 .index = ptr_mem.mod.rm.index, 24404 .scale = ptr_mem.mod.rm.scale, 24405 .disp = ptr_mem.mod.rm.disp + 0, 24406 } }, 24407 }); 24408 try self.asmRegisterMemory(.{ ._, .mov }, .rdx, .{ 24409 .base = ptr_mem.base, 24410 .mod = .{ .rm = .{ 24411 .size = .qword, 24412 .index = ptr_mem.mod.rm.index, 24413 .scale = ptr_mem.mod.rm.scale, 24414 .disp = ptr_mem.mod.rm.disp + 8, 24415 } }, 24416 }); 24417 const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); 24418 const val_mem_mcv: MCValue = switch (val_mcv) { 24419 .memory, .indirect, .load_frame => val_mcv, 24420 else => .{ .indirect = .{ 24421 .reg = try self.copyToTmpRegister(.usize, val_mcv.address()), 24422 } }, 24423 }; 24424 const val_lo_mem = try val_mem_mcv.mem(self, .{ .size = .qword }); 24425 const val_hi_mem = try val_mem_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }); 24426 if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { 24427 try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax); 24428 try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx); 24429 } 24430 if (rmw_op) |op| switch (op) { 24431 .Xchg => { 24432 try self.asmRegisterMemory(.{ ._, .mov }, .rbx, val_lo_mem); 24433 try self.asmRegisterMemory(.{ ._, .mov }, .rcx, val_hi_mem); 24434 }, 24435 .Add => { 24436 try self.asmRegisterMemory(.{ ._, .add }, .rbx, val_lo_mem); 24437 try self.asmRegisterMemory(.{ ._, .adc }, .rcx, val_hi_mem); 24438 }, 24439 .Sub => { 24440 try self.asmRegisterMemory(.{ ._, .sub }, .rbx, val_lo_mem); 24441 try self.asmRegisterMemory(.{ ._, .sbb }, .rcx, val_hi_mem); 24442 }, 24443 .And => { 24444 try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem); 24445 try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem); 24446 }, 24447 .Nand => { 24448 try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem); 24449 try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem); 24450 try self.asmRegister(.{ ._, .not }, .rbx); 24451 try self.asmRegister(.{ ._, .not }, .rcx); 24452 }, 24453 .Or => { 24454 try self.asmRegisterMemory(.{ ._, .@"or" }, .rbx, val_lo_mem); 24455 try self.asmRegisterMemory(.{ ._, .@"or" }, .rcx, val_hi_mem); 24456 }, 24457 .Xor => { 24458 try self.asmRegisterMemory(.{ ._, .xor }, .rbx, val_lo_mem); 24459 try self.asmRegisterMemory(.{ ._, .xor }, .rcx, val_hi_mem); 24460 }, 24461 .Min, .Max => { 24462 const cc: Condition = switch (if (val_ty.isAbiInt(zcu)) 24463 val_ty.intInfo(zcu).signedness 24464 else 24465 .unsigned) { 24466 .unsigned => switch (op) { 24467 .Min => .a, 24468 .Max => .b, 24469 else => unreachable, 24470 }, 24471 .signed => switch (op) { 24472 .Min => .g, 24473 .Max => .l, 24474 else => unreachable, 24475 }, 24476 }; 24477 24478 const tmp_reg = try self.copyToTmpRegister(.usize, .{ .register = .rcx }); 24479 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 24480 defer self.register_manager.unlockReg(tmp_lock); 24481 24482 try self.asmRegisterMemory(.{ ._, .cmp }, .rbx, val_lo_mem); 24483 try self.asmRegisterMemory(.{ ._, .sbb }, tmp_reg, val_hi_mem); 24484 try self.asmCmovccRegisterMemory(cc, .rbx, val_lo_mem); 24485 try self.asmCmovccRegisterMemory(cc, .rcx, val_hi_mem); 24486 }, 24487 }; 24488 try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem); 24489 _ = try self.asmJccReloc(.ne, loop); 24490 24491 if (unused) return .unreach; 24492 const dst_mcv = try self.allocTempRegOrMem(val_ty, false); 24493 try self.asmMemoryRegister(.{ ._, .mov }, .{ 24494 .base = .{ .frame = dst_mcv.load_frame.index }, 24495 .mod = .{ .rm = .{ 24496 .size = .qword, 24497 .disp = dst_mcv.load_frame.off + 0, 24498 } }, 24499 }, .rax); 24500 try self.asmMemoryRegister(.{ ._, .mov }, .{ 24501 .base = .{ .frame = dst_mcv.load_frame.index }, 24502 .mod = .{ .rm = .{ 24503 .size = .qword, 24504 .disp = dst_mcv.load_frame.off + 8, 24505 } }, 24506 }, .rdx); 24507 return dst_mcv; 24508 }, 24509 .libcall => return self.fail("TODO implement x86 atomic libcall", .{}), 24510 } 24511 } 24512 24513 fn airAtomicRmw(self: *CodeGen, inst: Air.Inst.Index) !void { 24514 const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; 24515 const extra = self.air.extraData(Air.AtomicRmw, pl_op.payload).data; 24516 24517 try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx }); 24518 const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx }); 24519 defer for (regs_lock) |lock| self.register_manager.unlockReg(lock); 24520 24521 const unused = self.liveness.isUnused(inst); 24522 24523 const ptr_ty = self.typeOf(pl_op.operand); 24524 const ptr_mcv = try self.resolveInst(pl_op.operand); 24525 24526 const val_ty = self.typeOf(extra.operand); 24527 const val_mcv = try self.resolveInst(extra.operand); 24528 24529 const result = 24530 try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, unused, extra.op(), extra.ordering()); 24531 return self.finishAir(inst, result, .{ pl_op.operand, extra.operand, .none }); 24532 } 24533 24534 fn airAtomicLoad(self: *CodeGen, inst: Air.Inst.Index) !void { 24535 const atomic_load = self.air.instructions.items(.data)[@intFromEnum(inst)].atomic_load; 24536 24537 const ptr_ty = self.typeOf(atomic_load.ptr); 24538 const ptr_mcv = try self.resolveInst(atomic_load.ptr); 24539 const ptr_lock = switch (ptr_mcv) { 24540 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 24541 else => null, 24542 }; 24543 defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); 24544 24545 const dst_mcv = 24546 if (self.reuseOperand(inst, atomic_load.ptr, 0, ptr_mcv)) 24547 ptr_mcv 24548 else 24549 try self.allocRegOrMem(inst, true); 24550 24551 try self.load(dst_mcv, ptr_ty, ptr_mcv); 24552 return self.finishAir(inst, dst_mcv, .{ atomic_load.ptr, .none, .none }); 24553 } 24554 24555 fn airAtomicStore(self: *CodeGen, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void { 24556 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 24557 24558 const ptr_ty = self.typeOf(bin_op.lhs); 24559 const ptr_mcv = try self.resolveInst(bin_op.lhs); 24560 24561 const val_ty = self.typeOf(bin_op.rhs); 24562 const val_mcv = try self.resolveInst(bin_op.rhs); 24563 24564 const result = try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, true, null, order); 24565 return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); 24566 } 24567 24568 fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { 24569 const pt = self.pt; 24570 const zcu = pt.zcu; 24571 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 24572 24573 result: { 24574 if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result; 24575 24576 try self.spillRegisters(&.{ .rax, .rdi, .rsi, .rcx }); 24577 const reg_locks = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdi, .rsi, .rcx }); 24578 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 24579 24580 const dst = try self.resolveInst(bin_op.lhs); 24581 const dst_ty = self.typeOf(bin_op.lhs); 24582 const dst_locks: [2]?RegisterLock = switch (dst) { 24583 .register => |dst_reg| .{ self.register_manager.lockRegAssumeUnused(dst_reg), null }, 24584 .register_pair => |dst_regs| .{ 24585 self.register_manager.lockRegAssumeUnused(dst_regs[0]), 24586 self.register_manager.lockRegAssumeUnused(dst_regs[1]), 24587 }, 24588 else => @splat(null), 24589 }; 24590 for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); 24591 24592 const src_val = try self.resolveInst(bin_op.rhs); 24593 const elem_ty = self.typeOf(bin_op.rhs); 24594 const src_val_lock: ?RegisterLock = switch (src_val) { 24595 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 24596 else => null, 24597 }; 24598 defer if (src_val_lock) |lock| self.register_manager.unlockReg(lock); 24599 24600 const elem_abi_size: u31 = @intCast(elem_ty.abiSize(zcu)); 24601 24602 if (elem_abi_size == 1) { 24603 const dst_ptr: MCValue = switch (dst_ty.ptrSize(zcu)) { 24604 .slice => switch (dst) { 24605 .register_pair => |dst_regs| .{ .register = dst_regs[0] }, 24606 else => dst, 24607 }, 24608 .one => dst, 24609 .c, .many => unreachable, 24610 }; 24611 const len: MCValue = switch (dst_ty.ptrSize(zcu)) { 24612 .slice => switch (dst) { 24613 .register_pair => |dst_regs| .{ .register = dst_regs[1] }, 24614 else => dst.address().offset(8).deref(), 24615 }, 24616 .one => .{ .immediate = dst_ty.childType(zcu).arrayLen(zcu) }, 24617 .c, .many => unreachable, 24618 }; 24619 const len_lock: ?RegisterLock = switch (len) { 24620 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 24621 else => null, 24622 }; 24623 defer if (len_lock) |lock| self.register_manager.unlockReg(lock); 24624 24625 try self.genInlineMemset(dst_ptr, src_val, len, .{ .safety = safety }); 24626 break :result; 24627 } 24628 24629 // Store the first element, and then rely on memcpy copying forwards. 24630 // Length zero requires a runtime check - so we handle arrays specially 24631 // here to elide it. 24632 switch (dst_ty.ptrSize(zcu)) { 24633 .slice => { 24634 const slice_ptr_ty = dst_ty.slicePtrFieldType(zcu); 24635 24636 const dst_ptr: MCValue = switch (dst) { 24637 .register_pair => |dst_regs| .{ .register = dst_regs[0] }, 24638 else => dst, 24639 }; 24640 const len: MCValue = switch (dst) { 24641 .register_pair => |dst_regs| .{ .register = dst_regs[1] }, 24642 else => dst.address().offset(8).deref(), 24643 }; 24644 24645 // Used to store the number of elements for comparison. 24646 // After comparison, updated to store number of bytes needed to copy. 24647 const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 24648 const len_mcv: MCValue = .{ .register = len_reg }; 24649 const len_lock = self.register_manager.lockRegAssumeUnused(len_reg); 24650 defer self.register_manager.unlockReg(len_lock); 24651 24652 try self.genSetReg(len_reg, .usize, len, .{}); 24653 try self.asmRegisterRegister(.{ ._, .@"test" }, len_reg, len_reg); 24654 24655 const skip_reloc = try self.asmJccReloc(.z, undefined); 24656 try self.store(slice_ptr_ty, dst_ptr, src_val, .{ .safety = safety }); 24657 24658 const second_elem_ptr_reg = 24659 try self.register_manager.allocReg(null, abi.RegisterClass.gp); 24660 const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg }; 24661 const second_elem_ptr_lock = 24662 self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); 24663 defer self.register_manager.unlockReg(second_elem_ptr_lock); 24664 24665 try self.genSetReg(second_elem_ptr_reg, .usize, .{ .register_offset = .{ 24666 .reg = try self.copyToTmpRegister(.usize, dst_ptr), 24667 .off = elem_abi_size, 24668 } }, .{}); 24669 24670 try self.genBinOpMir(.{ ._, .sub }, .usize, len_mcv, .{ .immediate = 1 }); 24671 try self.asmRegisterRegisterImmediate( 24672 .{ .i_, .mul }, 24673 len_reg, 24674 len_reg, 24675 .s(elem_abi_size), 24676 ); 24677 try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, len_mcv, .{ .no_alias = false }); 24678 24679 self.performReloc(skip_reloc); 24680 }, 24681 .one => { 24682 const elem_ptr_ty = try pt.singleMutPtrType(elem_ty); 24683 24684 const len = dst_ty.childType(zcu).arrayLen(zcu); 24685 24686 assert(len != 0); // prevented by Sema 24687 try self.store(elem_ptr_ty, dst, src_val, .{ .safety = safety }); 24688 24689 const second_elem_ptr_reg = 24690 try self.register_manager.allocReg(null, abi.RegisterClass.gp); 24691 const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg }; 24692 const second_elem_ptr_lock = 24693 self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); 24694 defer self.register_manager.unlockReg(second_elem_ptr_lock); 24695 24696 try self.genSetReg(second_elem_ptr_reg, .usize, .{ .register_offset = .{ 24697 .reg = try self.copyToTmpRegister(.usize, dst), 24698 .off = elem_abi_size, 24699 } }, .{}); 24700 24701 const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) }; 24702 try self.genInlineMemcpy(second_elem_ptr_mcv, dst, bytes_to_copy, .{ .no_alias = false }); 24703 }, 24704 .c, .many => unreachable, 24705 } 24706 } 24707 return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); 24708 } 24709 24710 fn airMemcpy(self: *CodeGen, inst: Air.Inst.Index) !void { 24711 const pt = self.pt; 24712 const zcu = pt.zcu; 24713 const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; 24714 24715 try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); 24716 const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); 24717 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 24718 24719 const dst = try self.resolveInst(bin_op.lhs); 24720 const dst_ty = self.typeOf(bin_op.lhs); 24721 const dst_locks: [2]?RegisterLock = switch (dst) { 24722 .register => |dst_reg| .{ self.register_manager.lockRegAssumeUnused(dst_reg), null }, 24723 .register_pair => |dst_regs| .{ 24724 self.register_manager.lockRegAssumeUnused(dst_regs[0]), 24725 self.register_manager.lockReg(dst_regs[1]), 24726 }, 24727 else => @splat(null), 24728 }; 24729 for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); 24730 24731 const src = try self.resolveInst(bin_op.rhs); 24732 const src_locks: [2]?RegisterLock = switch (src) { 24733 .register => |src_reg| .{ self.register_manager.lockReg(src_reg), null }, 24734 .register_pair => |src_regs| .{ 24735 self.register_manager.lockRegAssumeUnused(src_regs[0]), 24736 self.register_manager.lockRegAssumeUnused(src_regs[1]), 24737 }, 24738 else => @splat(null), 24739 }; 24740 for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock); 24741 24742 const len: MCValue = switch (dst_ty.ptrSize(zcu)) { 24743 .slice => len: { 24744 const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 24745 const len_lock = self.register_manager.lockRegAssumeUnused(len_reg); 24746 defer self.register_manager.unlockReg(len_lock); 24747 24748 switch (dst) { 24749 .register_pair => |dst_regs| try self.asmRegisterRegisterImmediate( 24750 .{ .i_, .mul }, 24751 len_reg, 24752 dst_regs[1], 24753 .s(@intCast(dst_ty.childType(zcu).abiSize(zcu))), 24754 ), 24755 else => try self.asmRegisterMemoryImmediate( 24756 .{ .i_, .mul }, 24757 len_reg, 24758 try dst.address().offset(8).deref().mem(self, .{ .size = .qword }), 24759 .s(@intCast(dst_ty.childType(zcu).abiSize(zcu))), 24760 ), 24761 } 24762 break :len .{ .register = len_reg }; 24763 }, 24764 .one => len: { 24765 const array_ty = dst_ty.childType(zcu); 24766 break :len .{ .immediate = array_ty.arrayLen(zcu) * array_ty.childType(zcu).abiSize(zcu) }; 24767 }, 24768 .c, .many => unreachable, 24769 }; 24770 const len_lock: ?RegisterLock = switch (len) { 24771 .register => |reg| self.register_manager.lockReg(reg), 24772 else => null, 24773 }; 24774 defer if (len_lock) |lock| self.register_manager.unlockReg(lock); 24775 24776 const dst_ptr: MCValue = switch (dst) { 24777 .register_pair => |dst_regs| .{ .register = dst_regs[0] }, 24778 else => dst, 24779 }; 24780 const src_ptr: MCValue = switch (src) { 24781 .register_pair => |src_regs| .{ .register = src_regs[0] }, 24782 else => src, 24783 }; 24784 24785 try self.genInlineMemcpy(dst_ptr, src_ptr, len, .{ .no_alias = true }); 24786 24787 return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); 24788 } 24789 24790 fn airTagName(self: *CodeGen, inst: Air.Inst.Index) !void { 24791 const pt = self.pt; 24792 const zcu = pt.zcu; 24793 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 24794 const inst_ty = self.typeOfIndex(inst); 24795 const enum_ty = self.typeOf(un_op); 24796 24797 // We need a properly aligned and sized call frame to be able to call this function. 24798 { 24799 const needed_call_frame: FrameAlloc = .init(.{ 24800 .size = inst_ty.abiSize(zcu), 24801 .alignment = inst_ty.abiAlignment(zcu), 24802 }); 24803 const frame_allocs_slice = self.frame_allocs.slice(); 24804 const stack_frame_size = 24805 &frame_allocs_slice.items(.abi_size)[@intFromEnum(FrameIndex.call_frame)]; 24806 stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size); 24807 const stack_frame_align = 24808 &frame_allocs_slice.items(.abi_align)[@intFromEnum(FrameIndex.call_frame)]; 24809 stack_frame_align.* = stack_frame_align.max(needed_call_frame.abi_align); 24810 } 24811 24812 try self.spillEflagsIfOccupied(); 24813 try self.spillCallerPreservedRegs(.auto); 24814 24815 const param_regs = abi.getCAbiIntParamRegs(.auto); 24816 24817 const dst_mcv = try self.allocRegOrMem(inst, false); 24818 try self.genSetReg(param_regs[0], .usize, dst_mcv.address(), .{}); 24819 24820 const operand = try self.resolveInst(un_op); 24821 try self.genSetReg(param_regs[1], enum_ty, operand, .{}); 24822 24823 const enum_lazy_sym: link.File.LazySymbol = .{ .kind = .code, .ty = enum_ty.toIntern() }; 24824 try self.genLazySymbolRef(.call, abi.getCAbiLinkerScratchReg(self.fn_type.fnCallingConvention(zcu)), enum_lazy_sym); 24825 24826 return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); 24827 } 24828 24829 fn airErrorName(self: *CodeGen, inst: Air.Inst.Index) !void { 24830 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 24831 24832 const err_ty = self.typeOf(un_op); 24833 const err_mcv = try self.resolveInst(un_op); 24834 const err_reg = try self.copyToTmpRegister(err_ty, err_mcv); 24835 const err_lock = self.register_manager.lockRegAssumeUnused(err_reg); 24836 defer self.register_manager.unlockReg(err_lock); 24837 24838 const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 24839 const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); 24840 defer self.register_manager.unlockReg(addr_lock); 24841 const anyerror_lazy_sym: link.File.LazySymbol = .{ .kind = .const_data, .ty = .anyerror_type }; 24842 try self.genLazySymbolRef(.lea, addr_reg, anyerror_lazy_sym); 24843 24844 const start_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 24845 const start_lock = self.register_manager.lockRegAssumeUnused(start_reg); 24846 defer self.register_manager.unlockReg(start_lock); 24847 24848 const end_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 24849 const end_lock = self.register_manager.lockRegAssumeUnused(end_reg); 24850 defer self.register_manager.unlockReg(end_lock); 24851 24852 try self.truncateRegister(err_ty, err_reg.to32()); 24853 24854 try self.asmRegisterMemory( 24855 .{ ._, .mov }, 24856 start_reg.to32(), 24857 .{ 24858 .base = .{ .reg = addr_reg.to64() }, 24859 .mod = .{ .rm = .{ 24860 .size = .dword, 24861 .index = err_reg.to64(), 24862 .scale = .@"4", 24863 .disp = (1 - 1) * 4, 24864 } }, 24865 }, 24866 ); 24867 try self.asmRegisterMemory( 24868 .{ ._, .mov }, 24869 end_reg.to32(), 24870 .{ 24871 .base = .{ .reg = addr_reg.to64() }, 24872 .mod = .{ .rm = .{ 24873 .size = .dword, 24874 .index = err_reg.to64(), 24875 .scale = .@"4", 24876 .disp = (2 - 1) * 4, 24877 } }, 24878 }, 24879 ); 24880 try self.asmRegisterRegister(.{ ._, .sub }, end_reg.to32(), start_reg.to32()); 24881 try self.asmRegisterMemory( 24882 .{ ._, .lea }, 24883 start_reg.to64(), 24884 .{ 24885 .base = .{ .reg = addr_reg.to64() }, 24886 .mod = .{ .rm = .{ 24887 .size = .dword, 24888 .index = start_reg.to64(), 24889 } }, 24890 }, 24891 ); 24892 try self.asmRegisterMemory( 24893 .{ ._, .lea }, 24894 end_reg.to32(), 24895 .{ 24896 .base = .{ .reg = end_reg.to64() }, 24897 .mod = .{ .rm = .{ 24898 .size = .byte, 24899 .disp = -1, 24900 } }, 24901 }, 24902 ); 24903 24904 const dst_mcv = try self.allocRegOrMem(inst, false); 24905 try self.asmMemoryRegister( 24906 .{ ._, .mov }, 24907 .{ 24908 .base = .{ .frame = dst_mcv.load_frame.index }, 24909 .mod = .{ .rm = .{ 24910 .size = .qword, 24911 .disp = dst_mcv.load_frame.off, 24912 } }, 24913 }, 24914 start_reg.to64(), 24915 ); 24916 try self.asmMemoryRegister( 24917 .{ ._, .mov }, 24918 .{ 24919 .base = .{ .frame = dst_mcv.load_frame.index }, 24920 .mod = .{ .rm = .{ 24921 .size = .qword, 24922 .disp = dst_mcv.load_frame.off + 8, 24923 } }, 24924 }, 24925 end_reg.to64(), 24926 ); 24927 24928 return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); 24929 } 24930 24931 fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void { 24932 const pt = self.pt; 24933 const zcu = pt.zcu; 24934 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 24935 const vector_ty = self.typeOfIndex(inst); 24936 const vector_len = vector_ty.vectorLen(zcu); 24937 const dst_rc = self.regSetForType(vector_ty); 24938 const scalar_ty = self.typeOf(ty_op.operand); 24939 24940 const result: MCValue = result: { 24941 switch (scalar_ty.zigTypeTag(zcu)) { 24942 else => {}, 24943 .bool => { 24944 const regs = 24945 try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp); 24946 const reg_locks = self.register_manager.lockRegsAssumeUnused(2, regs); 24947 defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); 24948 24949 try self.genSetReg(regs[1], vector_ty, .{ .immediate = 0 }, .{}); 24950 try self.genSetReg( 24951 regs[1], 24952 vector_ty, 24953 .{ .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - vector_len) }, 24954 .{}, 24955 ); 24956 const src_mcv = try self.resolveInst(ty_op.operand); 24957 const abi_size = @max(std.math.divCeil(u32, vector_len, 8) catch unreachable, 4); 24958 try self.asmCmovccRegisterRegister( 24959 switch (src_mcv) { 24960 .eflags => |cc| cc, 24961 .register => |src_reg| cc: { 24962 try self.asmRegisterImmediate(.{ ._, .@"test" }, src_reg.to8(), .u(1)); 24963 break :cc .nz; 24964 }, 24965 else => cc: { 24966 try self.asmMemoryImmediate( 24967 .{ ._, .@"test" }, 24968 try src_mcv.mem(self, .{ .size = .byte }), 24969 .u(1), 24970 ); 24971 break :cc .nz; 24972 }, 24973 }, 24974 registerAlias(regs[0], abi_size), 24975 registerAlias(regs[1], abi_size), 24976 ); 24977 break :result .{ .register = regs[0] }; 24978 }, 24979 .int => if (self.hasFeature(.avx2)) avx2: { 24980 const mir_tag = @as(?Mir.Inst.FixedTag, switch (scalar_ty.intInfo(zcu).bits) { 24981 else => null, 24982 1...8 => switch (vector_len) { 24983 else => null, 24984 1...32 => .{ .vp_b, .broadcast }, 24985 }, 24986 9...16 => switch (vector_len) { 24987 else => null, 24988 1...16 => .{ .vp_w, .broadcast }, 24989 }, 24990 17...32 => switch (vector_len) { 24991 else => null, 24992 1...8 => .{ .vp_d, .broadcast }, 24993 }, 24994 33...64 => switch (vector_len) { 24995 else => null, 24996 1...4 => .{ .vp_q, .broadcast }, 24997 }, 24998 65...128 => switch (vector_len) { 24999 else => null, 25000 1...2 => .{ .v_i128, .broadcast }, 25001 }, 25002 }) orelse break :avx2; 25003 25004 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); 25005 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); 25006 defer self.register_manager.unlockReg(dst_lock); 25007 25008 const src_mcv = try self.resolveInst(ty_op.operand); 25009 if (src_mcv.isBase()) try self.asmRegisterMemory( 25010 mir_tag, 25011 registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))), 25012 try src_mcv.mem(self, .{ .size = self.memSize(scalar_ty) }), 25013 ) else { 25014 if (mir_tag[0] == .v_i128) break :avx2; 25015 try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); 25016 try self.asmRegisterRegister( 25017 mir_tag, 25018 registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))), 25019 registerAlias(dst_reg, @intCast(scalar_ty.abiSize(zcu))), 25020 ); 25021 } 25022 break :result .{ .register = dst_reg }; 25023 } else { 25024 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); 25025 const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); 25026 defer self.register_manager.unlockReg(dst_lock); 25027 25028 try self.genSetReg(dst_reg, scalar_ty, .{ .air_ref = ty_op.operand }, .{}); 25029 if (vector_len == 1) break :result .{ .register = dst_reg }; 25030 25031 const dst_alias = registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))); 25032 const scalar_bits = scalar_ty.intInfo(zcu).bits; 25033 if (switch (scalar_bits) { 25034 1...8 => true, 25035 9...128 => false, 25036 else => unreachable, 25037 }) if (self.hasFeature(.avx)) try self.asmRegisterRegisterRegister( 25038 .{ .vp_, .unpcklbw }, 25039 dst_alias, 25040 dst_alias, 25041 dst_alias, 25042 ) else try self.asmRegisterRegister( 25043 .{ .p_, .unpcklbw }, 25044 dst_alias, 25045 dst_alias, 25046 ); 25047 if (switch (scalar_bits) { 25048 1...8 => vector_len > 2, 25049 9...16 => true, 25050 17...128 => false, 25051 else => unreachable, 25052 }) try self.asmRegisterRegisterImmediate( 25053 .{ if (self.hasFeature(.avx)) .vp_w else .p_w, .shufl }, 25054 dst_alias, 25055 dst_alias, 25056 .u(0b00_00_00_00), 25057 ); 25058 if (switch (scalar_bits) { 25059 1...8 => vector_len > 4, 25060 9...16 => vector_len > 2, 25061 17...64 => true, 25062 65...128 => false, 25063 else => unreachable, 25064 }) try self.asmRegisterRegisterImmediate( 25065 .{ if (self.hasFeature(.avx)) .vp_d else .p_d, .shuf }, 25066 dst_alias, 25067 dst_alias, 25068 .u(if (scalar_bits <= 64) 0b00_00_00_00 else 0b01_00_01_00), 25069 ); 25070 break :result .{ .register = dst_reg }; 25071 }, 25072 .float => switch (scalar_ty.floatBits(self.target.*)) { 25073 32 => switch (vector_len) { 25074 1 => { 25075 const src_mcv = try self.resolveInst(ty_op.operand); 25076 if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; 25077 const dst_reg = try self.register_manager.allocReg(inst, dst_rc); 25078 try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); 25079 break :result .{ .register = dst_reg }; 25080 }, 25081 2...4 => { 25082 const src_mcv = try self.resolveInst(ty_op.operand); 25083 if (self.hasFeature(.avx)) { 25084 const dst_reg = try self.register_manager.allocReg(inst, dst_rc); 25085 if (src_mcv.isBase()) try self.asmRegisterMemory( 25086 .{ .v_ss, .broadcast }, 25087 dst_reg.to128(), 25088 try src_mcv.mem(self, .{ .size = .dword }), 25089 ) else { 25090 const src_reg = if (src_mcv.isRegister()) 25091 src_mcv.getReg().? 25092 else 25093 try self.copyToTmpRegister(scalar_ty, src_mcv); 25094 try self.asmRegisterRegisterRegisterImmediate( 25095 .{ .v_ps, .shuf }, 25096 dst_reg.to128(), 25097 src_reg.to128(), 25098 src_reg.to128(), 25099 .u(0), 25100 ); 25101 } 25102 break :result .{ .register = dst_reg }; 25103 } else { 25104 const dst_mcv = if (src_mcv.isRegister() and 25105 self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) 25106 src_mcv 25107 else 25108 try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv); 25109 const dst_reg = dst_mcv.getReg().?; 25110 try self.asmRegisterRegisterImmediate( 25111 .{ ._ps, .shuf }, 25112 dst_reg.to128(), 25113 dst_reg.to128(), 25114 .u(0), 25115 ); 25116 break :result dst_mcv; 25117 } 25118 }, 25119 5...8 => if (self.hasFeature(.avx)) { 25120 const src_mcv = try self.resolveInst(ty_op.operand); 25121 const dst_reg = try self.register_manager.allocReg(inst, dst_rc); 25122 if (src_mcv.isBase()) try self.asmRegisterMemory( 25123 .{ .v_ss, .broadcast }, 25124 dst_reg.to256(), 25125 try src_mcv.mem(self, .{ .size = .dword }), 25126 ) else { 25127 const src_reg = if (src_mcv.isRegister()) 25128 src_mcv.getReg().? 25129 else 25130 try self.copyToTmpRegister(scalar_ty, src_mcv); 25131 if (self.hasFeature(.avx2)) try self.asmRegisterRegister( 25132 .{ .v_ss, .broadcast }, 25133 dst_reg.to256(), 25134 src_reg.to128(), 25135 ) else { 25136 try self.asmRegisterRegisterRegisterImmediate( 25137 .{ .v_ps, .shuf }, 25138 dst_reg.to128(), 25139 src_reg.to128(), 25140 src_reg.to128(), 25141 .u(0), 25142 ); 25143 try self.asmRegisterRegisterRegisterImmediate( 25144 .{ .v_f128, .insert }, 25145 dst_reg.to256(), 25146 dst_reg.to256(), 25147 dst_reg.to128(), 25148 .u(1), 25149 ); 25150 } 25151 } 25152 break :result .{ .register = dst_reg }; 25153 }, 25154 else => {}, 25155 }, 25156 64 => switch (vector_len) { 25157 1 => { 25158 const src_mcv = try self.resolveInst(ty_op.operand); 25159 if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; 25160 const dst_reg = try self.register_manager.allocReg(inst, dst_rc); 25161 try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); 25162 break :result .{ .register = dst_reg }; 25163 }, 25164 2 => { 25165 const src_mcv = try self.resolveInst(ty_op.operand); 25166 const dst_reg = try self.register_manager.allocReg(inst, dst_rc); 25167 if (self.hasFeature(.sse3)) { 25168 if (src_mcv.isBase()) try self.asmRegisterMemory( 25169 if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, 25170 dst_reg.to128(), 25171 try src_mcv.mem(self, .{ .size = .qword }), 25172 ) else try self.asmRegisterRegister( 25173 if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, 25174 dst_reg.to128(), 25175 (if (src_mcv.isRegister()) 25176 src_mcv.getReg().? 25177 else 25178 try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(), 25179 ); 25180 break :result .{ .register = dst_reg }; 25181 } else try self.asmRegisterRegister( 25182 .{ ._ps, .movlh }, 25183 dst_reg.to128(), 25184 (if (src_mcv.isRegister()) 25185 src_mcv.getReg().? 25186 else 25187 try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(), 25188 ); 25189 }, 25190 3...4 => if (self.hasFeature(.avx)) { 25191 const src_mcv = try self.resolveInst(ty_op.operand); 25192 const dst_reg = try self.register_manager.allocReg(inst, dst_rc); 25193 if (src_mcv.isBase()) try self.asmRegisterMemory( 25194 .{ .v_sd, .broadcast }, 25195 dst_reg.to256(), 25196 try src_mcv.mem(self, .{ .size = .qword }), 25197 ) else { 25198 const src_reg = if (src_mcv.isRegister()) 25199 src_mcv.getReg().? 25200 else 25201 try self.copyToTmpRegister(scalar_ty, src_mcv); 25202 if (self.hasFeature(.avx2)) try self.asmRegisterRegister( 25203 .{ .v_sd, .broadcast }, 25204 dst_reg.to256(), 25205 src_reg.to128(), 25206 ) else { 25207 try self.asmRegisterRegister( 25208 .{ .v_, .movddup }, 25209 dst_reg.to128(), 25210 src_reg.to128(), 25211 ); 25212 try self.asmRegisterRegisterRegisterImmediate( 25213 .{ .v_f128, .insert }, 25214 dst_reg.to256(), 25215 dst_reg.to256(), 25216 dst_reg.to128(), 25217 .u(1), 25218 ); 25219 } 25220 } 25221 break :result .{ .register = dst_reg }; 25222 }, 25223 else => {}, 25224 }, 25225 128 => switch (vector_len) { 25226 1 => { 25227 const src_mcv = try self.resolveInst(ty_op.operand); 25228 if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; 25229 const dst_reg = try self.register_manager.allocReg(inst, dst_rc); 25230 try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); 25231 break :result .{ .register = dst_reg }; 25232 }, 25233 2 => if (self.hasFeature(.avx)) { 25234 const src_mcv = try self.resolveInst(ty_op.operand); 25235 const dst_reg = try self.register_manager.allocReg(inst, dst_rc); 25236 if (src_mcv.isBase()) try self.asmRegisterMemory( 25237 .{ .v_f128, .broadcast }, 25238 dst_reg.to256(), 25239 try src_mcv.mem(self, .{ .size = .xword }), 25240 ) else { 25241 const src_reg = if (src_mcv.isRegister()) 25242 src_mcv.getReg().? 25243 else 25244 try self.copyToTmpRegister(scalar_ty, src_mcv); 25245 try self.asmRegisterRegisterRegisterImmediate( 25246 .{ .v_f128, .insert }, 25247 dst_reg.to256(), 25248 src_reg.to256(), 25249 src_reg.to128(), 25250 .u(1), 25251 ); 25252 } 25253 break :result .{ .register = dst_reg }; 25254 }, 25255 else => {}, 25256 }, 25257 16, 80 => {}, 25258 else => unreachable, 25259 }, 25260 } 25261 return self.fail("TODO implement airSplat for {}", .{vector_ty.fmt(pt)}); 25262 }; 25263 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 25264 } 25265 25266 fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { 25267 const pt = self.pt; 25268 const zcu = pt.zcu; 25269 const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; 25270 const extra = self.air.extraData(Air.Bin, pl_op.payload).data; 25271 const ty = self.typeOfIndex(inst); 25272 const vec_len = ty.vectorLen(zcu); 25273 const elem_ty = ty.childType(zcu); 25274 const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu)); 25275 const abi_size: u32 = @intCast(ty.abiSize(zcu)); 25276 const pred_ty = self.typeOf(pl_op.operand); 25277 25278 const result = result: { 25279 const has_blend = self.hasFeature(.sse4_1); 25280 const has_avx = self.hasFeature(.avx); 25281 const need_xmm0 = has_blend and !has_avx; 25282 const pred_mcv = try self.resolveInst(pl_op.operand); 25283 const mask_reg = mask: { 25284 switch (pred_mcv) { 25285 .register => |pred_reg| switch (pred_reg.class()) { 25286 .general_purpose => {}, 25287 .sse => if (need_xmm0 and pred_reg.id() != comptime Register.xmm0.id()) { 25288 try self.register_manager.getKnownReg(.xmm0, null); 25289 try self.genSetReg(.xmm0, pred_ty, pred_mcv, .{}); 25290 break :mask .xmm0; 25291 } else break :mask if (has_blend) 25292 pred_reg 25293 else 25294 try self.copyToTmpRegister(pred_ty, pred_mcv), 25295 else => unreachable, 25296 }, 25297 else => {}, 25298 } 25299 const mask_reg: Register = if (need_xmm0) mask_reg: { 25300 try self.register_manager.getKnownReg(.xmm0, null); 25301 break :mask_reg .xmm0; 25302 } else try self.register_manager.allocReg(null, abi.RegisterClass.sse); 25303 const mask_alias = registerAlias(mask_reg, abi_size); 25304 const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg); 25305 defer self.register_manager.unlockReg(mask_lock); 25306 25307 const pred_fits_in_elem = vec_len <= elem_abi_size; 25308 if (self.hasFeature(.avx2) and abi_size <= 32) { 25309 if (pred_mcv.isRegister()) broadcast: { 25310 try self.asmRegisterRegister( 25311 .{ .v_d, .mov }, 25312 mask_reg.to128(), 25313 pred_mcv.getReg().?.to32(), 25314 ); 25315 if (pred_fits_in_elem and vec_len > 1) try self.asmRegisterRegister( 25316 .{ switch (elem_abi_size) { 25317 1 => .vp_b, 25318 2 => .vp_w, 25319 3...4 => .vp_d, 25320 5...8 => .vp_q, 25321 9...16 => { 25322 try self.asmRegisterRegisterRegisterImmediate( 25323 .{ .v_f128, .insert }, 25324 mask_alias, 25325 mask_alias, 25326 mask_reg.to128(), 25327 .u(1), 25328 ); 25329 break :broadcast; 25330 }, 25331 17...32 => break :broadcast, 25332 else => unreachable, 25333 }, .broadcast }, 25334 mask_alias, 25335 mask_reg.to128(), 25336 ); 25337 } else try self.asmRegisterMemory( 25338 .{ switch (vec_len) { 25339 1...8 => .vp_b, 25340 9...16 => .vp_w, 25341 17...32 => .vp_d, 25342 else => unreachable, 25343 }, .broadcast }, 25344 mask_alias, 25345 if (pred_mcv.isBase()) try pred_mcv.mem(self, .{ .size = .byte }) else .{ 25346 .base = .{ .reg = (try self.copyToTmpRegister( 25347 .usize, 25348 pred_mcv.address(), 25349 )).to64() }, 25350 .mod = .{ .rm = .{ .size = .byte } }, 25351 }, 25352 ); 25353 } else if (abi_size <= 16) broadcast: { 25354 try self.asmRegisterRegister( 25355 .{ if (has_avx) .v_d else ._d, .mov }, 25356 mask_alias, 25357 (if (pred_mcv.isRegister()) 25358 pred_mcv.getReg().? 25359 else 25360 try self.copyToTmpRegister(pred_ty, pred_mcv.address())).to32(), 25361 ); 25362 if (!pred_fits_in_elem or vec_len == 1) break :broadcast; 25363 if (elem_abi_size <= 1) { 25364 if (has_avx) try self.asmRegisterRegisterRegister( 25365 .{ .vp_, .unpcklbw }, 25366 mask_alias, 25367 mask_alias, 25368 mask_alias, 25369 ) else try self.asmRegisterRegister( 25370 .{ .p_, .unpcklbw }, 25371 mask_alias, 25372 mask_alias, 25373 ); 25374 if (abi_size <= 2) break :broadcast; 25375 } 25376 if (elem_abi_size <= 2) { 25377 try self.asmRegisterRegisterImmediate( 25378 .{ if (has_avx) .vp_w else .p_w, .shufl }, 25379 mask_alias, 25380 mask_alias, 25381 .u(0b00_00_00_00), 25382 ); 25383 if (abi_size <= 8) break :broadcast; 25384 } 25385 try self.asmRegisterRegisterImmediate( 25386 .{ if (has_avx) .vp_d else .p_d, .shuf }, 25387 mask_alias, 25388 mask_alias, 25389 .u(switch (elem_abi_size) { 25390 1...2, 5...8 => 0b01_00_01_00, 25391 3...4 => 0b00_00_00_00, 25392 else => unreachable, 25393 }), 25394 ); 25395 } else return self.fail("TODO implement airSelect for {}", .{ty.fmt(pt)}); 25396 const elem_bits: u16 = @intCast(elem_abi_size * 8); 25397 const mask_elem_ty = try pt.intType(.unsigned, elem_bits); 25398 const mask_ty = try pt.vectorType(.{ .len = vec_len, .child = mask_elem_ty.toIntern() }); 25399 if (!pred_fits_in_elem) if (self.hasFeature(.ssse3)) { 25400 var mask_elems: [32]InternPool.Index = undefined; 25401 for (mask_elems[0..vec_len], 0..) |*elem, bit| elem.* = try pt.intern(.{ .int = .{ 25402 .ty = mask_elem_ty.toIntern(), 25403 .storage = .{ .u64 = bit / elem_bits }, 25404 } }); 25405 const mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ 25406 .ty = mask_ty.toIntern(), 25407 .storage = .{ .elems = mask_elems[0..vec_len] }, 25408 } }))); 25409 const mask_mem: Memory = .{ 25410 .base = .{ .reg = try self.copyToTmpRegister(.usize, mask_mcv.address()) }, 25411 .mod = .{ .rm = .{ .size = self.memSize(ty) } }, 25412 }; 25413 if (has_avx) try self.asmRegisterRegisterMemory( 25414 .{ .vp_b, .shuf }, 25415 mask_alias, 25416 mask_alias, 25417 mask_mem, 25418 ) else try self.asmRegisterMemory( 25419 .{ .p_b, .shuf }, 25420 mask_alias, 25421 mask_mem, 25422 ); 25423 } else return self.fail("TODO implement airSelect for {}", .{ty.fmt(pt)}); 25424 { 25425 var mask_elems: [32]InternPool.Index = undefined; 25426 for (mask_elems[0..vec_len], 0..) |*elem, bit| elem.* = try pt.intern(.{ .int = .{ 25427 .ty = mask_elem_ty.toIntern(), 25428 .storage = .{ .u64 = @as(u32, 1) << @intCast(bit & (elem_bits - 1)) }, 25429 } }); 25430 const mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ 25431 .ty = mask_ty.toIntern(), 25432 .storage = .{ .elems = mask_elems[0..vec_len] }, 25433 } }))); 25434 const mask_mem: Memory = .{ 25435 .base = .{ .reg = try self.copyToTmpRegister(.usize, mask_mcv.address()) }, 25436 .mod = .{ .rm = .{ .size = self.memSize(ty) } }, 25437 }; 25438 if (has_avx) { 25439 try self.asmRegisterRegisterMemory( 25440 .{ .vp_, .@"and" }, 25441 mask_alias, 25442 mask_alias, 25443 mask_mem, 25444 ); 25445 try self.asmRegisterRegisterMemory( 25446 .{ .vp_d, .cmpeq }, 25447 mask_alias, 25448 mask_alias, 25449 mask_mem, 25450 ); 25451 } else { 25452 try self.asmRegisterMemory( 25453 .{ .p_, .@"and" }, 25454 mask_alias, 25455 mask_mem, 25456 ); 25457 try self.asmRegisterMemory( 25458 .{ .p_d, .cmpeq }, 25459 mask_alias, 25460 mask_mem, 25461 ); 25462 } 25463 } 25464 break :mask mask_reg; 25465 }; 25466 const mask_alias = registerAlias(mask_reg, abi_size); 25467 const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg); 25468 defer self.register_manager.unlockReg(mask_lock); 25469 25470 const lhs_mcv = try self.resolveInst(extra.lhs); 25471 const lhs_lock = switch (lhs_mcv) { 25472 .register => |lhs_reg| self.register_manager.lockRegAssumeUnused(lhs_reg), 25473 else => null, 25474 }; 25475 defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); 25476 25477 const rhs_mcv = try self.resolveInst(extra.rhs); 25478 const rhs_lock = switch (rhs_mcv) { 25479 .register => |rhs_reg| self.register_manager.lockReg(rhs_reg), 25480 else => null, 25481 }; 25482 defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); 25483 25484 const reuse_mcv = if (has_blend) rhs_mcv else lhs_mcv; 25485 const dst_mcv: MCValue = if (reuse_mcv.isRegister() and self.reuseOperand( 25486 inst, 25487 if (has_blend) extra.rhs else extra.lhs, 25488 @intFromBool(has_blend), 25489 reuse_mcv, 25490 )) reuse_mcv else if (has_avx) 25491 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } 25492 else 25493 try self.copyToRegisterWithInstTracking(inst, ty, reuse_mcv); 25494 const dst_reg = dst_mcv.getReg().?; 25495 const dst_alias = registerAlias(dst_reg, abi_size); 25496 const dst_lock = self.register_manager.lockReg(dst_reg); 25497 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 25498 25499 const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.childType(zcu).zigTypeTag(zcu)) { 25500 else => null, 25501 .int => switch (abi_size) { 25502 0 => unreachable, 25503 1...16 => if (has_avx) 25504 .{ .vp_b, .blendv } 25505 else if (has_blend) 25506 .{ .p_b, .blendv } 25507 else 25508 .{ .p_, undefined }, 25509 17...32 => if (self.hasFeature(.avx2)) 25510 .{ .vp_b, .blendv } 25511 else 25512 null, 25513 else => null, 25514 }, 25515 .float => switch (ty.childType(zcu).floatBits(self.target.*)) { 25516 else => unreachable, 25517 16, 80, 128 => null, 25518 32 => switch (vec_len) { 25519 0 => unreachable, 25520 1...4 => if (has_avx) .{ .v_ps, .blendv } else .{ ._ps, .blendv }, 25521 5...8 => if (has_avx) .{ .v_ps, .blendv } else null, 25522 else => null, 25523 }, 25524 64 => switch (vec_len) { 25525 0 => unreachable, 25526 1...2 => if (has_avx) .{ .v_pd, .blendv } else .{ ._pd, .blendv }, 25527 3...4 => if (has_avx) .{ .v_pd, .blendv } else null, 25528 else => null, 25529 }, 25530 }, 25531 }) orelse return self.fail("TODO implement airSelect for {}", .{ty.fmt(pt)}); 25532 if (has_avx) { 25533 const rhs_alias = if (rhs_mcv.isRegister()) 25534 registerAlias(rhs_mcv.getReg().?, abi_size) 25535 else rhs: { 25536 try self.genSetReg(dst_reg, ty, rhs_mcv, .{}); 25537 break :rhs dst_alias; 25538 }; 25539 if (lhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister( 25540 mir_tag, 25541 dst_alias, 25542 rhs_alias, 25543 try lhs_mcv.mem(self, .{ .size = self.memSize(ty) }), 25544 mask_alias, 25545 ) else try self.asmRegisterRegisterRegisterRegister( 25546 mir_tag, 25547 dst_alias, 25548 rhs_alias, 25549 registerAlias(if (lhs_mcv.isRegister()) 25550 lhs_mcv.getReg().? 25551 else 25552 try self.copyToTmpRegister(ty, lhs_mcv), abi_size), 25553 mask_alias, 25554 ); 25555 } else if (has_blend) if (lhs_mcv.isBase()) try self.asmRegisterMemoryRegister( 25556 mir_tag, 25557 dst_alias, 25558 try lhs_mcv.mem(self, .{ .size = self.memSize(ty) }), 25559 mask_alias, 25560 ) else try self.asmRegisterRegisterRegister( 25561 mir_tag, 25562 dst_alias, 25563 registerAlias(if (lhs_mcv.isRegister()) 25564 lhs_mcv.getReg().? 25565 else 25566 try self.copyToTmpRegister(ty, lhs_mcv), abi_size), 25567 mask_alias, 25568 ) else { 25569 const mir_fixes = @as(?Mir.Inst.Fixes, switch (elem_ty.zigTypeTag(zcu)) { 25570 else => null, 25571 .int => .p_, 25572 .float => switch (elem_ty.floatBits(self.target.*)) { 25573 32 => ._ps, 25574 64 => ._pd, 25575 16, 80, 128 => null, 25576 else => unreachable, 25577 }, 25578 }) orelse return self.fail("TODO implement airSelect for {}", .{ty.fmt(pt)}); 25579 try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias); 25580 if (rhs_mcv.isBase()) try self.asmRegisterMemory( 25581 .{ mir_fixes, .andn }, 25582 mask_alias, 25583 try rhs_mcv.mem(self, .{ .size = .fromSize(abi_size) }), 25584 ) else try self.asmRegisterRegister( 25585 .{ mir_fixes, .andn }, 25586 mask_alias, 25587 if (rhs_mcv.isRegister()) 25588 rhs_mcv.getReg().? 25589 else 25590 try self.copyToTmpRegister(ty, rhs_mcv), 25591 ); 25592 try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias); 25593 } 25594 break :result dst_mcv; 25595 }; 25596 return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs }); 25597 } 25598 25599 fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { 25600 const pt = self.pt; 25601 const zcu = pt.zcu; 25602 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 25603 const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data; 25604 25605 const dst_ty = self.typeOfIndex(inst); 25606 const elem_ty = dst_ty.childType(zcu); 25607 const elem_abi_size: u16 = @intCast(elem_ty.abiSize(zcu)); 25608 const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); 25609 const lhs_ty = self.typeOf(extra.a); 25610 const lhs_abi_size: u32 = @intCast(lhs_ty.abiSize(zcu)); 25611 const rhs_ty = self.typeOf(extra.b); 25612 const rhs_abi_size: u32 = @intCast(rhs_ty.abiSize(zcu)); 25613 const max_abi_size = @max(dst_abi_size, lhs_abi_size, rhs_abi_size); 25614 25615 const ExpectedContents = [32]?i32; 25616 var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = 25617 std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); 25618 const allocator = stack.get(); 25619 25620 const mask_elems = try allocator.alloc(?i32, extra.mask_len); 25621 defer allocator.free(mask_elems); 25622 for (mask_elems, 0..) |*mask_elem, elem_index| { 25623 const mask_elem_val = 25624 Value.fromInterned(extra.mask).elemValue(pt, elem_index) catch unreachable; 25625 mask_elem.* = if (mask_elem_val.isUndef(zcu)) 25626 null 25627 else 25628 @intCast(mask_elem_val.toSignedInt(zcu)); 25629 } 25630 25631 const has_avx = self.hasFeature(.avx); 25632 const result = @as(?MCValue, result: { 25633 for (mask_elems) |mask_elem| { 25634 if (mask_elem) |_| break; 25635 } else break :result try self.allocRegOrMem(inst, true); 25636 25637 for (mask_elems, 0..) |mask_elem, elem_index| { 25638 if (mask_elem orelse continue != elem_index) break; 25639 } else { 25640 const lhs_mcv = try self.resolveInst(extra.a); 25641 if (self.reuseOperand(inst, extra.a, 0, lhs_mcv)) break :result lhs_mcv; 25642 const dst_mcv = try self.allocRegOrMem(inst, true); 25643 try self.genCopy(dst_ty, dst_mcv, lhs_mcv, .{}); 25644 break :result dst_mcv; 25645 } 25646 25647 for (mask_elems, 0..) |mask_elem, elem_index| { 25648 if (~(mask_elem orelse continue) != elem_index) break; 25649 } else { 25650 const rhs_mcv = try self.resolveInst(extra.b); 25651 if (self.reuseOperand(inst, extra.b, 1, rhs_mcv)) break :result rhs_mcv; 25652 const dst_mcv = try self.allocRegOrMem(inst, true); 25653 try self.genCopy(dst_ty, dst_mcv, rhs_mcv, .{}); 25654 break :result dst_mcv; 25655 } 25656 25657 for ([_]Mir.Inst.Tag{ .unpckl, .unpckh }) |variant| unpck: { 25658 if (elem_abi_size > 8) break :unpck; 25659 if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :unpck; 25660 25661 var sources: [2]?u1 = @splat(null); 25662 for (mask_elems, 0..) |maybe_mask_elem, elem_index| { 25663 const mask_elem = maybe_mask_elem orelse continue; 25664 const mask_elem_index = 25665 std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :unpck; 25666 const elem_byte = (elem_index >> 1) * elem_abi_size; 25667 if (mask_elem_index * elem_abi_size != (elem_byte & 0b0111) | @as(u4, switch (variant) { 25668 .unpckl => 0b0000, 25669 .unpckh => 0b1000, 25670 else => unreachable, 25671 }) | (elem_byte << 1 & 0b10000)) break :unpck; 25672 25673 const source = @intFromBool(mask_elem < 0); 25674 if (sources[elem_index & 0b00001]) |prev_source| { 25675 if (source != prev_source) break :unpck; 25676 } else sources[elem_index & 0b00001] = source; 25677 } 25678 if (sources[0] orelse break :unpck == sources[1] orelse break :unpck) break :unpck; 25679 25680 const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; 25681 const operand_tys = [2]Type{ lhs_ty, rhs_ty }; 25682 const lhs_mcv = try self.resolveInst(operands[sources[0].?]); 25683 const rhs_mcv = try self.resolveInst(operands[sources[1].?]); 25684 25685 const dst_mcv: MCValue = if (lhs_mcv.isRegister() and 25686 self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) 25687 lhs_mcv 25688 else if (has_avx and lhs_mcv.isRegister()) 25689 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } 25690 else 25691 try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); 25692 const dst_reg = dst_mcv.getReg().?; 25693 const dst_alias = registerAlias(dst_reg, max_abi_size); 25694 25695 const mir_tag: Mir.Inst.FixedTag = if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or 25696 (dst_abi_size > 16 and !self.hasFeature(.avx2))) .{ switch (elem_abi_size) { 25697 4 => if (has_avx) .v_ps else ._ps, 25698 8 => if (has_avx) .v_pd else ._pd, 25699 else => unreachable, 25700 }, variant } else .{ if (has_avx) .vp_ else .p_, switch (variant) { 25701 .unpckl => switch (elem_abi_size) { 25702 1 => .unpcklbw, 25703 2 => .unpcklwd, 25704 4 => .unpckldq, 25705 8 => .unpcklqdq, 25706 else => unreachable, 25707 }, 25708 .unpckh => switch (elem_abi_size) { 25709 1 => .unpckhbw, 25710 2 => .unpckhwd, 25711 4 => .unpckhdq, 25712 8 => .unpckhqdq, 25713 else => unreachable, 25714 }, 25715 else => unreachable, 25716 } }; 25717 if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemory( 25718 mir_tag, 25719 dst_alias, 25720 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), 25721 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), 25722 ) else try self.asmRegisterRegisterRegister( 25723 mir_tag, 25724 dst_alias, 25725 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), 25726 registerAlias(if (rhs_mcv.isRegister()) 25727 rhs_mcv.getReg().? 25728 else 25729 try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), 25730 ) else if (rhs_mcv.isBase()) try self.asmRegisterMemory( 25731 mir_tag, 25732 dst_alias, 25733 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), 25734 ) else try self.asmRegisterRegister( 25735 mir_tag, 25736 dst_alias, 25737 registerAlias(if (rhs_mcv.isRegister()) 25738 rhs_mcv.getReg().? 25739 else 25740 try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), 25741 ); 25742 break :result dst_mcv; 25743 } 25744 25745 pshufd: { 25746 if (elem_abi_size != 4) break :pshufd; 25747 if (max_abi_size > self.vectorSize(.float)) break :pshufd; 25748 25749 var control: u8 = 0b00_00_00_00; 25750 var sources: [1]?u1 = @splat(null); 25751 for (mask_elems, 0..) |maybe_mask_elem, elem_index| { 25752 const mask_elem = maybe_mask_elem orelse continue; 25753 const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); 25754 if (mask_elem_index & 0b100 != elem_index & 0b100) break :pshufd; 25755 25756 const source = @intFromBool(mask_elem < 0); 25757 if (sources[0]) |prev_source| { 25758 if (source != prev_source) break :pshufd; 25759 } else sources[(elem_index & 0b010) >> 1] = source; 25760 25761 const select_bit: u3 = @intCast((elem_index & 0b011) << 1); 25762 const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; 25763 if (elem_index & 0b100 == 0) 25764 control |= select_mask 25765 else if (control & @as(u8, 0b11) << select_bit != select_mask) break :pshufd; 25766 } 25767 25768 const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; 25769 const operand_tys = [2]Type{ lhs_ty, rhs_ty }; 25770 const src_mcv = try self.resolveInst(operands[sources[0] orelse break :pshufd]); 25771 25772 const dst_reg = if (src_mcv.isRegister() and 25773 self.reuseOperand(inst, operands[sources[0].?], sources[0].?, src_mcv)) 25774 src_mcv.getReg().? 25775 else 25776 try self.register_manager.allocReg(inst, abi.RegisterClass.sse); 25777 const dst_alias = registerAlias(dst_reg, max_abi_size); 25778 25779 if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( 25780 .{ if (has_avx) .vp_d else .p_d, .shuf }, 25781 dst_alias, 25782 try src_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), 25783 .u(control), 25784 ) else try self.asmRegisterRegisterImmediate( 25785 .{ if (has_avx) .vp_d else .p_d, .shuf }, 25786 dst_alias, 25787 registerAlias(if (src_mcv.isRegister()) 25788 src_mcv.getReg().? 25789 else 25790 try self.copyToTmpRegister(operand_tys[sources[0].?], src_mcv), max_abi_size), 25791 .u(control), 25792 ); 25793 break :result .{ .register = dst_reg }; 25794 } 25795 25796 shufps: { 25797 if (elem_abi_size != 4) break :shufps; 25798 if (max_abi_size > self.vectorSize(.float)) break :shufps; 25799 25800 var control: u8 = 0b00_00_00_00; 25801 var sources: [2]?u1 = @splat(null); 25802 for (mask_elems, 0..) |maybe_mask_elem, elem_index| { 25803 const mask_elem = maybe_mask_elem orelse continue; 25804 const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); 25805 if (mask_elem_index & 0b100 != elem_index & 0b100) break :shufps; 25806 25807 const source = @intFromBool(mask_elem < 0); 25808 if (sources[(elem_index & 0b010) >> 1]) |prev_source| { 25809 if (source != prev_source) break :shufps; 25810 } else sources[(elem_index & 0b010) >> 1] = source; 25811 25812 const select_bit: u3 = @intCast((elem_index & 0b011) << 1); 25813 const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; 25814 if (elem_index & 0b100 == 0) 25815 control |= select_mask 25816 else if (control & @as(u8, 0b11) << select_bit != select_mask) break :shufps; 25817 } 25818 if (sources[0] orelse break :shufps == sources[1] orelse break :shufps) break :shufps; 25819 25820 const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; 25821 const operand_tys = [2]Type{ lhs_ty, rhs_ty }; 25822 const lhs_mcv = try self.resolveInst(operands[sources[0].?]); 25823 const rhs_mcv = try self.resolveInst(operands[sources[1].?]); 25824 25825 const dst_mcv: MCValue = if (lhs_mcv.isRegister() and 25826 self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) 25827 lhs_mcv 25828 else if (has_avx and lhs_mcv.isRegister()) 25829 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } 25830 else 25831 try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); 25832 const dst_reg = dst_mcv.getReg().?; 25833 const dst_alias = registerAlias(dst_reg, max_abi_size); 25834 25835 if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 25836 .{ .v_ps, .shuf }, 25837 dst_alias, 25838 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), 25839 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), 25840 .u(control), 25841 ) else try self.asmRegisterRegisterRegisterImmediate( 25842 .{ .v_ps, .shuf }, 25843 dst_alias, 25844 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), 25845 registerAlias(if (rhs_mcv.isRegister()) 25846 rhs_mcv.getReg().? 25847 else 25848 try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), 25849 .u(control), 25850 ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( 25851 .{ ._ps, .shuf }, 25852 dst_alias, 25853 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), 25854 .u(control), 25855 ) else try self.asmRegisterRegisterImmediate( 25856 .{ ._ps, .shuf }, 25857 dst_alias, 25858 registerAlias(if (rhs_mcv.isRegister()) 25859 rhs_mcv.getReg().? 25860 else 25861 try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), 25862 .u(control), 25863 ); 25864 break :result dst_mcv; 25865 } 25866 25867 shufpd: { 25868 if (elem_abi_size != 8) break :shufpd; 25869 if (max_abi_size > self.vectorSize(.float)) break :shufpd; 25870 25871 var control: u4 = 0b0_0_0_0; 25872 var sources: [2]?u1 = @splat(null); 25873 for (mask_elems, 0..) |maybe_mask_elem, elem_index| { 25874 const mask_elem = maybe_mask_elem orelse continue; 25875 const mask_elem_index: u2 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); 25876 if (mask_elem_index & 0b10 != elem_index & 0b10) break :shufpd; 25877 25878 const source = @intFromBool(mask_elem < 0); 25879 if (sources[elem_index & 0b01]) |prev_source| { 25880 if (source != prev_source) break :shufpd; 25881 } else sources[elem_index & 0b01] = source; 25882 25883 control |= @as(u4, @intCast(mask_elem_index & 0b01)) << @intCast(elem_index); 25884 } 25885 if (sources[0] orelse break :shufpd == sources[1] orelse break :shufpd) break :shufpd; 25886 25887 const operands: [2]Air.Inst.Ref = .{ extra.a, extra.b }; 25888 const operand_tys: [2]Type = .{ lhs_ty, rhs_ty }; 25889 const lhs_mcv = try self.resolveInst(operands[sources[0].?]); 25890 const rhs_mcv = try self.resolveInst(operands[sources[1].?]); 25891 25892 const dst_mcv: MCValue = if (lhs_mcv.isRegister() and 25893 self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) 25894 lhs_mcv 25895 else if (has_avx and lhs_mcv.isRegister()) 25896 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } 25897 else 25898 try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); 25899 const dst_reg = dst_mcv.getReg().?; 25900 const dst_alias = registerAlias(dst_reg, max_abi_size); 25901 25902 if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 25903 .{ .v_pd, .shuf }, 25904 dst_alias, 25905 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), 25906 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), 25907 .u(control), 25908 ) else try self.asmRegisterRegisterRegisterImmediate( 25909 .{ .v_pd, .shuf }, 25910 dst_alias, 25911 registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), 25912 registerAlias(if (rhs_mcv.isRegister()) 25913 rhs_mcv.getReg().? 25914 else 25915 try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), 25916 .u(control), 25917 ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( 25918 .{ ._pd, .shuf }, 25919 dst_alias, 25920 try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), 25921 .u(control), 25922 ) else try self.asmRegisterRegisterImmediate( 25923 .{ ._pd, .shuf }, 25924 dst_alias, 25925 registerAlias(if (rhs_mcv.isRegister()) 25926 rhs_mcv.getReg().? 25927 else 25928 try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), 25929 .u(control), 25930 ); 25931 break :result dst_mcv; 25932 } 25933 25934 blend: { 25935 if (elem_abi_size < 2) break :blend; 25936 if (dst_abi_size > self.vectorSize(.float)) break :blend; 25937 if (!self.hasFeature(.sse4_1)) break :blend; 25938 25939 var control: u8 = 0b0_0_0_0_0_0_0_0; 25940 for (mask_elems, 0..) |maybe_mask_elem, elem_index| { 25941 const mask_elem = maybe_mask_elem orelse continue; 25942 const mask_elem_index = 25943 std.math.cast(u4, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blend; 25944 if (mask_elem_index != elem_index) break :blend; 25945 25946 const select_mask = @as(u8, @intFromBool(mask_elem < 0)) << @truncate(elem_index); 25947 if (elem_index & 0b1000 == 0) 25948 control |= select_mask 25949 else if (control & @as(u8, 0b1) << @truncate(elem_index) != select_mask) break :blend; 25950 } 25951 25952 if (!elem_ty.isRuntimeFloat() and self.hasFeature(.avx2)) vpblendd: { 25953 const expanded_control = switch (elem_abi_size) { 25954 4 => control, 25955 8 => @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) | 25956 @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) | 25957 @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) | 25958 @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00), 25959 else => break :vpblendd, 25960 }; 25961 25962 const lhs_mcv = try self.resolveInst(extra.a); 25963 const lhs_reg = if (lhs_mcv.isRegister()) 25964 lhs_mcv.getReg().? 25965 else 25966 try self.copyToTmpRegister(dst_ty, lhs_mcv); 25967 const lhs_lock = self.register_manager.lockReg(lhs_reg); 25968 defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); 25969 25970 const rhs_mcv = try self.resolveInst(extra.b); 25971 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); 25972 if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 25973 .{ .vp_d, .blend }, 25974 registerAlias(dst_reg, dst_abi_size), 25975 registerAlias(lhs_reg, dst_abi_size), 25976 try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), 25977 .u(expanded_control), 25978 ) else try self.asmRegisterRegisterRegisterImmediate( 25979 .{ .vp_d, .blend }, 25980 registerAlias(dst_reg, dst_abi_size), 25981 registerAlias(lhs_reg, dst_abi_size), 25982 registerAlias(if (rhs_mcv.isRegister()) 25983 rhs_mcv.getReg().? 25984 else 25985 try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), 25986 .u(expanded_control), 25987 ); 25988 break :result .{ .register = dst_reg }; 25989 } 25990 25991 if (!elem_ty.isRuntimeFloat() or elem_abi_size == 2) pblendw: { 25992 const expanded_control = switch (elem_abi_size) { 25993 2 => control, 25994 4 => if (dst_abi_size <= 16 or 25995 @as(u4, @intCast(control >> 4)) == @as(u4, @truncate(control >> 0))) 25996 @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) | 25997 @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) | 25998 @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) | 25999 @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00) 26000 else 26001 break :pblendw, 26002 8 => if (dst_abi_size <= 16 or 26003 @as(u2, @intCast(control >> 2)) == @as(u2, @truncate(control >> 0))) 26004 @as(u8, if (control & 0b01 != 0) 0b0000_1111 else 0b0000_0000) | 26005 @as(u8, if (control & 0b10 != 0) 0b1111_0000 else 0b0000_0000) 26006 else 26007 break :pblendw, 26008 16 => break :pblendw, 26009 else => unreachable, 26010 }; 26011 26012 const lhs_mcv = try self.resolveInst(extra.a); 26013 const rhs_mcv = try self.resolveInst(extra.b); 26014 26015 const dst_mcv: MCValue = if (lhs_mcv.isRegister() and 26016 self.reuseOperand(inst, extra.a, 0, lhs_mcv)) 26017 lhs_mcv 26018 else if (has_avx and lhs_mcv.isRegister()) 26019 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } 26020 else 26021 try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); 26022 const dst_reg = dst_mcv.getReg().?; 26023 26024 if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 26025 .{ .vp_w, .blend }, 26026 registerAlias(dst_reg, dst_abi_size), 26027 registerAlias(if (lhs_mcv.isRegister()) 26028 lhs_mcv.getReg().? 26029 else 26030 dst_reg, dst_abi_size), 26031 try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), 26032 .u(expanded_control), 26033 ) else try self.asmRegisterRegisterRegisterImmediate( 26034 .{ .vp_w, .blend }, 26035 registerAlias(dst_reg, dst_abi_size), 26036 registerAlias(if (lhs_mcv.isRegister()) 26037 lhs_mcv.getReg().? 26038 else 26039 dst_reg, dst_abi_size), 26040 registerAlias(if (rhs_mcv.isRegister()) 26041 rhs_mcv.getReg().? 26042 else 26043 try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), 26044 .u(expanded_control), 26045 ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( 26046 .{ .p_w, .blend }, 26047 registerAlias(dst_reg, dst_abi_size), 26048 try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), 26049 .u(expanded_control), 26050 ) else try self.asmRegisterRegisterImmediate( 26051 .{ .p_w, .blend }, 26052 registerAlias(dst_reg, dst_abi_size), 26053 registerAlias(if (rhs_mcv.isRegister()) 26054 rhs_mcv.getReg().? 26055 else 26056 try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), 26057 .u(expanded_control), 26058 ); 26059 break :result .{ .register = dst_reg }; 26060 } 26061 26062 const expanded_control = switch (elem_abi_size) { 26063 4, 8 => control, 26064 16 => @as(u4, if (control & 0b01 != 0) 0b00_11 else 0b00_00) | 26065 @as(u4, if (control & 0b10 != 0) 0b11_00 else 0b00_00), 26066 else => unreachable, 26067 }; 26068 26069 const lhs_mcv = try self.resolveInst(extra.a); 26070 const rhs_mcv = try self.resolveInst(extra.b); 26071 26072 const dst_mcv: MCValue = if (lhs_mcv.isRegister() and 26073 self.reuseOperand(inst, extra.a, 0, lhs_mcv)) 26074 lhs_mcv 26075 else if (has_avx and lhs_mcv.isRegister()) 26076 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } 26077 else 26078 try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); 26079 const dst_reg = dst_mcv.getReg().?; 26080 26081 if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( 26082 switch (elem_abi_size) { 26083 4 => .{ .v_ps, .blend }, 26084 8, 16 => .{ .v_pd, .blend }, 26085 else => unreachable, 26086 }, 26087 registerAlias(dst_reg, dst_abi_size), 26088 registerAlias(if (lhs_mcv.isRegister()) 26089 lhs_mcv.getReg().? 26090 else 26091 dst_reg, dst_abi_size), 26092 try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), 26093 .u(expanded_control), 26094 ) else try self.asmRegisterRegisterRegisterImmediate( 26095 switch (elem_abi_size) { 26096 4 => .{ .v_ps, .blend }, 26097 8, 16 => .{ .v_pd, .blend }, 26098 else => unreachable, 26099 }, 26100 registerAlias(dst_reg, dst_abi_size), 26101 registerAlias(if (lhs_mcv.isRegister()) 26102 lhs_mcv.getReg().? 26103 else 26104 dst_reg, dst_abi_size), 26105 registerAlias(if (rhs_mcv.isRegister()) 26106 rhs_mcv.getReg().? 26107 else 26108 try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), 26109 .u(expanded_control), 26110 ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( 26111 switch (elem_abi_size) { 26112 4 => .{ ._ps, .blend }, 26113 8, 16 => .{ ._pd, .blend }, 26114 else => unreachable, 26115 }, 26116 registerAlias(dst_reg, dst_abi_size), 26117 try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), 26118 .u(expanded_control), 26119 ) else try self.asmRegisterRegisterImmediate( 26120 switch (elem_abi_size) { 26121 4 => .{ ._ps, .blend }, 26122 8, 16 => .{ ._pd, .blend }, 26123 else => unreachable, 26124 }, 26125 registerAlias(dst_reg, dst_abi_size), 26126 registerAlias(if (rhs_mcv.isRegister()) 26127 rhs_mcv.getReg().? 26128 else 26129 try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), 26130 .u(expanded_control), 26131 ); 26132 break :result .{ .register = dst_reg }; 26133 } 26134 26135 blendv: { 26136 if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :blendv; 26137 26138 const select_mask_elem_ty = try pt.intType(.unsigned, elem_abi_size * 8); 26139 const select_mask_ty = try pt.vectorType(.{ 26140 .len = @intCast(mask_elems.len), 26141 .child = select_mask_elem_ty.toIntern(), 26142 }); 26143 var select_mask_elems: [32]InternPool.Index = undefined; 26144 for ( 26145 select_mask_elems[0..mask_elems.len], 26146 mask_elems, 26147 0.., 26148 ) |*select_mask_elem, maybe_mask_elem, elem_index| { 26149 const mask_elem = maybe_mask_elem orelse continue; 26150 const mask_elem_index = 26151 std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blendv; 26152 if (mask_elem_index != elem_index) break :blendv; 26153 26154 select_mask_elem.* = (if (mask_elem < 0) 26155 try select_mask_elem_ty.maxIntScalar(pt, select_mask_elem_ty) 26156 else 26157 try select_mask_elem_ty.minIntScalar(pt, select_mask_elem_ty)).toIntern(); 26158 } 26159 const select_mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ 26160 .ty = select_mask_ty.toIntern(), 26161 .storage = .{ .elems = select_mask_elems[0..mask_elems.len] }, 26162 } }))); 26163 26164 if (self.hasFeature(.sse4_1)) { 26165 const mir_tag: Mir.Inst.FixedTag = .{ 26166 if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or 26167 (dst_abi_size > 16 and !self.hasFeature(.avx2))) switch (elem_abi_size) { 26168 4 => if (has_avx) .v_ps else ._ps, 26169 8 => if (has_avx) .v_pd else ._pd, 26170 else => unreachable, 26171 } else if (has_avx) .vp_b else .p_b, 26172 .blendv, 26173 }; 26174 26175 const select_mask_reg = if (!has_avx) reg: { 26176 try self.register_manager.getKnownReg(.xmm0, null); 26177 try self.genSetReg(.xmm0, select_mask_elem_ty, select_mask_mcv, .{}); 26178 break :reg .xmm0; 26179 } else try self.copyToTmpRegister(select_mask_ty, select_mask_mcv); 26180 const select_mask_alias = registerAlias(select_mask_reg, dst_abi_size); 26181 const select_mask_lock = self.register_manager.lockRegAssumeUnused(select_mask_reg); 26182 defer self.register_manager.unlockReg(select_mask_lock); 26183 26184 const lhs_mcv = try self.resolveInst(extra.a); 26185 const rhs_mcv = try self.resolveInst(extra.b); 26186 26187 const dst_mcv: MCValue = if (lhs_mcv.isRegister() and 26188 self.reuseOperand(inst, extra.a, 0, lhs_mcv)) 26189 lhs_mcv 26190 else if (has_avx and lhs_mcv.isRegister()) 26191 .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } 26192 else 26193 try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); 26194 const dst_reg = dst_mcv.getReg().?; 26195 const dst_alias = registerAlias(dst_reg, dst_abi_size); 26196 26197 if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister( 26198 mir_tag, 26199 dst_alias, 26200 if (lhs_mcv.isRegister()) 26201 registerAlias(lhs_mcv.getReg().?, dst_abi_size) 26202 else 26203 dst_alias, 26204 try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), 26205 select_mask_alias, 26206 ) else try self.asmRegisterRegisterRegisterRegister( 26207 mir_tag, 26208 dst_alias, 26209 if (lhs_mcv.isRegister()) 26210 registerAlias(lhs_mcv.getReg().?, dst_abi_size) 26211 else 26212 dst_alias, 26213 registerAlias(if (rhs_mcv.isRegister()) 26214 rhs_mcv.getReg().? 26215 else 26216 try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), 26217 select_mask_alias, 26218 ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryRegister( 26219 mir_tag, 26220 dst_alias, 26221 try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), 26222 select_mask_alias, 26223 ) else try self.asmRegisterRegisterRegister( 26224 mir_tag, 26225 dst_alias, 26226 registerAlias(if (rhs_mcv.isRegister()) 26227 rhs_mcv.getReg().? 26228 else 26229 try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), 26230 select_mask_alias, 26231 ); 26232 break :result dst_mcv; 26233 } 26234 26235 const lhs_mcv = try self.resolveInst(extra.a); 26236 const rhs_mcv = try self.resolveInst(extra.b); 26237 26238 const dst_mcv: MCValue = if (rhs_mcv.isRegister() and 26239 self.reuseOperand(inst, extra.b, 1, rhs_mcv)) 26240 rhs_mcv 26241 else 26242 try self.copyToRegisterWithInstTracking(inst, dst_ty, rhs_mcv); 26243 const dst_reg = dst_mcv.getReg().?; 26244 const dst_alias = registerAlias(dst_reg, dst_abi_size); 26245 26246 const mask_reg = try self.copyToTmpRegister(select_mask_ty, select_mask_mcv); 26247 const mask_alias = registerAlias(mask_reg, dst_abi_size); 26248 const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg); 26249 defer self.register_manager.unlockReg(mask_lock); 26250 26251 const mir_fixes: Mir.Inst.Fixes = if (elem_ty.isRuntimeFloat()) 26252 switch (elem_ty.floatBits(self.target.*)) { 26253 16, 80, 128 => .p_, 26254 32 => ._ps, 26255 64 => ._pd, 26256 else => unreachable, 26257 } 26258 else 26259 .p_; 26260 try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias); 26261 if (lhs_mcv.isBase()) try self.asmRegisterMemory( 26262 .{ mir_fixes, .andn }, 26263 mask_alias, 26264 try lhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), 26265 ) else try self.asmRegisterRegister( 26266 .{ mir_fixes, .andn }, 26267 mask_alias, 26268 if (lhs_mcv.isRegister()) 26269 lhs_mcv.getReg().? 26270 else 26271 try self.copyToTmpRegister(dst_ty, lhs_mcv), 26272 ); 26273 try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias); 26274 break :result dst_mcv; 26275 } 26276 26277 pshufb: { 26278 if (max_abi_size > 16) break :pshufb; 26279 if (!self.hasFeature(.ssse3)) break :pshufb; 26280 26281 const temp_regs = 26282 try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.sse); 26283 const temp_locks = self.register_manager.lockRegsAssumeUnused(2, temp_regs); 26284 defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); 26285 26286 const lhs_temp_alias = registerAlias(temp_regs[0], max_abi_size); 26287 try self.genSetReg(temp_regs[0], lhs_ty, .{ .air_ref = extra.a }, .{}); 26288 26289 const rhs_temp_alias = registerAlias(temp_regs[1], max_abi_size); 26290 try self.genSetReg(temp_regs[1], rhs_ty, .{ .air_ref = extra.b }, .{}); 26291 26292 var lhs_mask_elems: [16]InternPool.Index = undefined; 26293 for (lhs_mask_elems[0..max_abi_size], 0..) |*lhs_mask_elem, byte_index| { 26294 const elem_index = byte_index / elem_abi_size; 26295 lhs_mask_elem.* = try pt.intern(.{ .int = .{ 26296 .ty = .u8_type, 26297 .storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: { 26298 const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000; 26299 if (mask_elem < 0) break :elem 0b1_00_00000; 26300 const mask_elem_index: u31 = @intCast(mask_elem); 26301 const byte_off: u32 = @intCast(byte_index % elem_abi_size); 26302 break :elem @intCast(mask_elem_index * elem_abi_size + byte_off); 26303 } }, 26304 } }); 26305 } 26306 const lhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type }); 26307 const lhs_mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ 26308 .ty = lhs_mask_ty.toIntern(), 26309 .storage = .{ .elems = lhs_mask_elems[0..max_abi_size] }, 26310 } }))); 26311 const lhs_mask_mem: Memory = .{ 26312 .base = .{ .reg = try self.copyToTmpRegister(.usize, lhs_mask_mcv.address()) }, 26313 .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } }, 26314 }; 26315 if (has_avx) try self.asmRegisterRegisterMemory( 26316 .{ .vp_b, .shuf }, 26317 lhs_temp_alias, 26318 lhs_temp_alias, 26319 lhs_mask_mem, 26320 ) else try self.asmRegisterMemory( 26321 .{ .p_b, .shuf }, 26322 lhs_temp_alias, 26323 lhs_mask_mem, 26324 ); 26325 26326 var rhs_mask_elems: [16]InternPool.Index = undefined; 26327 for (rhs_mask_elems[0..max_abi_size], 0..) |*rhs_mask_elem, byte_index| { 26328 const elem_index = byte_index / elem_abi_size; 26329 rhs_mask_elem.* = try pt.intern(.{ .int = .{ 26330 .ty = .u8_type, 26331 .storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: { 26332 const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000; 26333 if (mask_elem >= 0) break :elem 0b1_00_00000; 26334 const mask_elem_index: u31 = @intCast(~mask_elem); 26335 const byte_off: u32 = @intCast(byte_index % elem_abi_size); 26336 break :elem @intCast(mask_elem_index * elem_abi_size + byte_off); 26337 } }, 26338 } }); 26339 } 26340 const rhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type }); 26341 const rhs_mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ 26342 .ty = rhs_mask_ty.toIntern(), 26343 .storage = .{ .elems = rhs_mask_elems[0..max_abi_size] }, 26344 } }))); 26345 const rhs_mask_mem: Memory = .{ 26346 .base = .{ .reg = try self.copyToTmpRegister(.usize, rhs_mask_mcv.address()) }, 26347 .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } }, 26348 }; 26349 if (has_avx) try self.asmRegisterRegisterMemory( 26350 .{ .vp_b, .shuf }, 26351 rhs_temp_alias, 26352 rhs_temp_alias, 26353 rhs_mask_mem, 26354 ) else try self.asmRegisterMemory( 26355 .{ .p_b, .shuf }, 26356 rhs_temp_alias, 26357 rhs_mask_mem, 26358 ); 26359 26360 if (has_avx) try self.asmRegisterRegisterRegister( 26361 .{ switch (elem_ty.zigTypeTag(zcu)) { 26362 else => break :result null, 26363 .int => .vp_, 26364 .float => switch (elem_ty.floatBits(self.target.*)) { 26365 32 => .v_ps, 26366 64 => .v_pd, 26367 16, 80, 128 => break :result null, 26368 else => unreachable, 26369 }, 26370 }, .@"or" }, 26371 lhs_temp_alias, 26372 lhs_temp_alias, 26373 rhs_temp_alias, 26374 ) else try self.asmRegisterRegister( 26375 .{ switch (elem_ty.zigTypeTag(zcu)) { 26376 else => break :result null, 26377 .int => .p_, 26378 .float => switch (elem_ty.floatBits(self.target.*)) { 26379 32 => ._ps, 26380 64 => ._pd, 26381 16, 80, 128 => break :result null, 26382 else => unreachable, 26383 }, 26384 }, .@"or" }, 26385 lhs_temp_alias, 26386 rhs_temp_alias, 26387 ); 26388 break :result .{ .register = temp_regs[0] }; 26389 } 26390 26391 break :result null; 26392 }) orelse return self.fail("TODO implement airShuffle from {} and {} to {} with {}", .{ 26393 lhs_ty.fmt(pt), 26394 rhs_ty.fmt(pt), 26395 dst_ty.fmt(pt), 26396 Value.fromInterned(extra.mask).fmtValue(pt), 26397 }); 26398 return self.finishAir(inst, result, .{ extra.a, extra.b, .none }); 26399 } 26400 26401 fn airReduce(self: *CodeGen, inst: Air.Inst.Index) !void { 26402 const pt = self.pt; 26403 const zcu = pt.zcu; 26404 const reduce = self.air.instructions.items(.data)[@intFromEnum(inst)].reduce; 26405 26406 const result: MCValue = result: { 26407 const operand_ty = self.typeOf(reduce.operand); 26408 if (operand_ty.isVector(zcu) and operand_ty.childType(zcu).toIntern() == .bool_type) { 26409 try self.spillEflagsIfOccupied(); 26410 26411 const abi_size: u32 = @intCast(operand_ty.abiSize(zcu)); 26412 const operand_mcv = try self.resolveInst(reduce.operand); 26413 const mask_len = operand_ty.vectorLen(zcu); 26414 const mask_len_minus_one = (std.math.cast(u6, mask_len - 1) orelse { 26415 const acc_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); 26416 const acc_lock = self.register_manager.lockRegAssumeUnused(acc_reg); 26417 defer self.register_manager.unlockReg(acc_lock); 26418 var limb_offset: i31 = 0; 26419 while (limb_offset < abi_size) : (limb_offset += 8) { 26420 try self.asmRegisterMemory( 26421 .{ ._, if (limb_offset == 0) .mov else switch (reduce.operation) { 26422 .Or => .@"or", 26423 .And => .@"and", 26424 else => return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)}), 26425 } }, 26426 acc_reg.to64(), 26427 try operand_mcv.mem(self, .{ 26428 .size = .qword, 26429 .disp = limb_offset, 26430 }), 26431 ); 26432 } 26433 switch (reduce.operation) { 26434 .Or => { 26435 try self.asmRegisterRegister(.{ ._, .@"test" }, acc_reg.to64(), acc_reg.to64()); 26436 break :result .{ .eflags = .nz }; 26437 }, 26438 .And => { 26439 try self.asmRegisterImmediate(.{ ._, .cmp }, acc_reg.to64(), .s(-1)); 26440 break :result .{ .eflags = .z }; 26441 }, 26442 else => unreachable, 26443 } 26444 }); 26445 const mask = @as(u64, std.math.maxInt(u64)) >> ~mask_len_minus_one; 26446 switch (reduce.operation) { 26447 .Or => { 26448 if (operand_mcv.isBase()) try self.asmMemoryImmediate( 26449 .{ ._, .@"test" }, 26450 try operand_mcv.mem(self, .{ .size = .fromSize(abi_size) }), 26451 if (mask_len < abi_size * 8) 26452 .u(mask) 26453 else 26454 .s(-1), 26455 ) else { 26456 const operand_reg = registerAlias(operand_reg: { 26457 if (operand_mcv.isRegister()) { 26458 const operand_reg = operand_mcv.getReg().?; 26459 if (operand_reg.class() == .general_purpose) break :operand_reg operand_reg; 26460 } 26461 break :operand_reg try self.copyToTmpRegister(operand_ty, operand_mcv); 26462 }, abi_size); 26463 const operand_lock = self.register_manager.lockReg(operand_reg); 26464 defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); 26465 26466 if (mask_len < abi_size * 8) try self.asmRegisterImmediate( 26467 .{ ._, .@"test" }, 26468 operand_reg, 26469 .u(mask), 26470 ) else try self.asmRegisterRegister( 26471 .{ ._, .@"test" }, 26472 operand_reg, 26473 operand_reg, 26474 ); 26475 } 26476 break :result .{ .eflags = .nz }; 26477 }, 26478 .And => { 26479 const tmp_reg = registerAlias( 26480 try self.copyToTmpRegister(operand_ty, operand_mcv), 26481 abi_size, 26482 ); 26483 const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); 26484 defer self.register_manager.unlockReg(tmp_lock); 26485 26486 try self.asmRegister(.{ ._, .not }, tmp_reg); 26487 if (mask_len < abi_size * 8) 26488 try self.asmRegisterImmediate(.{ ._, .@"test" }, tmp_reg, .u(mask)) 26489 else 26490 try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_reg, tmp_reg); 26491 break :result .{ .eflags = .z }; 26492 }, 26493 else => return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)}), 26494 } 26495 } 26496 return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)}); 26497 }; 26498 return self.finishAir(inst, result, .{ reduce.operand, .none, .none }); 26499 } 26500 26501 fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void { 26502 const pt = self.pt; 26503 const zcu = pt.zcu; 26504 const result_ty = self.typeOfIndex(inst); 26505 const len: usize = @intCast(result_ty.arrayLen(zcu)); 26506 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 26507 const elements: []const Air.Inst.Ref = @ptrCast(self.air.extra[ty_pl.payload..][0..len]); 26508 const result: MCValue = result: { 26509 switch (result_ty.zigTypeTag(zcu)) { 26510 .@"struct" => { 26511 const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu)); 26512 if (result_ty.containerLayout(zcu) == .@"packed") { 26513 const loaded_struct = zcu.intern_pool.loadStructType(result_ty.toIntern()); 26514 try self.genInlineMemset( 26515 .{ .lea_frame = .{ .index = frame_index } }, 26516 .{ .immediate = 0 }, 26517 .{ .immediate = result_ty.abiSize(zcu) }, 26518 .{}, 26519 ); 26520 for (elements, 0..) |elem, elem_i_usize| { 26521 const elem_i: u32 = @intCast(elem_i_usize); 26522 if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue; 26523 26524 const elem_ty = result_ty.fieldType(elem_i, zcu); 26525 const elem_bit_size: u32 = @intCast(elem_ty.bitSize(zcu)); 26526 if (elem_bit_size > 64) { 26527 return self.fail( 26528 "TODO airAggregateInit implement packed structs with large fields", 26529 .{}, 26530 ); 26531 } 26532 const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu)); 26533 const elem_abi_bits = elem_abi_size * 8; 26534 const elem_off = pt.structPackedFieldBitOffset(loaded_struct, elem_i); 26535 const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size); 26536 const elem_bit_off = elem_off % elem_abi_bits; 26537 const elem_mcv = try self.resolveInst(elem); 26538 const mat_elem_mcv = switch (elem_mcv) { 26539 .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index }, 26540 else => elem_mcv, 26541 }; 26542 const elem_lock = switch (mat_elem_mcv) { 26543 .register => |reg| self.register_manager.lockReg(reg), 26544 .immediate => |imm| lock: { 26545 if (imm == 0) continue; 26546 break :lock null; 26547 }, 26548 else => null, 26549 }; 26550 defer if (elem_lock) |lock| self.register_manager.unlockReg(lock); 26551 26552 const elem_extra_bits = self.regExtraBits(elem_ty); 26553 { 26554 const temp_reg = try self.copyToTmpRegister(elem_ty, mat_elem_mcv); 26555 const temp_alias = registerAlias(temp_reg, elem_abi_size); 26556 const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); 26557 defer self.register_manager.unlockReg(temp_lock); 26558 26559 if (elem_bit_off < elem_extra_bits) { 26560 try self.truncateRegister(elem_ty, temp_alias); 26561 } 26562 if (elem_bit_off > 0) try self.genShiftBinOpMir( 26563 .{ ._l, .sh }, 26564 elem_ty, 26565 .{ .register = temp_alias }, 26566 .u8, 26567 .{ .immediate = elem_bit_off }, 26568 ); 26569 try self.genBinOpMir( 26570 .{ ._, .@"or" }, 26571 elem_ty, 26572 .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } }, 26573 .{ .register = temp_alias }, 26574 ); 26575 } 26576 if (elem_bit_off > elem_extra_bits) { 26577 const temp_reg = try self.copyToTmpRegister(elem_ty, mat_elem_mcv); 26578 const temp_alias = registerAlias(temp_reg, elem_abi_size); 26579 const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); 26580 defer self.register_manager.unlockReg(temp_lock); 26581 26582 if (elem_extra_bits > 0) { 26583 try self.truncateRegister(elem_ty, temp_alias); 26584 } 26585 try self.genShiftBinOpMir( 26586 .{ ._r, .sh }, 26587 elem_ty, 26588 .{ .register = temp_reg }, 26589 .u8, 26590 .{ .immediate = elem_abi_bits - elem_bit_off }, 26591 ); 26592 try self.genBinOpMir( 26593 .{ ._, .@"or" }, 26594 elem_ty, 26595 .{ .load_frame = .{ 26596 .index = frame_index, 26597 .off = elem_byte_off + @as(i32, @intCast(elem_abi_size)), 26598 } }, 26599 .{ .register = temp_alias }, 26600 ); 26601 } 26602 } 26603 } else for (elements, 0..) |elem, elem_i| { 26604 if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue; 26605 26606 const elem_ty = result_ty.fieldType(elem_i, zcu); 26607 const elem_off: i32 = @intCast(result_ty.structFieldOffset(elem_i, zcu)); 26608 const elem_mcv = try self.resolveInst(elem); 26609 const mat_elem_mcv = switch (elem_mcv) { 26610 .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index }, 26611 else => elem_mcv, 26612 }; 26613 try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv, .{}); 26614 } 26615 break :result .{ .load_frame = .{ .index = frame_index } }; 26616 }, 26617 .array, .vector => { 26618 const elem_ty = result_ty.childType(zcu); 26619 if (result_ty.isVector(zcu) and elem_ty.toIntern() == .bool_type) { 26620 const result_size: u32 = @intCast(result_ty.abiSize(zcu)); 26621 const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); 26622 try self.asmRegisterRegister( 26623 .{ ._, .xor }, 26624 registerAlias(dst_reg, @min(result_size, 4)), 26625 registerAlias(dst_reg, @min(result_size, 4)), 26626 ); 26627 26628 for (elements, 0..) |elem, elem_i| { 26629 const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem }); 26630 const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg); 26631 defer self.register_manager.unlockReg(elem_lock); 26632 26633 try self.asmRegisterImmediate( 26634 .{ ._, .@"and" }, 26635 registerAlias(elem_reg, @min(result_size, 4)), 26636 .u(1), 26637 ); 26638 if (elem_i > 0) try self.asmRegisterImmediate( 26639 .{ ._l, .sh }, 26640 registerAlias(elem_reg, result_size), 26641 .u(@intCast(elem_i)), 26642 ); 26643 try self.asmRegisterRegister( 26644 .{ ._, .@"or" }, 26645 registerAlias(dst_reg, result_size), 26646 registerAlias(elem_reg, result_size), 26647 ); 26648 } 26649 break :result .{ .register = dst_reg }; 26650 } else { 26651 const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu)); 26652 const elem_size: u32 = @intCast(elem_ty.abiSize(zcu)); 26653 26654 for (elements, 0..) |elem, elem_i| { 26655 const elem_mcv = try self.resolveInst(elem); 26656 const mat_elem_mcv = switch (elem_mcv) { 26657 .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index }, 26658 else => elem_mcv, 26659 }; 26660 const elem_off: i32 = @intCast(elem_size * elem_i); 26661 try self.genSetMem( 26662 .{ .frame = frame_index }, 26663 elem_off, 26664 elem_ty, 26665 mat_elem_mcv, 26666 .{}, 26667 ); 26668 } 26669 if (result_ty.sentinel(zcu)) |sentinel| try self.genSetMem( 26670 .{ .frame = frame_index }, 26671 @intCast(elem_size * elements.len), 26672 elem_ty, 26673 try self.genTypedValue(sentinel), 26674 .{}, 26675 ); 26676 break :result .{ .load_frame = .{ .index = frame_index } }; 26677 } 26678 }, 26679 else => unreachable, 26680 } 26681 }; 26682 26683 if (elements.len <= Liveness.bpi - 1) { 26684 var buf: [Liveness.bpi - 1]Air.Inst.Ref = @splat(.none); 26685 @memcpy(buf[0..elements.len], elements); 26686 return self.finishAir(inst, result, buf); 26687 } 26688 var bt = self.liveness.iterateBigTomb(inst); 26689 for (elements) |elem| try self.feed(&bt, elem); 26690 return self.finishAirResult(inst, result); 26691 } 26692 26693 fn airUnionInit(self: *CodeGen, inst: Air.Inst.Index) !void { 26694 const pt = self.pt; 26695 const zcu = pt.zcu; 26696 const ip = &zcu.intern_pool; 26697 const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; 26698 const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data; 26699 const result: MCValue = result: { 26700 const union_ty = self.typeOfIndex(inst); 26701 const layout = union_ty.unionGetLayout(zcu); 26702 26703 const src_ty = self.typeOf(extra.init); 26704 const src_mcv = try self.resolveInst(extra.init); 26705 if (layout.tag_size == 0) { 26706 if (layout.abi_size <= src_ty.abiSize(zcu) and 26707 self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv; 26708 26709 const dst_mcv = try self.allocRegOrMem(inst, true); 26710 try self.genCopy(src_ty, dst_mcv, src_mcv, .{}); 26711 break :result dst_mcv; 26712 } 26713 26714 const dst_mcv = try self.allocRegOrMem(inst, false); 26715 26716 const loaded_union = zcu.typeToUnion(union_ty).?; 26717 const field_name = loaded_union.loadTagType(ip).names.get(ip)[extra.field_index]; 26718 const tag_ty: Type = .fromInterned(loaded_union.enum_tag_ty); 26719 const field_index = tag_ty.enumFieldIndex(field_name, zcu).?; 26720 const tag_val = try pt.enumValueFieldIndex(tag_ty, field_index); 26721 const tag_int_val = try tag_val.intFromEnum(tag_ty, pt); 26722 const tag_int = tag_int_val.toUnsignedInt(zcu); 26723 const tag_off: i32 = @intCast(layout.tagOffset()); 26724 try self.genCopy( 26725 tag_ty, 26726 dst_mcv.address().offset(tag_off).deref(), 26727 .{ .immediate = tag_int }, 26728 .{}, 26729 ); 26730 26731 const pl_off: i32 = @intCast(layout.payloadOffset()); 26732 try self.genCopy(src_ty, dst_mcv.address().offset(pl_off).deref(), src_mcv, .{}); 26733 26734 break :result dst_mcv; 26735 }; 26736 return self.finishAir(inst, result, .{ extra.init, .none, .none }); 26737 } 26738 26739 fn airPrefetch(self: *CodeGen, inst: Air.Inst.Index) !void { 26740 const prefetch = self.air.instructions.items(.data)[@intFromEnum(inst)].prefetch; 26741 return self.finishAir(inst, .unreach, .{ prefetch.ptr, .none, .none }); 26742 } 26743 26744 fn airMulAdd(self: *CodeGen, inst: Air.Inst.Index) !void { 26745 const pt = self.pt; 26746 const zcu = pt.zcu; 26747 const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; 26748 const extra = self.air.extraData(Air.Bin, pl_op.payload).data; 26749 const ty = self.typeOfIndex(inst); 26750 26751 const ops = [3]Air.Inst.Ref{ extra.lhs, extra.rhs, pl_op.operand }; 26752 const result = result: { 26753 if (switch (ty.scalarType(zcu).floatBits(self.target.*)) { 26754 16, 80, 128 => true, 26755 32, 64 => !self.hasFeature(.fma), 26756 else => unreachable, 26757 }) { 26758 if (ty.zigTypeTag(zcu) != .float) return self.fail("TODO implement airMulAdd for {}", .{ 26759 ty.fmt(pt), 26760 }); 26761 26762 var callee_buf: ["__fma?".len]u8 = undefined; 26763 break :result try self.genCall(.{ .lib = .{ 26764 .return_type = ty.toIntern(), 26765 .param_types = &.{ ty.toIntern(), ty.toIntern(), ty.toIntern() }, 26766 .callee = std.fmt.bufPrint(&callee_buf, "{s}fma{s}", .{ 26767 floatLibcAbiPrefix(ty), 26768 floatLibcAbiSuffix(ty), 26769 }) catch unreachable, 26770 } }, &.{ ty, ty, ty }, &.{ 26771 .{ .air_ref = extra.lhs }, .{ .air_ref = extra.rhs }, .{ .air_ref = pl_op.operand }, 26772 }, .{}); 26773 } 26774 26775 var mcvs: [3]MCValue = undefined; 26776 var locks: [3]?RegisterManager.RegisterLock = @splat(null); 26777 defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); 26778 var order: [3]u2 = @splat(0); 26779 var unused: std.StaticBitSet(3) = .initFull(); 26780 for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| { 26781 const op_index: u2 = @intCast(op_i); 26782 mcv.* = try self.resolveInst(op); 26783 if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) { 26784 order[op_index] = 1; 26785 unused.unset(0); 26786 } else if (unused.isSet(2) and mcv.isBase()) { 26787 order[op_index] = 3; 26788 unused.unset(2); 26789 } 26790 switch (mcv.*) { 26791 .register => |reg| lock.* = self.register_manager.lockReg(reg), 26792 else => {}, 26793 } 26794 } 26795 for (&order, &mcvs, &locks) |*mop_index, *mcv, *lock| { 26796 if (mop_index.* != 0) continue; 26797 mop_index.* = 1 + @as(u2, @intCast(unused.toggleFirstSet().?)); 26798 if (mop_index.* > 1 and mcv.isRegister()) continue; 26799 const reg = try self.copyToTmpRegister(ty, mcv.*); 26800 mcv.* = .{ .register = reg }; 26801 if (lock.*) |old_lock| self.register_manager.unlockReg(old_lock); 26802 lock.* = self.register_manager.lockRegAssumeUnused(reg); 26803 } 26804 26805 const mir_tag = @as(?Mir.Inst.FixedTag, if (std.mem.eql(u2, &order, &.{ 1, 3, 2 }) or 26806 std.mem.eql(u2, &order, &.{ 3, 1, 2 })) 26807 switch (ty.zigTypeTag(zcu)) { 26808 .float => switch (ty.floatBits(self.target.*)) { 26809 32 => .{ .v_ss, .fmadd132 }, 26810 64 => .{ .v_sd, .fmadd132 }, 26811 16, 80, 128 => null, 26812 else => unreachable, 26813 }, 26814 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { 26815 .float => switch (ty.childType(zcu).floatBits(self.target.*)) { 26816 32 => switch (ty.vectorLen(zcu)) { 26817 1 => .{ .v_ss, .fmadd132 }, 26818 2...8 => .{ .v_ps, .fmadd132 }, 26819 else => null, 26820 }, 26821 64 => switch (ty.vectorLen(zcu)) { 26822 1 => .{ .v_sd, .fmadd132 }, 26823 2...4 => .{ .v_pd, .fmadd132 }, 26824 else => null, 26825 }, 26826 16, 80, 128 => null, 26827 else => unreachable, 26828 }, 26829 else => unreachable, 26830 }, 26831 else => unreachable, 26832 } 26833 else if (std.mem.eql(u2, &order, &.{ 2, 1, 3 }) or std.mem.eql(u2, &order, &.{ 1, 2, 3 })) 26834 switch (ty.zigTypeTag(zcu)) { 26835 .float => switch (ty.floatBits(self.target.*)) { 26836 32 => .{ .v_ss, .fmadd213 }, 26837 64 => .{ .v_sd, .fmadd213 }, 26838 16, 80, 128 => null, 26839 else => unreachable, 26840 }, 26841 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { 26842 .float => switch (ty.childType(zcu).floatBits(self.target.*)) { 26843 32 => switch (ty.vectorLen(zcu)) { 26844 1 => .{ .v_ss, .fmadd213 }, 26845 2...8 => .{ .v_ps, .fmadd213 }, 26846 else => null, 26847 }, 26848 64 => switch (ty.vectorLen(zcu)) { 26849 1 => .{ .v_sd, .fmadd213 }, 26850 2...4 => .{ .v_pd, .fmadd213 }, 26851 else => null, 26852 }, 26853 16, 80, 128 => null, 26854 else => unreachable, 26855 }, 26856 else => unreachable, 26857 }, 26858 else => unreachable, 26859 } 26860 else if (std.mem.eql(u2, &order, &.{ 2, 3, 1 }) or std.mem.eql(u2, &order, &.{ 3, 2, 1 })) 26861 switch (ty.zigTypeTag(zcu)) { 26862 .float => switch (ty.floatBits(self.target.*)) { 26863 32 => .{ .v_ss, .fmadd231 }, 26864 64 => .{ .v_sd, .fmadd231 }, 26865 16, 80, 128 => null, 26866 else => unreachable, 26867 }, 26868 .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { 26869 .float => switch (ty.childType(zcu).floatBits(self.target.*)) { 26870 32 => switch (ty.vectorLen(zcu)) { 26871 1 => .{ .v_ss, .fmadd231 }, 26872 2...8 => .{ .v_ps, .fmadd231 }, 26873 else => null, 26874 }, 26875 64 => switch (ty.vectorLen(zcu)) { 26876 1 => .{ .v_sd, .fmadd231 }, 26877 2...4 => .{ .v_pd, .fmadd231 }, 26878 else => null, 26879 }, 26880 16, 80, 128 => null, 26881 else => unreachable, 26882 }, 26883 else => unreachable, 26884 }, 26885 else => unreachable, 26886 } 26887 else 26888 unreachable) orelse return self.fail("TODO implement airMulAdd for {}", .{ty.fmt(pt)}); 26889 26890 var mops: [3]MCValue = undefined; 26891 for (order, mcvs) |mop_index, mcv| mops[mop_index - 1] = mcv; 26892 26893 const abi_size: u32 = @intCast(ty.abiSize(zcu)); 26894 const mop1_reg = registerAlias(mops[0].getReg().?, abi_size); 26895 const mop2_reg = registerAlias(mops[1].getReg().?, abi_size); 26896 if (mops[2].isRegister()) try self.asmRegisterRegisterRegister( 26897 mir_tag, 26898 mop1_reg, 26899 mop2_reg, 26900 registerAlias(mops[2].getReg().?, abi_size), 26901 ) else try self.asmRegisterRegisterMemory( 26902 mir_tag, 26903 mop1_reg, 26904 mop2_reg, 26905 try mops[2].mem(self, .{ .size = .fromSize(abi_size) }), 26906 ); 26907 break :result mops[0]; 26908 }; 26909 return self.finishAir(inst, result, ops); 26910 } 26911 26912 fn airVaStart(self: *CodeGen, inst: Air.Inst.Index) !void { 26913 const pt = self.pt; 26914 const zcu = pt.zcu; 26915 const va_list_ty = self.air.instructions.items(.data)[@intFromEnum(inst)].ty; 26916 const ptr_anyopaque_ty = try pt.singleMutPtrType(.anyopaque); 26917 26918 const result: MCValue = switch (self.fn_type.fnCallingConvention(zcu)) { 26919 .x86_64_sysv => result: { 26920 const info = self.va_info.sysv; 26921 const dst_fi = try self.allocFrameIndex(.initSpill(va_list_ty, zcu)); 26922 var field_off: u31 = 0; 26923 // gp_offset: c_uint, 26924 try self.genSetMem( 26925 .{ .frame = dst_fi }, 26926 field_off, 26927 .c_uint, 26928 .{ .immediate = info.gp_count * 8 }, 26929 .{}, 26930 ); 26931 field_off += @intCast(Type.c_uint.abiSize(zcu)); 26932 // fp_offset: c_uint, 26933 try self.genSetMem( 26934 .{ .frame = dst_fi }, 26935 field_off, 26936 .c_uint, 26937 .{ .immediate = abi.SysV.c_abi_int_param_regs.len * 8 + info.fp_count * 16 }, 26938 .{}, 26939 ); 26940 field_off += @intCast(Type.c_uint.abiSize(zcu)); 26941 // overflow_arg_area: *anyopaque, 26942 try self.genSetMem( 26943 .{ .frame = dst_fi }, 26944 field_off, 26945 ptr_anyopaque_ty, 26946 .{ .lea_frame = info.overflow_arg_area }, 26947 .{}, 26948 ); 26949 field_off += @intCast(ptr_anyopaque_ty.abiSize(zcu)); 26950 // reg_save_area: *anyopaque, 26951 try self.genSetMem( 26952 .{ .frame = dst_fi }, 26953 field_off, 26954 ptr_anyopaque_ty, 26955 .{ .lea_frame = info.reg_save_area }, 26956 .{}, 26957 ); 26958 field_off += @intCast(ptr_anyopaque_ty.abiSize(zcu)); 26959 break :result .{ .load_frame = .{ .index = dst_fi } }; 26960 }, 26961 .x86_64_win => return self.fail("TODO implement c_va_start for Win64", .{}), 26962 else => |cc| return self.fail("{s} does not support var args", .{@tagName(cc)}), 26963 }; 26964 return self.finishAir(inst, result, .{ .none, .none, .none }); 26965 } 26966 26967 fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { 26968 const pt = self.pt; 26969 const zcu = pt.zcu; 26970 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 26971 const ty = self.typeOfIndex(inst); 26972 const promote_ty = self.promoteVarArg(ty); 26973 const ptr_anyopaque_ty = try pt.singleMutPtrType(.anyopaque); 26974 const unused = self.liveness.isUnused(inst); 26975 26976 const result: MCValue = switch (self.fn_type.fnCallingConvention(zcu)) { 26977 .x86_64_sysv => result: { 26978 try self.spillEflagsIfOccupied(); 26979 26980 const tmp_regs = 26981 try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); 26982 const offset_reg = tmp_regs[0].to32(); 26983 const addr_reg = tmp_regs[1].to64(); 26984 const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs); 26985 defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); 26986 26987 const promote_mcv = try self.allocTempRegOrMem(promote_ty, true); 26988 const promote_lock = switch (promote_mcv) { 26989 .register => |reg| self.register_manager.lockRegAssumeUnused(reg), 26990 else => null, 26991 }; 26992 defer if (promote_lock) |lock| self.register_manager.unlockReg(lock); 26993 26994 const ptr_arg_list_reg = 26995 try self.copyToTmpRegister(self.typeOf(ty_op.operand), .{ .air_ref = ty_op.operand }); 26996 const ptr_arg_list_lock = self.register_manager.lockRegAssumeUnused(ptr_arg_list_reg); 26997 defer self.register_manager.unlockReg(ptr_arg_list_lock); 26998 26999 const gp_offset: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 0 } }; 27000 const fp_offset: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 4 } }; 27001 const overflow_arg_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 8 } }; 27002 const reg_save_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 16 } }; 27003 27004 const classes = std.mem.sliceTo(&abi.classifySystemV(promote_ty, zcu, self.target.*, .arg), .none); 27005 switch (classes[0]) { 27006 .integer => { 27007 assert(classes.len == 1); 27008 27009 try self.genSetReg(offset_reg, .c_uint, gp_offset, .{}); 27010 try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, .u( 27011 abi.SysV.c_abi_int_param_regs.len * 8, 27012 )); 27013 const mem_reloc = try self.asmJccReloc(.ae, undefined); 27014 27015 try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area, .{}); 27016 if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{ 27017 .base = .{ .reg = addr_reg }, 27018 .mod = .{ .rm = .{ 27019 .size = .qword, 27020 .index = offset_reg.to64(), 27021 } }, 27022 }); 27023 try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{ 27024 .base = .{ .reg = offset_reg.to64() }, 27025 .mod = .{ .rm = .{ 27026 .size = .qword, 27027 .disp = 8, 27028 } }, 27029 }); 27030 try self.genCopy(.c_uint, gp_offset, .{ .register = offset_reg }, .{}); 27031 const done_reloc = try self.asmJmpReloc(undefined); 27032 27033 self.performReloc(mem_reloc); 27034 try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area, .{}); 27035 try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{ 27036 .base = .{ .reg = addr_reg }, 27037 .mod = .{ .rm = .{ 27038 .size = .qword, 27039 .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)), 27040 } }, 27041 }); 27042 try self.genCopy( 27043 ptr_anyopaque_ty, 27044 overflow_arg_area, 27045 .{ .register = offset_reg.to64() }, 27046 .{}, 27047 ); 27048 27049 self.performReloc(done_reloc); 27050 if (!unused) try self.genCopy(promote_ty, promote_mcv, .{ 27051 .indirect = .{ .reg = addr_reg }, 27052 }, .{}); 27053 }, 27054 .sse => { 27055 assert(classes.len == 1); 27056 27057 try self.genSetReg(offset_reg, .c_uint, fp_offset, .{}); 27058 try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, .u( 27059 abi.SysV.c_abi_int_param_regs.len * 8 + abi.SysV.c_abi_sse_param_regs.len * 16, 27060 )); 27061 const mem_reloc = try self.asmJccReloc(.ae, undefined); 27062 27063 try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area, .{}); 27064 if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{ 27065 .base = .{ .reg = addr_reg }, 27066 .mod = .{ .rm = .{ 27067 .size = .qword, 27068 .index = offset_reg.to64(), 27069 } }, 27070 }); 27071 try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{ 27072 .base = .{ .reg = offset_reg.to64() }, 27073 .mod = .{ .rm = .{ 27074 .size = .qword, 27075 .disp = 16, 27076 } }, 27077 }); 27078 try self.genCopy(.c_uint, fp_offset, .{ .register = offset_reg }, .{}); 27079 const done_reloc = try self.asmJmpReloc(undefined); 27080 27081 self.performReloc(mem_reloc); 27082 try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area, .{}); 27083 try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{ 27084 .base = .{ .reg = addr_reg }, 27085 .mod = .{ .rm = .{ 27086 .size = .qword, 27087 .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)), 27088 } }, 27089 }); 27090 try self.genCopy( 27091 ptr_anyopaque_ty, 27092 overflow_arg_area, 27093 .{ .register = offset_reg.to64() }, 27094 .{}, 27095 ); 27096 27097 self.performReloc(done_reloc); 27098 if (!unused) try self.genCopy(promote_ty, promote_mcv, .{ 27099 .indirect = .{ .reg = addr_reg }, 27100 }, .{}); 27101 }, 27102 .memory => { 27103 assert(classes.len == 1); 27104 unreachable; 27105 }, 27106 else => return self.fail("TODO implement c_va_arg for {} on SysV", .{promote_ty.fmt(pt)}), 27107 } 27108 27109 if (unused) break :result .unreach; 27110 if (ty.toIntern() == promote_ty.toIntern()) break :result promote_mcv; 27111 27112 if (!promote_ty.isRuntimeFloat()) { 27113 const dst_mcv = try self.allocRegOrMem(inst, true); 27114 try self.genCopy(ty, dst_mcv, promote_mcv, .{}); 27115 break :result dst_mcv; 27116 } 27117 27118 assert(ty.toIntern() == .f32_type and promote_ty.toIntern() == .f64_type); 27119 const dst_mcv = if (promote_mcv.isRegister()) 27120 promote_mcv 27121 else 27122 try self.copyToRegisterWithInstTracking(inst, ty, promote_mcv); 27123 const dst_reg = dst_mcv.getReg().?.to128(); 27124 const dst_lock = self.register_manager.lockReg(dst_reg); 27125 defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); 27126 27127 if (self.hasFeature(.avx)) if (promote_mcv.isBase()) try self.asmRegisterRegisterMemory( 27128 .{ .v_ss, .cvtsd2 }, 27129 dst_reg, 27130 dst_reg, 27131 try promote_mcv.mem(self, .{ .size = .qword }), 27132 ) else try self.asmRegisterRegisterRegister( 27133 .{ .v_ss, .cvtsd2 }, 27134 dst_reg, 27135 dst_reg, 27136 (if (promote_mcv.isRegister()) 27137 promote_mcv.getReg().? 27138 else 27139 try self.copyToTmpRegister(promote_ty, promote_mcv)).to128(), 27140 ) else if (promote_mcv.isBase()) try self.asmRegisterMemory( 27141 .{ ._ss, .cvtsd2 }, 27142 dst_reg, 27143 try promote_mcv.mem(self, .{ .size = .qword }), 27144 ) else try self.asmRegisterRegister( 27145 .{ ._ss, .cvtsd2 }, 27146 dst_reg, 27147 (if (promote_mcv.isRegister()) 27148 promote_mcv.getReg().? 27149 else 27150 try self.copyToTmpRegister(promote_ty, promote_mcv)).to128(), 27151 ); 27152 break :result promote_mcv; 27153 }, 27154 .x86_64_win => return self.fail("TODO implement c_va_arg for Win64", .{}), 27155 else => |cc| return self.fail("{s} does not support var args", .{@tagName(cc)}), 27156 }; 27157 return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); 27158 } 27159 27160 fn airVaCopy(self: *CodeGen, inst: Air.Inst.Index) !void { 27161 const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; 27162 const ptr_va_list_ty = self.typeOf(ty_op.operand); 27163 27164 const dst_mcv = try self.allocRegOrMem(inst, true); 27165 try self.load(dst_mcv, ptr_va_list_ty, .{ .air_ref = ty_op.operand }); 27166 return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); 27167 } 27168 27169 fn airVaEnd(self: *CodeGen, inst: Air.Inst.Index) !void { 27170 const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; 27171 return self.finishAir(inst, .unreach, .{ un_op, .none, .none }); 27172 } 27173 27174 fn resolveInst(self: *CodeGen, ref: Air.Inst.Ref) InnerError!MCValue { 27175 const zcu = self.pt.zcu; 27176 const ty = self.typeOf(ref); 27177 27178 // If the type has no codegen bits, no need to store it. 27179 if (!ty.hasRuntimeBitsIgnoreComptime(zcu)) return .none; 27180 27181 const mcv = if (ref.toIndex()) |inst| mcv: { 27182 break :mcv self.inst_tracking.getPtr(inst).?.short; 27183 } else mcv: { 27184 const ip_index = ref.toInterned().?; 27185 const gop = try self.const_tracking.getOrPut(self.gpa, ip_index); 27186 if (!gop.found_existing) gop.value_ptr.* = .init(init: { 27187 const const_mcv = try self.genTypedValue(.fromInterned(ip_index)); 27188 switch (const_mcv) { 27189 .lea_tlv => |tlv_sym| switch (self.bin_file.tag) { 27190 .elf, .macho => { 27191 if (self.mod.pic) { 27192 try self.spillRegisters(&.{ .rdi, .rax }); 27193 } else { 27194 try self.spillRegisters(&.{.rax}); 27195 } 27196 const frame_index = try self.allocFrameIndex(.init(.{ 27197 .size = 8, 27198 .alignment = .@"8", 27199 })); 27200 try self.genSetMem( 27201 .{ .frame = frame_index }, 27202 0, 27203 .usize, 27204 .{ .lea_symbol = .{ .sym_index = tlv_sym } }, 27205 .{}, 27206 ); 27207 break :init .{ .load_frame = .{ .index = frame_index } }; 27208 }, 27209 else => break :init const_mcv, 27210 }, 27211 else => break :init const_mcv, 27212 } 27213 }); 27214 break :mcv gop.value_ptr.short; 27215 }; 27216 27217 switch (mcv) { 27218 .none, .unreach, .dead => unreachable, 27219 else => return mcv, 27220 } 27221 } 27222 27223 fn getResolvedInstValue(self: *CodeGen, inst: Air.Inst.Index) *InstTracking { 27224 const tracking = self.inst_tracking.getPtr(inst).?; 27225 return switch (tracking.short) { 27226 .none, .unreach, .dead => unreachable, 27227 else => tracking, 27228 }; 27229 } 27230 27231 /// If the MCValue is an immediate, and it does not fit within this type, 27232 /// we put it in a register. 27233 /// A potential opportunity for future optimization here would be keeping track 27234 /// of the fact that the instruction is available both as an immediate 27235 /// and as a register. 27236 fn limitImmediateType(self: *CodeGen, operand: Air.Inst.Ref, comptime T: type) !MCValue { 27237 const mcv = try self.resolveInst(operand); 27238 const ti = @typeInfo(T).int; 27239 switch (mcv) { 27240 .immediate => |imm| { 27241 // This immediate is unsigned. 27242 const U = std.meta.Int(.unsigned, ti.bits - @intFromBool(ti.signedness == .signed)); 27243 if (imm >= std.math.maxInt(U)) { 27244 return MCValue{ .register = try self.copyToTmpRegister(.usize, mcv) }; 27245 } 27246 }, 27247 else => {}, 27248 } 27249 return mcv; 27250 } 27251 27252 fn genTypedValue(self: *CodeGen, val: Value) InnerError!MCValue { 27253 const pt = self.pt; 27254 return switch (try codegen.genTypedValue(self.bin_file, pt, self.src_loc, val, self.target.*)) { 27255 .mcv => |mcv| switch (mcv) { 27256 .none => .none, 27257 .undef => .undef, 27258 .immediate => |imm| .{ .immediate = imm }, 27259 .memory => |addr| .{ .memory = addr }, 27260 .load_symbol => |sym_index| .{ .load_symbol = .{ .sym_index = sym_index } }, 27261 .lea_symbol => |sym_index| .{ .lea_symbol = .{ .sym_index = sym_index } }, 27262 .load_direct => |sym_index| .{ .load_direct = sym_index }, 27263 .lea_direct => |sym_index| .{ .lea_direct = sym_index }, 27264 .load_got => |sym_index| .{ .lea_got = sym_index }, 27265 .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, 27266 }, 27267 .fail => |msg| return self.failMsg(msg), 27268 }; 27269 } 27270 27271 const CallMCValues = struct { 27272 args: []MCValue, 27273 return_value: InstTracking, 27274 stack_byte_count: u31, 27275 stack_align: InternPool.Alignment, 27276 gp_count: u32, 27277 fp_count: u32, 27278 27279 fn deinit(self: *CallMCValues, func: *CodeGen) void { 27280 func.gpa.free(self.args); 27281 self.* = undefined; 27282 } 27283 }; 27284 27285 /// Caller must call `CallMCValues.deinit`. 27286 fn resolveCallingConventionValues( 27287 self: *CodeGen, 27288 fn_info: InternPool.Key.FuncType, 27289 var_args: []const Type, 27290 stack_frame_base: FrameIndex, 27291 ) !CallMCValues { 27292 const pt = self.pt; 27293 const zcu = pt.zcu; 27294 const ip = &zcu.intern_pool; 27295 const cc = fn_info.cc; 27296 const param_types = try self.gpa.alloc(Type, fn_info.param_types.len + var_args.len); 27297 defer self.gpa.free(param_types); 27298 27299 for (param_types[0..fn_info.param_types.len], fn_info.param_types.get(ip)) |*dest, src| 27300 dest.* = .fromInterned(src); 27301 for (param_types[fn_info.param_types.len..], var_args) |*param_ty, arg_ty| 27302 param_ty.* = self.promoteVarArg(arg_ty); 27303 27304 var result: CallMCValues = .{ 27305 .args = try self.gpa.alloc(MCValue, param_types.len), 27306 // These undefined values must be populated before returning from this function. 27307 .return_value = undefined, 27308 .stack_byte_count = 0, 27309 .stack_align = undefined, 27310 .gp_count = 0, 27311 .fp_count = 0, 27312 }; 27313 errdefer self.gpa.free(result.args); 27314 27315 const ret_ty: Type = .fromInterned(fn_info.return_type); 27316 switch (cc) { 27317 .naked => { 27318 assert(result.args.len == 0); 27319 result.return_value = .init(.unreach); 27320 result.stack_align = switch (self.target.cpu.arch) { 27321 else => unreachable, 27322 .x86 => .@"4", 27323 .x86_64 => .@"8", 27324 }; 27325 }, 27326 .x86_64_sysv, .x86_64_win => |cc_opts| { 27327 var ret_int_reg_i: u32 = 0; 27328 var ret_sse_reg_i: u32 = 0; 27329 var param_int_reg_i: u32 = 0; 27330 var param_sse_reg_i: u32 = 0; 27331 result.stack_align = .fromByteUnits(cc_opts.incoming_stack_alignment orelse 16); 27332 27333 switch (cc) { 27334 .x86_64_sysv => {}, 27335 .x86_64_win => result.stack_byte_count += @intCast(4 * 8), 27336 else => unreachable, 27337 } 27338 27339 // Return values 27340 if (ret_ty.isNoReturn(zcu)) { 27341 result.return_value = .init(.unreach); 27342 } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) { 27343 // TODO: is this even possible for C calling convention? 27344 result.return_value = .init(.none); 27345 } else { 27346 var ret_tracking: [4]InstTracking = undefined; 27347 var ret_tracking_i: usize = 0; 27348 27349 const classes = switch (cc) { 27350 .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, self.target.*, .ret), .none), 27351 .x86_64_win => &.{abi.classifyWindows(ret_ty, zcu)}, 27352 else => unreachable, 27353 }; 27354 for (classes) |class| switch (class) { 27355 .integer => { 27356 const ret_int_reg = registerAlias( 27357 abi.getCAbiIntReturnRegs(cc)[ret_int_reg_i], 27358 @intCast(@min(ret_ty.abiSize(zcu), 8)), 27359 ); 27360 ret_int_reg_i += 1; 27361 27362 ret_tracking[ret_tracking_i] = .init(.{ .register = ret_int_reg }); 27363 ret_tracking_i += 1; 27364 }, 27365 .sse, .float, .float_combine, .win_i128 => { 27366 const ret_sse_regs = abi.getCAbiSseReturnRegs(cc); 27367 const abi_size: u32 = @intCast(ret_ty.abiSize(zcu)); 27368 const reg_size = @min(abi_size, self.vectorSize(.float)); 27369 var byte_offset: u32 = 0; 27370 while (byte_offset < abi_size) : (byte_offset += reg_size) { 27371 const ret_sse_reg = registerAlias(ret_sse_regs[ret_sse_reg_i], reg_size); 27372 ret_sse_reg_i += 1; 27373 27374 ret_tracking[ret_tracking_i] = .init(.{ .register = ret_sse_reg }); 27375 ret_tracking_i += 1; 27376 } 27377 }, 27378 .sseup => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .sse), 27379 .x87 => { 27380 ret_tracking[ret_tracking_i] = .init(.{ .register = abi.getCAbiX87ReturnRegs(cc)[0] }); 27381 ret_tracking_i += 1; 27382 }, 27383 .x87up => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .x87), 27384 .complex_x87 => { 27385 ret_tracking[ret_tracking_i] = .init(.{ .register_pair = abi.getCAbiX87ReturnRegs(cc)[0..2].* }); 27386 ret_tracking_i += 1; 27387 }, 27388 .memory => { 27389 const ret_int_reg = abi.getCAbiIntReturnRegs(cc)[ret_int_reg_i].to64(); 27390 ret_int_reg_i += 1; 27391 const ret_indirect_reg = abi.getCAbiIntParamRegs(cc)[param_int_reg_i]; 27392 param_int_reg_i += 1; 27393 27394 ret_tracking[ret_tracking_i] = .{ 27395 .short = .{ .indirect = .{ .reg = ret_int_reg } }, 27396 .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, 27397 }; 27398 ret_tracking_i += 1; 27399 }, 27400 .none, .integer_per_element => unreachable, 27401 }; 27402 result.return_value = switch (ret_tracking_i) { 27403 else => unreachable, 27404 1 => ret_tracking[0], 27405 2 => .init(.{ .register_pair = .{ 27406 ret_tracking[0].short.register, 27407 ret_tracking[1].short.register, 27408 } }), 27409 3 => .init(.{ .register_triple = .{ 27410 ret_tracking[0].short.register, 27411 ret_tracking[1].short.register, 27412 ret_tracking[2].short.register, 27413 } }), 27414 4 => .init(.{ .register_quadruple = .{ 27415 ret_tracking[0].short.register, 27416 ret_tracking[1].short.register, 27417 ret_tracking[2].short.register, 27418 ret_tracking[3].short.register, 27419 } }), 27420 }; 27421 } 27422 27423 // Input params 27424 for (param_types, result.args) |ty, *arg| { 27425 assert(ty.hasRuntimeBitsIgnoreComptime(zcu)); 27426 switch (cc) { 27427 .x86_64_sysv => {}, 27428 .x86_64_win => { 27429 param_int_reg_i = @max(param_int_reg_i, param_sse_reg_i); 27430 param_sse_reg_i = param_int_reg_i; 27431 }, 27432 else => unreachable, 27433 } 27434 27435 var arg_mcv: [4]MCValue = undefined; 27436 var arg_mcv_i: usize = 0; 27437 27438 const classes = switch (cc) { 27439 .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .arg), .none), 27440 .x86_64_win => &.{abi.classifyWindows(ty, zcu)}, 27441 else => unreachable, 27442 }; 27443 classes: for (classes) |class| switch (class) { 27444 .integer => { 27445 const param_int_regs = abi.getCAbiIntParamRegs(cc); 27446 if (param_int_reg_i >= param_int_regs.len) break; 27447 27448 const param_int_reg = 27449 registerAlias(param_int_regs[param_int_reg_i], @intCast(@min(ty.abiSize(zcu), 8))); 27450 param_int_reg_i += 1; 27451 27452 arg_mcv[arg_mcv_i] = .{ .register = param_int_reg }; 27453 arg_mcv_i += 1; 27454 }, 27455 .sse, .float, .float_combine => { 27456 const param_sse_regs = abi.getCAbiSseParamRegs(cc); 27457 const abi_size: u32 = @intCast(ty.abiSize(zcu)); 27458 const reg_size = @min(abi_size, self.vectorSize(.float)); 27459 var byte_offset: u32 = 0; 27460 while (byte_offset < abi_size) : (byte_offset += reg_size) { 27461 if (param_sse_reg_i >= param_sse_regs.len) break :classes; 27462 27463 const param_sse_reg = registerAlias(param_sse_regs[param_sse_reg_i], reg_size); 27464 param_sse_reg_i += 1; 27465 27466 arg_mcv[arg_mcv_i] = .{ .register = param_sse_reg }; 27467 arg_mcv_i += 1; 27468 } 27469 }, 27470 .sseup => assert(arg_mcv[arg_mcv_i - 1].register.class() == .sse), 27471 .x87, .x87up, .complex_x87, .memory, .win_i128 => switch (cc) { 27472 .x86_64_sysv => switch (class) { 27473 .x87, .x87up, .complex_x87, .memory => break, 27474 else => unreachable, 27475 }, 27476 .x86_64_win => if (ty.abiSize(zcu) > 8) { 27477 const param_int_reg = abi.getCAbiIntParamRegs(cc)[param_int_reg_i].to64(); 27478 param_int_reg_i += 1; 27479 27480 arg_mcv[arg_mcv_i] = .{ .indirect = .{ .reg = param_int_reg } }; 27481 arg_mcv_i += 1; 27482 } else break, 27483 else => unreachable, 27484 }, 27485 .none => unreachable, 27486 .integer_per_element => { 27487 const param_int_regs_len: u32 = 27488 @intCast(abi.getCAbiIntParamRegs(cc).len); 27489 const remaining_param_int_regs: u3 = 27490 @intCast(param_int_regs_len - param_int_reg_i); 27491 param_int_reg_i = param_int_regs_len; 27492 27493 const frame_elem_align = 8; 27494 const frame_elems_len = ty.vectorLen(zcu) - remaining_param_int_regs; 27495 const frame_elem_size = std.mem.alignForward( 27496 u64, 27497 ty.childType(zcu).abiSize(zcu), 27498 frame_elem_align, 27499 ); 27500 const frame_size: u31 = @intCast(frame_elems_len * frame_elem_size); 27501 27502 result.stack_byte_count = 27503 std.mem.alignForward(u31, result.stack_byte_count, frame_elem_align); 27504 arg_mcv[arg_mcv_i] = .{ .elementwise_regs_then_frame = .{ 27505 .regs = remaining_param_int_regs, 27506 .frame_off = @intCast(result.stack_byte_count), 27507 .frame_index = stack_frame_base, 27508 } }; 27509 arg_mcv_i += 1; 27510 result.stack_byte_count += frame_size; 27511 }, 27512 } else { 27513 arg.* = switch (arg_mcv_i) { 27514 else => unreachable, 27515 1 => arg_mcv[0], 27516 2 => .{ .register_pair = .{ 27517 arg_mcv[0].register, 27518 arg_mcv[1].register, 27519 } }, 27520 3 => .{ .register_triple = .{ 27521 arg_mcv[0].register, 27522 arg_mcv[1].register, 27523 arg_mcv[2].register, 27524 } }, 27525 4 => .{ .register_quadruple = .{ 27526 arg_mcv[0].register, 27527 arg_mcv[1].register, 27528 arg_mcv[2].register, 27529 arg_mcv[3].register, 27530 } }, 27531 }; 27532 continue; 27533 } 27534 27535 const param_align = ty.abiAlignment(zcu).max(.@"8"); 27536 result.stack_byte_count = @intCast(param_align.forward(result.stack_byte_count)); 27537 result.stack_align = result.stack_align.max(param_align); 27538 arg.* = .{ .load_frame = .{ 27539 .index = stack_frame_base, 27540 .off = result.stack_byte_count, 27541 } }; 27542 result.stack_byte_count += @intCast(ty.abiSize(zcu)); 27543 } 27544 assert(param_int_reg_i <= 6); 27545 result.gp_count = param_int_reg_i; 27546 assert(param_sse_reg_i <= 16); 27547 result.fp_count = param_sse_reg_i; 27548 }, 27549 .auto => { 27550 result.stack_align = abi.zigcc.stack_align orelse .fromByteUnits(self.vectorSize(.float)); 27551 27552 var param_gpr = abi.getCAbiIntParamRegs(cc); 27553 var param_x87 = abi.getCAbiX87ParamRegs(cc); 27554 var param_sse = abi.getCAbiSseParamRegs(cc); 27555 27556 // Return values 27557 result.return_value = if (ret_ty.isNoReturn(zcu)) 27558 .init(.unreach) 27559 else if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) 27560 .init(.none) 27561 else return_value: { 27562 const ret_gpr = abi.getCAbiIntReturnRegs(cc); 27563 const ret_size: u31 = @intCast(ret_ty.abiSize(zcu)); 27564 if (abi.zigcc.return_in_regs) switch (self.regClassForType(ret_ty)) { 27565 .general_purpose => if (ret_size <= @as(u4, switch (self.target.cpu.arch) { 27566 else => unreachable, 27567 .x86 => 4, 27568 .x86_64 => 8, 27569 })) 27570 break :return_value .init(.{ .register = registerAlias(ret_gpr[0], ret_size) }) 27571 else if (ret_gpr.len >= 2 and ret_ty.isSliceAtRuntime(zcu)) 27572 break :return_value .init(.{ .register_pair = ret_gpr[0..2].* }), 27573 .segment, .mmx, .ip => unreachable, 27574 .x87 => break :return_value .init(.{ .register = .st0 }), 27575 .sse => if (ret_size <= self.vectorSize(.float)) break :return_value .init(.{ 27576 .register = registerAlias(abi.getCAbiSseReturnRegs(cc)[0], @max(ret_size, 16)), 27577 }), 27578 }; 27579 const ret_indirect_reg = param_gpr[0]; 27580 param_gpr = param_gpr[1..]; 27581 break :return_value .{ 27582 .short = .{ .indirect = .{ .reg = ret_gpr[0] } }, 27583 .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, 27584 }; 27585 }; 27586 27587 // Input params 27588 for (param_types, result.args) |param_ty, *arg| { 27589 if (!param_ty.hasRuntimeBitsIgnoreComptime(zcu)) { 27590 arg.* = .none; 27591 continue; 27592 } 27593 const param_size: u31 = @intCast(param_ty.abiSize(zcu)); 27594 if (abi.zigcc.params_in_regs) switch (self.regClassForType(param_ty)) { 27595 .general_purpose => if (param_gpr.len >= 1 and param_size <= @as(u4, switch (self.target.cpu.arch) { 27596 else => unreachable, 27597 .x86 => 4, 27598 .x86_64 => 8, 27599 })) { 27600 arg.* = .{ .register = registerAlias(param_gpr[0], param_size) }; 27601 param_gpr = param_gpr[1..]; 27602 continue; 27603 } else if (param_gpr.len >= 2 and param_ty.isSliceAtRuntime(zcu)) { 27604 arg.* = .{ .register_pair = param_gpr[0..2].* }; 27605 param_gpr = param_gpr[2..]; 27606 continue; 27607 }, 27608 .segment, .mmx, .ip => unreachable, 27609 .x87 => if (param_x87.len >= 1) { 27610 arg.* = .{ .register = param_x87[0] }; 27611 param_x87 = param_x87[1..]; 27612 continue; 27613 }, 27614 .sse => if (param_sse.len >= 1 and param_size <= self.vectorSize(.float)) { 27615 arg.* = .{ 27616 .register = registerAlias(param_sse[0], @max(param_size, 16)), 27617 }; 27618 param_sse = param_sse[1..]; 27619 continue; 27620 }, 27621 }; 27622 const param_align = param_ty.abiAlignment(zcu); 27623 result.stack_byte_count = @intCast(param_align.forward(result.stack_byte_count)); 27624 result.stack_align = result.stack_align.max(param_align); 27625 arg.* = .{ .load_frame = .{ 27626 .index = stack_frame_base, 27627 .off = result.stack_byte_count, 27628 } }; 27629 result.stack_byte_count += param_size; 27630 } 27631 }, 27632 else => return self.fail("TODO implement function parameters and return values for {} on x86_64", .{cc}), 27633 } 27634 27635 result.stack_byte_count = @intCast(result.stack_align.forward(result.stack_byte_count)); 27636 return result; 27637 } 27638 27639 fn fail(self: *CodeGen, comptime format: []const u8, args: anytype) error{ OutOfMemory, CodegenFail } { 27640 @branchHint(.cold); 27641 const zcu = self.pt.zcu; 27642 switch (self.owner) { 27643 .nav_index => |i| return zcu.codegenFail(i, format, args), 27644 .lazy_sym => |s| return zcu.codegenFailType(s.ty, format, args), 27645 } 27646 return error.CodegenFail; 27647 } 27648 27649 fn failMsg(self: *CodeGen, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail } { 27650 @branchHint(.cold); 27651 const zcu = self.pt.zcu; 27652 switch (self.owner) { 27653 .nav_index => |i| return zcu.codegenFailMsg(i, msg), 27654 .lazy_sym => |s| return zcu.codegenFailTypeMsg(s.ty, msg), 27655 } 27656 return error.CodegenFail; 27657 } 27658 27659 fn parseRegName(name: []const u8) ?Register { 27660 if (@hasDecl(Register, "parseRegName")) { 27661 return Register.parseRegName(name); 27662 } 27663 return std.meta.stringToEnum(Register, name); 27664 } 27665 27666 /// Returns register wide enough to hold at least `size_bytes`. 27667 fn registerAlias(reg: Register, size_bytes: u32) Register { 27668 return switch (reg.class()) { 27669 .general_purpose => if (size_bytes == 0) 27670 unreachable // should be comptime-known 27671 else if (size_bytes <= 1) 27672 reg.to8() 27673 else if (size_bytes <= 2) 27674 reg.to16() 27675 else if (size_bytes <= 4) 27676 reg.to32() 27677 else if (size_bytes <= 8) 27678 reg.to64() 27679 else 27680 unreachable, 27681 .segment => if (size_bytes <= 2) 27682 reg 27683 else 27684 unreachable, 27685 .x87 => if (size_bytes == 16) 27686 reg 27687 else 27688 unreachable, 27689 .mmx => if (size_bytes <= 8) 27690 reg 27691 else 27692 unreachable, 27693 .sse => if (size_bytes <= 16) 27694 reg.to128() 27695 else if (size_bytes <= 32) 27696 reg.to256() 27697 else 27698 unreachable, 27699 .ip => if (size_bytes <= 2) 27700 .ip 27701 else if (size_bytes <= 4) 27702 .eip 27703 else if (size_bytes <= 8) 27704 .rip 27705 else 27706 unreachable, 27707 }; 27708 } 27709 27710 fn memSize(self: *CodeGen, ty: Type) Memory.Size { 27711 const zcu = self.pt.zcu; 27712 return switch (ty.zigTypeTag(zcu)) { 27713 .float => .fromBitSize(ty.floatBits(self.target.*)), 27714 else => .fromSize(@intCast(ty.abiSize(zcu))), 27715 }; 27716 } 27717 27718 fn splitType(self: *CodeGen, comptime parts_len: usize, ty: Type) ![parts_len]Type { 27719 const pt = self.pt; 27720 const zcu = pt.zcu; 27721 var parts: [parts_len]Type = undefined; 27722 if (ty.isVector(zcu)) if (std.math.divExact(u32, ty.vectorLen(zcu), parts_len)) |vec_len| return .{ 27723 try pt.vectorType(.{ .len = vec_len, .child = ty.scalarType(zcu).toIntern() }), 27724 } ** parts_len else |err| switch (err) { 27725 error.DivisionByZero => unreachable, 27726 error.UnexpectedRemainder => {}, 27727 }; 27728 const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none); 27729 if (classes.len == parts_len) for (&parts, classes, 0..) |*part, class, part_i| { 27730 part.* = switch (class) { 27731 .integer => if (part_i < parts_len - 1) 27732 .u64 27733 else part: { 27734 const elem_size = ty.abiAlignment(zcu).minStrict(.@"8").toByteUnits().?; 27735 const elem_ty = try pt.intType(.unsigned, @intCast(elem_size * 8)); 27736 break :part switch (@divExact(ty.abiSize(zcu) - part_i * 8, elem_size)) { 27737 1 => elem_ty, 27738 else => |array_len| try pt.arrayType(.{ .len = array_len, .child = elem_ty.toIntern() }), 27739 }; 27740 }, 27741 .float => .f32, 27742 .float_combine => try pt.arrayType(.{ .len = 2, .child = .f32_type }), 27743 .sse => .f64, 27744 else => break, 27745 }; 27746 } else { 27747 var part_sizes: u64 = 0; 27748 for (parts) |part| part_sizes += part.abiSize(zcu); 27749 if (part_sizes == ty.abiSize(zcu)) return parts; 27750 }; 27751 return self.fail("TODO implement splitType({d}, {})", .{ parts_len, ty.fmt(pt) }); 27752 } 27753 27754 /// Truncates the value in the register in place. 27755 /// Clobbers any remaining bits. 27756 fn truncateRegister(self: *CodeGen, ty: Type, reg: Register) !void { 27757 const pt = self.pt; 27758 const zcu = pt.zcu; 27759 const int_info: InternPool.Key.IntType = if (ty.isAbiInt(zcu)) ty.intInfo(zcu) else .{ 27760 .signedness = .unsigned, 27761 .bits = @intCast(ty.bitSize(zcu)), 27762 }; 27763 const shift = std.math.cast(u6, 64 - int_info.bits % 64) orelse return; 27764 try self.spillEflagsIfOccupied(); 27765 switch (int_info.signedness) { 27766 .signed => { 27767 try self.genShiftBinOpMir(.{ ._l, .sa }, .isize, .{ .register = reg }, .u8, .{ .immediate = shift }); 27768 try self.genShiftBinOpMir(.{ ._r, .sa }, .isize, .{ .register = reg }, .u8, .{ .immediate = shift }); 27769 }, 27770 .unsigned => { 27771 const mask = ~@as(u64, 0) >> shift; 27772 if (int_info.bits <= 32) { 27773 try self.genBinOpMir(.{ ._, .@"and" }, .u32, .{ .register = reg }, .{ .immediate = mask }); 27774 } else { 27775 const tmp_reg = try self.copyToTmpRegister(.usize, .{ .immediate = mask }); 27776 try self.genBinOpMir(.{ ._, .@"and" }, .usize, .{ .register = reg }, .{ .register = tmp_reg }); 27777 } 27778 }, 27779 } 27780 } 27781 27782 fn regBitSize(self: *CodeGen, ty: Type) u64 { 27783 const zcu = self.pt.zcu; 27784 const abi_size = ty.abiSize(zcu); 27785 return switch (ty.zigTypeTag(zcu)) { 27786 else => switch (abi_size) { 27787 1 => 8, 27788 2 => 16, 27789 3...4 => 32, 27790 5...8 => 64, 27791 else => unreachable, 27792 }, 27793 .float => switch (abi_size) { 27794 1...16 => 128, 27795 17...32 => 256, 27796 else => unreachable, 27797 }, 27798 }; 27799 } 27800 27801 fn regExtraBits(self: *CodeGen, ty: Type) u64 { 27802 return self.regBitSize(ty) - ty.bitSize(self.pt.zcu); 27803 } 27804 27805 fn hasFeature(cg: *CodeGen, feature: std.Target.x86.Feature) bool { 27806 return switch (feature) { 27807 .@"64bit" => switch (cg.target.cpu.arch) { 27808 else => unreachable, 27809 .x86 => false, 27810 .x86_64 => true, 27811 }, 27812 .false_deps_getmant, 27813 .false_deps_lzcnt_tzcnt, 27814 .false_deps_mulc, 27815 .false_deps_mullq, 27816 .false_deps_perm, 27817 .false_deps_popcnt, 27818 .false_deps_range, 27819 .slow_3ops_lea, 27820 .slow_incdec, 27821 .slow_lea, 27822 .slow_pmaddwd, 27823 .slow_pmulld, 27824 .slow_shld, 27825 .slow_two_mem_ops, 27826 .slow_unaligned_mem_16, 27827 .slow_unaligned_mem_32, 27828 => switch (cg.mod.optimize_mode) { 27829 .Debug, .ReleaseSafe, .ReleaseFast => null, 27830 .ReleaseSmall => false, 27831 }, 27832 .fast_11bytenop, 27833 .fast_15bytenop, 27834 .fast_7bytenop, 27835 .fast_bextr, 27836 .fast_dpwssd, 27837 .fast_gather, 27838 .fast_hops, 27839 .fast_imm16, 27840 .fast_lzcnt, 27841 .fast_movbe, 27842 .fast_scalar_fsqrt, 27843 .fast_scalar_shift_masks, 27844 .fast_shld_rotate, 27845 .fast_variable_crosslane_shuffle, 27846 .fast_variable_perlane_shuffle, 27847 .fast_vector_fsqrt, 27848 .fast_vector_shift_masks, 27849 => switch (cg.mod.optimize_mode) { 27850 .Debug, .ReleaseSafe, .ReleaseFast => null, 27851 .ReleaseSmall => true, 27852 }, 27853 .mmx => false, 27854 else => null, 27855 } orelse std.Target.x86.featureSetHas(cg.target.cpu.features, feature); 27856 } 27857 27858 fn typeOf(self: *CodeGen, inst: Air.Inst.Ref) Type { 27859 const pt = self.pt; 27860 const zcu = pt.zcu; 27861 return self.air.typeOf(inst, &zcu.intern_pool); 27862 } 27863 27864 fn typeOfIndex(self: *CodeGen, inst: Air.Inst.Index) Type { 27865 const pt = self.pt; 27866 const zcu = pt.zcu; 27867 const temp: Temp = .{ .index = inst }; 27868 return switch (temp.unwrap(self)) { 27869 .ref => switch (self.air.instructions.items(.tag)[@intFromEnum(inst)]) { 27870 .loop_switch_br => self.typeOf(self.air.unwrapSwitch(inst).operand), 27871 else => self.air.typeOfIndex(inst, &zcu.intern_pool), 27872 }, 27873 .temp => temp.typeOf(self), 27874 }; 27875 } 27876 27877 fn intCompilerRtAbiName(int_bits: u32) u8 { 27878 return switch (int_bits) { 27879 1...32 => 's', 27880 33...64 => 'd', 27881 65...128 => 't', 27882 else => unreachable, 27883 }; 27884 } 27885 27886 fn floatCompilerRtAbiName(float_bits: u32) u8 { 27887 return switch (float_bits) { 27888 16 => 'h', 27889 32 => 's', 27890 64 => 'd', 27891 80 => 'x', 27892 128 => 't', 27893 else => unreachable, 27894 }; 27895 } 27896 27897 fn floatCompilerRtAbiType(self: *CodeGen, ty: Type, other_ty: Type) Type { 27898 if (ty.toIntern() == .f16_type and 27899 (other_ty.toIntern() == .f32_type or other_ty.toIntern() == .f64_type) and 27900 self.target.isDarwin()) return .u16; 27901 return ty; 27902 } 27903 27904 fn floatLibcAbiPrefix(ty: Type) []const u8 { 27905 return switch (ty.toIntern()) { 27906 .f16_type, .f80_type => "__", 27907 .f32_type, .f64_type, .f128_type, .c_longdouble_type => "", 27908 else => unreachable, 27909 }; 27910 } 27911 27912 fn floatLibcAbiSuffix(ty: Type) []const u8 { 27913 return switch (ty.toIntern()) { 27914 .f16_type => "h", 27915 .f32_type => "f", 27916 .f64_type => "", 27917 .f80_type => "x", 27918 .f128_type => "q", 27919 .c_longdouble_type => "l", 27920 else => unreachable, 27921 }; 27922 } 27923 27924 fn promoteInt(self: *CodeGen, ty: Type) Type { 27925 const pt = self.pt; 27926 const zcu = pt.zcu; 27927 const int_info: InternPool.Key.IntType = switch (ty.toIntern()) { 27928 .bool_type => .{ .signedness = .unsigned, .bits = 1 }, 27929 else => if (ty.isAbiInt(zcu)) ty.intInfo(zcu) else return ty, 27930 }; 27931 for ([_]Type{ 27932 .c_int, .c_uint, 27933 .c_long, .c_ulong, 27934 .c_longlong, .c_ulonglong, 27935 }) |promote_ty| { 27936 const promote_info = promote_ty.intInfo(zcu); 27937 if (int_info.signedness == .signed and promote_info.signedness == .unsigned) continue; 27938 if (int_info.bits + @intFromBool(int_info.signedness == .unsigned and 27939 promote_info.signedness == .signed) <= promote_info.bits) return promote_ty; 27940 } 27941 return ty; 27942 } 27943 27944 fn promoteVarArg(self: *CodeGen, ty: Type) Type { 27945 if (!ty.isRuntimeFloat()) return self.promoteInt(ty); 27946 switch (ty.floatBits(self.target.*)) { 27947 32, 64 => return .f64, 27948 else => |float_bits| { 27949 assert(float_bits == self.target.cTypeBitSize(.longdouble)); 27950 return .c_longdouble; 27951 }, 27952 } 27953 } 27954 27955 const Temp = struct { 27956 index: Air.Inst.Index, 27957 27958 fn unwrap(temp: Temp, cg: *CodeGen) union(enum) { 27959 ref: Air.Inst.Ref, 27960 temp: Index, 27961 } { 27962 switch (temp.index.unwrap()) { 27963 .ref => |ref| return .{ .ref = ref }, 27964 .target => |target_index| { 27965 const temp_index: Index = @enumFromInt(target_index); 27966 assert(temp_index.isValid(cg)); 27967 return .{ .temp = temp_index }; 27968 }, 27969 } 27970 } 27971 27972 fn typeOf(temp: Temp, cg: *CodeGen) Type { 27973 return switch (temp.unwrap(cg)) { 27974 .ref => |ref| cg.typeOf(ref), 27975 .temp => |temp_index| temp_index.typeOf(cg), 27976 }; 27977 } 27978 27979 fn isMut(temp: Temp, cg: *CodeGen) bool { 27980 return switch (temp.unwrap(cg)) { 27981 .ref => false, 27982 .temp => |temp_index| switch (temp_index.tracking(cg).short) { 27983 .none, 27984 .unreach, 27985 .dead, 27986 .undef, 27987 .immediate, 27988 .eflags, 27989 .register_offset, 27990 .register_mask, 27991 .memory, 27992 .load_symbol, 27993 .lea_symbol, 27994 .indirect, 27995 .load_direct, 27996 .lea_direct, 27997 .load_got, 27998 .lea_got, 27999 .load_tlv, 28000 .lea_tlv, 28001 .lea_frame, 28002 .elementwise_regs_then_frame, 28003 .reserved_frame, 28004 .air_ref, 28005 => false, 28006 .register, 28007 .register_pair, 28008 .register_triple, 28009 .register_quadruple, 28010 .register_overflow, 28011 => true, 28012 .load_frame => |frame_addr| !frame_addr.index.isNamed(), 28013 }, 28014 }; 28015 } 28016 28017 fn tracking(temp: Temp, cg: *CodeGen) InstTracking { 28018 return cg.inst_tracking.get(temp.index).?; 28019 } 28020 28021 fn getOffset(temp: Temp, off: i32, cg: *CodeGen) !Temp { 28022 const new_temp_index = cg.next_temp_index; 28023 cg.temp_type[@intFromEnum(new_temp_index)] = .usize; 28024 cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); 28025 switch (temp.tracking(cg).short) { 28026 else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), 28027 .register => |reg| { 28028 const new_reg = 28029 try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); 28030 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); 28031 try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ 28032 .base = .{ .reg = reg.to64() }, 28033 .mod = .{ .rm = .{ 28034 .size = .qword, 28035 .disp = off, 28036 } }, 28037 }); 28038 }, 28039 .register_offset => |reg_off| { 28040 const new_reg = 28041 try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); 28042 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); 28043 try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ 28044 .base = .{ .reg = reg_off.reg.to64() }, 28045 .mod = .{ .rm = .{ 28046 .size = .qword, 28047 .disp = reg_off.off + off, 28048 } }, 28049 }); 28050 }, 28051 .lea_symbol => |sym_off| new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = .{ 28052 .sym_index = sym_off.sym_index, 28053 .off = sym_off.off + off, 28054 } }), 28055 .load_frame => |frame_addr| { 28056 const new_reg = 28057 try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); 28058 new_temp_index.tracking(cg).* = .init(.{ .register_offset = .{ 28059 .reg = new_reg, 28060 .off = off, 28061 } }); 28062 try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ 28063 .base = .{ .frame = frame_addr.index }, 28064 .mod = .{ .rm = .{ 28065 .size = .qword, 28066 .disp = frame_addr.off, 28067 } }, 28068 }); 28069 }, 28070 .lea_frame => |frame_addr| new_temp_index.tracking(cg).* = .init(.{ .lea_frame = .{ 28071 .index = frame_addr.index, 28072 .off = frame_addr.off + off, 28073 } }), 28074 } 28075 return .{ .index = new_temp_index.toIndex() }; 28076 } 28077 28078 fn toOffset(temp: *Temp, off: i32, cg: *CodeGen) !void { 28079 if (off == 0) return; 28080 switch (temp.unwrap(cg)) { 28081 .ref => {}, 28082 .temp => |temp_index| { 28083 const temp_tracking = temp_index.tracking(cg); 28084 switch (temp_tracking.short) { 28085 else => {}, 28086 .register => |reg| { 28087 try cg.freeValue(temp_tracking.long); 28088 temp_tracking.* = .init(.{ .register_offset = .{ 28089 .reg = reg, 28090 .off = off, 28091 } }); 28092 return; 28093 }, 28094 .register_offset => |reg_off| { 28095 try cg.freeValue(temp_tracking.long); 28096 temp_tracking.* = .init(.{ .register_offset = .{ 28097 .reg = reg_off.reg, 28098 .off = reg_off.off + off, 28099 } }); 28100 return; 28101 }, 28102 .lea_symbol => |sym_off| { 28103 assert(std.meta.eql(temp_tracking.long.lea_symbol, sym_off)); 28104 temp_tracking.* = .init(.{ .lea_symbol = .{ 28105 .sym_index = sym_off.sym_index, 28106 .off = sym_off.off + off, 28107 } }); 28108 return; 28109 }, 28110 .lea_frame => |frame_addr| { 28111 assert(std.meta.eql(temp_tracking.long.lea_frame, frame_addr)); 28112 temp_tracking.* = .init(.{ .lea_frame = .{ 28113 .index = frame_addr.index, 28114 .off = frame_addr.off + off, 28115 } }); 28116 return; 28117 }, 28118 } 28119 }, 28120 } 28121 const new_temp = try temp.getOffset(off, cg); 28122 try temp.die(cg); 28123 temp.* = new_temp; 28124 } 28125 28126 fn getLimb(temp: Temp, limb_ty: Type, limb_index: u28, cg: *CodeGen) !Temp { 28127 const new_temp_index = cg.next_temp_index; 28128 cg.temp_type[@intFromEnum(new_temp_index)] = limb_ty; 28129 switch (temp.tracking(cg).short) { 28130 else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), 28131 .immediate => |imm| { 28132 assert(limb_index == 0); 28133 new_temp_index.tracking(cg).* = .init(.{ .immediate = imm }); 28134 }, 28135 .register => |reg| { 28136 assert(limb_index == 0); 28137 const new_reg = 28138 try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); 28139 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); 28140 try cg.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), reg.to64()); 28141 }, 28142 .register_pair => |regs| { 28143 const new_reg = 28144 try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); 28145 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); 28146 try cg.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), regs[limb_index].to64()); 28147 }, 28148 .register_offset => |reg_off| { 28149 assert(limb_index == 0); 28150 const new_reg = 28151 try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); 28152 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); 28153 try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ 28154 .base = .{ .reg = reg_off.reg.to64() }, 28155 .mod = .{ .rm = .{ 28156 .size = .qword, 28157 .disp = reg_off.off + @as(u31, limb_index) * 8, 28158 } }, 28159 }); 28160 }, 28161 .load_symbol => |sym_off| { 28162 const new_reg = 28163 try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); 28164 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); 28165 try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ 28166 .base = .{ .reloc = sym_off.sym_index }, 28167 .mod = .{ .rm = .{ 28168 .size = .qword, 28169 .disp = sym_off.off + @as(u31, limb_index) * 8, 28170 } }, 28171 }); 28172 }, 28173 .lea_symbol => |sym_off| { 28174 assert(limb_index == 0); 28175 new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = sym_off }); 28176 }, 28177 .load_frame => |frame_addr| { 28178 const new_reg = 28179 try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); 28180 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); 28181 try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ 28182 .base = .{ .frame = frame_addr.index }, 28183 .mod = .{ .rm = .{ 28184 .size = .qword, 28185 .disp = frame_addr.off + @as(u31, limb_index) * 8, 28186 } }, 28187 }); 28188 }, 28189 .lea_frame => |frame_addr| { 28190 assert(limb_index == 0); 28191 new_temp_index.tracking(cg).* = .init(.{ .lea_frame = frame_addr }); 28192 }, 28193 } 28194 cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); 28195 return .{ .index = new_temp_index.toIndex() }; 28196 } 28197 28198 fn toLimb(temp: *Temp, limb_ty: Type, limb_index: u28, cg: *CodeGen) !void { 28199 switch (temp.unwrap(cg)) { 28200 .ref => {}, 28201 .temp => |temp_index| { 28202 const temp_tracking = temp_index.tracking(cg); 28203 switch (temp_tracking.short) { 28204 else => {}, 28205 .register, .lea_symbol, .lea_frame => { 28206 assert(limb_index == 0); 28207 cg.temp_type[@intFromEnum(temp_index)] = limb_ty; 28208 return; 28209 }, 28210 .register_pair => |regs| { 28211 switch (temp_tracking.long) { 28212 .none, .reserved_frame => {}, 28213 else => temp_tracking.long = 28214 temp_tracking.long.address().offset(@as(u31, limb_index) * 8).deref(), 28215 } 28216 for (regs, 0..) |reg, reg_index| if (reg_index != limb_index) 28217 cg.register_manager.freeReg(reg); 28218 temp_tracking.* = .init(.{ .register = regs[limb_index] }); 28219 cg.temp_type[@intFromEnum(temp_index)] = limb_ty; 28220 return; 28221 }, 28222 .load_symbol => |sym_off| { 28223 assert(std.meta.eql(temp_tracking.long.load_symbol, sym_off)); 28224 temp_tracking.* = .init(.{ .load_symbol = .{ 28225 .sym_index = sym_off.sym_index, 28226 .off = sym_off.off + @as(u31, limb_index) * 8, 28227 } }); 28228 cg.temp_type[@intFromEnum(temp_index)] = limb_ty; 28229 return; 28230 }, 28231 .load_frame => |frame_addr| if (!frame_addr.index.isNamed()) { 28232 assert(std.meta.eql(temp_tracking.long.load_frame, frame_addr)); 28233 temp_tracking.* = .init(.{ .load_frame = .{ 28234 .index = frame_addr.index, 28235 .off = frame_addr.off + @as(u31, limb_index) * 8, 28236 } }); 28237 cg.temp_type[@intFromEnum(temp_index)] = limb_ty; 28238 return; 28239 }, 28240 } 28241 }, 28242 } 28243 const new_temp = try temp.getLimb(limb_ty, limb_index, cg); 28244 try temp.die(cg); 28245 temp.* = new_temp; 28246 } 28247 28248 fn toSlicePtr(temp: *Temp, cg: *CodeGen) !void { 28249 const temp_ty = temp.typeOf(cg); 28250 if (temp_ty.isSlice(cg.pt.zcu)) try temp.toLimb(temp_ty.slicePtrFieldType(cg.pt.zcu), 0, cg); 28251 } 28252 28253 fn toSliceLen(temp: *Temp, cg: *CodeGen) !void { 28254 try temp.toLimb(.usize, 1, cg); 28255 } 28256 28257 fn toReg(temp: *Temp, new_reg: Register, cg: *CodeGen) !bool { 28258 const val, const ty = val_ty: switch (temp.unwrap(cg)) { 28259 .ref => |ref| .{ temp.tracking(cg).short, cg.typeOf(ref) }, 28260 .temp => |temp_index| { 28261 const temp_tracking = temp_index.tracking(cg); 28262 if (temp_tracking.short == .register and 28263 temp_tracking.short.register == new_reg) return false; 28264 break :val_ty .{ temp_tracking.short, temp_index.typeOf(cg) }; 28265 }, 28266 }; 28267 const new_temp_index = cg.next_temp_index; 28268 try cg.register_manager.getReg(new_reg, new_temp_index.toIndex()); 28269 cg.temp_type[@intFromEnum(new_temp_index)] = ty; 28270 try cg.genSetReg(new_reg, ty, val, .{}); 28271 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); 28272 try temp.die(cg); 28273 cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); 28274 temp.* = .{ .index = new_temp_index.toIndex() }; 28275 return true; 28276 } 28277 28278 fn toRegClass(temp: *Temp, mut: bool, rc: Register.Class, cg: *CodeGen) !bool { 28279 const val = temp.tracking(cg).short; 28280 if (!mut or temp.isMut(cg)) switch (val) { 28281 else => {}, 28282 .register => |reg| if (reg.class() == rc) return false, 28283 .register_offset => |reg_off| if (reg_off.reg.class() == rc and reg_off.off == 0) return false, 28284 }; 28285 const ty = temp.typeOf(cg); 28286 const new_temp_index = cg.next_temp_index; 28287 cg.temp_type[@intFromEnum(new_temp_index)] = ty; 28288 const new_reg = try cg.register_manager.allocReg(new_temp_index.toIndex(), regSetForRegClass(rc)); 28289 try cg.genSetReg(new_reg, ty, val, .{}); 28290 new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); 28291 try temp.die(cg); 28292 cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); 28293 temp.* = .{ .index = new_temp_index.toIndex() }; 28294 return true; 28295 } 28296 28297 fn toPair(first_temp: *Temp, second_temp: *Temp, cg: *CodeGen) !void { 28298 while (true) for ([_]*Temp{ first_temp, second_temp }) |part_temp| { 28299 if (try part_temp.toRegClass(true, .general_purpose, cg)) break; 28300 } else break; 28301 const first_temp_tracking = first_temp.unwrap(cg).temp.tracking(cg); 28302 const second_temp_tracking = second_temp.unwrap(cg).temp.tracking(cg); 28303 const result: MCValue = .{ .register_pair = .{ 28304 first_temp_tracking.short.register, 28305 second_temp_tracking.short.register, 28306 } }; 28307 const result_temp_index = cg.next_temp_index; 28308 const result_temp: Temp = .{ .index = result_temp_index.toIndex() }; 28309 assert(cg.reuseTemp(result_temp.index, first_temp.index, first_temp_tracking)); 28310 assert(cg.reuseTemp(result_temp.index, second_temp.index, second_temp_tracking)); 28311 cg.temp_type[@intFromEnum(result_temp_index)] = .slice_const_u8; 28312 result_temp_index.tracking(cg).* = .init(result); 28313 first_temp.* = result_temp; 28314 } 28315 28316 fn asMask(temp: Temp, info: MaskInfo, cg: *CodeGen) void { 28317 assert(info.scalar != .none); 28318 const mcv = &temp.unwrap(cg).temp.tracking(cg).short; 28319 const reg = mcv.register; 28320 mcv.* = .{ .register_mask = .{ .reg = reg, .info = info } }; 28321 } 28322 28323 fn toLea(temp: *Temp, cg: *CodeGen) !bool { 28324 switch (temp.tracking(cg).short) { 28325 .none, 28326 .unreach, 28327 .dead, 28328 .undef, 28329 .eflags, 28330 .register_pair, 28331 .register_triple, 28332 .register_quadruple, 28333 .register_overflow, 28334 .register_mask, 28335 .elementwise_regs_then_frame, 28336 .reserved_frame, 28337 .air_ref, 28338 => unreachable, // not a valid pointer 28339 .immediate, 28340 .register, 28341 .register_offset, 28342 .lea_direct, 28343 .lea_got, 28344 .lea_tlv, 28345 .lea_frame, 28346 => return false, 28347 .memory, 28348 .indirect, 28349 .load_symbol, 28350 .load_direct, 28351 .load_got, 28352 .load_tlv, 28353 .load_frame, 28354 => return temp.toRegClass(true, .general_purpose, cg), 28355 .lea_symbol => |sym_off| { 28356 const off = sym_off.off; 28357 if (off == 0) return false; 28358 try temp.toOffset(-off, cg); 28359 while (try temp.toRegClass(true, .general_purpose, cg)) {} 28360 try temp.toOffset(off, cg); 28361 return true; 28362 }, 28363 } 28364 } 28365 28366 fn toMemory(temp: *Temp, cg: *CodeGen) !bool { 28367 const temp_tracking = temp.tracking(cg); 28368 if (temp_tracking.short.isMemory()) return false; 28369 const new_temp_index = cg.next_temp_index; 28370 const ty = temp.typeOf(cg); 28371 cg.temp_type[@intFromEnum(new_temp_index)] = ty; 28372 const new_frame_index = try cg.allocFrameIndex(.initSpill(ty, cg.pt.zcu)); 28373 try cg.genSetMem(.{ .frame = new_frame_index }, 0, ty, temp_tracking.short, .{}); 28374 new_temp_index.tracking(cg).* = .init(.{ .load_frame = .{ .index = new_frame_index } }); 28375 try temp.die(cg); 28376 cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); 28377 temp.* = .{ .index = new_temp_index.toIndex() }; 28378 return true; 28379 } 28380 28381 // hack around linker relocation bugs 28382 fn toBase(temp: *Temp, cg: *CodeGen) !bool { 28383 const temp_tracking = temp.tracking(cg); 28384 if (temp_tracking.short.isBase()) return false; 28385 if (try temp.toMemory(cg)) return true; 28386 const new_temp_index = cg.next_temp_index; 28387 cg.temp_type[@intFromEnum(new_temp_index)] = temp.typeOf(cg); 28388 const new_reg = 28389 try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); 28390 try cg.genSetReg(new_reg, .usize, temp_tracking.short.address(), .{}); 28391 new_temp_index.tracking(cg).* = .init(.{ .indirect = .{ .reg = new_reg } }); 28392 try temp.die(cg); 28393 cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); 28394 temp.* = .{ .index = new_temp_index.toIndex() }; 28395 return true; 28396 } 28397 28398 const AccessOptions = struct { 28399 disp: i32 = 0, 28400 safe: bool = false, 28401 }; 28402 28403 fn load(ptr: *Temp, val_ty: Type, opts: AccessOptions, cg: *CodeGen) !Temp { 28404 const val = try cg.tempAlloc(val_ty); 28405 try ptr.toOffset(opts.disp, cg); 28406 while (try ptr.toLea(cg)) {} 28407 const val_mcv = val.tracking(cg).short; 28408 switch (val_mcv) { 28409 else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), 28410 .register => |val_reg| try ptr.loadReg(val_ty, registerAlias( 28411 val_reg, 28412 @intCast(val_ty.abiSize(cg.pt.zcu)), 28413 ), cg), 28414 inline .register_pair, 28415 .register_triple, 28416 .register_quadruple, 28417 => |val_regs| for (val_regs) |val_reg| { 28418 try ptr.loadReg(val_ty, val_reg, cg); 28419 try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg); 28420 while (try ptr.toLea(cg)) {} 28421 }, 28422 .register_offset => |val_reg_off| switch (val_reg_off.off) { 28423 0 => try ptr.loadReg(val_ty, registerAlias( 28424 val_reg_off.reg, 28425 @intCast(val_ty.abiSize(cg.pt.zcu)), 28426 ), cg), 28427 else => unreachable, 28428 }, 28429 .memory, .indirect, .load_frame, .load_symbol => { 28430 var val_ptr = try cg.tempInit(.usize, val_mcv.address()); 28431 var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); 28432 try val_ptr.memcpy(ptr, &len, cg); 28433 try val_ptr.die(cg); 28434 try len.die(cg); 28435 }, 28436 } 28437 return val; 28438 } 28439 28440 fn store(ptr: *Temp, val: *Temp, opts: AccessOptions, cg: *CodeGen) !void { 28441 const val_ty = val.typeOf(cg); 28442 try ptr.toOffset(opts.disp, cg); 28443 while (try ptr.toLea(cg)) {} 28444 val_to_gpr: while (true) : (while (try ptr.toLea(cg) or 28445 try val.toRegClass(false, .general_purpose, cg)) 28446 {}) { 28447 const val_mcv = val.tracking(cg).short; 28448 switch (val_mcv) { 28449 else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), 28450 .undef => if (opts.safe) { 28451 var pat = try cg.tempInit(.u8, .{ .immediate = 0xaa }); 28452 var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); 28453 try ptr.memset(&pat, &len, cg); 28454 try pat.die(cg); 28455 try len.die(cg); 28456 }, 28457 .immediate => |val_imm| { 28458 const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31| 28459 .u(val_uimm31) 28460 else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32| 28461 .s(val_simm32) 28462 else 28463 continue :val_to_gpr; 28464 // hack around linker relocation bugs 28465 switch (ptr.tracking(cg).short) { 28466 else => {}, 28467 .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {}, 28468 } 28469 try cg.asmMemoryImmediate( 28470 .{ ._, .mov }, 28471 try ptr.tracking(cg).short.deref().mem(cg, .{ 28472 .size = cg.memSize(val_ty), 28473 }), 28474 val_op, 28475 ); 28476 }, 28477 .eflags => |cc| { 28478 // hack around linker relocation bugs 28479 switch (ptr.tracking(cg).short) { 28480 else => {}, 28481 .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {}, 28482 } 28483 try cg.asmSetccMemory( 28484 cc, 28485 try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }), 28486 ); 28487 }, 28488 .register => |val_reg| try ptr.storeReg(val_ty, registerAlias( 28489 val_reg, 28490 @intCast(val_ty.abiSize(cg.pt.zcu)), 28491 ), cg), 28492 inline .register_pair, 28493 .register_triple, 28494 .register_quadruple, 28495 => |val_regs| for (val_regs) |val_reg| { 28496 try ptr.storeReg(val_ty, val_reg, cg); 28497 try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg); 28498 while (try ptr.toLea(cg)) {} 28499 }, 28500 .register_offset => |val_reg_off| switch (val_reg_off.off) { 28501 0 => try ptr.storeReg(val_ty, registerAlias( 28502 val_reg_off.reg, 28503 @intCast(val_ty.abiSize(cg.pt.zcu)), 28504 ), cg), 28505 else => continue :val_to_gpr, 28506 }, 28507 .register_overflow => |val_reg_ov| { 28508 const ip = &cg.pt.zcu.intern_pool; 28509 const first_ty: Type = .fromInterned(first_ty: switch (ip.indexToKey(val_ty.toIntern())) { 28510 .tuple_type => |tuple_type| { 28511 const tuple_field_types = tuple_type.types.get(ip); 28512 assert(tuple_field_types.len == 2 and tuple_field_types[1] == .u1_type); 28513 break :first_ty tuple_field_types[0]; 28514 }, 28515 .opt_type => |opt_child| { 28516 assert(!val_ty.optionalReprIsPayload(cg.pt.zcu)); 28517 break :first_ty opt_child; 28518 }, 28519 else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }), 28520 }); 28521 const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu)); 28522 try ptr.storeReg(first_ty, registerAlias(val_reg_ov.reg, first_size), cg); 28523 try ptr.toOffset(first_size, cg); 28524 try cg.asmSetccMemory( 28525 val_reg_ov.eflags, 28526 try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }), 28527 ); 28528 }, 28529 .lea_frame, .lea_symbol => continue :val_to_gpr, 28530 .memory, .indirect, .load_frame, .load_symbol => { 28531 var val_ptr = try cg.tempInit(.usize, val_mcv.address()); 28532 var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); 28533 try ptr.memcpy(&val_ptr, &len, cg); 28534 try val_ptr.die(cg); 28535 try len.die(cg); 28536 }, 28537 } 28538 break; 28539 } 28540 } 28541 28542 fn read(src: *Temp, val_ty: Type, opts: AccessOptions, cg: *CodeGen) !Temp { 28543 var val = try cg.tempAlloc(val_ty); 28544 while (try src.toBase(cg)) {} 28545 const val_mcv = val.tracking(cg).short; 28546 switch (val_mcv) { 28547 else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), 28548 .register => |val_reg| try src.readReg(opts.disp, val_ty, registerAlias( 28549 val_reg, 28550 @intCast(val_ty.abiSize(cg.pt.zcu)), 28551 ), cg), 28552 inline .register_pair, .register_triple, .register_quadruple => |val_regs| { 28553 var disp = opts.disp; 28554 for (val_regs) |val_reg| { 28555 try src.readReg(disp, val_ty, val_reg, cg); 28556 disp += @divExact(val_reg.bitSize(), 8); 28557 } 28558 }, 28559 .register_offset => |val_reg_off| switch (val_reg_off.off) { 28560 0 => try src.readReg(opts.disp, val_ty, registerAlias( 28561 val_reg_off.reg, 28562 @intCast(val_ty.abiSize(cg.pt.zcu)), 28563 ), cg), 28564 else => unreachable, 28565 }, 28566 .memory, .indirect, .load_frame, .load_symbol => { 28567 var val_ptr = try cg.tempInit(.usize, val_mcv.address()); 28568 var src_ptr = 28569 try cg.tempInit(.usize, src.tracking(cg).short.address().offset(opts.disp)); 28570 var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); 28571 try val_ptr.memcpy(&src_ptr, &len, cg); 28572 try val_ptr.die(cg); 28573 try src_ptr.die(cg); 28574 try len.die(cg); 28575 }, 28576 } 28577 return val; 28578 } 28579 28580 fn write(dst: *Temp, val: *Temp, opts: AccessOptions, cg: *CodeGen) !void { 28581 const val_ty = val.typeOf(cg); 28582 while (try dst.toBase(cg)) {} 28583 val_to_gpr: while (true) : (while (try dst.toBase(cg) or 28584 try val.toRegClass(false, .general_purpose, cg)) 28585 {}) { 28586 const val_mcv = val.tracking(cg).short; 28587 switch (val_mcv) { 28588 else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), 28589 .immediate => |val_imm| { 28590 const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31| 28591 .u(val_uimm31) 28592 else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32| 28593 .s(val_simm32) 28594 else 28595 continue :val_to_gpr; 28596 try cg.asmMemoryImmediate( 28597 .{ ._, .mov }, 28598 try dst.tracking(cg).short.mem(cg, .{ 28599 .size = cg.memSize(val_ty), 28600 .disp = opts.disp, 28601 }), 28602 val_op, 28603 ); 28604 }, 28605 .register => |val_reg| try dst.writeReg(opts.disp, val_ty, registerAlias( 28606 val_reg, 28607 @intCast(val_ty.abiSize(cg.pt.zcu)), 28608 ), cg), 28609 inline .register_pair, .register_triple, .register_quadruple => |val_regs| { 28610 var disp = opts.disp; 28611 for (val_regs) |val_reg| { 28612 try dst.writeReg(disp, val_ty, val_reg, cg); 28613 disp += @divExact(val_reg.bitSize(), 8); 28614 } 28615 }, 28616 .register_offset => |val_reg_off| switch (val_reg_off.off) { 28617 0 => try dst.writeReg(opts.disp, val_ty, registerAlias( 28618 val_reg_off.reg, 28619 @intCast(val_ty.abiSize(cg.pt.zcu)), 28620 ), cg), 28621 else => continue :val_to_gpr, 28622 }, 28623 .lea_frame, .lea_symbol => continue :val_to_gpr, 28624 .memory, .indirect, .load_frame, .load_symbol => { 28625 var dst_ptr = 28626 try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(opts.disp)); 28627 var val_ptr = try cg.tempInit(.usize, val_mcv.address()); 28628 var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); 28629 try dst_ptr.memcpy(&val_ptr, &len, cg); 28630 try dst_ptr.die(cg); 28631 try val_ptr.die(cg); 28632 try len.die(cg); 28633 }, 28634 } 28635 break; 28636 } 28637 } 28638 28639 fn loadReg(ptr: *Temp, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void { 28640 const dst_rc = dst_reg.class(); 28641 const strat = try cg.moveStrategy(dst_ty, dst_rc, false); 28642 // hack around linker relocation bugs 28643 switch (ptr.tracking(cg).short) { 28644 else => {}, 28645 .lea_symbol => |sym_off| if (dst_rc != .general_purpose or sym_off.off != 0) 28646 while (try ptr.toRegClass(false, .general_purpose, cg)) {}, 28647 } 28648 try strat.read(cg, dst_reg, try ptr.tracking(cg).short.deref().mem(cg, .{ 28649 .size = .fromBitSize(@min(8 * dst_ty.abiSize(cg.pt.zcu), dst_reg.bitSize())), 28650 })); 28651 } 28652 28653 fn storeReg(ptr: *Temp, src_ty: Type, src_reg: Register, cg: *CodeGen) !void { 28654 const src_rc = src_reg.class(); 28655 const src_abi_size = src_ty.abiSize(cg.pt.zcu); 28656 const strat = try cg.moveStrategy(src_ty, src_rc, false); 28657 // hack around linker relocation bugs 28658 switch (ptr.tracking(cg).short) { 28659 else => {}, 28660 .lea_symbol => |sym_off| if (src_rc != .general_purpose or sym_off.off != 0) 28661 while (try ptr.toRegClass(false, .general_purpose, cg)) {}, 28662 } 28663 if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) { 28664 try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{ 28665 .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())), 28666 }), src_reg); 28667 } else { 28668 const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu); 28669 const frame_index = try cg.allocFrameIndex(frame_alloc); 28670 const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size); 28671 try strat.write(cg, .{ 28672 .base = .{ .frame = frame_index }, 28673 .mod = .{ .rm = .{ .size = frame_size } }, 28674 }, src_reg); 28675 var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); 28676 var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); 28677 try ptr.memcpy(&src_ptr, &len, cg); 28678 try src_ptr.die(cg); 28679 try len.die(cg); 28680 } 28681 } 28682 28683 fn readReg(src: Temp, disp: i32, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void { 28684 const strat = try cg.moveStrategy(dst_ty, dst_reg.class(), false); 28685 try strat.read(cg, dst_reg, try src.tracking(cg).short.mem(cg, .{ 28686 .size = .fromBitSize(@min(8 * dst_ty.abiSize(cg.pt.zcu), dst_reg.bitSize())), 28687 .disp = disp, 28688 })); 28689 } 28690 28691 fn writeReg(dst: Temp, disp: i32, src_ty: Type, src_reg: Register, cg: *CodeGen) !void { 28692 const src_rc = src_reg.class(); 28693 const src_abi_size = src_ty.abiSize(cg.pt.zcu); 28694 const strat = try cg.moveStrategy(src_ty, src_rc, false); 28695 if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) { 28696 try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{ 28697 .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())), 28698 .disp = disp, 28699 }), src_reg); 28700 } else { 28701 const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu); 28702 const frame_index = try cg.allocFrameIndex(frame_alloc); 28703 const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size); 28704 try strat.write(cg, .{ 28705 .base = .{ .frame = frame_index }, 28706 .mod = .{ .rm = .{ .size = frame_size } }, 28707 }, src_reg); 28708 var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address()); 28709 var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); 28710 var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); 28711 try dst_ptr.memcpy(&src_ptr, &len, cg); 28712 try dst_ptr.die(cg); 28713 try src_ptr.die(cg); 28714 try len.die(cg); 28715 } 28716 } 28717 28718 fn memcpy(dst: *Temp, src: *Temp, len: *Temp, cg: *CodeGen) !void { 28719 while (true) for ([_]*Temp{ dst, src, len }, [_]Register{ .rdi, .rsi, .rcx }) |temp, reg| { 28720 if (try temp.toReg(reg, cg)) break; 28721 } else break; 28722 try cg.asmOpOnly(.{ .@"rep _sb", .mov }); 28723 } 28724 28725 fn memset(dst: *Temp, val: *Temp, len: *Temp, cg: *CodeGen) !void { 28726 while (true) for ([_]*Temp{ dst, val, len }, [_]Register{ .rdi, .rax, .rcx }) |temp, reg| { 28727 if (try temp.toReg(reg, cg)) break; 28728 } else break; 28729 try cg.asmOpOnly(.{ .@"rep _sb", .sto }); 28730 } 28731 28732 fn moveTo(temp: Temp, inst: Air.Inst.Index, cg: *CodeGen) !void { 28733 if (cg.liveness.isUnused(inst)) try temp.die(cg) else switch (temp.unwrap(cg)) { 28734 .ref => { 28735 const result = try cg.allocRegOrMem(inst, true); 28736 try cg.genCopy(cg.typeOfIndex(inst), result, temp.tracking(cg).short, .{}); 28737 tracking_log.debug("{} => {} (birth)", .{ inst, result }); 28738 cg.inst_tracking.putAssumeCapacityNoClobber(inst, .init(result)); 28739 }, 28740 .temp => |temp_index| { 28741 const temp_tracking = temp_index.tracking(cg); 28742 tracking_log.debug("{} => {} (birth)", .{ inst, temp_tracking.short }); 28743 cg.inst_tracking.putAssumeCapacityNoClobber(inst, temp_tracking.*); 28744 assert(cg.reuseTemp(inst, temp_index.toIndex(), temp_tracking)); 28745 }, 28746 } 28747 } 28748 28749 fn die(temp: Temp, cg: *CodeGen) !void { 28750 switch (temp.unwrap(cg)) { 28751 .ref => {}, 28752 .temp => |temp_index| try temp_index.tracking(cg).die(cg, temp_index.toIndex()), 28753 } 28754 } 28755 28756 const Index = enum(u4) { 28757 _, 28758 28759 fn toIndex(index: Index) Air.Inst.Index { 28760 return .fromTargetIndex(@intFromEnum(index)); 28761 } 28762 28763 fn fromIndex(index: Air.Inst.Index) Index { 28764 return @enumFromInt(index.toTargetIndex()); 28765 } 28766 28767 fn tracking(index: Index, cg: *CodeGen) *InstTracking { 28768 return &cg.inst_tracking.values()[@intFromEnum(index)]; 28769 } 28770 28771 fn isValid(index: Index, cg: *CodeGen) bool { 28772 return index.tracking(cg).short != .dead; 28773 } 28774 28775 fn typeOf(index: Index, cg: *CodeGen) Type { 28776 assert(index.isValid(cg)); 28777 return cg.temp_type[@intFromEnum(index)]; 28778 } 28779 28780 const max = std.math.maxInt(@typeInfo(Index).@"enum".tag_type); 28781 const Set = std.StaticBitSet(max); 28782 const SafetySet = if (std.debug.runtime_safety) Set else struct { 28783 inline fn initEmpty() @This() { 28784 return .{}; 28785 } 28786 28787 inline fn isSet(_: @This(), index: usize) bool { 28788 assert(index < max); 28789 return true; 28790 } 28791 28792 inline fn set(_: @This(), index: usize) void { 28793 assert(index < max); 28794 } 28795 28796 inline fn eql(_: @This(), _: @This()) bool { 28797 return true; 28798 } 28799 }; 28800 }; 28801 }; 28802 28803 fn resetTemps(cg: *CodeGen) void { 28804 for (0..@intFromEnum(cg.next_temp_index)) |temp_index| { 28805 const temp: Temp.Index = @enumFromInt(temp_index); 28806 assert(!temp.isValid(cg)); 28807 cg.temp_type[temp_index] = undefined; 28808 } 28809 cg.next_temp_index = @enumFromInt(0); 28810 } 28811 28812 fn reuseTemp( 28813 cg: *CodeGen, 28814 new_inst: Air.Inst.Index, 28815 old_inst: Air.Inst.Index, 28816 tracking: *InstTracking, 28817 ) bool { 28818 switch (tracking.short) { 28819 .register, 28820 .register_pair, 28821 .register_offset, 28822 .register_overflow, 28823 .register_mask, 28824 .indirect, 28825 => for (tracking.short.getRegs()) |tracked_reg| { 28826 if (RegisterManager.indexOfRegIntoTracked(tracked_reg)) |tracked_index| { 28827 cg.register_manager.registers[tracked_index] = new_inst; 28828 } 28829 }, 28830 .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false, 28831 else => {}, 28832 } 28833 switch (tracking.short) { 28834 .eflags, .register_overflow => cg.eflags_inst = new_inst, 28835 else => {}, 28836 } 28837 tracking.reuse(cg, new_inst, old_inst); 28838 return true; 28839 } 28840 28841 fn tempAlloc(cg: *CodeGen, ty: Type) !Temp { 28842 const temp_index = cg.next_temp_index; 28843 temp_index.tracking(cg).* = .init( 28844 try cg.allocRegOrMemAdvanced(ty, temp_index.toIndex(), true), 28845 ); 28846 cg.temp_type[@intFromEnum(temp_index)] = ty; 28847 cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); 28848 return .{ .index = temp_index.toIndex() }; 28849 } 28850 28851 fn tempAllocReg(cg: *CodeGen, ty: Type, rs: RegisterManager.RegisterBitSet) !Temp { 28852 const temp_index = cg.next_temp_index; 28853 temp_index.tracking(cg).* = .init( 28854 .{ .register = try cg.register_manager.allocReg(temp_index.toIndex(), rs) }, 28855 ); 28856 cg.temp_type[@intFromEnum(temp_index)] = ty; 28857 cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); 28858 return .{ .index = temp_index.toIndex() }; 28859 } 28860 28861 fn tempAllocRegPair(cg: *CodeGen, ty: Type, rs: RegisterManager.RegisterBitSet) !Temp { 28862 const temp_index = cg.next_temp_index; 28863 temp_index.tracking(cg).* = .init( 28864 .{ .register_pair = try cg.register_manager.allocRegs(2, temp_index.toIndex(), rs) }, 28865 ); 28866 cg.temp_type[@intFromEnum(temp_index)] = ty; 28867 cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); 28868 return .{ .index = temp_index.toIndex() }; 28869 } 28870 28871 fn tempAllocMem(cg: *CodeGen, ty: Type) !Temp { 28872 const temp_index = cg.next_temp_index; 28873 temp_index.tracking(cg).* = .init( 28874 try cg.allocRegOrMemAdvanced(ty, temp_index.toIndex(), false), 28875 ); 28876 cg.temp_type[@intFromEnum(temp_index)] = ty; 28877 cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); 28878 return .{ .index = temp_index.toIndex() }; 28879 } 28880 28881 fn tempInit(cg: *CodeGen, ty: Type, value: MCValue) !Temp { 28882 const temp_index = cg.next_temp_index; 28883 temp_index.tracking(cg).* = .init(value); 28884 cg.temp_type[@intFromEnum(temp_index)] = ty; 28885 try cg.getValue(value, temp_index.toIndex()); 28886 cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); 28887 return .{ .index = temp_index.toIndex() }; 28888 } 28889 28890 fn tempFromValue(cg: *CodeGen, value: Value) !Temp { 28891 return cg.tempInit(value.typeOf(cg.pt.zcu), try cg.genTypedValue(value)); 28892 } 28893 28894 fn tempFromOperand( 28895 cg: *CodeGen, 28896 inst: Air.Inst.Index, 28897 op_index: Liveness.OperandInt, 28898 op_ref: Air.Inst.Ref, 28899 ignore_death: bool, 28900 ) !Temp { 28901 const zcu = cg.pt.zcu; 28902 const ip = &zcu.intern_pool; 28903 28904 if (ignore_death or !cg.liveness.operandDies(inst, op_index)) { 28905 if (op_ref.toIndex()) |op_inst| return .{ .index = op_inst }; 28906 const val = op_ref.toInterned().?; 28907 const gop = try cg.const_tracking.getOrPut(cg.gpa, val); 28908 if (!gop.found_existing) gop.value_ptr.* = .init(init: { 28909 const const_mcv = try cg.genTypedValue(.fromInterned(val)); 28910 switch (const_mcv) { 28911 .lea_tlv => |tlv_sym| switch (cg.bin_file.tag) { 28912 .elf, .macho => { 28913 if (cg.mod.pic) { 28914 try cg.spillRegisters(&.{ .rdi, .rax }); 28915 } else { 28916 try cg.spillRegisters(&.{.rax}); 28917 } 28918 const frame_index = try cg.allocFrameIndex(.init(.{ 28919 .size = 8, 28920 .alignment = .@"8", 28921 })); 28922 try cg.genSetMem( 28923 .{ .frame = frame_index }, 28924 0, 28925 .usize, 28926 .{ .lea_symbol = .{ .sym_index = tlv_sym } }, 28927 .{}, 28928 ); 28929 break :init .{ .load_frame = .{ .index = frame_index } }; 28930 }, 28931 else => break :init const_mcv, 28932 }, 28933 else => break :init const_mcv, 28934 } 28935 }); 28936 return cg.tempInit(.fromInterned(ip.typeOf(val)), gop.value_ptr.short); 28937 } 28938 28939 const temp_index = cg.next_temp_index; 28940 const temp: Temp = .{ .index = temp_index.toIndex() }; 28941 const op_inst = op_ref.toIndex().?; 28942 const tracking = cg.getResolvedInstValue(op_inst); 28943 temp_index.tracking(cg).* = tracking.*; 28944 if (!cg.reuseTemp(temp.index, op_inst, tracking)) return .{ .index = op_ref.toIndex().? }; 28945 cg.temp_type[@intFromEnum(temp_index)] = cg.typeOf(op_ref); 28946 cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); 28947 return temp; 28948 } 28949 28950 inline fn tempsFromOperands(cg: *CodeGen, inst: Air.Inst.Index, op_refs: anytype) ![op_refs.len]Temp { 28951 var temps: [op_refs.len]Temp = undefined; 28952 inline for (&temps, 0.., op_refs) |*temp, op_index, op_ref| { 28953 temp.* = try cg.tempFromOperand(inst, op_index, op_ref, inline for (0..op_index) |prev_op_index| { 28954 if (op_ref == op_refs[prev_op_index]) break true; 28955 } else false); 28956 } 28957 return temps; 28958 } 28959 28960 const Operand = union(enum) { 28961 none, 28962 reg: Register, 28963 mem: Memory, 28964 imm: Immediate, 28965 inst: Mir.Inst.Index, 28966 }; 28967 28968 const Select = struct { 28969 cg: *CodeGen, 28970 temps: [@intFromEnum(Select.Operand.Ref.none)]Temp, 28971 labels: [@intFromEnum(Label._)]struct { 28972 backward: ?Mir.Inst.Index, 28973 forward: [1]?Mir.Inst.Index, 28974 }, 28975 28976 fn emitLabel(s: *Select, label_index: Label) void { 28977 if (label_index == ._) return; 28978 const label = &s.labels[@intFromEnum(label_index)]; 28979 for (&label.forward) |*reloc| { 28980 if (reloc.*) |r| s.cg.performReloc(r); 28981 reloc.* = null; 28982 } 28983 label.backward = @intCast(s.cg.mir_instructions.len); 28984 } 28985 28986 fn emit(s: *Select, inst: Instruction) !void { 28987 s.emitLabel(inst[0]); 28988 const mir_tag: Mir.Inst.FixedTag = .{ inst[1], inst[2] }; 28989 var mir_ops: [4]CodeGen.Operand = undefined; 28990 inline for (&mir_ops, 3..) |*mir_op, inst_index| mir_op.* = try inst[inst_index].lower(s); 28991 s.cg.asmOps(mir_tag, mir_ops) catch |err| switch (err) { 28992 error.InvalidInstruction => { 28993 const fixes = @tagName(mir_tag[0]); 28994 const fixes_blank = std.mem.indexOfScalar(u8, fixes, '_').?; 28995 return s.cg.fail( 28996 "invalid instruction: '{s}{s}{s} {s} {s} {s} {s}'", 28997 .{ 28998 fixes[0..fixes_blank], 28999 @tagName(mir_tag[1]), 29000 fixes[fixes_blank + 1 ..], 29001 @tagName(mir_ops[0]), 29002 @tagName(mir_ops[1]), 29003 @tagName(mir_ops[2]), 29004 @tagName(mir_ops[3]), 29005 }, 29006 ); 29007 }, 29008 else => |e| return e, 29009 }; 29010 } 29011 29012 const Case = struct { 29013 required_features: [4]?std.Target.x86.Feature = @splat(null), 29014 dst_constraints: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]Constraint = @splat(.any), 29015 src_constraints: [@intFromEnum(Select.Operand.Ref.none) - @intFromEnum(Select.Operand.Ref.src0)]Constraint = @splat(.any), 29016 patterns: []const Select.Pattern, 29017 extra_temps: [@intFromEnum(Select.Operand.Ref.dst0) - @intFromEnum(Select.Operand.Ref.tmp0)]TempSpec = @splat(.unused), 29018 dst_temps: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]TempSpec.Kind = @splat(.unused), 29019 clobbers: struct { eflags: bool = false } = .{}, 29020 each: union(enum) { 29021 once: []const Instruction, 29022 }, 29023 }; 29024 29025 const Constraint = union(enum) { 29026 any, 29027 any_bool_vec, 29028 any_int, 29029 any_signed_int, 29030 any_float, 29031 po2_any, 29032 bool_vec: Memory.Size, 29033 vec: Memory.Size, 29034 signed_int_vec: Memory.Size, 29035 signed_int_or_full_vec: Memory.Size, 29036 unsigned_int_vec: Memory.Size, 29037 int_or_vec: Memory.Size, 29038 exact_remainder_int_or_vec: struct { of: Memory.Size, is: Memory.Size }, 29039 int: Memory.Size, 29040 scalar_int: Memory.Size, 29041 scalar_signed_int: Memory.Size, 29042 scalar_unsigned_int: Memory.Size, 29043 scalar_remainder_int: struct { of: Memory.Size, is: Memory.Size }, 29044 exact_int: u16, 29045 exact_signed_int: u16, 29046 exact_unsigned_int: u16, 29047 signed_or_exact_int: Memory.Size, 29048 unsigned_or_exact_int: Memory.Size, 29049 po2_int: Memory.Size, 29050 signed_po2_int: Memory.Size, 29051 unsigned_po2_or_exact_int: Memory.Size, 29052 remainder_int: struct { of: Memory.Size, is: Memory.Size }, 29053 exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, 29054 signed_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, 29055 unsigned_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, 29056 signed_int: Memory.Size, 29057 unsigned_int: Memory.Size, 29058 elem_size_is: u8, 29059 po2_elem_size, 29060 elem_int: Memory.Size, 29061 29062 fn accepts(constraint: Constraint, ty: Type, cg: *CodeGen) bool { 29063 const zcu = cg.pt.zcu; 29064 switch (constraint) { 29065 .any => return true, 29066 .any_bool_vec => return ty.isVector(zcu) and ty.childType(zcu).toIntern() == .bool_type, 29067 .any_int => return ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu) or ty.isAbiInt(zcu), 29068 .any_signed_int => return ty.isAbiInt(zcu) and ty.intInfo(zcu).signedness == .signed, 29069 .any_float => return ty.isRuntimeFloat(), 29070 .po2_any => return std.math.isPowerOfTwo(ty.abiSize(zcu)), 29071 .bool_vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and 29072 size.bitSize(cg.target) >= ty.vectorLen(zcu), 29073 .vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() != .bool_type and 29074 size.bitSize(cg.target) >= ty.abiSize(zcu), 29075 .signed_int_vec => |size| { 29076 if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false; 29077 const scalar_ty = ty.scalarType(zcu); 29078 return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .signed; 29079 }, 29080 .signed_int_or_full_vec => |size| { 29081 if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false; 29082 const scalar_ty = ty.scalarType(zcu); 29083 if (scalar_ty.isPtrAtRuntime(zcu)) return true; 29084 if (!scalar_ty.isAbiInt(zcu)) return false; 29085 const scalar_int_info = scalar_ty.intInfo(zcu); 29086 return switch (scalar_int_info.signedness) { 29087 .signed => true, 29088 .unsigned => scalar_int_info.bits >= 8 and std.math.isPowerOfTwo(scalar_int_info.bits), 29089 }; 29090 }, 29091 .unsigned_int_vec => |size| { 29092 if (!ty.isVector(zcu) or size.bitSize(cg.target) < ty.bitSize(zcu)) return false; 29093 const scalar_ty = ty.scalarType(zcu); 29094 if (scalar_ty.isPtrAtRuntime(zcu)) return true; 29095 return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .unsigned; 29096 }, 29097 .int_or_vec => |size| { 29098 if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and 29099 size.bitSize(cg.target) >= 8 * ty.abiSize(zcu); 29100 if (ty.toIntern() == .bool_type) return true; 29101 if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); 29102 return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits; 29103 }, 29104 .exact_remainder_int_or_vec => |of_is| { 29105 if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and 29106 of_is.is.bitSize(cg.target) == (8 * ty.abiSize(zcu) - 1) % of_is.of.bitSize(cg.target) + 1; 29107 if (ty.isPtrAtRuntime(zcu)) 29108 return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; 29109 if (!ty.isAbiInt(zcu)) return false; 29110 return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; 29111 }, 29112 .int => |size| { 29113 if (ty.toIntern() == .bool_type) return true; 29114 if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); 29115 return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits; 29116 }, 29117 .scalar_int => |size| { 29118 const scalar_ty = ty.scalarType(zcu); 29119 if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); 29120 return scalar_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= scalar_ty.intInfo(zcu).bits; 29121 }, 29122 .scalar_signed_int => |size| { 29123 const scalar_ty = ty.scalarType(zcu); 29124 if (!scalar_ty.isAbiInt(zcu)) return false; 29125 const scalar_int_info = scalar_ty.intInfo(zcu); 29126 return scalar_int_info.signedness == .signed and size.bitSize(cg.target) >= scalar_int_info.bits; 29127 }, 29128 .scalar_unsigned_int => |size| { 29129 const scalar_ty = ty.scalarType(zcu); 29130 if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); 29131 if (!scalar_ty.isAbiInt(zcu)) return false; 29132 const scalar_int_info = scalar_ty.intInfo(zcu); 29133 return scalar_int_info.signedness == .unsigned and size.bitSize(cg.target) >= scalar_int_info.bits; 29134 }, 29135 .scalar_remainder_int => |of_is| { 29136 const scalar_ty = ty.scalarType(zcu); 29137 if (scalar_ty.isPtrAtRuntime(zcu)) 29138 return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; 29139 if (!scalar_ty.isAbiInt(zcu)) return false; 29140 return of_is.is.bitSize(cg.target) >= (scalar_ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; 29141 }, 29142 .exact_int => |bit_size| { 29143 if (ty.toIntern() == .bool_type) return bit_size == 1; 29144 if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth(); 29145 return ty.isAbiInt(zcu) and bit_size == ty.intInfo(zcu).bits; 29146 }, 29147 .exact_signed_int => |bit_size| { 29148 if (!ty.isAbiInt(zcu)) return false; 29149 const int_info = ty.intInfo(zcu); 29150 return int_info.signedness == .signed and bit_size == int_info.bits; 29151 }, 29152 .exact_unsigned_int => |bit_size| { 29153 if (ty.toIntern() == .bool_type) return bit_size == 1; 29154 if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth(); 29155 if (!ty.isAbiInt(zcu)) return false; 29156 const int_info = ty.intInfo(zcu); 29157 return int_info.signedness == .unsigned and bit_size == int_info.bits; 29158 }, 29159 .signed_or_exact_int => |size| { 29160 if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) == cg.target.ptrBitWidth(); 29161 if (!ty.isAbiInt(zcu)) return false; 29162 const int_info = ty.intInfo(zcu); 29163 return switch (int_info.signedness) { 29164 .signed => size.bitSize(cg.target) >= int_info.bits, 29165 .unsigned => size.bitSize(cg.target) == int_info.bits, 29166 }; 29167 }, 29168 .unsigned_or_exact_int => |size| { 29169 if (ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu)) return true; 29170 if (!ty.isAbiInt(zcu)) return false; 29171 const int_info = ty.intInfo(zcu); 29172 return switch (int_info.signedness) { 29173 .signed => size.bitSize(cg.target) == int_info.bits, 29174 .unsigned => size.bitSize(cg.target) >= int_info.bits, 29175 }; 29176 }, 29177 .po2_int => |size| { 29178 if (ty.toIntern() == .bool_type) return true; 29179 if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); 29180 if (!ty.isAbiInt(zcu)) return false; 29181 const bit_size = ty.intInfo(zcu).bits; 29182 return std.math.isPowerOfTwo(bit_size) and size.bitSize(cg.target) >= bit_size; 29183 }, 29184 .signed_po2_int => |size| { 29185 if (!ty.isAbiInt(zcu)) return false; 29186 const int_info = ty.intInfo(zcu); 29187 return int_info.signedness == .signed and std.math.isPowerOfTwo(int_info.bits) and 29188 size.bitSize(cg.target) >= int_info.bits; 29189 }, 29190 .unsigned_po2_or_exact_int => |size| { 29191 if (ty.toIntern() == .bool_type) return true; 29192 if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); 29193 if (!ty.isAbiInt(zcu)) return false; 29194 const int_info = ty.intInfo(zcu); 29195 return switch (int_info.signedness) { 29196 .signed => size.bitSize(cg.target) == int_info.bits, 29197 .unsigned => std.math.isPowerOfTwo(int_info.bits) and size.bitSize(cg.target) >= int_info.bits, 29198 }; 29199 }, 29200 .remainder_int => |of_is| { 29201 if (ty.toIntern() == .bool_type) return true; 29202 if (ty.isPtrAtRuntime(zcu)) 29203 return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; 29204 if (!ty.isAbiInt(zcu)) return false; 29205 return of_is.is.bitSize(cg.target) >= (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; 29206 }, 29207 .exact_remainder_int => |of_is| { 29208 if (ty.isPtrAtRuntime(zcu)) 29209 return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; 29210 if (!ty.isAbiInt(zcu)) return false; 29211 return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; 29212 }, 29213 .signed_or_exact_remainder_int => |of_is| { 29214 if (ty.isPtrAtRuntime(zcu)) 29215 return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; 29216 if (!ty.isAbiInt(zcu)) return false; 29217 const int_info = ty.intInfo(zcu); 29218 return switch (int_info.signedness) { 29219 .signed => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, 29220 .unsigned => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, 29221 }; 29222 }, 29223 .unsigned_or_exact_remainder_int => |of_is| { 29224 if (ty.toIntern() == .bool_type) return true; 29225 if (ty.isPtrAtRuntime(zcu)) 29226 return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; 29227 if (!ty.isAbiInt(zcu)) return false; 29228 const int_info = ty.intInfo(zcu); 29229 return switch (int_info.signedness) { 29230 .signed => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, 29231 .unsigned => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, 29232 }; 29233 }, 29234 .signed_int => |size| { 29235 if (!ty.isAbiInt(zcu)) return false; 29236 const int_info = ty.intInfo(zcu); 29237 return int_info.signedness == .signed and size.bitSize(cg.target) >= int_info.bits; 29238 }, 29239 .unsigned_int => |size| { 29240 if (ty.toIntern() == .bool_type) return true; 29241 if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); 29242 if (!ty.isAbiInt(zcu)) return false; 29243 const int_info = ty.intInfo(zcu); 29244 return int_info.signedness == .unsigned and size.bitSize(cg.target) >= int_info.bits; 29245 }, 29246 .elem_size_is => |size| return size == ty.elemType2(zcu).abiSize(zcu), 29247 .po2_elem_size => return std.math.isPowerOfTwo(ty.elemType2(zcu).abiSize(zcu)), 29248 .elem_int => |size| { 29249 const elem_ty = ty.elemType2(zcu); 29250 if (elem_ty.toIntern() == .bool_type) return true; 29251 if (elem_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); 29252 return elem_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= elem_ty.intInfo(zcu).bits; 29253 }, 29254 } 29255 } 29256 }; 29257 29258 const Pattern = struct { 29259 src: [2]Src, 29260 commute: struct { u8, u8 } = .{ 0, 0 }, 29261 29262 const Src = enum { 29263 none, 29264 any, 29265 imm8, 29266 imm16, 29267 imm32, 29268 simm32, 29269 mem, 29270 to_mem, 29271 mut_mem, 29272 to_mut_mem, 29273 gpr, 29274 to_gpr, 29275 mut_gpr, 29276 to_mut_gpr, 29277 mm, 29278 to_mm, 29279 mut_mm, 29280 to_mut_mm, 29281 xmm, 29282 to_xmm, 29283 mut_xmm, 29284 to_mut_xmm, 29285 ymm, 29286 to_ymm, 29287 mut_ymm, 29288 to_mut_ymm, 29289 29290 fn matches(src: Src, temp: Temp, cg: *CodeGen) bool { 29291 return switch (src) { 29292 .none => unreachable, 29293 .any => true, 29294 .imm8 => switch (temp.tracking(cg).short) { 29295 .immediate => |imm| std.math.cast(u8, imm) != null, 29296 else => false, 29297 }, 29298 .imm16 => switch (temp.tracking(cg).short) { 29299 .immediate => |imm| std.math.cast(u16, imm) != null, 29300 else => false, 29301 }, 29302 .imm32 => switch (temp.tracking(cg).short) { 29303 .immediate => |imm| std.math.cast(u32, imm) != null, 29304 else => false, 29305 }, 29306 .simm32 => switch (temp.tracking(cg).short) { 29307 .immediate => |imm| std.math.cast(i32, @as(i64, @bitCast(imm))) != null, 29308 else => false, 29309 }, 29310 .mem => temp.tracking(cg).short.isMemory(), 29311 .to_mem, .to_mut_mem => true, 29312 .mut_mem => temp.isMut(cg) and temp.tracking(cg).short.isMemory(), 29313 .gpr => temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8 and switch (temp.tracking(cg).short) { 29314 .register => |reg| reg.class() == .general_purpose, 29315 .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0, 29316 else => false, 29317 }, 29318 .mut_gpr => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8 and switch (temp.tracking(cg).short) { 29319 .register => |reg| reg.class() == .general_purpose, 29320 .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0, 29321 else => false, 29322 }, 29323 .to_gpr, .to_mut_gpr => temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8, 29324 .mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) { 29325 .register => |reg| reg.class() == .mmx, 29326 .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0, 29327 else => false, 29328 }, 29329 .mut_mm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) { 29330 .register => |reg| reg.class() == .mmx, 29331 .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0, 29332 else => false, 29333 }, 29334 .to_mm, .to_mut_mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8, 29335 .xmm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 16 and switch (temp.tracking(cg).short) { 29336 .register => |reg| reg.class() == .sse, 29337 .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, 29338 else => false, 29339 }, 29340 .mut_xmm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 16 and switch (temp.tracking(cg).short) { 29341 .register => |reg| reg.class() == .sse, 29342 .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, 29343 else => false, 29344 }, 29345 .to_xmm, .to_mut_xmm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 16, 29346 .ymm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 32 and switch (temp.tracking(cg).short) { 29347 .register => |reg| reg.class() == .sse, 29348 .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, 29349 else => false, 29350 }, 29351 .mut_ymm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 32 and switch (temp.tracking(cg).short) { 29352 .register => |reg| reg.class() == .sse, 29353 .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, 29354 else => false, 29355 }, 29356 .to_ymm, .to_mut_ymm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 32, 29357 }; 29358 } 29359 29360 fn convert(src: Src, temp: *Temp, cg: *CodeGen) !bool { 29361 return switch (src) { 29362 .none => unreachable, 29363 .any, .imm8, .imm16, .imm32, .simm32 => false, 29364 .mem, .to_mem, .mut_mem, .to_mut_mem => try temp.toBase(cg), 29365 .gpr, .to_gpr => try temp.toRegClass(false, .general_purpose, cg), 29366 .mut_gpr, .to_mut_gpr => try temp.toRegClass(true, .general_purpose, cg), 29367 .mm, .to_mm => try temp.toRegClass(false, .mmx, cg), 29368 .mut_mm, .to_mut_mm => try temp.toRegClass(true, .mmx, cg), 29369 .xmm, .to_xmm, .ymm, .to_ymm => try temp.toRegClass(false, .sse, cg), 29370 .mut_xmm, .to_mut_xmm, .mut_ymm, .to_mut_ymm => try temp.toRegClass(true, .sse, cg), 29371 }; 29372 } 29373 }; 29374 }; 29375 29376 const TempSpec = struct { 29377 type: Type = .noreturn, 29378 kind: Kind, 29379 29380 const unused: TempSpec = .{ .kind = .unused }; 29381 29382 const Kind = union(enum) { 29383 unused, 29384 any, 29385 cc: Condition, 29386 reg: Register, 29387 rc: Register.Class, 29388 rc_mask: struct { rc: Register.Class, info: MaskInfo }, 29389 mem, 29390 smin_mem: Select.Operand.Ref, 29391 smax_mem: Select.Operand.Ref, 29392 umin_mem: Select.Operand.Ref, 29393 umax_mem: Select.Operand.Ref, 29394 ref: Select.Operand.Ref, 29395 ref_mask: struct { ref: Select.Operand.Ref, info: MaskInfo }, 29396 29397 fn finish(kind: Kind, temp: Temp, s: *const Select) void { 29398 switch (kind) { 29399 else => {}, 29400 inline .rc_mask, .ref_mask => |mask| temp.asMask(mask.info, s.cg), 29401 } 29402 } 29403 }; 29404 29405 fn create(spec: TempSpec, s: *Select) !?Temp { 29406 const cg = s.cg; 29407 return switch (spec.kind) { 29408 .unused => null, 29409 .any => try cg.tempAlloc(spec.type), 29410 .cc => |cc| try cg.tempInit(spec.type, .{ .eflags = cc }), 29411 .reg => |reg| try cg.tempInit(spec.type, .{ .register = reg }), 29412 .rc => |rc| try cg.tempAllocReg(spec.type, regSetForRegClass(rc)), 29413 .rc_mask => |rc_mask| try cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)), 29414 .mem => try cg.tempAllocMem(spec.type), 29415 .smin_mem, .smax_mem, .umin_mem, .umax_mem => |ty_ref| { 29416 const pt = cg.pt; 29417 const zcu = pt.zcu; 29418 const ip = &zcu.intern_pool; 29419 const ty = ty_ref.deref(s).typeOf(s.cg); 29420 const vector_len, const scalar_ty: Type = switch (ip.indexToKey(ty.toIntern())) { 29421 else => .{ null, ty }, 29422 .vector_type => |vector_type| .{ vector_type.len, .fromInterned(vector_type.child) }, 29423 }; 29424 const res_scalar_ty, const res_scalar_val: Value = res_scalar: switch (scalar_ty.toIntern()) { 29425 .bool_type => .{ 29426 scalar_ty, 29427 .fromInterned(switch (spec.kind) { 29428 else => unreachable, 29429 .smin_mem, .umax_mem => .bool_true, 29430 .smax_mem, .umin_mem => .bool_false, 29431 }), 29432 }, 29433 else => { 29434 const scalar_info: InternPool.Key.IntType = if (scalar_ty.isAbiInt(zcu)) 29435 scalar_ty.intInfo(zcu) 29436 else 29437 .{ .signedness = .unsigned, .bits = @intCast(scalar_ty.bitSize(zcu)) }; 29438 const scalar_int_ty = try pt.intType(scalar_info.signedness, scalar_info.bits); 29439 if (scalar_info.bits <= 64) { 29440 const int_val: i64 = switch (spec.kind) { 29441 else => unreachable, 29442 .smin_mem => std.math.minInt(i64), 29443 .smax_mem => std.math.maxInt(i64), 29444 .umin_mem => 0, 29445 .umax_mem => -1, 29446 }; 29447 const shift: u6 = @intCast(64 - scalar_info.bits); 29448 break :res_scalar .{ scalar_int_ty, switch (scalar_info.signedness) { 29449 .signed => try pt.intValue_i64(scalar_int_ty, int_val >> shift), 29450 .unsigned => try pt.intValue_u64(scalar_int_ty, @as(u64, @bitCast(int_val)) >> shift), 29451 } }; 29452 } 29453 var big_int: std.math.big.int.Managed = try .init(cg.gpa); 29454 defer big_int.deinit(); 29455 try big_int.setTwosCompIntLimit(switch (spec.kind) { 29456 else => unreachable, 29457 .smin_mem, .umin_mem => .min, 29458 .smax_mem, .umax_mem => .max, 29459 }, switch (spec.kind) { 29460 else => unreachable, 29461 .smin_mem, .smax_mem => .signed, 29462 .umin_mem, .umax_mem => .unsigned, 29463 }, scalar_info.bits); 29464 try big_int.truncate(&big_int, scalar_info.signedness, scalar_info.bits); 29465 break :res_scalar .{ scalar_int_ty, try pt.intValue_big(scalar_int_ty, big_int.toConst()) }; 29466 }, 29467 }; 29468 const res_val: Value = if (vector_len) |len| .fromInterned(try pt.intern(.{ .aggregate = .{ 29469 .ty = (try pt.vectorType(.{ 29470 .len = len, 29471 .child = res_scalar_ty.toIntern(), 29472 })).toIntern(), 29473 .storage = .{ .repeated_elem = res_scalar_val.toIntern() }, 29474 } })) else res_scalar_val; 29475 return try cg.tempFromValue(res_val); 29476 }, 29477 .ref => |ref| ref.deref(s), 29478 .ref_mask => |ref_mask| ref_mask.ref.deref(s), 29479 }; 29480 } 29481 }; 29482 29483 const Instruction = struct { 29484 Label, 29485 Mir.Inst.Fixes, 29486 Mir.Inst.Tag, 29487 Select.Operand, 29488 Select.Operand, 29489 Select.Operand, 29490 Select.Operand, 29491 }; 29492 const Label = enum { @"0:", @"1:", @"_" }; 29493 const Operand = struct { 29494 tag: Tag, 29495 base: Ref.Sized = .none, 29496 index: packed struct(u6) { 29497 ref: Ref, 29498 scale: Memory.Scale, 29499 } = .{ .ref = .none, .scale = .@"1" }, 29500 adjust: Adjust = .none, 29501 imm: i32 = 0, 29502 29503 const Tag = enum { 29504 none, 29505 backward_label, 29506 forward_label, 29507 ref, 29508 simm, 29509 uimm, 29510 lea, 29511 mem, 29512 }; 29513 const Adjust = packed struct(u8) { 29514 factor: i2, 29515 scale: Memory.Scale, 29516 amount: enum(u4) { 29517 none, 29518 ptr_size, 29519 ptr_bit_size, 29520 size, 29521 src0_size, 29522 bit_size, 29523 src0_bit_size, 29524 len, 29525 elem_limbs, 29526 src0_elem_size, 29527 src0_elem_size_times_src1, 29528 log2_src0_elem_size, 29529 smin, 29530 smax, 29531 umax, 29532 }, 29533 29534 const none: Adjust = .{ .factor = 0, .scale = .@"1", .amount = .none }; 29535 const sub_ptr_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .ptr_size }; 29536 const add_ptr_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .ptr_bit_size }; 29537 const add_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .size }; 29538 const sub_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .size }; 29539 const add_src0_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_size }; 29540 const sub_src0_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_size }; 29541 const add_2_bit_size: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .bit_size }; 29542 const add_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .bit_size }; 29543 const sub_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .bit_size }; 29544 const add_src0_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_bit_size }; 29545 const sub_src0_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_bit_size }; 29546 const add_8_len: Adjust = .{ .factor = 1, .scale = .@"8", .amount = .len }; 29547 const add_4_len: Adjust = .{ .factor = 1, .scale = .@"4", .amount = .len }; 29548 const add_3_len: Adjust = .{ .factor = 1, .scale = .@"3", .amount = .len }; 29549 const add_2_len: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .len }; 29550 const add_len: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .len }; 29551 const sub_len: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .len }; 29552 const add_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size }; 29553 const add_2_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .src0_elem_size }; 29554 const add_4_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"4", .amount = .src0_elem_size }; 29555 const add_8_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"8", .amount = .src0_elem_size }; 29556 const sub_src0_elem_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size }; 29557 const add_src0_elem_size_times_src1: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size_times_src1 }; 29558 const sub_src0_elem_size_times_src1: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size_times_src1 }; 29559 const add_log2_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .log2_src0_elem_size }; 29560 const add_elem_limbs: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .elem_limbs }; 29561 const add_umax: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .umax }; 29562 }; 29563 const Ref = enum(u4) { 29564 tmp0, 29565 tmp1, 29566 tmp2, 29567 tmp3, 29568 tmp4, 29569 tmp5, 29570 dst0, 29571 src0, 29572 src1, 29573 none, 29574 29575 const Sized = packed struct(u8) { 29576 ref: Ref, 29577 size: Memory.Size, 29578 29579 const none: Sized = .{ .ref = .none, .size = .none }; 29580 29581 const tmp0: Sized = .{ .ref = .tmp0, .size = .none }; 29582 const tmp0b: Sized = .{ .ref = .tmp0, .size = .byte }; 29583 const tmp0w: Sized = .{ .ref = .tmp0, .size = .word }; 29584 const tmp0d: Sized = .{ .ref = .tmp0, .size = .dword }; 29585 const tmp0p: Sized = .{ .ref = .tmp0, .size = .ptr }; 29586 const tmp0q: Sized = .{ .ref = .tmp0, .size = .qword }; 29587 const tmp0x: Sized = .{ .ref = .tmp0, .size = .xword }; 29588 const tmp0y: Sized = .{ .ref = .tmp0, .size = .yword }; 29589 29590 const tmp1: Sized = .{ .ref = .tmp1, .size = .none }; 29591 const tmp1b: Sized = .{ .ref = .tmp1, .size = .byte }; 29592 const tmp1w: Sized = .{ .ref = .tmp1, .size = .word }; 29593 const tmp1d: Sized = .{ .ref = .tmp1, .size = .dword }; 29594 const tmp1p: Sized = .{ .ref = .tmp1, .size = .ptr }; 29595 const tmp1q: Sized = .{ .ref = .tmp1, .size = .qword }; 29596 const tmp1x: Sized = .{ .ref = .tmp1, .size = .xword }; 29597 const tmp1y: Sized = .{ .ref = .tmp1, .size = .yword }; 29598 29599 const tmp2: Sized = .{ .ref = .tmp2, .size = .none }; 29600 const tmp2b: Sized = .{ .ref = .tmp2, .size = .byte }; 29601 const tmp2w: Sized = .{ .ref = .tmp2, .size = .word }; 29602 const tmp2d: Sized = .{ .ref = .tmp2, .size = .dword }; 29603 const tmp2p: Sized = .{ .ref = .tmp2, .size = .ptr }; 29604 const tmp2q: Sized = .{ .ref = .tmp2, .size = .qword }; 29605 const tmp2x: Sized = .{ .ref = .tmp2, .size = .xword }; 29606 const tmp2y: Sized = .{ .ref = .tmp2, .size = .yword }; 29607 29608 const tmp3: Sized = .{ .ref = .tmp3, .size = .none }; 29609 const tmp3b: Sized = .{ .ref = .tmp3, .size = .byte }; 29610 const tmp3w: Sized = .{ .ref = .tmp3, .size = .word }; 29611 const tmp3d: Sized = .{ .ref = .tmp3, .size = .dword }; 29612 const tmp3p: Sized = .{ .ref = .tmp3, .size = .ptr }; 29613 const tmp3q: Sized = .{ .ref = .tmp3, .size = .qword }; 29614 const tmp3x: Sized = .{ .ref = .tmp3, .size = .xword }; 29615 const tmp3y: Sized = .{ .ref = .tmp3, .size = .yword }; 29616 29617 const tmp4: Sized = .{ .ref = .tmp4, .size = .none }; 29618 const tmp4b: Sized = .{ .ref = .tmp4, .size = .byte }; 29619 const tmp4w: Sized = .{ .ref = .tmp4, .size = .word }; 29620 const tmp4d: Sized = .{ .ref = .tmp4, .size = .dword }; 29621 const tmp4p: Sized = .{ .ref = .tmp4, .size = .ptr }; 29622 const tmp4q: Sized = .{ .ref = .tmp4, .size = .qword }; 29623 const tmp4x: Sized = .{ .ref = .tmp4, .size = .xword }; 29624 const tmp4y: Sized = .{ .ref = .tmp4, .size = .yword }; 29625 29626 const tmp5: Sized = .{ .ref = .tmp5, .size = .none }; 29627 const tmp5b: Sized = .{ .ref = .tmp5, .size = .byte }; 29628 const tmp5w: Sized = .{ .ref = .tmp5, .size = .word }; 29629 const tmp5d: Sized = .{ .ref = .tmp5, .size = .dword }; 29630 const tmp5p: Sized = .{ .ref = .tmp5, .size = .ptr }; 29631 const tmp5q: Sized = .{ .ref = .tmp5, .size = .qword }; 29632 const tmp5x: Sized = .{ .ref = .tmp5, .size = .xword }; 29633 const tmp5y: Sized = .{ .ref = .tmp5, .size = .yword }; 29634 29635 const dst0: Sized = .{ .ref = .dst0, .size = .none }; 29636 const dst0b: Sized = .{ .ref = .dst0, .size = .byte }; 29637 const dst0w: Sized = .{ .ref = .dst0, .size = .word }; 29638 const dst0d: Sized = .{ .ref = .dst0, .size = .dword }; 29639 const dst0p: Sized = .{ .ref = .dst0, .size = .ptr }; 29640 const dst0q: Sized = .{ .ref = .dst0, .size = .qword }; 29641 const dst0x: Sized = .{ .ref = .dst0, .size = .xword }; 29642 const dst0y: Sized = .{ .ref = .dst0, .size = .yword }; 29643 29644 const src0: Sized = .{ .ref = .src0, .size = .none }; 29645 const src0b: Sized = .{ .ref = .src0, .size = .byte }; 29646 const src0w: Sized = .{ .ref = .src0, .size = .word }; 29647 const src0d: Sized = .{ .ref = .src0, .size = .dword }; 29648 const src0p: Sized = .{ .ref = .src0, .size = .ptr }; 29649 const src0q: Sized = .{ .ref = .src0, .size = .qword }; 29650 const src0x: Sized = .{ .ref = .src0, .size = .xword }; 29651 const src0y: Sized = .{ .ref = .src0, .size = .yword }; 29652 29653 const src1: Sized = .{ .ref = .src1, .size = .none }; 29654 const src1b: Sized = .{ .ref = .src1, .size = .byte }; 29655 const src1w: Sized = .{ .ref = .src1, .size = .word }; 29656 const src1d: Sized = .{ .ref = .src1, .size = .dword }; 29657 const src1p: Sized = .{ .ref = .src1, .size = .ptr }; 29658 const src1q: Sized = .{ .ref = .src1, .size = .qword }; 29659 const src1x: Sized = .{ .ref = .src1, .size = .xword }; 29660 const src1y: Sized = .{ .ref = .src1, .size = .yword }; 29661 }; 29662 29663 fn deref(ref: Ref, s: *const Select) Temp { 29664 return s.temps[@intFromEnum(ref)]; 29665 } 29666 }; 29667 29668 const @"_": Select.Operand = .{ .tag = .none }; 29669 29670 const @"0b": Select.Operand = .{ .tag = .backward_label, .base = .{ .ref = .tmp0, .size = .none } }; 29671 const @"0f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp0, .size = .none } }; 29672 const @"1b": Select.Operand = .{ .tag = .backward_label, .base = .{ .ref = .tmp1, .size = .none } }; 29673 const @"1f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp1, .size = .none } }; 29674 29675 const tmp0b: Select.Operand = .{ .tag = .ref, .base = .tmp0b }; 29676 const tmp0w: Select.Operand = .{ .tag = .ref, .base = .tmp0w }; 29677 const tmp0d: Select.Operand = .{ .tag = .ref, .base = .tmp0d }; 29678 const tmp0p: Select.Operand = .{ .tag = .ref, .base = .tmp0p }; 29679 const tmp0q: Select.Operand = .{ .tag = .ref, .base = .tmp0q }; 29680 const tmp0x: Select.Operand = .{ .tag = .ref, .base = .tmp0x }; 29681 const tmp0y: Select.Operand = .{ .tag = .ref, .base = .tmp0y }; 29682 29683 const tmp1b: Select.Operand = .{ .tag = .ref, .base = .tmp1b }; 29684 const tmp1w: Select.Operand = .{ .tag = .ref, .base = .tmp1w }; 29685 const tmp1d: Select.Operand = .{ .tag = .ref, .base = .tmp1d }; 29686 const tmp1p: Select.Operand = .{ .tag = .ref, .base = .tmp1p }; 29687 const tmp1q: Select.Operand = .{ .tag = .ref, .base = .tmp1q }; 29688 const tmp1x: Select.Operand = .{ .tag = .ref, .base = .tmp1x }; 29689 const tmp1y: Select.Operand = .{ .tag = .ref, .base = .tmp1y }; 29690 29691 const tmp2b: Select.Operand = .{ .tag = .ref, .base = .tmp2b }; 29692 const tmp2w: Select.Operand = .{ .tag = .ref, .base = .tmp2w }; 29693 const tmp2d: Select.Operand = .{ .tag = .ref, .base = .tmp2d }; 29694 const tmp2p: Select.Operand = .{ .tag = .ref, .base = .tmp2p }; 29695 const tmp2q: Select.Operand = .{ .tag = .ref, .base = .tmp2q }; 29696 const tmp2x: Select.Operand = .{ .tag = .ref, .base = .tmp2x }; 29697 const tmp2y: Select.Operand = .{ .tag = .ref, .base = .tmp2y }; 29698 29699 const tmp3b: Select.Operand = .{ .tag = .ref, .base = .tmp3b }; 29700 const tmp3w: Select.Operand = .{ .tag = .ref, .base = .tmp3w }; 29701 const tmp3d: Select.Operand = .{ .tag = .ref, .base = .tmp3d }; 29702 const tmp3p: Select.Operand = .{ .tag = .ref, .base = .tmp3p }; 29703 const tmp3q: Select.Operand = .{ .tag = .ref, .base = .tmp3q }; 29704 const tmp3x: Select.Operand = .{ .tag = .ref, .base = .tmp3x }; 29705 const tmp3y: Select.Operand = .{ .tag = .ref, .base = .tmp3y }; 29706 29707 const tmp4b: Select.Operand = .{ .tag = .ref, .base = .tmp4b }; 29708 const tmp4w: Select.Operand = .{ .tag = .ref, .base = .tmp4w }; 29709 const tmp4d: Select.Operand = .{ .tag = .ref, .base = .tmp4d }; 29710 const tmp4p: Select.Operand = .{ .tag = .ref, .base = .tmp4p }; 29711 const tmp4q: Select.Operand = .{ .tag = .ref, .base = .tmp4q }; 29712 const tmp4x: Select.Operand = .{ .tag = .ref, .base = .tmp4x }; 29713 const tmp4y: Select.Operand = .{ .tag = .ref, .base = .tmp4y }; 29714 29715 const tmp5b: Select.Operand = .{ .tag = .ref, .base = .tmp5b }; 29716 const tmp5w: Select.Operand = .{ .tag = .ref, .base = .tmp5w }; 29717 const tmp5d: Select.Operand = .{ .tag = .ref, .base = .tmp5d }; 29718 const tmp5p: Select.Operand = .{ .tag = .ref, .base = .tmp5p }; 29719 const tmp5q: Select.Operand = .{ .tag = .ref, .base = .tmp5q }; 29720 const tmp5x: Select.Operand = .{ .tag = .ref, .base = .tmp5x }; 29721 const tmp5y: Select.Operand = .{ .tag = .ref, .base = .tmp5y }; 29722 29723 const dst0b: Select.Operand = .{ .tag = .ref, .base = .dst0b }; 29724 const dst0w: Select.Operand = .{ .tag = .ref, .base = .dst0w }; 29725 const dst0d: Select.Operand = .{ .tag = .ref, .base = .dst0d }; 29726 const dst0p: Select.Operand = .{ .tag = .ref, .base = .dst0p }; 29727 const dst0q: Select.Operand = .{ .tag = .ref, .base = .dst0q }; 29728 const dst0x: Select.Operand = .{ .tag = .ref, .base = .dst0x }; 29729 const dst0y: Select.Operand = .{ .tag = .ref, .base = .dst0y }; 29730 29731 const src0b: Select.Operand = .{ .tag = .ref, .base = .src0b }; 29732 const src0w: Select.Operand = .{ .tag = .ref, .base = .src0w }; 29733 const src0d: Select.Operand = .{ .tag = .ref, .base = .src0d }; 29734 const src0p: Select.Operand = .{ .tag = .ref, .base = .src0p }; 29735 const src0q: Select.Operand = .{ .tag = .ref, .base = .src0q }; 29736 const src0x: Select.Operand = .{ .tag = .ref, .base = .src0x }; 29737 const src0y: Select.Operand = .{ .tag = .ref, .base = .src0y }; 29738 29739 const src1b: Select.Operand = .{ .tag = .ref, .base = .src1b }; 29740 const src1w: Select.Operand = .{ .tag = .ref, .base = .src1w }; 29741 const src1d: Select.Operand = .{ .tag = .ref, .base = .src1d }; 29742 const src1p: Select.Operand = .{ .tag = .ref, .base = .src1p }; 29743 const src1q: Select.Operand = .{ .tag = .ref, .base = .src1q }; 29744 const src1x: Select.Operand = .{ .tag = .ref, .base = .src1x }; 29745 const src1y: Select.Operand = .{ .tag = .ref, .base = .src1y }; 29746 29747 fn si(imm: i32) Select.Operand { 29748 return .{ .tag = .simm, .imm = imm }; 29749 } 29750 fn sa(base: Ref.Sized, adjust: Adjust) Select.Operand { 29751 return .{ .tag = .simm, .base = base, .adjust = adjust }; 29752 } 29753 fn sia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand { 29754 return .{ .tag = .simm, .base = base, .adjust = adjust, .imm = imm }; 29755 } 29756 fn ui(imm: i32) Select.Operand { 29757 return .{ .tag = .uimm, .imm = imm }; 29758 } 29759 fn ua(base: Ref.Sized, adjust: Adjust) Select.Operand { 29760 return .{ .tag = .uimm, .base = base, .adjust = adjust }; 29761 } 29762 fn uia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand { 29763 return .{ .tag = .uimm, .base = base, .adjust = adjust, .imm = imm }; 29764 } 29765 29766 fn lea(size: Memory.Size, base: Ref) Select.Operand { 29767 return .{ 29768 .tag = .lea, 29769 .base = .{ .ref = base, .size = size }, 29770 }; 29771 } 29772 fn leaa(size: Memory.Size, base: Ref, adjust: Adjust) Select.Operand { 29773 return .{ 29774 .tag = .lea, 29775 .base = .{ .ref = base, .size = size }, 29776 .adjust = adjust, 29777 }; 29778 } 29779 fn lead(size: Memory.Size, base: Ref, disp: i32) Select.Operand { 29780 return .{ 29781 .tag = .lea, 29782 .base = .{ .ref = base, .size = size }, 29783 .imm = disp, 29784 }; 29785 } 29786 fn leai(size: Memory.Size, base: Ref, index: Ref) Select.Operand { 29787 return .{ 29788 .tag = .lea, 29789 .base = .{ .ref = base, .size = size }, 29790 .index = .{ .ref = index, .scale = .@"1" }, 29791 }; 29792 } 29793 fn leaia(size: Memory.Size, base: Ref, index: Ref, adjust: Adjust) Select.Operand { 29794 return .{ 29795 .tag = .lea, 29796 .base = .{ .ref = base, .size = size }, 29797 .index = .{ .ref = index, .scale = .@"1" }, 29798 .adjust = adjust, 29799 }; 29800 } 29801 fn leaid(size: Memory.Size, base: Ref, index: Ref, disp: i32) Select.Operand { 29802 return .{ 29803 .tag = .lea, 29804 .base = .{ .ref = base, .size = size }, 29805 .index = .{ .ref = index, .scale = .@"1" }, 29806 .imm = disp, 29807 }; 29808 } 29809 fn leasi(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref) Select.Operand { 29810 return .{ 29811 .tag = .lea, 29812 .base = .{ .ref = base, .size = size }, 29813 .index = .{ .ref = index, .scale = scale }, 29814 }; 29815 } 29816 fn leasid(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand { 29817 return .{ 29818 .tag = .lea, 29819 .base = .{ .ref = base, .size = size }, 29820 .index = .{ .ref = index, .scale = scale }, 29821 .imm = disp, 29822 }; 29823 } 29824 fn leasiad(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, adjust: Adjust, disp: i32) Select.Operand { 29825 return .{ 29826 .tag = .lea, 29827 .base = .{ .ref = base, .size = size }, 29828 .index = .{ .ref = index, .scale = scale }, 29829 .adjust = adjust, 29830 .imm = disp, 29831 }; 29832 } 29833 29834 fn mem(base: Ref.Sized) Select.Operand { 29835 return .{ 29836 .tag = .mem, 29837 .base = base, 29838 }; 29839 } 29840 fn memd(base: Ref.Sized, disp: i32) Select.Operand { 29841 return .{ 29842 .tag = .mem, 29843 .base = base, 29844 .imm = disp, 29845 }; 29846 } 29847 fn mema(base: Ref.Sized, adjust: Adjust) Select.Operand { 29848 return .{ 29849 .tag = .mem, 29850 .base = base, 29851 .adjust = adjust, 29852 }; 29853 } 29854 fn memad(base: Ref.Sized, adjust: Adjust, disp: i32) Select.Operand { 29855 return .{ 29856 .tag = .mem, 29857 .base = base, 29858 .adjust = adjust, 29859 .imm = disp, 29860 }; 29861 } 29862 fn memi(base: Ref.Sized, index: Ref) Select.Operand { 29863 return .{ 29864 .tag = .mem, 29865 .base = base, 29866 .index = .{ .ref = index, .scale = .@"1" }, 29867 }; 29868 } 29869 fn memia(base: Ref.Sized, index: Ref, adjust: Adjust) Select.Operand { 29870 return .{ 29871 .tag = .mem, 29872 .base = base, 29873 .index = .{ .ref = index, .scale = .@"1" }, 29874 .adjust = adjust, 29875 }; 29876 } 29877 fn memiad(base: Ref.Sized, index: Ref, adjust: Adjust, disp: i32) Select.Operand { 29878 return .{ 29879 .tag = .mem, 29880 .base = base, 29881 .index = .{ .ref = index, .scale = .@"1" }, 29882 .adjust = adjust, 29883 .imm = disp, 29884 }; 29885 } 29886 fn memid(base: Ref.Sized, index: Ref, disp: i32) Select.Operand { 29887 return .{ 29888 .tag = .mem, 29889 .base = base, 29890 .index = .{ .ref = index, .scale = .@"1" }, 29891 .imm = disp, 29892 }; 29893 } 29894 fn memsi(base: Ref.Sized, scale: Memory.Scale, index: Ref) Select.Operand { 29895 return .{ 29896 .tag = .mem, 29897 .base = base, 29898 .index = .{ .ref = index, .scale = scale }, 29899 }; 29900 } 29901 fn memsia(base: Ref.Sized, scale: Memory.Scale, index: Ref, adjust: Adjust) Select.Operand { 29902 return .{ 29903 .tag = .mem, 29904 .base = base, 29905 .index = .{ .ref = index, .scale = scale }, 29906 .adjust = adjust, 29907 }; 29908 } 29909 fn memsid(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand { 29910 return .{ 29911 .tag = .mem, 29912 .base = base, 29913 .index = .{ .ref = index, .scale = scale }, 29914 .imm = disp, 29915 }; 29916 } 29917 fn memsiad(base: Ref.Sized, scale: Memory.Scale, index: Ref, adjust: Adjust, disp: i32) Select.Operand { 29918 return .{ 29919 .tag = .mem, 29920 .base = base, 29921 .index = .{ .ref = index, .scale = scale }, 29922 .adjust = adjust, 29923 .imm = disp, 29924 }; 29925 } 29926 29927 fn adjustedImm(op: Select.Operand, comptime SignedImm: type, s: *const Select) SignedImm { 29928 const UnsignedImm = @Type(.{ 29929 .int = .{ .signedness = .unsigned, .bits = @typeInfo(SignedImm).int.bits }, 29930 }); 29931 return @as(i5, op.adjust.factor) * op.adjust.scale.toFactor() * @as(SignedImm, switch (op.adjust.amount) { 29932 .none => 0, 29933 .ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8), 29934 .ptr_bit_size => s.cg.target.ptrBitWidth(), 29935 .size => @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)), 29936 .src0_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)), 29937 .bit_size => @intCast(op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)), 29938 .src0_bit_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)), 29939 .len => @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu)), 29940 .elem_limbs => @intCast(@divExact( 29941 op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), 29942 @divExact(op.base.size.bitSize(s.cg.target), 8), 29943 )), 29944 .src0_elem_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), 29945 .src0_elem_size_times_src1 => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) * 29946 Select.Operand.Ref.src1.deref(s).tracking(s.cg).short.immediate), 29947 .log2_src0_elem_size => @intCast(std.math.log2(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))), 29948 .smin => @as(SignedImm, std.math.minInt(SignedImm)) >> @truncate( 29949 -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), 29950 ), 29951 .smax => @as(SignedImm, std.math.maxInt(SignedImm)) >> @truncate( 29952 -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), 29953 ), 29954 .umax => @bitCast(@as(UnsignedImm, std.math.maxInt(UnsignedImm)) >> @truncate( 29955 -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), 29956 )), 29957 }) + op.imm; 29958 } 29959 29960 fn lower(op: Select.Operand, s: *Select) !CodeGen.Operand { 29961 return switch (op.tag) { 29962 .none => .none, 29963 .backward_label => .{ .inst = s.labels[@intFromEnum(op.base.ref)].backward.? }, 29964 .forward_label => for (&s.labels[@intFromEnum(op.base.ref)].forward) |*label| { 29965 if (label.*) |_| continue; 29966 label.* = @intCast(s.cg.mir_instructions.len); 29967 break .{ .inst = undefined }; 29968 } else unreachable, 29969 .ref => switch (op.base.ref.deref(s).tracking(s.cg).short) { 29970 .immediate => |imm| .{ .imm = switch (op.base.size) { 29971 .byte => if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u8, @intCast(imm))), 29972 .word => if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u16, @intCast(imm))), 29973 .dword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u32, @intCast(imm))), 29974 .qword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(imm), 29975 else => unreachable, 29976 } }, 29977 else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = op.base.size }) }, 29978 .register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))) }, 29979 }, 29980 .simm => .{ .imm = .s(op.adjustedImm(i32, s)) }, 29981 .uimm => .{ .imm = .u(@bitCast(op.adjustedImm(i64, s))) }, 29982 .lea => .{ .mem = .{ 29983 .base = .{ .reg = registerAlias(op.base.ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)) }, 29984 .mod = .{ .rm = .{ 29985 .size = op.base.size, 29986 .index = switch (op.index.ref) { 29987 else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)), 29988 .none => .none, 29989 }, 29990 .scale = op.index.scale, 29991 .disp = op.adjustedImm(i32, s), 29992 } }, 29993 } }, 29994 .mem => .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, .{ 29995 .size = op.base.size, 29996 .index = switch (op.index.ref) { 29997 else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)), 29998 .none => .none, 29999 }, 30000 .scale = op.index.scale, 30001 .disp = op.adjustedImm(i32, s), 30002 }) }, 30003 }; 30004 } 30005 }; 30006 }; 30007 fn select( 30008 cg: *CodeGen, 30009 dst_temps: []Temp, 30010 dst_tys: []const Type, 30011 src_temps: []Temp, 30012 cases: []const Select.Case, 30013 ) !void { 30014 cases: for (cases) |case| { 30015 for (case.required_features) |required_feature| if (required_feature) |feature| if (!cg.hasFeature(feature)) continue :cases; 30016 for (case.dst_constraints[0..dst_temps.len], dst_tys) |dst_constraint, dst_ty| if (!dst_constraint.accepts(dst_ty, cg)) continue :cases; 30017 for (case.src_constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp.typeOf(cg), cg)) continue :cases; 30018 if (std.debug.runtime_safety) { 30019 for (case.dst_constraints[dst_temps.len..]) |dst_constraint| assert(dst_constraint == .any); 30020 for (case.src_constraints[src_temps.len..]) |src_constraint| assert(src_constraint == .any); 30021 } 30022 patterns: for (case.patterns) |pattern| { 30023 for (pattern.src[0..src_temps.len], src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns; 30024 if (std.debug.runtime_safety) for (pattern.src[src_temps.len..]) |src_pattern| assert(src_pattern == .none); 30025 30026 var s: Select = .{ 30027 .cg = cg, 30028 .temps = undefined, 30029 .labels = @splat(.{ .forward = @splat(null), .backward = null }), 30030 }; 30031 const tmp_slots = s.temps[@intFromEnum(Select.Operand.Ref.tmp0)..@intFromEnum(Select.Operand.Ref.dst0)]; 30032 const dst_slots = s.temps[@intFromEnum(Select.Operand.Ref.dst0)..@intFromEnum(Select.Operand.Ref.src0)]; 30033 const src_slots = s.temps[@intFromEnum(Select.Operand.Ref.src0)..@intFromEnum(Select.Operand.Ref.none)]; 30034 30035 @memcpy(src_slots[0..src_temps.len], src_temps); 30036 std.mem.swap(Temp, &src_slots[pattern.commute[0]], &src_slots[pattern.commute[1]]); 30037 for (tmp_slots, case.extra_temps) |*slot, spec| slot.* = try spec.create(&s) orelse continue; 30038 30039 while (true) for (pattern.src[0..src_temps.len], src_temps) |src_pattern, *src_temp| { 30040 if (try src_pattern.convert(src_temp, cg)) break; 30041 } else break; 30042 @memcpy(src_slots[0..src_temps.len], src_temps); 30043 std.mem.swap(Temp, &src_slots[pattern.commute[0]], &src_slots[pattern.commute[1]]); 30044 30045 if (case.clobbers.eflags or case.each != .once) try cg.spillEflagsIfOccupied(); 30046 30047 for (dst_temps, dst_tys, case.dst_temps[0..dst_temps.len]) |*dst_temp, dst_ty, dst_kind| 30048 dst_temp.* = (try Select.TempSpec.create(.{ .type = dst_ty, .kind = dst_kind }, &s)).?; 30049 @memcpy(dst_slots[0..dst_temps.len], dst_temps); 30050 30051 switch (case.each) { 30052 .once => |body| { 30053 for (body) |inst| try s.emit(inst); 30054 s.emitLabel(.@"0:"); 30055 }, 30056 } 30057 30058 for (dst_temps, case.dst_temps[0..dst_temps.len]) |dst_temp, dst_kind| dst_kind.finish(dst_temp, &s); 30059 for (case.extra_temps, tmp_slots) |spec, temp| if (spec.kind != .unused) try temp.die(cg); 30060 return; 30061 } 30062 } 30063 return error.SelectFailed; 30064 }