diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 3be4356873..f20d079035 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2389,7 +2389,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { - @setEvalBranchQuota(23_100); + @setEvalBranchQuota(23_600); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -2427,7 +2427,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { // zig fmt: off .select => try cg.airSelect(inst), .shuffle => try cg.airShuffle(inst), - .reduce_optimized => try cg.airReduce(inst), // zig fmt: on .arg => if (cg.debug_output != .none) { @@ -67795,7 +67794,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .movbe, .dst0w, .src0w, ._, ._ }, + .{ ._, ._be, .mov, .dst0w, .src0w, ._, ._ }, } }, }, .{ .src_constraints = .{ .{ .exact_int = 16 }, .any, .any }, @@ -67815,7 +67814,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .each = .{ .once = &.{ - .{ ._, ._, .movbe, .dst0d, .src0d, ._, ._ }, + .{ ._, ._be, .mov, .dst0d, .src0d, ._, ._ }, } }, }, .{ .src_constraints = .{ .{ .exact_int = 32 }, .any, .any }, @@ -67824,7 +67823,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{ .{ .ref = .src0 }, .unused }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .dst0d, ._, ._, ._ }, + .{ ._, .b_, .swap, .dst0d, ._, ._, ._ }, } }, }, .{ .required_features = .{ .movbe, null, null, null }, @@ -67835,7 +67834,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .movbe, .dst0d, .src0d, ._, ._ }, + .{ ._, ._be, .mov, .dst0d, .src0d, ._, ._ }, .{ ._, ._r, .sa, .dst0d, .uia(32, .src0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -67846,7 +67845,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .dst0d, ._, ._, ._ }, + .{ ._, .b_, .swap, .dst0d, ._, ._, ._ }, .{ ._, ._r, .sa, .dst0d, .uia(32, .src0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -67858,7 +67857,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .movbe, .dst0d, .src0d, ._, ._ }, + .{ ._, ._be, .mov, .dst0d, .src0d, ._, ._ }, .{ ._, ._r, .sh, .dst0d, .uia(32, .src0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -67869,7 +67868,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .dst0d, ._, ._, ._ }, + .{ ._, .b_, .swap, .dst0d, ._, ._, ._ }, .{ ._, ._r, .sh, .dst0d, .uia(32, .src0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -67880,7 +67879,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .each = .{ .once = &.{ - .{ ._, ._, .movbe, .dst0q, .src0q, ._, ._ }, + .{ ._, ._be, .mov, .dst0q, .src0q, ._, ._ }, } }, }, .{ .src_constraints = .{ .{ .exact_int = 64 }, .any, .any }, @@ -67889,7 +67888,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{ .{ .ref = .src0 }, .unused }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .dst0q, ._, ._, ._ }, + .{ ._, .b_, .swap, .dst0q, ._, ._, ._ }, } }, }, .{ .required_features = .{ .movbe, null, null, null }, @@ -67900,7 +67899,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .movbe, .dst0q, .src0q, ._, ._ }, + .{ ._, ._be, .mov, .dst0q, .src0q, ._, ._ }, .{ ._, ._r, .sa, .dst0q, .uia(64, .src0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -67911,7 +67910,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .dst0q, ._, ._, ._ }, + .{ ._, .b_, .swap, .dst0q, ._, ._, ._ }, .{ ._, ._r, .sa, .dst0q, .uia(64, .src0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -67923,7 +67922,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .movbe, .dst0q, .src0q, ._, ._ }, + .{ ._, ._be, .mov, .dst0q, .src0q, ._, ._ }, .{ ._, ._r, .sh, .dst0q, .uia(64, .src0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -67934,7 +67933,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .dst0q, ._, ._, ._ }, + .{ ._, .b_, .swap, .dst0q, ._, ._, ._ }, .{ ._, ._r, .sh, .dst0q, .uia(64, .src0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -67962,7 +67961,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_size), ._, ._ }, .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, .{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ }, + .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, @@ -67992,7 +67991,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_size), ._, ._ }, .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, .{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp2q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp2q, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, @@ -68026,7 +68025,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ }, .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ }, .{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ }, + .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, @@ -68059,7 +68058,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ }, .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ }, .{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp2q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp2q, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, @@ -68091,7 +68090,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ }, .{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ }, + .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, @@ -68122,7 +68121,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ }, .{ .@"0:", ._, .mov, .tmp2q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp2q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp2q, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, @@ -68155,7 +68154,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .movsx, .tmp2q, .mem(.src0b), ._, ._ }, .{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ }, .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ }, - .{ .@"0:", ._, .movbe, .tmp3q, .lea(.tmp1q), ._, ._ }, + .{ .@"0:", ._be, .mov, .tmp3q, .lea(.tmp1q), ._, ._ }, .{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ }, .{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ }, .{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ }, @@ -68192,7 +68191,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ }, .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ }, .{ .@"0:", ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp3q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp3q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ }, .{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ }, .{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ }, @@ -68227,7 +68226,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ }, - .{ .@"0:", ._, .movbe, .tmp3q, .lea(.tmp1q), ._, ._ }, + .{ .@"0:", ._be, .mov, .tmp3q, .lea(.tmp1q), ._, ._ }, .{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ }, .{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ }, .{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ }, @@ -68263,7 +68262,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ }, .{ .@"0:", ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp3q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp3q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ }, .{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ }, .{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ }, @@ -68298,7 +68297,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, .{ ._, ._, .movsx, .tmp2q, .mem(.src0b), ._, ._ }, .{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ }, - .{ .@"0:", ._, .movbe, .tmp3q, .lea(.tmp1q), ._, ._ }, + .{ .@"0:", ._be, .mov, .tmp3q, .lea(.tmp1q), ._, ._ }, .{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ }, .{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ }, .{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ }, @@ -68334,7 +68333,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .movsx, .tmp2q, .mem(.src0b), ._, ._ }, .{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ }, .{ .@"0:", ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp3q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp3q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ }, .{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ }, .{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ }, @@ -68368,7 +68367,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_size), ._, ._ }, .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ .@"0:", ._, .movbe, .tmp3q, .lea(.tmp1q), ._, ._ }, + .{ .@"0:", ._be, .mov, .tmp3q, .lea(.tmp1q), ._, ._ }, .{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ }, .{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ }, .{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ }, @@ -68403,7 +68402,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ .@"0:", ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp3q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp3q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp4q, .tmp3q, ._, ._ }, .{ ._, ._rd, .sh, .tmp3q, .tmp2q, .uia(64, .src0, .sub_bit_size_rem_64), ._ }, .{ ._, ._, .mov, .tmp2q, .tmp4q, ._, ._ }, @@ -69742,7 +69741,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .src0d, ._, ._, ._ }, + .{ ._, .b_, .swap, .src0d, ._, ._, ._ }, .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, .{ ._, ._r, .sh, .src0d, .ui(4), ._, ._ }, .{ ._, ._l, .sh, .tmp0d, .ui(4), ._, ._ }, @@ -69897,7 +69896,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .src0d, ._, ._, ._ }, + .{ ._, .b_, .swap, .src0d, ._, ._, ._ }, .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, .{ ._, ._r, .sh, .src0d, .ui(4), ._, ._ }, .{ ._, ._l, .sh, .tmp0d, .ui(4), ._, ._ }, @@ -70053,7 +70052,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .src0d, ._, ._, ._ }, + .{ ._, .b_, .swap, .src0d, ._, ._, ._ }, .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, .{ ._, ._r, .sh, .src0d, .ui(4), ._, ._ }, .{ ._, ._l, .sh, .tmp0d, .ui(4), ._, ._ }, @@ -70150,7 +70149,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .dst0p, .mem(.tmp0), ._, ._ }, .{ ._, ._b, .gf2p8affineq, .src0x, .lea(.dst0x), .ui(0), ._ }, .{ ._, ._q, .mov, .dst0q, .src0x, ._, ._ }, - .{ ._, ._, .bswap, .dst0q, ._, ._, ._ }, + .{ ._, .b_, .swap, .dst0q, ._, ._, ._ }, } }, }, .{ .required_features = .{ .@"64bit", null, null, null }, @@ -70174,7 +70173,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .src0q, ._, ._, ._ }, + .{ ._, .b_, .swap, .src0q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ }, .{ ._, ._, .mov, .tmp1q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ }, .{ ._, ._, .@"and", .tmp0q, .tmp1q, ._, ._ }, @@ -70222,7 +70221,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .dst0p, .mem(.tmp0), ._, ._ }, .{ ._, .v_b, .gf2p8affineq, .tmp1x, .src0x, .lea(.dst0x), .ui(0) }, .{ ._, .v_q, .mov, .dst0q, .tmp1x, ._, ._ }, - .{ ._, ._, .bswap, .dst0q, ._, ._, ._ }, + .{ ._, .b_, .swap, .dst0q, ._, ._, ._ }, .{ ._, ._r, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -70250,7 +70249,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .dst0p, .mem(.tmp0), ._, ._ }, .{ ._, ._b, .gf2p8affineq, .src0x, .lea(.dst0x), .ui(0), ._ }, .{ ._, ._q, .mov, .dst0q, .src0x, ._, ._ }, - .{ ._, ._, .bswap, .dst0q, ._, ._, ._ }, + .{ ._, .b_, .swap, .dst0q, ._, ._, ._ }, .{ ._, ._r, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -70275,7 +70274,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .src0q, ._, ._, ._ }, + .{ ._, .b_, .swap, .src0q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ }, .{ ._, ._, .mov, .tmp1q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ }, .{ ._, ._, .@"and", .tmp0q, .tmp1q, ._, ._ }, @@ -70378,7 +70377,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .dst0p, .mem(.tmp0), ._, ._ }, .{ ._, ._b, .gf2p8affineq, .src0x, .lea(.dst0x), .ui(0), ._ }, .{ ._, ._q, .mov, .dst0q, .src0x, ._, ._ }, - .{ ._, ._, .bswap, .dst0q, ._, ._, ._ }, + .{ ._, .b_, .swap, .dst0q, ._, ._, ._ }, .{ ._, ._r, .sh, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ }, } }, }, .{ @@ -70403,7 +70402,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bswap, .src0q, ._, ._, ._ }, + .{ ._, .b_, .swap, .src0q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ }, .{ ._, ._, .mov, .tmp1q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ }, .{ ._, ._, .@"and", .tmp0q, .tmp1q, ._, ._ }, @@ -70646,10 +70645,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .vector_32_u8, .kind = .forward_bits_mem }, - .{ .type = .vector_32_u8, .kind = .{ .pshufb_bswap_mem = .{ .repeat = 2, .size = .xword } } }, + .{ .type = .vector_16_u8, .kind = .{ .pshufb_bswap_mem = .{ .size = .xword } } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -70661,9 +70660,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp2p, .mem(.tmp0), ._, ._ }, .{ ._, .v_b, .gf2p8affineq, .dst0y, .src0y, .lea(.tmp2y), .ui(0) }, + .{ ._, .v_f128, .extract, .tmp3x, .dst0y, .ui(1), ._ }, .{ ._, ._, .lea, .tmp2p, .mem(.tmp1), ._, ._ }, - .{ ._, .vp_b, .shuf, .dst0y, .dst0y, .lea(.tmp2y), ._ }, - .{ ._, .v_pd, .perm, .dst0y, .dst0y, .ui(0b01_00_11_10), ._ }, + .{ ._, .v_dqa, .mov, .tmp4x, .lea(.tmp2x), ._, ._ }, + .{ ._, .vp_b, .shuf, .dst0x, .dst0x, .tmp4x, ._ }, + .{ ._, .vp_b, .shuf, .tmp3x, .tmp3x, .tmp4x, ._ }, + .{ ._, .v_f128, .insert, .dst0y, .tmp3y, .dst0x, .ui(1) }, } }, }, .{ .required_features = .{ .avx2, .gfni, null, null }, @@ -70701,42 +70703,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .avx, .gfni, null, null }, - .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .yword, .is = 256 } }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .to_mem, .none, .none } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .vector_32_u8, .kind = .forward_bits_mem }, - .{ .type = .vector_32_u8, .kind = .{ .pshufb_bswap_mem = .{ .repeat = 2, .size = .xword } } }, - .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } }, - .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } }, - .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } }, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .mem, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp4y, .lea(.tmp0y), ._, ._ }, - .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp5y, .lea(.tmp0y), ._, ._ }, - .{ ._, ._, .mov, .tmp0d, .sia(-32, .dst0, .add_size), ._, ._ }, - .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, - .{ .@"0:", .v_pd, .perm, .tmp6y, .lea(.tmp1y), .ui(0b01_00_11_10), ._ }, - .{ ._, .v_b, .gf2p8affineq, .tmp6y, .tmp6y, .tmp4y, .ui(0) }, - .{ ._, .vp_b, .shuf, .tmp6y, .tmp6y, .tmp5y, ._ }, - .{ ._, .v_dqu, .mov, .memi(.dst0y, .tmp0), .tmp6y, ._, ._ }, - .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 32), ._, ._ }, - .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, - .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, - } }, }, .{ .required_features = .{ .avx, .gfni, null, null }, .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = 128 } }, .any, .any }, @@ -70819,7 +70785,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .sh, .tmp6q, .ui(1), ._, ._ }, .{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ }, .{ ._, ._, .@"or", .tmp5q, .tmp6q, ._, ._ }, - .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ }, + .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, @@ -70870,7 +70836,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .sh, .tmp6q, .ui(1), ._, ._ }, .{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ }, .{ ._, ._, .@"or", .tmp5q, .tmp6q, ._, ._ }, - .{ ._, ._, .bswap, .tmp5q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp5q, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, @@ -70926,7 +70892,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .sh, .tmp6q, .ui(1), ._, ._ }, .{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ }, .{ ._, ._, .@"or", .tmp5q, .tmp6q, ._, ._ }, - .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ }, + .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, @@ -70981,7 +70947,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .sh, .tmp6q, .ui(1), ._, ._ }, .{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ }, .{ ._, ._, .@"or", .tmp5q, .tmp6q, ._, ._ }, - .{ ._, ._, .bswap, .tmp5q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp5q, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, @@ -71034,7 +71000,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .sh, .tmp6q, .ui(1), ._, ._ }, .{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ }, .{ ._, ._, .@"or", .tmp5q, .tmp6q, ._, ._ }, - .{ ._, ._, .movbe, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ }, + .{ ._, ._be, .mov, .memi(.dst0q, .tmp0), .tmp5q, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, @@ -71068,7 +71034,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp3q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp4q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ }, .{ .@"0:", ._, .mov, .tmp5q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp5q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp5q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp6q, .tmp5q, ._, ._ }, .{ ._, ._, .@"and", .tmp6q, .tmp2q, ._, ._ }, .{ ._, ._r, .sh, .tmp5q, .ui(4), ._, ._ }, @@ -71123,7 +71089,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp3q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ }, - .{ .@"0:", ._, .movbe, .tmp6q, .lea(.tmp1q), ._, ._ }, + .{ .@"0:", ._be, .mov, .tmp6q, .lea(.tmp1q), ._, ._ }, .{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ }, .{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ }, .{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ }, @@ -71182,7 +71148,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ }, .{ .@"0:", ._, .mov, .tmp6q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp6q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp6q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ }, .{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ }, .{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ }, @@ -71238,7 +71204,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp3q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ }, - .{ .@"0:", ._, .movbe, .tmp6q, .lea(.tmp1q), ._, ._ }, + .{ .@"0:", ._be, .mov, .tmp6q, .lea(.tmp1q), ._, ._ }, .{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ }, .{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ }, .{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ }, @@ -71295,7 +71261,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ }, .{ .@"0:", ._, .mov, .tmp6q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp6q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp6q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ }, .{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ }, .{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ }, @@ -71352,7 +71318,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp3q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ }, - .{ .@"0:", ._, .movbe, .tmp6q, .lea(.tmp1q), ._, ._ }, + .{ .@"0:", ._be, .mov, .tmp6q, .lea(.tmp1q), ._, ._ }, .{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ }, .{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ }, .{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ }, @@ -71410,7 +71376,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ }, .{ .@"0:", ._, .mov, .tmp6q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp6q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp6q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ }, .{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ }, .{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ }, @@ -71465,7 +71431,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp3q, .uia(0b00001111000011110000111100001111, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ }, - .{ .@"0:", ._, .movbe, .tmp6q, .lea(.tmp1q), ._, ._ }, + .{ .@"0:", ._be, .mov, .tmp6q, .lea(.tmp1q), ._, ._ }, .{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ }, .{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ }, .{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ }, @@ -71521,7 +71487,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp4q, .uia(0b00110011001100110011001100110011, .none, .repeat), ._, ._ }, .{ ._, ._, .mov, .tmp5q, .uia(0b01010101010101010101010101010101, .none, .repeat), ._, ._ }, .{ .@"0:", ._, .mov, .tmp6q, .lea(.tmp1q), ._, ._ }, - .{ ._, ._, .bswap, .tmp6q, ._, ._, ._ }, + .{ ._, .b_, .swap, .tmp6q, ._, ._, ._ }, .{ ._, ._, .mov, .tmp7q, .tmp6q, ._, ._ }, .{ ._, ._, .@"and", .tmp7q, .tmp3q, ._, ._ }, .{ ._, ._r, .sh, .tmp6q, .ui(4), ._, ._ }, @@ -98883,7 +98849,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ }, + .{ ._, .v_ps, .mova, .tmp2x, .lea(.tmp0x), ._, ._ }, .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ }, .{ .@"0:", .v_ps, .cvtph2, .tmp3x, .memsia(.src0q, .@"2", .tmp0, .add_unaligned_size), ._, ._ }, .{ ._, .v_, .cvttps2dq, .tmp3x, .tmp3x, ._, ._ }, @@ -118968,26 +118934,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, } }, } ++ [_]Select.Case{ .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .byte }, .any }, .src_constraints = .{ .{ .scalar_int = .{ .of = .word, .is = .byte } }, .any, .any }, @@ -119627,7 +119573,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -119669,7 +119615,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, @@ -119717,6 +119663,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -119755,6 +119702,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -119793,6 +119741,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -119838,6 +119787,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -119897,6 +119847,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -119948,6 +119899,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -119999,6 +119951,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, @@ -120025,7 +119978,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .slow_incdec, null, null, null }, .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -120053,7 +120006,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -120079,26 +120032,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._c, .de, .tmp0d, ._, ._, ._ }, .{ ._, ._ns, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .word }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .word }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -120573,7 +120506,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -120613,7 +120546,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, @@ -120659,6 +120592,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -120695,6 +120629,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -120731,6 +120666,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -120773,6 +120709,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -120830,6 +120767,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -120879,6 +120817,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -120928,6 +120867,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, @@ -120950,7 +120890,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .dst_constraints = .{ .{ .int = .word }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -120976,26 +120916,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .dword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .dword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -121316,7 +121236,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -121354,7 +121274,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, @@ -121398,6 +121318,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -121432,6 +121353,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -121466,6 +121388,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -121505,6 +121428,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -121560,6 +121484,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -121607,6 +121532,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -121654,6 +121580,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, @@ -121673,7 +121600,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .dst_constraints = .{ .{ .int = .dword }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -121699,27 +121626,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .qword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .required_features = .{ .@"64bit", null, null, null }, - .dst_constraints = .{ .{ .int = .qword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .qword }, .any }, @@ -121898,7 +121804,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -121934,7 +121840,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, @@ -121976,6 +121882,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -122008,6 +121915,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -122040,6 +121948,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -122077,6 +121986,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -122130,6 +122040,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -122175,6 +122086,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -122220,6 +122132,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, @@ -122238,7 +122151,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{ .{ .int = .qword }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -122264,25 +122177,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .xword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .xword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .to_mut_mem, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .int = .xword }, .any }, @@ -122353,7 +122247,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -122408,6 +122302,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -122438,6 +122333,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -122450,7 +122346,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .xword }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .xword } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -122468,6 +122364,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -122503,6 +122400,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -122554,6 +122452,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -122597,6 +122496,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -122640,6 +122540,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, @@ -122653,25 +122554,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .dst_constraints = .{ .{ .int = .yword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .yword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .to_mut_mem, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .avx512f, null, null, null }, .dst_constraints = .{ .{ .int = .yword }, .any }, @@ -122679,7 +122561,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_, mir_tag, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -122687,7 +122569,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .int = .yword }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .yword } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .yword } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -122705,6 +122587,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -122715,7 +122598,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .yword }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .yword } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .yword } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -122733,6 +122616,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -122768,6 +122652,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -122817,6 +122702,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -122858,6 +122744,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -122871,14 +122758,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .dst_constraints = .{ .any_int, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .to_mut_mem, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any }, @@ -122920,26 +122799,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .Min, .Max => unreachable, .Add => comptime &.{ .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .byte }, .any }, .src_constraints = .{ .{ .scalar_int = .{ .of = .word, .is = .byte } }, .any, .any }, @@ -123513,7 +123372,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_b, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -123548,7 +123407,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, @@ -123588,6 +123447,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -123626,6 +123486,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -123662,6 +123523,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -123700,6 +123562,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -123744,6 +123607,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -123784,6 +123648,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -123824,6 +123689,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, @@ -123846,7 +123712,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .slow_incdec, null, null, null }, .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -123874,7 +123740,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -123901,25 +123767,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ns, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .sse, null, null, null }, + .required_features = .{ .avx, .fast_hops, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .dword, .is = .word } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, + .{ .src = .{ .to_sse, .none, .none } }, }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .word }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vph_w, .add, .dst0x, .src0x, .src0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -123945,6 +123802,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp0x, .src0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .src0x, .tmp0x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .dword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .ph_w, .add, .dst0x, .src0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -123971,6 +123839,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ }, .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vph_w, .add, .dst0x, .src0x, .src0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -123998,6 +123878,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .ph_w, .add, .dst0x, .src0x, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124026,6 +123918,32 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ }, .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vph_w, .add, .tmp0x, .src0x, .src0x, ._ }, + .{ ._, .vp_d, .shuf, .dst0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .tmp0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124053,6 +123971,32 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .dst0x, .src0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .tmp0x, .dst0x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .ph_w, .add, .dst0x, .src0x, ._, ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124080,6 +124024,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_d, .srl, .dst0x, .ui(16), ._, ._ }, .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vph_w, .add, .dst0x, .src0x, .src0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124109,6 +124066,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .ph_w, .add, .dst0x, .src0x, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124139,6 +124109,34 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ }, .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124170,6 +124168,34 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124202,6 +124228,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_d, .srl, .tmp2x, .ui(16), ._, ._ }, .{ ._, .p_w, .add, .dst0x, .tmp2x, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx2, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_i128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .src0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124233,6 +124274,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .src0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124264,6 +124320,36 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, } }, + }, .{ + .required_features = .{ .avx2, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124297,6 +124383,36 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124350,7 +124466,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_w, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -124383,7 +124499,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, @@ -124400,6 +124516,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .avx2, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, + .{ .@"0:", .vp_w, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124421,6 +124571,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -124436,6 +124587,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp1x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .vp_w, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124457,6 +124640,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -124470,6 +124654,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp1x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .p_w, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124491,6 +124707,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -124526,6 +124743,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -124547,6 +124765,42 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .avx2, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_size), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ .@"0:", .vp_w, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124568,6 +124822,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -124585,6 +124840,42 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ .@"0:", .vp_w, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_w, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124606,6 +124897,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -124623,6 +124915,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ }, + .{ ._, .p_, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ .@"0:", .p_w, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .ph_w, .add, .dst0x, .dst0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -124644,6 +124970,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, @@ -124662,7 +124989,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .dst_constraints = .{ .{ .int = .word }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -124689,25 +125016,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .sse, null, null, null }, + .required_features = .{ .avx, .fast_hops, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .dword } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, + .{ .src = .{ .to_sse, .none, .none } }, }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .dword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vph_d, .add, .dst0x, .src0x, .src0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -124733,6 +125051,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .src0x, .tmp0x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .ph_d, .add, .dst0x, .src0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -124758,6 +125087,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, .{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vph_d, .add, .dst0x, .src0x, .src0x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -124785,6 +125126,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .ph_d, .add, .dst0x, .src0x, ._, ._ }, + .{ ._, .ph_d, .add, .dst0x, .dst0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -124812,6 +125165,32 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vph_d, .add, .tmp0x, .src0x, .src0x, ._ }, + .{ ._, .vp_d, .shuf, .dst0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .tmp0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -124839,6 +125218,32 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .dst0x, .src0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .tmp0x, .dst0x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .ph_d, .add, .dst0x, .src0x, ._, ._ }, + .{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -124866,6 +125271,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_d, .shuf, .dst0x, .src0x, .ui(0b01_01_01_01), ._ }, .{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx2, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_i128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .src0x, .dst0x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -124895,6 +125314,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .src0x, .dst0x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -124924,6 +125357,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, } }, + }, .{ + .required_features = .{ .avx2, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -124955,6 +125417,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -125006,7 +125497,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_d, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -125037,7 +125529,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, @@ -125052,6 +125545,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .avx2, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, + .{ .@"0:", .vp_d, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -125073,6 +125599,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -125086,6 +125613,37 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .vp_d, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -125107,6 +125665,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -125118,6 +125677,37 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .p_d, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .ph_d, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .ph_d, .add, .dst0x, .dst0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -125139,6 +125729,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -125171,6 +125762,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -125190,6 +125782,41 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .avx2, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_size), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ .@"0:", .vp_d, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -125211,6 +125838,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -125226,6 +125854,41 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ .@"0:", .vp_d, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vph_d, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -125262,6 +125925,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, } }, + }, .{ + .required_features = .{ .ssse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ }, + .{ ._, .p_, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ .@"0:", .p_d, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .ph_d, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .ph_d, .add, .dst0x, .dst0x, ._, ._ }, + } }, }, .{ .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -125283,6 +125979,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, @@ -125298,7 +125995,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .dst_constraints = .{ .{ .int = .dword }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -125324,27 +126021,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .qword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .required_features = .{ .@"64bit", null, null, null }, - .dst_constraints = .{ .{ .int = .qword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .qword }, .any }, @@ -125523,7 +126199,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_q, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -125552,7 +126228,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, @@ -125586,6 +126262,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -125618,6 +126295,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -125648,6 +126326,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -125678,6 +126357,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -125716,6 +126396,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -125750,6 +126431,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -125784,6 +126466,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, @@ -125798,7 +126481,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{ .{ .int = .qword }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -125824,52 +126507,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .xword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .xword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .to_mut_mem, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .dst_constraints = .{ .{ .int = .yword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .yword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .to_mut_mem, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .any_int, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .to_mut_mem, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any }, @@ -125910,26 +126547,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, } }, .Mul => comptime &.{ .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .byte }, .any }, .src_constraints = .{ .{ .scalar_int = .{ .of = .word, .is = .byte } }, .any, .any }, @@ -126528,7 +127145,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .v_dqa, .mov, .tmp0y, .memd(.src0y, 32), ._, ._ }, @@ -126566,7 +127183,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -126610,6 +127227,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -126649,6 +127267,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-24, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -126686,6 +127305,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-24, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -126725,6 +127345,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-24, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -126765,6 +127386,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -126815,6 +127437,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .tmp2y, .lea(.tmp0y), ._, ._ }, @@ -126857,6 +127480,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .tmp2y, .lea(.tmp0y), ._, ._ }, @@ -126903,6 +127527,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ }, @@ -126946,6 +127571,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ }, @@ -126972,7 +127598,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .slow_incdec, null, null, null }, .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -127000,7 +127626,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .dst_constraints = .{ .{ .int = .byte }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -127026,26 +127652,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._c, .de, .tmp0d, ._, ._, ._ }, .{ ._, ._ns, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .word }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .word }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .word }, .any }, @@ -127489,7 +128095,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_w, .mull, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -127522,7 +128128,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -127562,6 +128168,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -127598,6 +128205,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -127632,6 +128240,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -127667,6 +128276,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -127711,6 +128321,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .tmp2y, .lea(.tmp0y), ._, ._ }, @@ -127750,6 +128361,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .tmp2y, .lea(.tmp0y), ._, ._ }, @@ -127791,6 +128403,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ }, @@ -127811,7 +128424,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .dst_constraints = .{ .{ .int = .word }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -127837,26 +128450,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .dword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .dword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -128163,7 +128756,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, .{ ._, .vp_d, .mull, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, @@ -128194,7 +128787,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -128232,6 +128825,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, @@ -128266,6 +128860,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -128298,6 +128893,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -128330,6 +128926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, @@ -128365,6 +128962,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, @@ -128407,6 +129005,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_dqa, .mov, .tmp2y, .lea(.tmp0y), ._, ._ }, @@ -128444,6 +129043,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .v_ps, .mova, .tmp2y, .lea(.tmp0y), ._, ._ }, @@ -128483,6 +129083,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ }, @@ -128519,6 +129120,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ }, @@ -128539,7 +129141,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .dst_constraints = .{ .{ .int = .dword }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -128565,31 +129167,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .qword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{ .{ .int = .qword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .to_mut_gpr, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .required_features = .{ .@"64bit", null, null, null }, - .dst_constraints = .{ .{ .int = .qword }, .any }, - .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -128615,52 +129196,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .dst_constraints = .{ .{ .int = .xword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .xword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .to_mut_mem, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .dst_constraints = .{ .{ .int = .yword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, - .{ .src = .{ .mut_gpr, .none, .none } }, - .{ .src = .{ .to_mut_sse, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .{ .int = .yword }, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .to_mut_mem, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, - }, .{ - .dst_constraints = .{ .any_int, .any }, - .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, - .patterns = &.{ - .{ .src = .{ .to_mut_mem, .none, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .each = .{ .once = &.{} }, }, .{ .required_features = .{ .@"64bit", .bmi2, .adx, null }, .dst_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any }, @@ -128937,6 +129472,2289 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } try res[0].finish(inst, &.{reduce.operand}, &ops, cg); }, + .reduce_optimized => |air_tag| if (use_old) try cg.airReduce(inst) else fallback: { + const reduce = air_datas[@intFromEnum(inst)].reduce; + switch (reduce.operation) { + .And, .Or, .Xor => unreachable, + .Min, .Max => break :fallback try cg.airReduce(inst), + .Add => {}, + .Mul => break :fallback try cg.airReduce(inst), + } + var ops = try cg.tempsFromOperands(inst, .{reduce.operand}); + var res: [1]Temp = undefined; + cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (reduce.operation) { + .And, .Or, .Xor => unreachable, + .Min, .Max => unreachable, + .Add => comptime &.{ .{ + .required_features = .{ .f16c, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .dword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .dword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .vh_ps, .add, .tmp0x, .dst0x, .dst0x, ._ }, + .{ ._, .v_ps, .shuf, .dst0x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ss, .add, .dst0x, .tmp0x, .dst0x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_f128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ }, + .{ ._, .v_f128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp0y, .tmp0x, ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp0y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_f128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp0y, .tmp0x, ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp0y, ._ }, + .{ ._, .v_f128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, .f16c, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .zword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, .v_ps, .mova, .tmp2y, .lead(.tmp0y, 32), ._, ._ }, + .{ ._, .v_ps, .@"and", .tmp2y, .tmp2y, .memd(.src0y, 32), ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .mem(.src0y), ._ }, + .{ ._, .v_f128, .extract, .tmp3x, .tmp2y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp3y, .tmp3x, ._, ._ }, + .{ ._, .v_ps, .add, .tmp2y, .tmp2y, .tmp3y, ._ }, + .{ ._, .v_f128, .extract, .tmp3x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp3y, .tmp3x, ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp3y, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, .f16c, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .zword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .mova, .tmp1y, .memd(.src0y, 32), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .tmp1y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .tmp1y, .tmp1x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ }, + .{ ._, .v_ps, .add, .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp1y, ._ }, + .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp1y, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp1y, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, .f16c, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .zword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, .v_ps, .mova, .tmp2y, .lead(.tmp0y, 32), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-80, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .tmp2y, .tmp2y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -64), ._ }, + .{ ._, .v_f128, .extract, .tmp3x, .tmp2y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp3y, .tmp3x, ._, ._ }, + .{ ._, .v_ps, .add, .tmp2y, .tmp2y, .tmp3y, ._ }, + .{ ._, .v_f128, .extract, .tmp3x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp3y, .tmp3x, ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp3y, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ .@"0:", .v_ps, .cvtph2, .tmp2y, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ .@"0:", .v_ps, .cvtph2, .tmp2y, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .cvtph2, .dst0y, .dst0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp2y, .tmp2x, ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ .@"0:", .v_ps, .cvtph2, .tmp2y, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addhf3" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .reg = .xmm0 }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, .vp_, .xor, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-4, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .vp_w, .insr, .dst0x, .dst0x, .memad(.src0w, .add_unaligned_size, -2), .ui(0) }, + .{ .@"0:", .vp_, .xor, .tmp1x, .tmp1x, .tmp1x, ._ }, + .{ ._, .vp_w, .insr, .tmp1x, .tmp1x, .memi(.src0w, .tmp0), .ui(0) }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addhf3" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .reg = .xmm0 }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, .p_, .xor, .dst0x, .dst0x, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-4, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .p_w, .insr, .dst0x, .memad(.src0w, .add_unaligned_size, -2), .ui(0), ._ }, + .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, .p_w, .insr, .tmp1x, .memi(.src0w, .tmp0), .ui(0), ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .ax } }, + .{ .type = .f32, .kind = .mem }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addhf3" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .reg = .xmm0 }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._ps, .xor, .dst0x, .dst0x, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-4, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .movzx, .tmp1d, .memad(.src0w, .add_unaligned_size, -2), ._, ._ }, + .{ ._, ._, .mov, .mem(.tmp2d), .tmp1d, ._, ._ }, + .{ ._, ._ss, .mov, .dst0x, .mem(.tmp2d), ._, ._ }, + .{ .@"0:", ._ps, .xor, .tmp3x, .tmp3x, ._, ._ }, + .{ ._, ._ss, .mov, .tmp3x, .memi(.src0d, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vh_ps, .add, .dst0x, .src0x, .src0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .shuf, .tmp0x, .src0x, .src0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .src0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .h_ps, .add, .dst0x, .src0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .qword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._ps, .mova, .tmp0x, .src0x, ._, ._ }, + .{ ._, ._ps, .shuf, .tmp0x, .tmp0x, .ui(0b01_01_01_01), ._ }, + .{ ._, ._ss, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vh_ps, .add, .tmp0x, .src0x, .src0x, ._ }, + .{ ._, .v_ps, .movhl, .dst0x, .src0x, .src0x, ._ }, + .{ ._, .v_ss, .add, .dst0x, .tmp0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .movhl, .tmp0x, .src0x, .src0x, ._ }, + .{ ._, .v_ps, .add, .tmp0x, .src0x, .tmp0x, ._ }, + .{ ._, .v_ps, .shuf, .dst0x, .src0x, .src0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .tmp0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._ps, .xor, .tmp0x, .tmp0x, ._, ._ }, + .{ ._, ._ps, .movhl, .tmp0x, .src0x, ._, ._ }, + .{ ._, .h_ps, .add, .dst0x, .src0x, ._, ._ }, + .{ ._, ._ss, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._ps, .xor, .tmp0x, .tmp0x, ._, ._ }, + .{ ._, ._ps, .movhl, .tmp0x, .src0x, ._, ._ }, + .{ ._, ._ss, .add, .tmp0x, .src0x, ._, ._ }, + .{ ._, ._ps, .shuf, .dst0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, ._ss, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vh_ps, .add, .dst0x, .src0x, .src0x, ._ }, + .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .movhl, .tmp0x, .src0x, .src0x, ._ }, + .{ ._, .v_ps, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .src0x, .src0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .src0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .h_ps, .add, .dst0x, .src0x, ._, ._ }, + .{ ._, .h_ps, .add, .dst0x, .dst0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._ps, .xor, .tmp0x, .tmp0x, ._, ._ }, + .{ ._, ._ps, .movhl, .tmp0x, .src0x, ._, ._ }, + .{ ._, ._ps, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, ._ps, .mova, .tmp0x, .dst0x, ._, ._ }, + .{ ._, ._ps, .shuf, .tmp0x, .tmp0x, .ui(0b01_01_01_01), ._ }, + .{ ._, ._ss, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_f128, .extract, .tmp1x, .src0y, .ui(1), ._ }, + .{ ._, .v_ss, .add, .dst0x, .src0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .tmp1x, .src0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .src0x, .tmp1x, ._ }, + .{ ._, .v_ps, .movhl, .tmp1x, .dst0x, .dst0x, ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .zword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .v_ps, .mova, .tmp2y, .memd(.src0y, 32), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .lea(.tmp0y), ._ }, + .{ ._, .v_ps, .@"and", .tmp2y, .tmp2y, .lead(.tmp0y, 32), ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .zword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .mova, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .v_ps, .mova, .tmp0y, .memd(.src0y, 32), ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp0y, ._ }, + .{ ._, .v_i128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_ps, .movhl, .tmp0x, .dst0x, .dst0x, ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_ps, .shuf, .tmp0x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, + .{ .@"0:", .v_ps, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vh_ps, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, + .{ .@"0:", .v_ps, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_ps, .movhl, .tmp1x, .dst0x, .dst0x, ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .sse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._ps, .mova, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._ps, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .h_ps, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .h_ps, .add, .dst0x, .dst0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._ps, .mova, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._ps, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .movhl, .tmp1x, .dst0x, ._, ._ }, + .{ ._, ._ps, .add, .dst0x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .mova, .tmp1x, .dst0x, ._, ._ }, + .{ ._, ._ps, .shuf, .tmp1x, .tmp1x, .ui(0b01_01_01_01), ._ }, + .{ ._, ._ss, .add, .dst0x, .tmp1x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .zword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, .v_ps, .mova, .tmp2y, .lead(.tmp0y, 32), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-128, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .tmp2y, .tmp2y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -64), ._ }, + .{ .@"0:", .v_ps, .add, .tmp2y, .tmp2y, .memid(.src0y, .tmp0, 32), ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(64), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_ps, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ .@"0:", .v_ps, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vh_ps, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ .@"0:", .v_ps, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, + .{ ._, .v_ps, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, + .{ ._, .v_ss, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .sse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._ps, .mova, .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ }, + .{ ._, ._ps, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ .@"0:", ._ps, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .h_ps, .add, .dst0x, .dst0x, ._, ._ }, + .{ ._, .h_ps, .add, .dst0x, .dst0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._ps, .mova, .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ }, + .{ ._, ._ps, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ .@"0:", ._ps, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .xor, .tmp2x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .movhl, .tmp2x, .dst0x, ._, ._ }, + .{ ._, ._ps, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .dst0x, ._, ._ }, + .{ ._, ._ps, .shuf, .tmp2x, .tmp2x, .ui(0b01_01_01_01), ._ }, + .{ ._, ._ss, .add, .dst0x, .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vh_pd, .add, .dst0x, .src0x, .src0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_ps, .movhl, .tmp0x, .src0x, .src0x, ._ }, + .{ ._, .v_sd, .add, .dst0x, .src0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .h_pd, .add, .dst0x, .src0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._ps, .xor, .tmp0x, .tmp0x, ._, ._ }, + .{ ._, ._ps, .movhl, .tmp0x, .src0x, ._, ._ }, + .{ ._, ._sd, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vh_pd, .add, .tmp0x, .src0x, .src0x, ._ }, + .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .v_sd, .add, .dst0x, .tmp0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .v_pd, .add, .tmp0x, .src0x, .tmp0x, ._ }, + .{ ._, .v_ps, .movhl, .dst0x, .src0x, .src0x, ._ }, + .{ ._, .v_sd, .add, .dst0x, .tmp0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vh_pd, .add, .dst0y, .src0y, .src0y, ._ }, + .{ ._, .v_f128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .v_pd, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .v_ps, .movhl, .tmp0x, .dst0x, .dst0x, ._ }, + .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .exclusive_scalar_float = .{ .of = .zword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_pd, .mova, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .v_pd, .mova, .tmp2y, .memd(.src0y, 32), ._, ._ }, + .{ ._, .v_pd, .@"and", .dst0y, .dst0y, .lea(.tmp0y), ._ }, + .{ ._, .v_pd, .@"and", .tmp2y, .tmp2y, .lead(.tmp0y, 32), ._ }, + .{ ._, .v_pd, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, + .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .exact_scalar_float = .{ .of = .zword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_pd, .mova, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .v_pd, .mova, .tmp0y, .memd(.src0y, 32), ._, ._ }, + .{ ._, .v_pd, .add, .dst0y, .dst0y, .tmp0y, ._ }, + .{ ._, .v_i128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .v_ps, .movhl, .tmp0x, .dst0x, .dst0x, ._ }, + .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_pd, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, + .{ .@"0:", .v_pd, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vh_pd, .add, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_pd, .mova, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, + .{ .@"0:", .v_pd, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .v_ps, .movhl, .tmp1x, .dst0x, .dst0x, ._ }, + .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .sse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._pd, .mova, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._pd, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .h_pd, .add, .dst0x, .dst0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._pd, .mova, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._pd, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .movhl, .tmp1x, .dst0x, ._, ._ }, + .{ ._, ._sd, .add, .dst0x, .tmp1x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .zword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_pd, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, .v_pd, .mova, .tmp2y, .lead(.tmp0y, 32), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-128, .src0, .add_size), ._, ._ }, + .{ ._, .v_pd, .@"and", .tmp2y, .tmp2y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_pd, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -64), ._ }, + .{ .@"0:", .v_pd, .add, .tmp2y, .tmp2y, .memid(.src0y, .tmp0, 32), ._ }, + .{ ._, .v_pd, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(64), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_pd, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, + .{ ._, .v_sd, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_pd, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_pd, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ .@"0:", .v_pd, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vh_pd, .add, .dst0x, .dst0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_pd, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_pd, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ .@"0:", .v_pd, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, + .{ ._, .v_pd, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .sse3, .fast_hops, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._pd, .mova, .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ }, + .{ ._, ._pd, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ .@"0:", ._pd, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .h_pd, .add, .dst0x, .dst0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._pd, .mova, .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ }, + .{ ._, ._pd, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ .@"0:", ._pd, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .xor, .tmp2x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .movhl, .tmp2x, .dst0x, ._, ._ }, + .{ ._, ._pd, .add, .dst0x, .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .x87, null, null, null }, + .dst_constraints = .{ .{ .float = .qword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f64, .kind = .{ .reg = .st7 } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .f_, .ld, .memad(.src0q, .add_unaligned_size, -8), ._, ._, ._ }, + .{ .@"0:", .f_, .add, .memi(.src0q, .tmp0), ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .f_p, .st, .dst0q, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .x87, null, null, null }, + .dst_constraints = .{ .{ .float = .tbyte }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .tbyte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .f_, .ld, .memad(.src0t, .add_unaligned_size, -16), ._, ._, ._ }, + .{ .@"0:", .f_, .ld, .memi(.src0t, .tmp0), ._, ._, ._ }, + .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .f_p, .st, .dst0t, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addtf3" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .reg = .xmm0 }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addtf3" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .reg = .xmm0 }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__addtf3" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .reg = .xmm0 }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._ps, .mova, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + } }, + .Mul => unreachable, + }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{ + @tagName(air_tag), + cg.typeOf(reduce.operand).fmt(pt), + ops[0].tracking(cg), + }), + else => |e| return e, + }; + try res[0].finish(inst, &.{reduce.operand}, &ops, cg); + }, .splat => |air_tag| if (use_old) try cg.airSplat(inst) else fallback: { const ty_op = air_datas[@intFromEnum(inst)].ty_op; if (cg.typeOf(ty_op.operand).toIntern() == .bool_type) break :fallback try cg.airSplat(inst); @@ -137624,7 +140442,7 @@ fn genByteSwap( return src_mcv; }, 3...8 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - try self.genUnOpMir(.{ ._, .bswap }, src_ty, src_mcv); + try self.genUnOpMir(.{ .b_, .swap }, src_ty, src_mcv); return src_mcv; }, 9...16 => { @@ -137635,7 +140453,7 @@ fn genByteSwap( break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } }; }, .register_pair => |src_regs| if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - for (src_regs) |src_reg| try self.asmRegister(.{ ._, .bswap }, src_reg.to64()); + for (src_regs) |src_reg| try self.asmRegister(.{ .b_, .swap }, src_reg.to64()); return .{ .register_pair = .{ src_regs[1], src_regs[0] } }; } else src_mcv, else => src_mcv, @@ -137649,18 +140467,18 @@ fn genByteSwap( for (dst_regs, 0..) |dst_reg, limb_index| { if (mat_src_mcv.isBase()) { try self.asmRegisterMemory( - .{ ._, if (has_movbe) .movbe else .mov }, + .{ if (has_movbe) ._be else ._, .mov }, dst_reg.to64(), try mat_src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }), ); - if (!has_movbe) try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); + if (!has_movbe) try self.asmRegister(.{ .b_, .swap }, dst_reg.to64()); } else { try self.asmRegisterRegister( .{ ._, .mov }, dst_reg.to64(), mat_src_mcv.register_pair[limb_index].to64(), ); - try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); + try self.asmRegister(.{ .b_, .swap }, dst_reg.to64()); } } return .{ .register_pair = .{ dst_regs[1], dst_regs[0] } }; @@ -137679,7 +140497,7 @@ fn genByteSwap( const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); try self.asmRegisterMemory( - .{ ._, if (has_movbe) .movbe else .mov }, + .{ if (has_movbe) ._be else ._, .mov }, temp_regs[2].to64(), .{ .base = .{ .frame = dst_mcv.load_frame.index }, @@ -137692,7 +140510,7 @@ fn genByteSwap( }, ); try self.asmRegisterMemory( - .{ ._, if (has_movbe) .movbe else .mov }, + .{ if (has_movbe) ._be else ._, .mov }, temp_regs[3].to64(), .{ .base = .{ .frame = dst_mcv.load_frame.index }, @@ -137705,8 +140523,8 @@ fn genByteSwap( }, ); if (!has_movbe) { - try self.asmRegister(.{ ._, .bswap }, temp_regs[2].to64()); - try self.asmRegister(.{ ._, .bswap }, temp_regs[3].to64()); + try self.asmRegister(.{ .b_, .swap }, temp_regs[2].to64()); + try self.asmRegister(.{ .b_, .swap }, temp_regs[3].to64()); } try self.asmMemoryRegister(.{ ._, .mov }, .{ .base = .{ .frame = dst_mcv.load_frame.index }, @@ -137751,9 +140569,9 @@ fn genByteSwap( switch (abi_size) { else => unreachable, 2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }), - 3...8 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv), + 3...8 => try self.genUnOpMir(.{ .b_, .swap }, src_ty, dst_mcv), } - } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); + } else try self.genBinOpMir(.{ ._be, .mov }, src_ty, dst_mcv, src_mcv); return dst_mcv; } @@ -145626,16 +148444,16 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { .{ ._, .pseudo } else for (std.enums.values(Mir.Inst.Fixes)) |fixes| { const fixes_name = @tagName(fixes); - const space_i = std.mem.indexOfScalar(u8, fixes_name, ' '); - const fixes_prefix = if (space_i) |i| - std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..i]).? + const space_index = std.mem.indexOfScalar(u8, fixes_name, ' '); + const fixes_prefix = if (space_index) |index| + std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..index]).? else .none; if (fixes_prefix != prefix) continue; - const pattern = fixes_name[if (space_i) |i| i + " ".len else 0..]; - const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?; - const mnem_prefix = pattern[0..wildcard_i]; - const mnem_suffix = pattern[wildcard_i + "_".len ..]; + const pattern = fixes_name[if (space_index) |index| index + " ".len else 0..]; + const wildcard_index = std.mem.indexOfScalar(u8, pattern, '_').?; + const mnem_prefix = pattern[0..wildcard_index]; + const mnem_suffix = pattern[wildcard_index + "_".len ..]; if (!std.mem.startsWith(u8, mnem_name, mnem_prefix)) continue; if (!std.mem.endsWith(u8, mnem_name, mnem_suffix)) continue; break .{ fixes, std.meta.stringToEnum( @@ -157348,7 +160166,6 @@ const Select = struct { bool, bool_vec: Memory.Size, exact_bool_vec: u16, - vec_len: u32, ptr_any_bool_vec, ptr_bool_vec: Memory.Size, remainder_bool_vec: OfIsSizes, @@ -157390,8 +160207,11 @@ const Select = struct { float: Memory.Size, scalar_any_float: Memory.Size, scalar_float: OfIsSizes, + exclusive_scalar_float: OfIsSizes, + exact_scalar_float: OfIsSizes, multiple_scalar_any_float: Memory.Size, multiple_scalar_float: OfIsSizes, + unaligned_multiple_scalar_float: OfIsSizes, exact_int: u16, exact_signed_int: u16, exact_unsigned_int: u16, @@ -157435,7 +160255,6 @@ const Select = struct { size.bitSize(cg.target) >= ty.vectorLen(zcu), .exact_bool_vec => |size| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and size == ty.vectorLen(zcu), - .vec_len => |len| ty.isVector(zcu) and ty.vectorLen(zcu) == len, .ptr_any_bool_vec => switch (zcu.intern_pool.indexToKey(ty.childType(zcu).toIntern())) { .vector_type => |vector_type| vector_type.child == .bool_type, else => false, @@ -157559,10 +160378,16 @@ const Select = struct { cg.floatBits(ty.scalarType(zcu)) != null, .scalar_float => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false, + .exclusive_scalar_float => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) > cg.unalignedSize(ty) and + if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false, + .exact_scalar_float => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) == cg.unalignedSize(ty) and + if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false, .multiple_scalar_any_float => |size| ty.abiSize(zcu) % @divExact(size.bitSize(cg.target), 8) == 0 and cg.floatBits(ty.scalarType(zcu)) != null, .multiple_scalar_float => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false, + .unaligned_multiple_scalar_float => |of_is| cg.unalignedSize(ty) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and + if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false, .exact_int => |bit_size| if (cg.intInfo(ty)) |int_info| bit_size == int_info.bits else false, .exact_signed_int => |bit_size| if (cg.intInfo(ty)) |int_info| switch (int_info.signedness) { .signed => bit_size == int_info.bits, diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index a1dbedcaa1..32df7925c4 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -313,7 +313,7 @@ pub const Mnemonic = enum { @"or", out, outs, outsb, outsd, outsw, pause, pop, popf, popfd, popfq, push, pushfq, rcl, rcr, - rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp, + rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdsspd, rdsspq, rdtsc, rdtscp, ret, rol, ror, rsm, sahf, sal, sar, sbb, scas, scasb, scasd, scasq, scasw, @@ -436,6 +436,7 @@ pub const Mnemonic = enum { pblendvb, pblendw, pcmpeqq, pextrb, pextrd, pextrq, + phminposuw, pinsrb, pinsrd, pinsrq, pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw, pmovsxbd, pmovsxbq, pmovsxbw, pmovsxdq, pmovsxwd, pmovsxwq, @@ -494,19 +495,19 @@ pub const Mnemonic = enum { vpblendvb, vpblendw, vpclmulqdq, vpcmpeqb, vpcmpeqd, vpcmpeqq, vpcmpeqw, vpcmpgtb, vpcmpgtd, vpcmpgtq, vpcmpgtw, - vphaddw, vphaddsw, vphaddd, vphsubw, vphsubsw, vphsubd, vperm2f128, vpermilpd, vpermilps, vpextrb, vpextrd, vpextrq, vpextrw, + vphaddw, vphaddsw, vphaddd, vphminposuw, vphsubw, vphsubsw, vphsubd, vpinsrb, vpinsrd, vpinsrq, vpinsrw, + vpmaddubsw, vpmaddwd, vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw, vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw, - vpmaddubsw, vpmovmskb, vpmovsxbd, vpmovsxbq, vpmovsxbw, vpmovsxdq, vpmovsxwd, vpmovsxwq, vpmovzxbd, vpmovzxbq, vpmovzxbw, vpmovzxdq, vpmovzxwd, vpmovzxwq, - vpmuldq, vpmulhrsw, vpmulhw, vpmulld, vpmullw, vpmuludq, + vpmuldq, vpmulhrsw, vpmulhuw, vpmulhw, vpmulld, vpmullw, vpmuludq, vpor, - vpshufb, vpshufd, vpshufhw, vpshuflw, + vpsadbw, vpshufb, vpshufd, vpshufhw, vpshuflw, vpsignb, vpsignd, vpsignw, vpslld, vpslldq, vpsllq, vpsllw, vpsrad, vpsraq, vpsraw, @@ -1029,7 +1030,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op } const mnemonic_to_encodings_map = init: { - @setEvalBranchQuota(5_800); + @setEvalBranchQuota(5_900); const ModrmExt = u3; const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, ModrmExt, Mode, Feature }; const encodings: []const Entry = @import("encodings.zon"); @@ -1038,17 +1039,17 @@ const mnemonic_to_encodings_map = init: { var mnemonic_map: [mnemonic_count][]Data = @splat(&.{}); for (encodings) |entry| mnemonic_map[@intFromEnum(entry[0])].len += 1; var data_storage: [encodings.len]Data = undefined; - var storage_i: usize = 0; + var storage_index: usize = 0; for (&mnemonic_map) |*value| { - value.ptr = data_storage[storage_i..].ptr; - storage_i += value.len; + value.ptr = data_storage[storage_index..].ptr; + storage_index += value.len; } - var mnemonic_i: [mnemonic_count]usize = @splat(0); + var mnemonic_index: [mnemonic_count]usize = @splat(0); const ops_len = @typeInfo(@FieldType(Data, "ops")).array.len; const opc_len = @typeInfo(@FieldType(Data, "opc")).array.len; for (encodings) |entry| { - const i = &mnemonic_i[@intFromEnum(entry[0])]; - mnemonic_map[@intFromEnum(entry[0])][i.*] = .{ + const index = &mnemonic_index[@intFromEnum(entry[0])]; + mnemonic_map[@intFromEnum(entry[0])][index.*] = .{ .op_en = entry[1], .ops = (entry[2] ++ .{.none} ** (ops_len - entry[2].len)).*, .opc_len = entry[3].len, @@ -1057,14 +1058,14 @@ const mnemonic_to_encodings_map = init: { .mode = entry[5], .feature = entry[6], }; - i.* += 1; + index.* += 1; } const final_storage = data_storage; var final_map: [mnemonic_count][]const Data = @splat(&.{}); - storage_i = 0; + storage_index = 0; for (&final_map, mnemonic_map) |*final_value, value| { - final_value.* = final_storage[storage_i..][0..value.len]; - storage_i += value.len; + final_value.* = final_storage[storage_index..][0..value.len]; + storage_index += value.len; } break :init final_map; }; diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 9e9793c7b0..7a69d58abc 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -567,7 +567,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) } fn generic(lower: *Lower, inst: Mir.Inst) Error!void { - @setEvalBranchQuota(2_600); + @setEvalBranchQuota(2_800); const fixes = switch (inst.ops) { .none => inst.data.none.fixes, .inst => inst.data.inst.fixes, @@ -601,9 +601,9 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { var buf: [max_len]u8 = undefined; const fixes_name = @tagName(fixes); - const pattern = fixes_name[if (std.mem.indexOfScalar(u8, fixes_name, ' ')) |i| i + 1 else 0..]; - const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?; - const parts = .{ pattern[0..wildcard_i], @tagName(inst.tag), pattern[wildcard_i + 1 ..] }; + const pattern = fixes_name[if (std.mem.indexOfScalar(u8, fixes_name, ' ')) |i| i + " ".len else 0..]; + const wildcard_index = std.mem.indexOfScalar(u8, pattern, '_').?; + const parts = .{ pattern[0..wildcard_index], @tagName(inst.tag), pattern[wildcard_index + "_".len ..] }; const err_msg = "unsupported mnemonic: "; const mnemonic = std.fmt.bufPrint(&buf, "{s}{s}{s}", parts) catch return lower.fail(err_msg ++ "'{s}{s}{s}'", parts); diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 8ca610fdb4..bd846e55fc 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -34,6 +34,16 @@ pub const Inst = struct { /// ___ 4 _4, + /// ___ Demote + _demote, + /// ___ Flush + _flush, + /// ___ Flush Optimized + _flushopt, + /// ___ Instructions With T0 Hint + _it0, + /// ___ Instructions With T0 Hint + _it1, /// ___ With NTA Hint _nta, /// System Call ___ @@ -44,6 +54,8 @@ pub const Inst = struct { _t1, /// ___ With T2 Hint _t2, + /// ___ Write Back + _wb, /// ___ With Intent to Write and T1 Hint _wt1, @@ -53,6 +65,8 @@ pub const Inst = struct { _csspq, /// ___ FS Segment Base _fsbase, + /// ___ GS + _gs, /// ___ GS Segment Base _gsbase, /// ___ Model Specific Register @@ -67,8 +81,14 @@ pub const Inst = struct { _pmc, /// ___ Random Number _rand, + /// ___ r Busy Flag in a Supervisor Shadow Stack token + _rssbsy, /// ___ Random Seed _seed, + /// ___ Shadow Stack Doubleword + _ssd, + /// ___ Shadow Stack Quadword + _ssq, /// ___ Shadow Stack Pointer Doubleword _sspd, /// ___ Shadow Stack Pointer Quadword @@ -77,9 +97,15 @@ pub const Inst = struct { _tsc, /// ___ Time-Stamp Counter And Processor ID _tscp, + /// ___ User Shadow Stack Doubleword + _ussd, + /// ___ User Shadow Stack Quadword + _ussq, /// VEX-Encoded ___ MXCSR v_mxcsr, + /// Byte ___ + b_, /// Interrupt ___ /// Integer ___ i_, @@ -118,6 +144,8 @@ pub const Inst = struct { _ld, /// ___ Left Without Affecting Flags _lx, + /// ___ Mask + _msk, /// ___ Right /// ___ For Reading /// ___ Register @@ -139,6 +167,7 @@ pub const Inst = struct { /// ___ Below _b, /// ___ Below Or Equal + /// ___ Big Endian _be, /// ___ Carry /// ___ Carry Flag @@ -212,8 +241,12 @@ pub const Inst = struct { _w, /// ___ Doubleword //_d, + /// ___ Double Quadword to Quadword + _dq2q, /// ___ QuadWord _q, + /// ___ Quadword to Double Quadword + _q2dq, /// ___ String //_s, @@ -369,6 +402,8 @@ pub const Inst = struct { fn_sw, /// Float Extended ___ fx_, + /// Float Extended ___ 64 + fx_64, /// ___ in 32-bit and Compatibility Mode _32, @@ -390,6 +425,14 @@ pub const Inst = struct { p_dq, /// Packed ___ Unsigned Doubleword to Quadword p_udq, + /// Packed Carry-Less ___ Quadword to Double Quadword + pcl_qdq, + /// Packed Half ___ Doubleword + ph_d, + /// Packed Half ___ Saturate Word + ph_sw, + /// Packed Half ___ Word + ph_w, /// ___ Aligned Packed Integer Values _dqa, /// ___ Unaligned Packed Integer Values @@ -403,6 +446,10 @@ pub const Inst = struct { //_sd, /// ___ Packed Double-Precision Values _pd, + /// Half ___ Packed Single-Precision Values + h_ps, + /// Half ___ Packed Double-Precision Values + h_pd, /// ___ Internal Caches //_d, @@ -430,7 +477,7 @@ pub const Inst = struct { v_w, /// VEX-Encoded ___ Doubleword v_d, - /// VEX-Encoded ___ QuadWord + /// VEX-Encoded ___ Quadword v_q, /// VEX-Encoded ___ Aligned Packed Integer Values v_dqa, @@ -453,6 +500,14 @@ pub const Inst = struct { vp_dq, /// VEX-Encoded Packed ___ Unsigned Doubleword to Quadword vp_udq, + /// VEx-Encoded Packed Carry-Less ___ Quadword to Double Quadword + vpcl_qdq, + /// VEX-Encoded Packed Half ___ Doubleword + vph_d, + /// VEX-Encoded Packed Half ___ Saturate Word + vph_sw, + /// VEX-Encoded Packed Half ___ Word + vph_w, /// VEX-Encoded ___ Scalar Single-Precision Values v_ss, /// VEX-Encoded ___ Packed Single-Precision Values @@ -463,6 +518,10 @@ pub const Inst = struct { v_pd, /// VEX-Encoded ___ 128-Bits Of Floating-Point Data v_f128, + /// VEX-Encoded Half ___ Packed Single-Precision Values + vh_ps, + /// VEX-Encoded Half ___ Packed Double-Precision Values + vh_pd, /// ___ 128-bit key with key locker _128, @@ -510,6 +569,10 @@ pub const Inst = struct { /// Add scalar single-precision floating-point values /// Add packed double-precision floating-point values /// Add scalar double-precision floating-point values + /// Packed single-precision floating-point horizontal add + /// Packed double-precision floating-point horizontal add + /// Packed horizontal add + /// Packed horizontal add and saturate add, /// Logical and /// Bitwise logical and of packed single-precision floating-point values @@ -521,12 +584,15 @@ pub const Inst = struct { /// Bit scan reverse bs, /// Byte swap - bswap, + /// Swap GS base register + swap, /// Bit test /// Bit test and complement /// Bit test and reset /// Bit test and set bt, + /// Check array index against bounds + bound, /// Call /// Fast system call call, @@ -542,17 +608,12 @@ pub const Inst = struct { /// Clear interrupt flag /// Clear task-switched flag in CR0 /// Clear user interrupt flag - cl, /// Cache line demote - cldemote, /// Flush cache line - clflush, /// Flush cache line optimized - clflushopt, /// Clear busy flag in a supervisor shadow stack token - clrssbsy, /// Cache line write back - clwb, + cl, /// Complement carry flag cmc, /// Conditional move @@ -650,15 +711,16 @@ pub const Inst = struct { lzcnt, /// Move /// Move data from string to string + /// Move data after swapping bytes /// Move scalar single-precision floating-point value /// Move scalar double-precision floating-point value /// Move doubleword /// Move quadword /// Move aligned packed integer values /// Move unaligned packed integer values + /// Move quadword from XMM to MMX technology register + /// Move quadword from MMX technology to XMM register mov, - /// Move data after swapping bytes - movbe, /// Move with sign extension movsx, /// Move with zero extension @@ -671,6 +733,7 @@ pub const Inst = struct { /// Multiply scalar double-precision floating-point values /// Multiply packed unsigned doubleword integers /// Multiply packed doubleword integers + /// Carry-less multiplication quadword mul, /// Two's complement negation neg, @@ -737,6 +800,8 @@ pub const Inst = struct { sca, /// Send user interprocessor interrupt senduipi, + /// Serialize instruction execution + serialize, /// Set byte on condition set, /// Logical shift left @@ -758,6 +823,10 @@ pub const Inst = struct { /// Subtract scalar single-precision floating-point values /// Subtract packed double-precision floating-point values /// Subtract scalar double-precision floating-point values + /// Packed single-precision floating-point horizontal subtract + /// Packed double-precision floating-point horizontal subtract + /// Packed horizontal subtract + /// Packed horizontal subtract and saturate sub, /// Set carry flag /// Set direction flag @@ -772,8 +841,6 @@ pub const Inst = struct { st, /// Store string sto, - /// Swap GS base register - swapgs, /// Test condition /// Logical compare /// Packed bit test @@ -788,6 +855,8 @@ pub const Inst = struct { /// Write to model specific register /// Write to model specific register /// Write to model specific register + /// Write to shadow stack + /// Write to user shadow stack wr, /// Exchange and add xadd, @@ -904,6 +973,10 @@ pub const Inst = struct { cmpgt, /// Empty MMX technology state emms, + /// Multiply and add packed signed and unsigned bytes + maddubs, + /// Multiply and add packed integers + maddw, /// Multiply packed signed integers and store low result mull, /// Multiply packed signed integers and store high result @@ -932,6 +1005,8 @@ pub const Inst = struct { unpcklwd, // SSE + /// Average packed integers + avg, /// Convert packed doubleword integers to packed single-precision floating-point values /// Convert packed doubleword integers to packed double-precision floating-point values cvtpi2, @@ -994,9 +1069,13 @@ pub const Inst = struct { /// Move unaligned packed single-precision floating-point values /// Move unaligned packed double-precision floating-point values movu, + /// Multiply packed unsigned integers and store high result + mulhu, /// Prefetch data into caches /// Prefetch data into caches with intent to write prefetch, + /// Compute sum of absolute differences + sadb, /// Packed interleave shuffle of quadruplets of single-precision floating-point values /// Packed interleave shuffle of pairs of double-precision floating-point values /// Shuffle packed doublewords @@ -1056,9 +1135,6 @@ pub const Inst = struct { /// Packed single-precision floating-point add/subtract /// Packed double-precision floating-point add/subtract addsub, - /// Packed single-precision floating-point horizontal add - /// Packed double-precision floating-point horizontal add - hadd, /// Replicate double floating-point values movddup, /// Replicate single floating-point values @@ -1069,6 +1145,10 @@ pub const Inst = struct { // SSSE3 /// Packed align right alignr, + /// Packed multiply high with round and scale + mulhrs, + /// Packed sign + sign, // SSE4.1 /// Pack with unsigned saturation @@ -1090,6 +1170,8 @@ pub const Inst = struct { /// Extract packed floating-point values /// Extract packed integer values extract, + /// Packed horizontal word minimum + hminposu, /// Insert scalar single-precision floating-point value /// Insert packed floating-point values insert, @@ -1111,10 +1193,6 @@ pub const Inst = struct { /// Accumulate CRC32 value crc32, - // PCLMUL - /// Carry-less multiplication quadword - clmulq, - // AES /// Perform one round of an AES decryption flow /// Perform ten rounds of AES decryption flow with key locker using 128-bit key @@ -1634,12 +1712,51 @@ pub const Inst = struct { reg_list: RegisterList, }; - // Make sure we don't accidentally make instructions bigger than expected. - // Note that in safety builds, Zig is allowed to insert a secret field for safety checks. comptime { if (!std.debug.runtime_safety) { + // Make sure we don't accidentally make instructions bigger than expected. + // Note that in safety builds, Zig is allowed to insert a secret field for safety checks. assert(@sizeOf(Data) == 8); } + const Mnemonic = @import("Encoding.zig").Mnemonic; + if (@typeInfo(Mnemonic).@"enum".fields.len != 977 or + @typeInfo(Fixes).@"enum".fields.len != 231 or + @typeInfo(Tag).@"enum".fields.len != 251) + { + const cond_src = (struct { + fn src() std.builtin.SourceLocation { + return @src(); + } + }).src(); + @setEvalBranchQuota(1_750_000); + for (@typeInfo(Mnemonic).@"enum".fields) |mnemonic| { + if (mnemonic.name[0] == '.') continue; + for (@typeInfo(Fixes).@"enum".fields) |fixes| { + const pattern = fixes.name[if (std.mem.indexOfScalar(u8, fixes.name, ' ')) |index| index + " ".len else 0..]; + const wildcard_index = std.mem.indexOfScalar(u8, pattern, '_').?; + const mnem_prefix = pattern[0..wildcard_index]; + const mnem_suffix = pattern[wildcard_index + "_".len ..]; + if (!std.mem.startsWith(u8, mnemonic.name, mnem_prefix)) continue; + if (!std.mem.endsWith(u8, mnemonic.name, mnem_suffix)) continue; + if (@hasField( + Tag, + mnemonic.name[mnem_prefix.len .. mnemonic.name.len - mnem_suffix.len], + )) break; + } else @compileError("'" ++ mnemonic.name ++ "' is not encodable in Mir"); + } + @compileError(std.fmt.comptimePrint( + \\All mnemonics are encodable in Mir! You may now change the condition at {s}:{d} to: + \\if (@typeInfo(Mnemonic).@"enum".fields.len != {d} or + \\ @typeInfo(Fixes).@"enum".fields.len != {d} or + \\ @typeInfo(Tag).@"enum".fields.len != {d}) + , .{ + cond_src.file, + cond_src.line - 6, + @typeInfo(Mnemonic).@"enum".fields.len, + @typeInfo(Fixes).@"enum".fields.len, + @typeInfo(Tag).@"enum".fields.len, + })); + } } }; diff --git a/src/arch/x86_64/encodings.zon b/src/arch/x86_64/encodings.zon index 53c6bcffe1..388189afbf 100644 --- a/src/arch/x86_64/encodings.zon +++ b/src/arch/x86_64/encodings.zon @@ -684,8 +684,8 @@ .{ .rdseed, .m, .{ .r32 }, .{ 0x0f, 0xc7 }, 7, .none, .rdseed }, .{ .rdseed, .m, .{ .r64 }, .{ 0x0f, 0xc7 }, 7, .long, .rdseed }, - .{ .rdssd, .m, .{ .r32 }, .{ 0xf3, 0x0f, 0x1e }, 1, .none, .shstk }, - .{ .rdssq, .m, .{ .r64 }, .{ 0xf3, 0x0f, 0x1e }, 1, .long, .shstk }, + .{ .rdsspd, .m, .{ .r32 }, .{ 0xf3, 0x0f, 0x1e }, 1, .none, .shstk }, + .{ .rdsspq, .m, .{ .r64 }, .{ 0xf3, 0x0f, 0x1e }, 1, .long, .shstk }, .{ .rdtsc, .z, .{}, .{ 0x0f, 0x31 }, 0, .none, .none }, @@ -1524,6 +1524,8 @@ .{ .pinsrw, .rmi, .{ .xmm, .r32_m16, .imm8 }, .{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, + .{ .pmaddwd, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf5 }, 0, .none, .sse2 }, + .{ .pmaxsw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xee }, 0, .none, .sse2 }, .{ .pmaxub, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xde }, 0, .none, .sse2 }, @@ -1532,6 +1534,8 @@ .{ .pminub, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xda }, 0, .none, .sse2 }, + .{ .pmulhuw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xe4 }, 0, .none, .sse2 }, + .{ .pmulhw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 }, .{ .pmullw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 }, @@ -1540,6 +1544,8 @@ .{ .por, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 }, + .{ .psadbw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf6 }, 0, .none, .sse2 }, + .{ .pshufd, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0x66, 0x0f, 0x70 }, 0, .none, .sse2 }, .{ .pshufhw, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 }, @@ -1642,8 +1648,26 @@ .{ .palignr, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x0f }, 0, .none, .ssse3 }, + .{ .phaddw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x01 }, 0, .none, .ssse3 }, + .{ .phaddd, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x02 }, 0, .none, .ssse3 }, + + .{ .phaddsw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x03 }, 0, .none, .ssse3 }, + + .{ .phsubw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x05 }, 0, .none, .ssse3 }, + .{ .phsubd, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x06 }, 0, .none, .ssse3 }, + + .{ .phsubsw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x07 }, 0, .none, .ssse3 }, + + .{ .pmaddubsw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x04 }, 0, .none, .ssse3 }, + + .{ .pmulhrsw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x0b }, 0, .none, .ssse3 }, + .{ .pshufb, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x00 }, 0, .none, .ssse3 }, + .{ .psignb, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x08 }, 0, .none, .ssse3 }, + .{ .psignw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x09 }, 0, .none, .ssse3 }, + .{ .psignd, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x0a }, 0, .none, .ssse3 }, + // SSE4.1 .{ .blendpd, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 }, @@ -1678,6 +1702,8 @@ .{ .pextrw, .mri, .{ .r32_m16, .xmm, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 }, + .{ .phminposuw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x41 }, 0, .none, .sse4_1 }, + .{ .pinsrb, .rmi, .{ .xmm, .r32_m8, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .none, .sse4_1 }, .{ .pinsrd, .rmi, .{ .xmm, .rm32, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 }, .{ .pinsrq, .rmi, .{ .xmm, .rm64, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 }, @@ -2129,12 +2155,28 @@ .{ .vpextrw, .rmi, .{ .r32, .xmm, .imm8 }, .{ 0x66, 0x0f, 0xc5 }, 0, .vex_128_w0, .avx }, .{ .vpextrw, .mri, .{ .r32_m16, .xmm, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_w0, .avx }, + .{ .vphaddw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x01 }, 0, .vex_128_wig, .avx }, + .{ .vphaddd, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x02 }, 0, .vex_128_wig, .avx }, + + .{ .vphaddsw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x03 }, 0, .vex_128_wig, .avx }, + + .{ .vphminposuw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x41 }, 0, .vex_128_wig, .avx }, + + .{ .vphsubw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x05 }, 0, .vex_128_wig, .avx }, + .{ .vphsubd, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x06 }, 0, .vex_128_wig, .avx }, + + .{ .vphsubsw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x07 }, 0, .vex_128_wig, .avx }, + .{ .vpinsrb, .rvmi, .{ .xmm, .xmm, .r32_m8, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx }, .{ .vpinsrd, .rvmi, .{ .xmm, .xmm, .rm32, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx }, .{ .vpinsrq, .rvmi, .{ .xmm, .xmm, .rm64, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx }, .{ .vpinsrw, .rvmi, .{ .xmm, .xmm, .r32_m16, .imm8 }, .{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_w0, .avx }, + .{ .vpmaddubsw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x04 }, 0, .vex_128_wig, .avx }, + + .{ .vpmaddwd, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf5 }, 0, .vex_128_wig, .avx }, + .{ .vpmaxsb, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx }, .{ .vpmaxsw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx }, .{ .vpmaxsd, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_128_wig, .avx }, @@ -2172,6 +2214,10 @@ .{ .vpmuldq, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x28 }, 0, .vex_128_wig, .avx }, + .{ .vpmulhrsw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x0b }, 0, .vex_128_wig, .avx }, + + .{ .vpmulhuw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xe4 }, 0, .vex_128_wig, .avx }, + .{ .vpmulhw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx }, .{ .vpmulld, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx }, @@ -2182,6 +2228,8 @@ .{ .vpor, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx }, + .{ .vpsadbw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf6 }, 0, .vex_128_wig, .avx }, + .{ .vpshufb, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_128_wig, .avx }, .{ .vpshufd, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0x66, 0x0f, 0x70 }, 0, .vex_128_wig, .avx }, @@ -2190,6 +2238,10 @@ .{ .vpshuflw, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0xf2, 0x0f, 0x70 }, 0, .vex_128_wig, .avx }, + .{ .vpsignb, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x08 }, 0, .vex_128_wig, .avx }, + .{ .vpsignw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x09 }, 0, .vex_128_wig, .avx }, + .{ .vpsignd, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x0a }, 0, .vex_128_wig, .avx }, + .{ .vpsllw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf1 }, 0, .vex_128_wig, .avx }, .{ .vpsllw, .vmi, .{ .xmm, .xmm, .imm8 }, .{ 0x66, 0x0f, 0x71 }, 6, .vex_128_wig, .avx }, .{ .vpslld, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf2 }, 0, .vex_128_wig, .avx }, @@ -2447,6 +2499,16 @@ .{ .vpcmpgtq, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x37 }, 0, .vex_256_wig, .avx2 }, + .{ .vphaddw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x01 }, 0, .vex_256_wig, .avx2 }, + .{ .vphaddd, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x02 }, 0, .vex_256_wig, .avx2 }, + + .{ .vphaddsw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x03 }, 0, .vex_256_wig, .avx2 }, + + .{ .vphaddw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x05 }, 0, .vex_256_wig, .avx2 }, + .{ .vphaddd, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x06 }, 0, .vex_256_wig, .avx2 }, + + .{ .vphaddsw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x07 }, 0, .vex_256_wig, .avx2 }, + .{ .vperm2i128, .rvmi, .{ .ymm, .ymm, .ymm_m256, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x46 }, 0, .vex_256_w0, .avx2 }, .{ .vpermd, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x36 }, 0, .vex_256_w0, .avx2 }, @@ -2457,6 +2519,10 @@ .{ .vpermq, .rmi, .{ .ymm, .ymm_m256, .imm8 }, .{ 0x66, 0x0f, 0x3a, 0x00 }, 0, .vex_256_w1, .avx2 }, + .{ .vpmaddubsw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x04 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpmaddwd, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xf5 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmaskmovd, .rvm, .{ .xmm, .xmm, .m128 }, .{ 0x66, 0x0f, 0x38, 0x8c }, 0, .vex_128_w0, .avx2 }, .{ .vpmaskmovd, .rvm, .{ .ymm, .ymm, .m256 }, .{ 0x66, 0x0f, 0x38, 0x8c }, 0, .vex_256_w0, .avx2 }, .{ .vpmaskmovq, .rvm, .{ .xmm, .xmm, .m128 }, .{ 0x66, 0x0f, 0x38, 0x8c }, 0, .vex_128_w1, .avx2 }, @@ -2503,6 +2569,10 @@ .{ .vpmuldq, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x28 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmulhrsw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x0b }, 0, .vex_256_wig, .avx2 }, + + .{ .vpmulhuw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xe4 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmulhw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx2 }, .{ .vpmulld, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx2 }, @@ -2513,6 +2583,8 @@ .{ .vpor, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 }, + .{ .vpsadbw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xf6 }, 0, .vex_256_wig, .avx2 }, + .{ .vpshufb, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_256_wig, .avx2 }, .{ .vpshufd, .rmi, .{ .ymm, .ymm_m256, .imm8 }, .{ 0x66, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 }, @@ -2520,6 +2592,10 @@ .{ .vpshuflw, .rmi, .{ .ymm, .ymm_m256, .imm8 }, .{ 0xf2, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsignb, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x08 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsignw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x09 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsignd, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x0a }, 0, .vex_256_wig, .avx2 }, + .{ .vpsllw, .rvm, .{ .ymm, .ymm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf1 }, 0, .vex_256_wig, .avx2 }, .{ .vpsllw, .vmi, .{ .ymm, .ymm, .imm8 }, .{ 0x66, 0x0f, 0x71 }, 6, .vex_256_wig, .avx2 }, .{ .vpslld, .rvm, .{ .ymm, .ymm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf2 }, 0, .vex_256_wig, .avx2 }, diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index bab5613d3c..bbd376c784 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -290,14 +290,21 @@ test "vector cmp f128" { } test "vector cmp f80/c_longdouble" { - if (true) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .hexagon) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .powerpc64le) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try testCmpVector(f80); try comptime testCmpVector(f80); try testCmpVector(c_longdouble); try comptime testCmpVector(c_longdouble); } + fn testCmpVector(comptime T: type) !void { + @setEvalBranchQuota(2_000); var edges = [_]T{ -math.inf(T), -math.floatMax(T), diff --git a/test/behavior/x86_64/build.zig b/test/behavior/x86_64/build.zig index 92ccede99e..8a580714ca 100644 --- a/test/behavior/x86_64/build.zig +++ b/test/behavior/x86_64/build.zig @@ -87,7 +87,7 @@ pub fn build(b: *std.Build) void { .{ .cpu_arch = .x86_64, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v2 }, - .cpu_features_add = std.Target.x86.featureSet(&.{ .adx, .gfni, .pclmul }), + .cpu_features_add = std.Target.x86.featureSet(&.{ .adx, .fast_hops, .gfni, .pclmul, .slow_incdec }), }, .{ .cpu_arch = .x86_64, @@ -97,6 +97,7 @@ pub fn build(b: *std.Build) void { .{ .cpu_arch = .x86_64, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 }, + .cpu_features_add = std.Target.x86.featureSet(&.{ .adx, .fast_hops, .gfni, .pclmul, .slow_incdec }), .cpu_features_sub = std.Target.x86.featureSet(&.{.avx2}), }, .{ @@ -106,12 +107,11 @@ pub fn build(b: *std.Build) void { .{ .cpu_arch = .x86_64, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 }, - .cpu_features_add = std.Target.x86.featureSet(&.{ .adx, .gfni, .pclmul }), + .cpu_features_add = std.Target.x86.featureSet(&.{ .adx, .fast_hops, .gfni, .slow_incdec, .vpclmulqdq }), }, .{ .cpu_arch = .x86_64, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 }, - .cpu_features_add = std.Target.x86.featureSet(&.{.vpclmulqdq}), }, }) |query| { const target = b.resolveTargetQuery(query); diff --git a/test/behavior/x86_64/unary.zig b/test/behavior/x86_64/unary.zig index e492aad7f9..839feb02de 100644 --- a/test/behavior/x86_64/unary.zig +++ b/test/behavior/x86_64/unary.zig @@ -4451,141 +4451,354 @@ fn unary(comptime op: anytype, comptime opts: struct { } fn testFloatVectors() !void { try testArgs(@Vector(1, f16), .{ - -0x1.17cp-12, + 0x1.7d8p12, }); try testArgs(@Vector(2, f16), .{ - 0x1.47cp9, 0x1.3acp9, + -0x0.054p-14, -0x1.c6cp10, + }); + try testArgs(@Vector(3, f16), .{ + -0x1.39cp-3, -0x1.088p4, -0x0.644p-14, }); try testArgs(@Vector(4, f16), .{ - 0x1.ab4p0, -0x1.7fcp-7, -0x1.1cp0, -0x1.f14p12, + -0x1.108p11, 0x1.364p-3, 0x1.8f4p-2, -0x0.8acp-14, + }); + try testArgs(@Vector(5, f16), .{ + 0x1.e1p8, 0x1.ddp11, 0x0.388p-14, 0x1.7p-7, -0x0.a08p-14, + }); + try testArgs(@Vector(7, f16), .{ + 0x1.988p-14, -0x1.f7p-14, 0x1.38cp12, 0x0.0fp-14, -0x1.774p2, -0x1.de4p11, -0x1.9bp-10, }); try testArgs(@Vector(8, f16), .{ - -0x1.8d8p8, 0x1.83p10, -0x1.5ap-1, -0x1.d78p13, -0x1.608p12, 0x1.e8p-9, -0x1.688p-10, -0x1.738p9, + 0x1.6ecp12, -0x1.834p9, -0x1.2c8p13, 0x1.e7cp3, -0x1.418p3, 0x1.15cp-1, 0x1.fecp-2, 0x1.1dp-3, + }); + try testArgs(@Vector(9, f16), .{ + 0x1.da8p-1, 0x1.d44p-11, 0x1.884p-10, -0x1.898p1, 0x1.5ccp-5, 0x1.68p0, 0x1.618p14, -0x1.c34p2, + -0x1.318p6, + }); + try testArgs(@Vector(15, f16), .{ + 0x1.41cp11, 0x1.edp-1, 0x1.1c8p-12, -0x0.0ecp-14, -0x1.abp8, 0x1.34p0, -0x1.24cp-4, -0x1.214p1, + -0x1.604p9, -0x1.364p-1, 0x1.adp0, 0x0.63p-14, 0x0.60cp-14, 0x1.6ep-6, 0x0.84cp-14, }); try testArgs(@Vector(16, f16), .{ - 0x1.da8p-1, -0x1.ed4p-10, -0x1.dc8p1, 0x1.b78p-14, nan(f16), 0x1.9d8p8, nan(f16), 0x1.d5p13, - -0x1.2dp13, 0x1.6c4p12, 0x1.a9cp-11, -0x1.0ecp8, 0x0.4ccp-14, -0x1.0a8p-6, -0x1.5bcp-14, 0x1.6d8p-9, + 0x1.308p6, -0x1.078p-1, 0x0.81p-14, 0x1.1b4p-14, 0x1.4ep-7, 0x1.75p12, 0x1.264p-8, 0x1.a6p2, + 0x1.9a4p-3, 0x1.e9p4, -0x1.a4p-6, 0x1.6acp-1, 0x1.7e8p-12, -0x1.02cp6, -0x1.0ccp-14, 0x1.edp-12, + }); + try testArgs(@Vector(17, f16), .{ + 0x1.2c4p-1, 0x1.91cp-3, 0x1.bf8p10, -0x0.25p-14, 0x1.45p-9, 0x1.cap-2, 0x1.e9cp8, 0x1.b7p8, + 0x1.21cp9, -0x0.ba4p-14, -0x1.ddcp-4, -0x1.bcp9, -0x1.7dcp-3, 0x1.6a4p-12, 0x1.ca8p-8, -0x1.558p11, + 0x0.26cp-14, + }); + try testArgs(@Vector(31, f16), .{ + -0x1.f94p7, 0x1.55cp9, -0x1.f78p11, -0x0.f48p-14, -0x1.b6p-2, 0x1.85cp1, -0x1.114p4, -0x1.97cp-5, + -0x1.6f8p2, 0x1.79cp-3, 0x1.e58p-9, -0x1.f5cp-10, 0x1.a74p5, -0x0.1e8p-14, 0x1.15cp-14, 0x1.814p-7, + -0x0.318p-14, -0x1.b5p-5, -0x1.058p-10, 0x1.124p0, -0x1.20cp-1, 0x1.978p10, -0x1.808p-8, 0x1.528p-6, + -0x1.ba8p9, 0x0.294p-14, 0x1.11cp0, 0x1.e5p5, 0x1.904p-11, 0x1.d78p11, -0x1.c1p5, }); try testArgs(@Vector(32, f16), .{ - 0x1.d5cp-6, -0x1.a98p5, 0x1.49cp5, -0x1.e4p-1, -0x1.21p-13, -0x1.c94p-1, -0x1.adcp-5, -0x1.524p-1, - -0x1.0d8p-3, -0x1.5c4p-2, 0x1.f84p-2, 0x1.664p1, -0x1.f64p13, -0x1.bf4p4, -0x1.4b8p0, -0x0.f64p-14, - -0x1.3f8p1, 0x1.098p2, -0x1.a44p8, 0x1.048p13, 0x1.fd4p-11, 0x1.18p-9, -0x1.504p2, 0x1.d04p7, - -nan(f16), 0x1.a94p2, 0x0.5e8p-14, -0x1.7acp-7, 0x1.4c8p-3, 0x1.518p-4, nan(f16), 0x1.8f8p10, + -0x0.11p-14, 0x0.29cp-14, 0x1.7a8p5, 0x1.49cp-11, 0x1.6c4p-3, -0x1.85cp-11, 0x1.ap-8, -0x0.49cp-14, + 0x1.dfp2, -0x1.4cp1, 0x1.138p-5, -0x1.45p-9, 0x0.88cp-14, 0x1.6acp10, 0x1.594p3, 0x1.704p6, + -0x1.c34p13, 0x1.44cp0, -0x1.cfcp-10, 0x1.5c8p-4, -0x1.b2cp-10, -0x1.178p1, -0x1.b74p7, -0x1.d18p0, + 0x1.0fcp-9, 0x1.b6p-11, -0x1.ff4p-2, -0x0.0b8p-14, 0x1.4dcp-10, -0x1.af4p-5, -0x1.eap2, -0x1.79cp-4, + }); + try testArgs(@Vector(33, f16), .{ + -0x1.6e8p0, -0x1.304p-12, 0x1.558p11, 0x1.cf4p13, 0x1.cc4p-9, 0x1.d88p-11, 0x1.838p8, -0x1.2ecp-10, + -0x1.65cp-1, -0x1.644p8, -0x1.048p10, 0x0.114p-14, 0x1.8a4p13, 0x1.c9p-3, 0x1.dfp-6, -0x1.774p12, + -0x0.4dp-14, 0x1.2ccp-12, 0x0.98p-14, -0x1.b18p-6, 0x0.1ecp-14, 0x0.86cp-14, 0x0.6e8p-14, -0x1.6dp14, + 0x1.9e8p-3, 0x1.1ep10, -0x1.6cp13, -0x1.d44p1, -0x1.f54p-12, -0x1.fe8p-14, 0x1.968p-1, -0x1.ab4p-9, + 0x1.f0cp0, + }); + try testArgs(@Vector(63, f16), .{ + -0x1.3ecp-1, 0x0.04p-14, -0x1.1cp-2, 0x1.0dp10, 0x1.ddcp-12, -0x1.57cp-11, -0x1.84p-9, 0x1.dfp4, + 0x1.6e4p-9, 0x0.5d4p-14, -0x0.51cp-14, -0x1.bp2, -0x1.8ecp-14, 0x1.268p-2, -0x0.69p-14, -0x1.b98p7, + -0x0.cb4p-14, -0x1.accp-3, 0x1.cdcp6, -0x1.e6p7, 0x1.4ep-14, 0x1.5fp5, -0x1.95p8, 0x1.044p8, + -0x1.e14p9, 0x1.e84p14, 0x1.ee8p-10, -0x1.0a4p8, 0x1.b14p-8, -0x1.5dp9, 0x0.e68p-14, -0x0.1acp-14, + -0x1.7ccp-11, 0x1.45p-10, 0x0.044p-14, 0x1.078p4, 0x1.c8p-1, -0x1.8fp11, -0x1.cbp0, -0x1.208p-10, + -0x1.a5p-1, -0x1.164p-8, -0x1.304p-3, -0x1.038p-10, -0x1.4dp11, 0x0.248p-14, 0x1.09cp-4, -0x1.a7cp14, + -0x1.a38p-6, -0x1.0bp-9, -0x1.fecp-14, -0x1.c78p-10, -0x1.e38p-11, 0x1.47p-5, -0x1.3bcp5, 0x1.6a4p9, + 0x0.728p-14, 0x1.9c8p9, 0x1.88p12, -0x1.e6p0, 0x1.5dcp-2, -0x1.7f4p-4, -0x1.a6p3, }); try testArgs(@Vector(64, f16), .{ - -0x1.c2p2, 0x0.2fcp-14, 0x1.de8p0, -0x1.714p2, 0x1.f9p-7, -0x1.11cp-13, -0x1.558p10, -0x1.2acp-7, - 0x1.348p14, 0x1.2dcp7, -0x1.8acp-12, -0x1.2cp2, 0x1.868p1, -0x1.1f8p-14, 0x1.638p7, -0x1.734p-5, - 0x0.b98p-14, -0x1.7f4p-12, -0x1.38cp15, 0x1.50cp15, 0x1.91cp8, 0x1.cb4p-1, 0x1.fc4p-13, 0x1.9a4p0, - 0x1.18p-4, 0x1.60cp10, 0x1.6fp-12, 0x1.b48p6, 0x1.37cp-11, 0x1.424p7, 0x1.44cp13, 0x1.aep5, - 0x1.968p14, 0x1.e8p13, -0x1.bp2, -0x1.644p5, 0x1.de4p-8, -0x1.5b4p-14, -0x1.4ap1, -0x1.868p9, - -0x1.d14p0, 0x1.d7cp15, 0x1.3c8p14, 0x1.2ccp-14, -0x1.ee4p8, 0x1.49p-3, 0x1.35cp12, 0x1.d34p6, - 0x1.7acp3, -0x1.fa4p2, 0x1.7b4p13, -0x1.cf4p-12, -0x1.ebcp-10, -0x1.5p-3, 0x1.4bp-6, 0x1.83p12, - -0x1.f9cp-8, -0x1.43p-8, -0x1.99p-1, -0x1.dacp3, -0x1.728p-4, -0x1.03cp4, 0x1.604p-2, -0x1.0ep13, + -0x1.67cp-13, 0x1.f2cp-10, 0x1.69cp11, -0x1.0dp-2, 0x1.a8p9, 0x1.7dp-11, 0x1.908p-5, -0x1.37cp0, + 0x1.8f8p5, 0x1.38p11, 0x1.d2p8, 0x1.b74p-10, -0x1.188p-7, 0x1.578p5, 0x1.68p-11, -0x1.b9cp8, + -0x1.ba4p2, 0x0.b78p-14, 0x1.458p-8, 0x0.054p-14, -0x0.63p-14, 0x1.83p10, 0x1.94cp-2, -0x1.d7p2, + -0x1.62p4, 0x1.b34p4, -0x1.4cp-11, -0x1.714p9, -0x1.ce4p1, 0x1.75p-3, -0x1.cbp-13, 0x1.714p6, + -0x1.cb8p7, -0x1.b98p-4, 0x1.facp-13, -0x1.1f4p8, -0x1.92p-3, 0x0.144p-14, 0x1.504p-4, 0x1.a9p-10, + 0x1.a94p3, 0x1.708p-2, 0x1.c84p-14, 0x1.77cp9, -0x0.1e4p-14, -0x0.3d8p-14, -0x1.f8p4, -0x1.2bp5, + 0x1.5b8p-14, 0x1.898p14, -0x1.e2p3, -0x1.0e8p-5, 0x1.4dcp-12, 0x1.368p8, 0x1.968p-7, -0x1.98cp-5, + 0x1.39cp-13, 0x1.23p2, 0x1.8e8p6, 0x1.344p7, 0x1.70cp-5, -0x1.f24p11, -0x1.54p-7, -0x1.904p3, + }); + try testArgs(@Vector(65, f16), .{ + -0x1.d78p-4, 0x1.ea8p-8, -0x1.b4cp6, -0x1.c7cp4, 0x1.dfcp7, 0x1.a8cp6, -0x1.768p11, 0x0.0fp-14, + -0x1.a3p-4, -0x1.868p-9, 0x1.23p-1, -0x1.2e8p3, -0x1.9e8p-12, 0x1.8a8p3, 0x1.168p-5, -0x1.608p8, + -0x1.9d4p-4, -0x1.17cp-1, -0x1.f2p1, -0x1.d38p-11, 0x1.f38p-12, -0x1.92p-11, 0x1.c44p6, 0x1.4fp-3, + 0x0.18p-14, 0x1.3dp11, -0x1.ce4p9, -0x1.bf8p-12, 0x0.88cp-14, -0x1.998p-9, 0x1.788p-2, -0x1.5c4p2, + 0x0.08cp-14, -0x0.6f8p-14, 0x1.c7cp-10, -0x0.1p-14, -0x1.0fcp-9, -0x1.5a4p6, -0x1.8c8p-12, 0x0.57p-14, + -0x1.96cp-9, 0x1.6ecp10, -0x1.c18p1, -0x1.0ap5, -0x0.768p-14, -0x1.f8cp-6, 0x0.44p-14, -0x1.2b4p-2, + 0x1.efcp-13, -0x1.434p-13, 0x1.434p-3, 0x1.a6p-2, 0x1.bc4p7, -0x0.e1p-14, -0x1.d9cp-7, -0x1.f94p-9, + 0x1.448p-6, 0x1.0d8p3, -0x0.4a4p-14, -0x1.25cp-10, 0x1.c18p12, 0x0.1ccp-14, -0x1.ep14, -0x1.42cp6, + 0x1.14p8, }); try testArgs(@Vector(1, f32), .{ - -0x1.17cp-12, + 0x1.12e082p8, }); try testArgs(@Vector(2, f32), .{ - -0x1.a3123ap90, -0x1.4a2ec6p-54, + -0x1.f04666p17, 0x1.27d624p4, + }); + try testArgs(@Vector(3, f32), .{ + -0x1.c3168cp-85, -0x1.169cdcp9, -0x1.4bdb2ap13, }); try testArgs(@Vector(4, f32), .{ - -0x1.8a41p77, -0x1.7c54e2p-61, -0x1.498556p-41, 0x1.d77c22p-20, + -0x1.a8b1d6p29, -0x1.b94e32p-76, 0x1.f4d9aap-43, 0x1.e6c654p44, + }); + try testArgs(@Vector(5, f32), .{ + 0x1.37c57ep-53, -0x1.832c84p49, -0x1.04256ep-110, -0x1.de4454p-37, + -0x1.a36832p-34, + }); + try testArgs(@Vector(7, f32), .{ + -0x1.35df86p87, -0x1.d96a52p62, 0x1.f9d3ecp-12, 0x1.5f4cc6p112, + 0x1.176cfap94, 0x1.bb86fcp69, 0x1.015e56p0, }); try testArgs(@Vector(8, f32), .{ - 0x1.943da4p-86, 0x1.528792p95, -0x1.9c9bfap-26, -0x1.8df936p-90, - -0x1.6a70cep56, 0x1.626638p-48, 0x1.7bb2bap-57, -0x1.ac5104p94, + -0x1.9dd6cap3, 0x1.726066p-42, 0x1.5b1f5ep-20, -0x1.347ed6p29, + 0x1.bfb5d4p-126, -0x1.b0e8dp45, 0x1.5577bep45, -0x1.9d1608p2, + }); + try testArgs(@Vector(9, f32), .{ + -0x1.4159b2p76, 0x1.bea7b8p-107, -0x1.b47036p-82, -0x1.4635ap-26, + -0x1.27bc98p-47, 0x1.1e0ap-116, 0x1.0f628p-118, 0x1.2e63bcp-62, + 0x1.d0e45ep-57, + }); + try testArgs(@Vector(15, f32), .{ + 0x1.65e0bcp-12, 0x1.d947c6p-42, -0x1.4596acp64, -0x1.2a897cp75, + 0x1.cb074ap-8, 0x1.e44a98p-62, -0x1.3edb2p74, 0x1.07aecep-2, + -0x1.fda1f8p14, 0x1.2f2c7ap-95, 0x1.9814e6p-33, 0x1.6d6a58p3, + 0x1.6a1478p-3, -0x1.85886ap64, -0x1.e2b9bcp-114, }); try testArgs(@Vector(16, f32), .{ - 0x1.157044p115, -0x1.416c04p-111, 0x1.a8f164p-104, 0x1.9b6678p84, - -0x1.9d065cp9, -0x1.e8c4b4p126, -0x1.ddb968p84, -0x1.fec8c8p74, - 0x1.64ffb2p59, 0x1.548922p20, 0x1.7270fcp22, -0x1.abac68p33, - 0x1.faabfp33, -0x1.8aee82p55, 0x1.1bf8fp75, 0x1.33c46ap-66, + 0x1.348b38p103, 0x1.bbc8e4p8, -0x1.03f48ap-119, -0x1.90f87cp115, + -0x1.88aaaep28, -0x1.21ec4p-94, 0x1.e1f21cp-57, 0x1.0e7dd2p-37, + -0x1.5963a2p-24, 0x1.4c314cp-61, -0x1.753d5ap113, -0x1.65705p-12, + -0x1.e34902p-54, -0x1.ab8022p87, -0x1.5cc252p-99, 0x1.4f4fe6p41, + }); + try testArgs(@Vector(17, f32), .{ + 0x1.6be79ap-19, -0x1.38819p-21, -0x1.8551dp2, -0x1.43155ep-126, + 0x1.96e6p108, 0x1.58abaap41, 0x1.145ffcp124, -0x1.8e314ep-41, + -0x1.63151p42, 0x1.9585e8p124, 0x1.4bdd42p-66, 0x1.858674p-45, + -0x1.bccb68p66, -0x1.88e0e8p-14, -0x1.e0461cp-116, 0x1.3c1e2ep120, + -0x1.0076dep14, + }); + try testArgs(@Vector(31, f32), .{ + 0x1.8d5b34p-49, -0x1.bd019cp-83, -0x1.1d06e2p-95, -0x1.d9ac6ap-45, + 0x1.f942dap10, -0x1.c23402p121, -0x1.8e5656p-32, 0x1.925222p-53, + -0x1.16440ep-117, 0x1.b146cep107, -0x1.b58cdep-52, 0x1.713f34p8, + 0x1.3de424p99, -0x1.3e6d6ep-28, -0x1.8261b4p-69, 0x1.043d66p-91, + -0x1.fbcd6ep113, 0x1.7934dcp-47, 0x1.fa8152p99, 0x1.c29968p-58, + 0x1.77f26ap82, 0x1.4602aap-57, -0x1.8a4cb4p8, 0x1.d48cdap113, + 0x1.636a7ep29, 0x1.730262p57, 0x1.29e668p7, 0x1.58592cp20, + 0x1.d09ebp-107, 0x1.7a85c6p-39, 0x1.38e1d6p44, }); try testArgs(@Vector(32, f32), .{ - -0x1.039b68p37, -0x1.34de4ap-74, -0x1.05d78ap-76, -0x1.be0f5ap-47, - 0x1.032204p-38, 0x1.ef8e2ap-78, -0x1.b013ecp-80, 0x1.71fe4cp99, - 0x1.abdadap-14, 0x1.56a9a8p-48, -0x1.8bbd7ep9, 0x1.edd308p-72, - -0x1.92fafcp-121, -0x1.50812p19, 0x1.f4ddc4p28, -0x1.6f0b12p-50, - -0x1.12ab02p127, 0x1.24df48p21, -0x1.993c3p-14, -0x1.4cc476p-112, - 0x1.13d9a8p-40, 0x1.a6e652p-9, -0x1.9c730cp-21, -0x1.a75aaap-70, - -0x1.39e632p-111, 0x1.8e8da8p-45, 0x1.b5652cp31, 0x1.258366p44, - 0x1.d473aap92, -0x1.951b64p9, 0x1.542edp15, -0x0.f6222ap-126, + -0x1.95dec4p-65, 0x1.3833cp65, -0x1.0ef5ap-53, 0x1.86e4c8p101, + -0x1.713132p24, -0x1.c6fd0ep123, -0x1.75aadcp88, -0x1.b8f0fp18, + 0x1.0f5b8ep-34, -0x1.0d0d66p-15, 0x0.842836p-126, -0x1.157782p22, + -0x1.025e8ap-100, 0x1.be825ep117, 0x1.d3efc6p-45, 0x1.ed8462p-34, + -0x1.b373c8p-118, -0x1.dbfd16p4, 0x1.73ee9p-56, -0x1.cdff48p-69, + 0x1.1b806ep-78, 0x1.65a58ap-4, -0x1.0d851cp77, 0x1.442c12p41, + 0x1.215116p47, -0x1.75f266p-48, 0x1.2273d4p89, 0x1.1bab24p-100, + -0x1.0300ep-22, 0x1.8c199cp-70, -0x1.70e08cp-66, 0x1.aa6b3ep-24, + }); + try testArgs(@Vector(33, f32), .{ + -0x1.4eddccp-116, 0x1.724e18p-94, -0x1.9d40bep54, -0x1.0afc5p-14, + 0x1.576c2p92, 0x1.cf52b6p110, -0x1.7e67ep117, -0x1.7db66ep90, + 0x1.3eac22p-38, 0x1.6ba068p72, -0x1.72dc2cp97, -0x1.4193f4p72, + 0x1.aa81f6p86, 0x1.984268p53, -0x1.14ba6ep-45, 0x1.15603ep-122, + 0x1.85e75p-56, 0x1.108a82p-121, 0x1.569ecp62, -0x1.7f3268p-68, + -0x1.d0964ep0, 0x0.f7a596p-126, -0x1.367646p-11, 0x1.2065bp-26, + 0x1.cc954ap125, -0x1.956e1cp65, 0x1.774dep112, 0x1.69dfcep-16, + -0x1.b0efb2p76, 0x1.14c54p70, -0x1.7c6b08p25, 0x1.ae20b4p31, + -0x1.73c584p-118, }); try testArgs(@Vector(1, f64), .{ - -0x1.0114613df6f97p816, + 0x1.58849bfb1303cp-254, }); try testArgs(@Vector(2, f64), .{ - -0x1.8404dad72003cp720, -0x1.6b14b40bcf3b7p-176, + -0x1.b4a24030f3facp215, -0x1.c1bdddbc41cdep950, + }); + try testArgs(@Vector(3, f64), .{ + -0x1.7d154dcee386cp-284, -0x1.2fdda9cbabfap-84, + 0x1.00c86a9c3de5cp-46, }); try testArgs(@Vector(4, f64), .{ - -0x1.04e1acbfddd9cp681, -0x1.ed553cc056da7p-749, - 0x1.3d3f703a0c893p-905, 0x1.0b35633fa78fp691, + 0x1.70f298f25a9bfp826, 0x1.4b944832c8eecp-319, + -0x1.d801afafdbc01p-708, -0x1.65d0b4b097a57p-872, + }); + try testArgs(@Vector(5, f64), .{ + -0x1.4796bdf4c112bp938, 0x1.3661030c6a2fp-156, + -0x1.20d194f89bc7fp-9, -0x1.f545d17a1d9e8p604, + 0x1.c786013e7205ep-514, + }); + try testArgs(@Vector(7, f64), .{ + -0x1.8f6d6e549941fp501, -0x1.56374640d779p-762, + -0x1.4ea02d12bd9cfp209, -0x1.ab85b639e78c6p-879, + -0x1.fcd56fe4f85abp47, -0x1.8963745584169p-957, + -0x1.581a8a0033e8p915, }); try testArgs(@Vector(8, f64), .{ - -0x1.901a2a60f0562p-301, -0x1.2516175ad61ecp-447, - 0x1.e7b12124846bfp564, 0x1.9291384bd7259p209, - -0x1.a7bf62f803c98p900, 0x1.4e2e26257bb3p987, - -0x1.413ca9a32d894p811, 0x1.61b1dd9432e95p479, + -0x1.2a8fb1782b7f2p-126, -0x1.b246d12815c21p606, + 0x1.6bc24f2a268b9p837, 0x1.1d550478ebd71p1016, + 0x1.d2ba52815edc2p252, 0x1.a8d87e5eb97ecp-450, + -0x1.c8a3d899aa89p601, -0x1.1fa47083d9a8fp289, + }); + try testArgs(@Vector(9, f64), .{ + -0x1.312d39a09757p-567, -0x1.4b0ef2ac9424ep-10, + 0x1.84302715c6852p930, -0x1.01565f82fd32p761, + -0x1.36ad9c057719ap-351, 0x1.dc4929f2400c8p793, + -0x1.e90f3ae855d3dp-474, 0x1.4e65fb145865ep-834, + 0x1.4236a94937ee3p-987, + }); + try testArgs(@Vector(15, f64), .{ + 0x1.df73a72937309p351, -0x1.73506ab182b9p-23, + 0x1.b2c954612187p-997, 0x1.7c5ee7c602989p-93, + -0x1.5edba35428d13p762, -0x1.e3bc1f194dc8cp-386, + 0x1.ca056fb59bdb9p651, 0x1.e59b99b174a0dp-528, + 0x1.7a995c7651aa7p929, -0x1.a25d3d5153405p413, + 0x1.e5579317d4b37p-50, 0x1.f9d5578c67f67p-90, + -0x1.5da751d423506p611, 0x1.9a2cba7bf2467p488, + 0x1.db3d45f662c4ep-619, }); try testArgs(@Vector(16, f64), .{ - -0x1.8fc7286d95f54p-235, -0x1.796a7ea8372b6p-837, - -0x1.8c0f930539acbp-98, -0x1.ec80dfbf0b931p-430, - -0x1.e3d80c640652fp-1019, 0x1.8241238fb542fp161, - -0x1.e1f1a79d50263p137, -0x1.9ac5cb2771c28p-791, - 0x1.4d8f00fe881e7p-401, -0x1.87fbd7bfd99d7p346, - -0x1.a8a7cc575335ep1017, 0x1.37bb88dc3fd8bp-355, - 0x1.9d53d346c0e65p929, -0x1.bbae3d0229c34p289, - -0x1.cb8ef994d5ce5p25, 0x1.ba20af512616ap50, + 0x1.fd61de463a33cp898, -0x1.47be52b4f1241p-18, + 0x1.729aa777312a3p-930, -0x1.2db258cd9984dp895, + 0x1.a1fbc900c10cbp517, -0x1.e93dfa8923807p815, + -0x1.e8f19fc0aa2a8p191, -0x1.1b084206321d5p861, + -0x1.0be3c6310c58ep457, 0x1.816c3bcf4b9f5p-504, + 0x1.ec4b026b00c91p-831, 0x1.e42d18f5c7e4bp924, + -0x1.f1483ecd74646p560, -0x1.cc5aea97d2264p447, + -0x1.a0b1e5b69d166p597, 0x1.e9a109fcf1358p694, + }); + try testArgs(@Vector(17, f64), .{ + -0x1.cd163cf2878e5p-934, -0x1.ce0ad5b67552p196, + -0x1.da0fd3a62b298p508, 0x1.1981c99b14943p3, + 0x1.d2f6461a9d1a9p390, -0x1.e8e877d3b4e96p-539, + -0x1.8ad9d3e185c43p864, 0x1.61786be9783eep-110, + -0x1.1f4be91d90cc3p-500, 0x1.71cacdd984837p956, + 0x1.7b6ae301fd95ep-661, 0x1.24571ba56e32p343, + 0x1.b1a9454ab9481p648, -0x1.887873f8044fep842, + -0x1.2f4ee57b9de22p-967, -0x1.c931346ced885p-983, + 0x1.fe31b9923796bp-772, }); try testArgs(@Vector(1, f80), .{ - -0x1.a2e9410a7dfedabp-2324, + -0x1.482098130df28b74p12578, }); try testArgs(@Vector(2, f80), .{ - -0x1.a2e9410a7dfedabp-2324, - 0x1.2b17da3b9746885p-8665, + -0x1.275157565b1eee5ep14003, + 0x1.a27b82ef4be6132ap3681, + }); + try testArgs(@Vector(3, f80), .{ + 0x1.9825fbd9b22021fep-10432, + -0x1.b8c8c4e5e3911ca8p13568, + 0x1.aa99cc199c8e524p9865, }); try testArgs(@Vector(4, f80), .{ - -0x1.c488fedb7ab646cep-13007, - 0x1.e914deaccaa50016p2073, - -0x1.d1c7ae8ec3c9df86p10642, - -0x1.2da1658f337fa01p9893, + -0x1.9d8ab0a36953d0f6p-760, + 0x1.869b464121ce6576p-13660, + 0x1.a54b1d1e8ae2b62ap12073, + -0x1.2abe41c9a9d89ea4p-13141, + }); + try testArgs(@Vector(5, f80), .{ + 0x1.0fb10e205522f5aep-15041, + -0x1.13e0c338580504dap10809, + 0x1.50e7c6666fd851acp-5508, + -0x1.e2231120481fc762p-8351, + 0x1.4fae86dc45b06fe2p10741, + }); + try testArgs(@Vector(7, f80), .{ + -0x1.fe8f8caa4e8697ecp-2992, + 0x1.2623c910a340e286p-14518, + 0x1.c5524642a438569p-9469, + 0x1.3d416ca0a47c73cep2981, + 0x1.a3a1eb1243923114p-6689, + -0x1.a55df9ded3010b1cp-5798, + -0x1.3d593df395b03e5ap-14382, }); try testArgs(@Vector(8, f80), .{ - -0x1.bed8a74c43750656p890, - -0x1.7bf57f38004ac976p8481, - -0x1.9cdc10ac0657d328p7884, - 0x1.c86f61883da149fp12293, - -0x1.528d6957df6bfdd8p14125, - -0x1.5ebb4006d0243bfep14530, - -0x1.94b9b18636d12402p-1845, - -0x1.25439a6d68add188p5962, + -0x1.9bb73ea024f4167cp3116, + 0x1.adf6241753b29ed2p-4428, + -0x1.1494fa8680f9f5f4p2008, + -0x1.c68a673c59edeb24p2377, + 0x1.26c7ab4021afb6dcp1376, + 0x1.c829b0b3935a2ac6p-11758, + -0x1.11e39b110c2fb122p-3836, + -0x1.6db14745e291d466p1604, + }); + try testArgs(@Vector(9, f80), .{ + 0x1.f6e537676c132cc6p-10213, + -0x1.b86eadf24d8c80eep808, + -0x1.54bc27c9a9a2348cp-2369, + -0x1.99453820b245bc5p-840, + -0x1.93c299090fd981e6p-5264, + -0x1.c742059979281ec4p-6347, + -0x1.e3efe7b892591d3p-1877, + -0x1.350c20a2d59c67dap-8972, + -0x1.e3879f20ffc62ff2p-2600, }); try testArgs(@Vector(1, f128), .{ - -0x1.d1e6fc3b1e66632e7b79051a47dap14300, + -0x1.274ece23c1832bfe66a1bc59cf87p-8354, }); try testArgs(@Vector(2, f128), .{ - 0x1.84b3ac8ffe5893b2c6af8d68de9dp-83, - -0x1.438ca2c8a0d8e3ee9062d351c46ep-10235, + 0x1.838a4e7ba1e2191cebe701eac5d4p6581, + 0x1.cdfbda51a2adbce757d7c2e0981bp446, + }); + try testArgs(@Vector(3, f128), .{ + -0x1.ff45938938f76db417c980c368c6p-7215, + -0x1.277a316793a0172e49c7227952ccp10618, + 0x1.d85027eb4f4ed3512c10bff9a199p-8465, }); try testArgs(@Vector(4, f128), .{ - 0x1.04eb03882d4fd1b090e714d3e5ep806, - -0x1.4082b29f7c26e701764c915642ffp-6182, - -0x1.b6f1e8565e5040415110f18b519ap13383, - 0x1.1c29f8c162cead9061c5797ea15ap11957, + -0x1.43d8ecf283d4ec6fc4993f385386p-12233, + -0x1.384424d239aa2ed9719d2c2d1e58p7346, + -0x1.d33fd11001f0ab6d0f9a2790b41cp14692, + -0x1.40219a635ef4b042cfb9d7bd9781p900, + }); + try testArgs(@Vector(5, f128), .{ + -0x1.3273c97faf4619baedaebb51148fp9085, + -0x1.f381263ad1033a071dff3a143b14p-13649, + -0x1.24b24810f9a1f9b5d1542e2b5841p1425, + -0x1.df9e062d482c2bbae0b8fcb07efep-5044, + -0x1.15cbca8b8384412d7d09ff76bfe4p-2424, + }); + try testArgs(@Vector(7, f128), .{ + -0x1.0972e6da79fa8bcd49431d813ea5p12192, + 0x1.568e3e61ac4fb17303e4ead041dcp-2542, + 0x1.a55c3f0014942187e6d40c72f12p-13437, + -0x1.31fb0ec6dbdf7e4ea8ecc307e6f4p13767, + -0x1.5dcc12514e3e540fea9dbd257935p-8938, + -0x1.32471cd1d5d2a36e9148a8ce879ap-3274, + -0x1.3fd3eb6d86a14567e49f358cf029p-4569, }); try testArgs(@Vector(8, f128), .{ - -0x1.53d7f00cd204d80e5ff5bb665773p11218, - -0x1.4daa1c81cffe28e8fa5cd703c287p2362, - -0x1.cc6a71c3ad4560871efdbd025cd7p-8116, - -0x1.87f8553cf8772fb6b78e7df3e3bap14523, - -0x1.14b6880f6678f86dfb543dde1c6ep2105, - 0x1.9d2d4398414da9d857e76e8fd7ccp-13668, - 0x1.a37f07af240ded458d103c022064p-1158, - 0x1.425d53e6bd6070b847e5da1ed593p1394, + -0x1.05fe5035b415bdc5f8f9ae4c8815p455, + -0x1.fafde904d5cad82413daee7b88b8p-244, + 0x1.53041230913c654449b12eb4d89bp2214, + -0x1.12d9f4b006063e9c0c7bdf19f61ap-2483, + 0x1.aee9d4ba013f668773e4f0fd9002p5461, + 0x1.a6776670633403e78a3cc6fcf8fdp8324, + -0x1.392aa756df3b993ea9db22def53ep15136, + 0x1.823ef104549bdd4624961a44736cp-1097, + }); + try testArgs(@Vector(9, f128), .{ + -0x1.bde12739521a2bff70e510a6aca3p12384, + -0x1.0001c77658eb15cd7cb631b4836bp2147, + -0x1.f24c72b8cde26d95bd40f689a2aep-1416, + -0x1.61957e7946030c0432af0381f64ap-9492, + -0x1.631851492fa27fe7adc7441e0d21p16144, + -0x1.9dd39ece97e7a70c6d36e7e3026p-15761, + 0x1.b044e441d7377755389d0bab3256p-1181, + 0x1.5c11719701b7ff21384fbbf32922p-1671, + -0x1.1a2944a4dff2a4f96732bf03e8f7p-10567, }); } }; @@ -4897,6 +5110,15 @@ test reduceMul { try test_reduce_mul.testIntVectors(); } +inline fn reduceAddOptimized(comptime Type: type, rhs: Type) @typeInfo(Type).vector.child { + @setFloatMode(.optimized); + return @reduce(.Add, rhs); +} +test reduceAddOptimized { + const test_reduce_add_optimized = unary(reduceAddOptimized, .{ .compare = .approx }); + try test_reduce_add_optimized.testFloatVectors(); +} + inline fn splat(comptime Type: type, rhs: Type) Type { return @splat(rhs[0]); } diff --git a/test/cases/float_mode_optimized_reduce.zig b/test/cases/float_mode_optimized_reduce.zig index 55d71f4709..d03eccb86d 100644 --- a/test/cases/float_mode_optimized_reduce.zig +++ b/test/cases/float_mode_optimized_reduce.zig @@ -8,5 +8,5 @@ pub fn main() void { } // run -// backend=llvm -// +// backend=stage2,llvm +// target=x86_64-linux