diff --git a/src/Air/Legalize.zig b/src/Air/Legalize.zig
index 0e659bea06..870270f089 100644
--- a/src/Air/Legalize.zig
+++ b/src/Air/Legalize.zig
@@ -42,6 +42,7 @@ pub const Feature = enum {
     scalarize_shl_sat,
     scalarize_xor,
     scalarize_not,
+    scalarize_bitcast,
     scalarize_clz,
     scalarize_ctz,
     scalarize_popcount,
@@ -76,7 +77,7 @@ pub const Feature = enum {
     scalarize_mul_add,
 
     /// Legalize (shift lhs, (splat rhs)) -> (shift lhs, rhs)
-    remove_shift_vector_rhs_splat,
+    unsplat_shift_rhs,
     /// Legalize reduce of a one element vector to a bitcast
     reduce_one_elem_to_bitcast,
 
@@ -121,6 +122,7 @@ pub const Feature = enum {
             .shl_sat => .scalarize_shl_sat,
             .xor => .scalarize_xor,
             .not => .scalarize_not,
+            .bitcast => .scalarize_bitcast,
             .clz => .scalarize_clz,
             .ctz => .scalarize_ctz,
             .popcount => .scalarize_popcount,
@@ -259,9 +261,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
             => |air_tag| done: {
                 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
                 if (!l.typeOf(bin_op.rhs).isVector(zcu)) break :done;
-                if (l.features.contains(comptime .scalarize(air_tag))) {
-                    continue :inst try l.scalarize(inst, .bin_op);
-                } else if (l.features.contains(.remove_shift_vector_rhs_splat)) {
+                if (l.features.contains(.unsplat_shift_rhs)) {
                     if (bin_op.rhs.toInterned()) |rhs_ip_index| switch (ip.indexToKey(rhs_ip_index)) {
                         else => {},
                         .aggregate => |aggregate| switch (aggregate.storage) {
@@ -282,6 +282,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
                         }
                     }
                 }
+                if (l.features.contains(comptime .scalarize(air_tag))) continue :inst try l.scalarize(inst, .bin_op);
             },
             inline .not,
             .clz,
@@ -302,8 +303,14 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
                 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
                 if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op);
             },
-            .bitcast,
-            => {},
+            inline .bitcast,
+            => |air_tag| if (l.features.contains(comptime .scalarize(air_tag))) {
+                const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
+                const to_ty = ty_op.ty.toType();
+                const from_ty = l.typeOf(ty_op.operand);
+                if (to_ty.isVector(zcu) and from_ty.isVector(zcu) and to_ty.vectorLen(zcu) == from_ty.vectorLen(zcu))
+                    continue :inst try l.scalarize(inst, .ty_op);
+            },
             .block,
             .loop,
             => {
diff --git a/src/Sema.zig b/src/Sema.zig
index 34c1bb4df7..c18f71d1fa 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -10165,16 +10165,7 @@ fn zirIntFromPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!
     try sema.requireRuntimeBlock(block, block.nodeOffset(inst_data.src_node), ptr_src);
     try sema.validateRuntimeValue(block, ptr_src, operand);
     try sema.checkLogicalPtrOperation(block, ptr_src, ptr_ty);
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        return block.addBitCast(dest_ty, operand);
-    }
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand, idx_ref);
-        new_elem.* = try block.addBitCast(.usize, old_elem);
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return block.addBitCast(dest_ty, operand);
 }
 
 fn zirFieldVal(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@@ -10640,17 +10631,7 @@ fn zirFloatCast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A
     if (dst_bits >= src_bits) {
         return sema.coerce(block, dest_ty, operand, operand_src);
     }
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        return block.addTyOp(.fptrunc, dest_ty, operand);
-    }
-    const vec_len = operand_ty.vectorLen(zcu);
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, vec_len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand, idx_ref);
-        new_elem.* = try block.addTyOp(.fptrunc, dest_scalar_ty, old_elem);
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return block.addTyOp(.fptrunc, dest_ty, operand);
 }
 
 fn zirElemVal(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@@ -20722,16 +20703,7 @@ fn zirIntFromBool(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError
             .storage = .{ .elems = new_elems },
         } }));
     }
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        return block.addBitCast(dest_ty, operand);
-    }
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand, idx_ref);
-        new_elem.* = try block.addBitCast(.u1, old_elem);
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return block.addBitCast(dest_ty, operand);
 }
 
 fn zirErrorName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@@ -22327,42 +22299,23 @@ fn zirIntFromFloat(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErro
             .storage = .{ .repeated_elem = (try pt.intValue(dest_scalar_ty, 0)).toIntern() },
         } }));
     }
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        const result = try block.addTyOp(if (block.float_mode == .optimized) .int_from_float_optimized else .int_from_float, dest_ty, operand);
-        if (block.wantSafety()) {
-            const back = try block.addTyOp(.float_from_int, operand_ty, result);
-            const diff = try block.addBinOp(if (block.float_mode == .optimized) .sub_optimized else .sub, operand, back);
-            const ok = if (is_vector) ok: {
-                const ok_pos = try block.addCmpVector(diff, Air.internedToRef((try sema.splat(operand_ty, try pt.floatValue(operand_scalar_ty, 1.0))).toIntern()), .lt);
-                const ok_neg = try block.addCmpVector(diff, Air.internedToRef((try sema.splat(operand_ty, try pt.floatValue(operand_scalar_ty, -1.0))).toIntern()), .gt);
-                const ok = try block.addBinOp(.bit_and, ok_pos, ok_neg);
-                break :ok try block.addReduce(ok, .And);
-            } else ok: {
-                const ok_pos = try block.addBinOp(if (block.float_mode == .optimized) .cmp_lt_optimized else .cmp_lt, diff, Air.internedToRef((try pt.floatValue(operand_ty, 1.0)).toIntern()));
-                const ok_neg = try block.addBinOp(if (block.float_mode == .optimized) .cmp_gt_optimized else .cmp_gt, diff, Air.internedToRef((try pt.floatValue(operand_ty, -1.0)).toIntern()));
-                break :ok try block.addBinOp(.bool_and, ok_pos, ok_neg);
-            };
-            try sema.addSafetyCheck(block, src, ok, .integer_part_out_of_bounds);
-        }
-        return result;
+    const result = try block.addTyOp(if (block.float_mode == .optimized) .int_from_float_optimized else .int_from_float, dest_ty, operand);
+    if (block.wantSafety()) {
+        const back = try block.addTyOp(.float_from_int, operand_ty, result);
+        const diff = try block.addBinOp(if (block.float_mode == .optimized) .sub_optimized else .sub, operand, back);
+        const ok = if (is_vector) ok: {
+            const ok_pos = try block.addCmpVector(diff, Air.internedToRef((try sema.splat(operand_ty, try pt.floatValue(operand_scalar_ty, 1.0))).toIntern()), .lt);
+            const ok_neg = try block.addCmpVector(diff, Air.internedToRef((try sema.splat(operand_ty, try pt.floatValue(operand_scalar_ty, -1.0))).toIntern()), .gt);
+            const ok = try block.addBinOp(.bit_and, ok_pos, ok_neg);
+            break :ok try block.addReduce(ok, .And);
+        } else ok: {
+            const ok_pos = try block.addBinOp(if (block.float_mode == .optimized) .cmp_lt_optimized else .cmp_lt, diff, Air.internedToRef((try pt.floatValue(operand_ty, 1.0)).toIntern()));
+            const ok_neg = try block.addBinOp(if (block.float_mode == .optimized) .cmp_gt_optimized else .cmp_gt, diff, Air.internedToRef((try pt.floatValue(operand_ty, -1.0)).toIntern()));
+            break :ok try block.addBinOp(.bool_and, ok_pos, ok_neg);
+        };
+        try sema.addSafetyCheck(block, src, ok, .integer_part_out_of_bounds);
     }
-    const len = dest_ty.vectorLen(zcu);
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand, idx_ref);
-        const result = try block.addTyOp(if (block.float_mode == .optimized) .int_from_float_optimized else .int_from_float, dest_scalar_ty, old_elem);
-        if (block.wantSafety()) {
-            const back = try block.addTyOp(.float_from_int, operand_scalar_ty, result);
-            const diff = try block.addBinOp(.sub, old_elem, back);
-            const ok_pos = try block.addBinOp(if (block.float_mode == .optimized) .cmp_lt_optimized else .cmp_lt, diff, Air.internedToRef((try pt.floatValue(operand_scalar_ty, 1.0)).toIntern()));
-            const ok_neg = try block.addBinOp(if (block.float_mode == .optimized) .cmp_gt_optimized else .cmp_gt, diff, Air.internedToRef((try pt.floatValue(operand_scalar_ty, -1.0)).toIntern()));
-            const ok = try block.addBinOp(.bool_and, ok_pos, ok_neg);
-            try sema.addSafetyCheck(block, src, ok, .integer_part_out_of_bounds);
-        }
-        new_elem.* = result;
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return result;
 }
 
 fn zirFloatFromInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@@ -22377,7 +22330,6 @@ fn zirFloatFromInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErro
     const operand_ty = sema.typeOf(operand);
 
     try sema.checkVectorizableBinaryOperands(block, operand_src, dest_ty, operand_ty, src, operand_src);
-    const is_vector = dest_ty.zigTypeTag(zcu) == .vector;
 
     const dest_scalar_ty = dest_ty.scalarType(zcu);
     const operand_scalar_ty = operand_ty.scalarType(zcu);
@@ -22393,17 +22345,7 @@ fn zirFloatFromInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErro
     }
 
     try sema.requireRuntimeBlock(block, src, operand_src);
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        return block.addTyOp(.float_from_int, dest_ty, operand);
-    }
-    const len = operand_ty.vectorLen(zcu);
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand, idx_ref);
-        new_elem.* = try block.addTyOp(.float_from_int, dest_scalar_ty, old_elem);
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return block.addTyOp(.float_from_int, dest_ty, operand);
 }
 
 fn zirPtrFromInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@@ -22473,69 +22415,34 @@ fn zirPtrFromInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!
     }
     try sema.requireRuntimeBlock(block, src, operand_src);
     try sema.checkLogicalPtrOperation(block, src, ptr_ty);
-    if (!is_vector or zcu.backendSupportsFeature(.all_vector_instructions)) {
-        if (block.wantSafety() and (try elem_ty.hasRuntimeBitsSema(pt) or elem_ty.zigTypeTag(zcu) == .@"fn")) {
-            if (!ptr_ty.isAllowzeroPtr(zcu)) {
-                const is_non_zero = if (is_vector) all_non_zero: {
-                    const zero_usize = Air.internedToRef((try sema.splat(operand_ty, .zero_usize)).toIntern());
-                    const is_non_zero = try block.addCmpVector(operand_coerced, zero_usize, .neq);
-                    break :all_non_zero try block.addReduce(is_non_zero, .And);
-                } else try block.addBinOp(.cmp_neq, operand_coerced, .zero_usize);
-                try sema.addSafetyCheck(block, src, is_non_zero, .cast_to_null);
-            }
-            if (ptr_align.compare(.gt, .@"1")) {
-                const align_bytes_minus_1 = ptr_align.toByteUnits().? - 1;
-                const align_mask = Air.internedToRef((try sema.splat(operand_ty, try pt.intValue(
-                    .usize,
-                    if (elem_ty.fnPtrMaskOrNull(zcu)) |mask|
-                        align_bytes_minus_1 & mask
-                    else
-                        align_bytes_minus_1,
-                ))).toIntern());
-                const remainder = try block.addBinOp(.bit_and, operand_coerced, align_mask);
-                const is_aligned = if (is_vector) all_aligned: {
-                    const splat_zero_usize = Air.internedToRef((try sema.splat(operand_ty, .zero_usize)).toIntern());
-                    const is_aligned = try block.addCmpVector(remainder, splat_zero_usize, .eq);
-                    break :all_aligned try block.addReduce(is_aligned, .And);
-                } else try block.addBinOp(.cmp_eq, remainder, .zero_usize);
-                try sema.addSafetyCheck(block, src, is_aligned, .incorrect_alignment);
-            }
-        }
-        return block.addBitCast(dest_ty, operand_coerced);
-    }
-
-    const len = dest_ty.vectorLen(zcu);
     if (block.wantSafety() and (try elem_ty.hasRuntimeBitsSema(pt) or elem_ty.zigTypeTag(zcu) == .@"fn")) {
-        for (0..len) |i| {
-            const idx_ref = try pt.intRef(.usize, i);
-            const elem_coerced = try block.addBinOp(.array_elem_val, operand_coerced, idx_ref);
-            if (!ptr_ty.isAllowzeroPtr(zcu)) {
-                const is_non_zero = try block.addBinOp(.cmp_neq, elem_coerced, .zero_usize);
-                try sema.addSafetyCheck(block, src, is_non_zero, .cast_to_null);
-            }
-            if (ptr_align.compare(.gt, .@"1")) {
-                const align_bytes_minus_1 = ptr_align.toByteUnits().? - 1;
-                const align_mask = Air.internedToRef((try pt.intValue(
-                    .usize,
-                    if (elem_ty.fnPtrMaskOrNull(zcu)) |mask|
-                        align_bytes_minus_1 & mask
-                    else
-                        align_bytes_minus_1,
-                )).toIntern());
-                const remainder = try block.addBinOp(.bit_and, elem_coerced, align_mask);
-                const is_aligned = try block.addBinOp(.cmp_eq, remainder, .zero_usize);
-                try sema.addSafetyCheck(block, src, is_aligned, .incorrect_alignment);
-            }
+        if (!ptr_ty.isAllowzeroPtr(zcu)) {
+            const is_non_zero = if (is_vector) all_non_zero: {
+                const zero_usize = Air.internedToRef((try sema.splat(operand_ty, .zero_usize)).toIntern());
+                const is_non_zero = try block.addCmpVector(operand_coerced, zero_usize, .neq);
+                break :all_non_zero try block.addReduce(is_non_zero, .And);
+            } else try block.addBinOp(.cmp_neq, operand_coerced, .zero_usize);
+            try sema.addSafetyCheck(block, src, is_non_zero, .cast_to_null);
+        }
+        if (ptr_align.compare(.gt, .@"1")) {
+            const align_bytes_minus_1 = ptr_align.toByteUnits().? - 1;
+            const align_mask = Air.internedToRef((try sema.splat(operand_ty, try pt.intValue(
+                .usize,
+                if (elem_ty.fnPtrMaskOrNull(zcu)) |mask|
+                    align_bytes_minus_1 & mask
+                else
+                    align_bytes_minus_1,
+            ))).toIntern());
+            const remainder = try block.addBinOp(.bit_and, operand_coerced, align_mask);
+            const is_aligned = if (is_vector) all_aligned: {
+                const splat_zero_usize = Air.internedToRef((try sema.splat(operand_ty, .zero_usize)).toIntern());
+                const is_aligned = try block.addCmpVector(remainder, splat_zero_usize, .eq);
+                break :all_aligned try block.addReduce(is_aligned, .And);
+            } else try block.addBinOp(.cmp_eq, remainder, .zero_usize);
+            try sema.addSafetyCheck(block, src, is_aligned, .incorrect_alignment);
         }
     }
-
-    const new_elems = try sema.arena.alloc(Air.Inst.Ref, len);
-    for (new_elems, 0..) |*new_elem, i| {
-        const idx_ref = try pt.intRef(.usize, i);
-        const old_elem = try block.addBinOp(.array_elem_val, operand_coerced, idx_ref);
-        new_elem.* = try block.addBitCast(ptr_ty, old_elem);
-    }
-    return block.addAggregateInit(dest_ty, new_elems);
+    return block.addBitCast(dest_ty, operand_coerced);
 }
 
 fn ptrFromIntVal(
diff --git a/src/Zcu.zig b/src/Zcu.zig
index c49a1d46b1..38e926298e 100644
--- a/src/Zcu.zig
+++ b/src/Zcu.zig
@@ -3840,15 +3840,6 @@ pub const Feature = enum {
     safety_checked_instructions,
     /// If the backend supports running from another thread.
     separate_thread,
-    /// If the backend supports the following AIR instructions with vector types:
-    /// * `Air.Inst.Tag.bit_and`
-    /// * `Air.Inst.Tag.bit_or`
-    /// * `Air.Inst.Tag.bitcast`
-    /// * `Air.Inst.Tag.float_from_int`
-    /// * `Air.Inst.Tag.fptrunc`
-    /// * `Air.Inst.Tag.int_from_float`
-    /// If not supported, Sema will scalarize the operation.
-    all_vector_instructions,
 };
 
 pub fn backendSupportsFeature(zcu: *const Zcu, comptime feature: Feature) bool {
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index 6fd23cfd18..88fdeb9831 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -40,6 +40,10 @@ const gp = abi.RegisterClass.gp;
 
 const InnerError = CodeGenError || error{OutOfRegisters};
 
+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 gpa: Allocator,
 pt: Zcu.PerThread,
 air: Air,
@@ -2261,12 +2265,13 @@ fn shiftExact(
     rhs_ty: Type,
     maybe_inst: ?Air.Inst.Index,
 ) InnerError!MCValue {
-    _ = rhs_ty;
-
     const pt = self.pt;
     const zcu = pt.zcu;
     switch (lhs_ty.zigTypeTag(zcu)) {
-        .vector => return self.fail("TODO binary operations on vectors", .{}),
+        .vector => if (!rhs_ty.isVector(zcu))
+            return self.fail("TODO vector shift with scalar rhs", .{})
+        else
+            return self.fail("TODO binary operations on vectors", .{}),
         .int => {
             const int_info = lhs_ty.intInfo(zcu);
             if (int_info.bits <= 64) {
@@ -2317,7 +2322,10 @@ fn shiftNormal(
     const pt = self.pt;
     const zcu = pt.zcu;
     switch (lhs_ty.zigTypeTag(zcu)) {
-        .vector => return self.fail("TODO binary operations on vectors", .{}),
+        .vector => if (!rhs_ty.isVector(zcu))
+            return self.fail("TODO vector shift with scalar rhs", .{})
+        else
+            return self.fail("TODO binary operations on vectors", .{}),
         .int => {
             const int_info = lhs_ty.intInfo(zcu);
             if (int_info.bits <= 64) {
@@ -2874,7 +2882,10 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) InnerError!void {
         const overflow_bit_offset = @as(u32, @intCast(tuple_ty.structFieldOffset(1, zcu)));
 
         switch (lhs_ty.zigTypeTag(zcu)) {
-            .vector => return self.fail("TODO implement shl_with_overflow for vectors", .{}),
+            .vector => if (!rhs_ty.isVector(zcu))
+                return self.fail("TODO implement vector shl_with_overflow with scalar rhs", .{})
+            else
+                return self.fail("TODO implement shl_with_overflow for vectors", .{}),
             .int => {
                 const int_info = lhs_ty.intInfo(zcu);
                 if (int_info.bits <= 64) {
@@ -2993,8 +3004,14 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) InnerError!void {
 }
 
 fn airShlSat(self: *Self, inst: Air.Inst.Index) InnerError!void {
+    const zcu = self.pt.zcu;
     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
+    const result: MCValue = if (self.liveness.isUnused(inst))
+        .dead
+    else if (self.typeOf(bin_op.lhs).isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+        return self.fail("TODO implement vector shl_sat with scalar rhs for {}", .{self.target.cpu.arch})
+    else
+        return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index e9d0e91db1..a018302566 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -41,6 +41,10 @@ const gp = abi.RegisterClass.gp;
 
 const InnerError = CodeGenError || error{OutOfRegisters};
 
+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 gpa: Allocator,
 pt: Zcu.PerThread,
 air: Air,
@@ -1857,7 +1861,10 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
         const overflow_bit_offset: u32 = @intCast(tuple_ty.structFieldOffset(1, zcu));
 
         switch (lhs_ty.zigTypeTag(zcu)) {
-            .vector => return self.fail("TODO implement shl_with_overflow for vectors", .{}),
+            .vector => if (!rhs_ty.isVector(zcu))
+                return self.fail("TODO implement vector shl_with_overflow with scalar rhs", .{})
+            else
+                return self.fail("TODO implement shl_with_overflow for vectors", .{}),
             .int => {
                 const int_info = lhs_ty.intInfo(zcu);
                 if (int_info.bits <= 32) {
@@ -1978,8 +1985,14 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
 }
 
 fn airShlSat(self: *Self, inst: Air.Inst.Index) !void {
+    const zcu = self.pt.zcu;
     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
+    const result: MCValue = if (self.liveness.isUnused(inst))
+        .dead
+    else if (self.typeOf(bin_op.lhs).isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+        return self.fail("TODO implement vector shl_sat with scalar rhs for {}", .{self.target.cpu.arch})
+    else
+        return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
@@ -3788,7 +3801,10 @@ fn shiftExact(
     const pt = self.pt;
     const zcu = pt.zcu;
     switch (lhs_ty.zigTypeTag(zcu)) {
-        .vector => return self.fail("TODO ARM binary operations on vectors", .{}),
+        .vector => if (!rhs_ty.isVector(zcu))
+            return self.fail("TODO ARM vector shift with scalar rhs", .{})
+        else
+            return self.fail("TODO ARM binary operations on vectors", .{}),
         .int => {
             const int_info = lhs_ty.intInfo(zcu);
             if (int_info.bits <= 32) {
@@ -3828,7 +3844,10 @@ fn shiftNormal(
     const pt = self.pt;
     const zcu = pt.zcu;
     switch (lhs_ty.zigTypeTag(zcu)) {
-        .vector => return self.fail("TODO ARM binary operations on vectors", .{}),
+        .vector => if (!rhs_ty.isVector(zcu))
+            return self.fail("TODO ARM vector shift with scalar rhs", .{})
+        else
+            return self.fail("TODO ARM binary operations on vectors", .{}),
         .int => {
             const int_info = lhs_ty.intInfo(zcu);
             if (int_info.bits <= 32) {
diff --git a/src/arch/powerpc/CodeGen.zig b/src/arch/powerpc/CodeGen.zig
index 6334b65ff8..a3a4615b4a 100644
--- a/src/arch/powerpc/CodeGen.zig
+++ b/src/arch/powerpc/CodeGen.zig
@@ -10,6 +10,10 @@ const Zcu = @import("../../Zcu.zig");
 const assert = std.debug.assert;
 const log = std.log.scoped(.codegen);
 
+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 pub fn generate(
     bin_file: *link.File,
     pt: Zcu.PerThread,
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index edba985beb..5732f4cd41 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -51,6 +51,10 @@ const Instruction = encoding.Instruction;
 
 const InnerError = CodeGenError || error{OutOfRegisters};
 
+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 pt: Zcu.PerThread,
 air: Air,
 liveness: Air.Liveness,
@@ -2764,6 +2768,7 @@ fn genBinOp(
         .shl,
         .shl_exact,
         => {
+            if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu)) return func.fail("TODO: vector shift with scalar rhs", .{});
             if (bit_size > 64) return func.fail("TODO: genBinOp shift > 64 bits, {}", .{bit_size});
             try func.truncateRegister(rhs_ty, rhs_reg);
 
@@ -3248,8 +3253,14 @@ fn airMulWithOverflow(func: *Func, inst: Air.Inst.Index) !void {
 }
 
 fn airShlWithOverflow(func: *Func, inst: Air.Inst.Index) !void {
+    const zcu = func.pt.zcu;
     const bin_op = func.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-    const result: MCValue = if (func.liveness.isUnused(inst)) .unreach else return func.fail("TODO implement airShlWithOverflow", .{});
+    const result: MCValue = if (func.liveness.isUnused(inst))
+        .unreach
+    else if (func.typeOf(bin_op.lhs).isVector(zcu) and !func.typeOf(bin_op.rhs).isVector(zcu))
+        return func.fail("TODO implement vector airShlWithOverflow with scalar rhs", .{})
+    else
+        return func.fail("TODO implement airShlWithOverflow", .{});
     return func.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
@@ -3266,8 +3277,14 @@ fn airMulSat(func: *Func, inst: Air.Inst.Index) !void {
 }
 
 fn airShlSat(func: *Func, inst: Air.Inst.Index) !void {
+    const zcu = func.pt.zcu;
     const bin_op = func.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-    const result: MCValue = if (func.liveness.isUnused(inst)) .unreach else return func.fail("TODO implement airShlSat", .{});
+    const result: MCValue = if (func.liveness.isUnused(inst))
+        .unreach
+    else if (func.typeOf(bin_op.lhs).isVector(zcu) and !func.typeOf(bin_op.rhs).isVector(zcu))
+        return func.fail("TODO implement vector airShlSat with scalar rhs", .{})
+    else
+        return func.fail("TODO implement airShlSat", .{});
     return func.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig
index 4235de94f5..ead7ef3e99 100644
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@@ -41,6 +41,10 @@ const Self = @This();
 
 const InnerError = CodeGenError || error{OutOfRegisters};
 
+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 const RegisterView = enum(u1) {
     caller,
     callee,
@@ -2270,8 +2274,14 @@ fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
 }
 
 fn airShlSat(self: *Self, inst: Air.Inst.Index) !void {
+    const zcu = self.pt.zcu;
     const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
+    const result: MCValue = if (self.liveness.isUnused(inst))
+        .dead
+    else if (self.typeOf(bin_op.lhs).isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+        return self.fail("TODO implement vector shl_sat with scalar rhs for {}", .{self.target.cpu.arch})
+    else
+        return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
@@ -2287,7 +2297,10 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
         const rhs_ty = self.typeOf(extra.rhs);
 
         switch (lhs_ty.zigTypeTag(zcu)) {
-            .vector => return self.fail("TODO implement mul_with_overflow for vectors", .{}),
+            .vector => if (!rhs_ty.isVector(zcu))
+                return self.fail("TODO implement vector shl_with_overflow with scalar rhs", .{})
+            else
+                return self.fail("TODO implement mul_with_overflow for vectors", .{}),
             .int => {
                 const int_info = lhs_ty.intInfo(zcu);
                 if (int_info.bits <= 64) {
@@ -3002,7 +3015,10 @@ fn binOp(
 
             // Truncate if necessary
             switch (lhs_ty.zigTypeTag(zcu)) {
-                .vector => return self.fail("TODO binary operations on vectors", .{}),
+                .vector => if (rhs_ty.isVector(zcu))
+                    return self.fail("TODO vector shift with scalar rhs", .{})
+                else
+                    return self.fail("TODO binary operations on vectors", .{}),
                 .int => {
                     const int_info = lhs_ty.intInfo(zcu);
                     if (int_info.bits <= 64) {
@@ -3024,7 +3040,10 @@ fn binOp(
         .shr_exact,
         => {
             switch (lhs_ty.zigTypeTag(zcu)) {
-                .vector => return self.fail("TODO binary operations on vectors", .{}),
+                .vector => if (rhs_ty.isVector(zcu))
+                    return self.fail("TODO vector shift with scalar rhs", .{})
+                else
+                    return self.fail("TODO binary operations on vectors", .{}),
                 .int => {
                     const int_info = lhs_ty.intInfo(zcu);
                     if (int_info.bits <= 64) {
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index a48f7012f5..a130f1508d 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -31,6 +31,10 @@ const libcFloatSuffix = target_util.libcFloatSuffix;
 const compilerRtFloatAbbrev = target_util.compilerRtFloatAbbrev;
 const compilerRtIntAbbrev = target_util.compilerRtIntAbbrev;
 
+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 /// Reference to the function declaration the code
 /// section belongs to
 owner_nav: InternPool.Nav.Index,
@@ -2638,6 +2642,10 @@ fn airBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
     // For big integers we can ignore this as we will call into compiler-rt which handles this.
     const result = switch (op) {
         .shr, .shl => result: {
+            if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu)) {
+                return cg.fail("TODO: implement vector '{s}' with scalar rhs", .{@tagName(op)});
+            }
+
             const lhs_wasm_bits = toWasmBits(@intCast(lhs_ty.bitSize(zcu))) orelse {
                 return cg.fail("TODO: implement '{s}' for types larger than 128 bits", .{@tagName(op)});
             };
@@ -3055,8 +3063,12 @@ fn airWrapBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
     const lhs_ty = cg.typeOf(bin_op.lhs);
     const rhs_ty = cg.typeOf(bin_op.rhs);
 
-    if (lhs_ty.zigTypeTag(zcu) == .vector or rhs_ty.zigTypeTag(zcu) == .vector) {
-        return cg.fail("TODO: Implement wrapping arithmetic for vectors", .{});
+    if (lhs_ty.isVector(zcu)) {
+        if ((op == .shr or op == .shl) and !rhs_ty.isVector(zcu)) {
+            return cg.fail("TODO: implement wrapping vector '{s}' with scalar rhs", .{@tagName(op)});
+        } else {
+            return cg.fail("TODO: implement wrapping '{s}' for vectors", .{@tagName(op)});
+        }
     }
 
     // For certain operations, such as shifting, the types are different.
@@ -6067,13 +6079,17 @@ fn airShlWithOverflow(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     const ty = cg.typeOf(extra.lhs);
     const rhs_ty = cg.typeOf(extra.rhs);
 
-    if (ty.zigTypeTag(zcu) == .vector) {
-        return cg.fail("TODO: Implement overflow arithmetic for vectors", .{});
+    if (ty.isVector(zcu)) {
+        if (!rhs_ty.isVector(zcu)) {
+            return cg.fail("TODO: implement vector 'shl_with_overflow' with scalar rhs", .{});
+        } else {
+            return cg.fail("TODO: implement vector 'shl_with_overflow'", .{});
+        }
     }
 
     const int_info = ty.intInfo(zcu);
     const wasm_bits = toWasmBits(int_info.bits) orelse {
-        return cg.fail("TODO: Implement shl_with_overflow for integer bitsize: {d}", .{int_info.bits});
+        return cg.fail("TODO: implement 'shl_with_overflow' for integer bitsize: {d}", .{int_info.bits});
     };
 
     // Ensure rhs is coerced to lhs as they must have the same WebAssembly types
@@ -6994,6 +7010,11 @@ fn airShlSat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 
     const pt = cg.pt;
     const zcu = pt.zcu;
+
+    if (cg.typeOf(bin_op.lhs).isVector(zcu) and !cg.typeOf(bin_op.rhs).isVector(zcu)) {
+        return cg.fail("TODO: implement vector 'shl_sat' with scalar rhs", .{});
+    }
+
     const ty = cg.typeOfIndex(inst);
     const int_info = ty.intInfo(zcu);
     const is_signed = int_info.signedness == .signed;
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 8db15122d0..49a32fc66c 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -32,7 +32,7 @@ const FrameIndex = bits.FrameIndex;
 
 const InnerError = codegen.CodeGenError || error{OutOfRegisters};
 
-pub inline fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Features {
+pub fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Features {
     @setEvalBranchQuota(1_200);
     return switch (target.ofmt == .coff) {
         inline false, true => |use_old| comptime &.init(.{
@@ -86,7 +86,7 @@ pub inline fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Fe
             .scalarize_float_from_int = use_old,
             .scalarize_mul_add = use_old,
 
-            .remove_shift_vector_rhs_splat = false,
+            .unsplat_shift_rhs = false,
             .reduce_one_elem_to_bitcast = true,
         }),
     };
diff --git a/src/codegen.zig b/src/codegen.zig
index d482cca4b2..8d609682f1 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -52,7 +52,7 @@ fn importBackend(comptime backend: std.builtin.CompilerBackend) type {
 pub fn legalizeFeatures(pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) *const Air.Legalize.Features {
     const zcu = pt.zcu;
     const target = &zcu.navFileScope(nav_index).mod.?.resolved_target.result;
-    switch (target_util.zigBackend(target.*, zcu.comp.config.use_llvm)) {
+    return switch (target_util.zigBackend(target.*, zcu.comp.config.use_llvm)) {
         else => unreachable,
         inline .stage2_llvm,
         .stage2_c,
@@ -65,11 +65,8 @@ pub fn legalizeFeatures(pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) *con
         .stage2_sparc64,
         .stage2_spirv64,
         .stage2_powerpc,
-        => |backend| {
-            const Backend = importBackend(backend);
-            return if (@hasDecl(Backend, "legalizeFeatures")) Backend.legalizeFeatures(target) else comptime &.initEmpty();
-        },
-    }
+        => |backend| importBackend(backend).legalizeFeatures(target),
+    };
 }
 
 pub fn generateFunction(
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index d83eb8f771..2d3b236a86 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -20,6 +20,10 @@ const Alignment = InternPool.Alignment;
 const BigIntLimb = std.math.big.Limb;
 const BigInt = std.math.big.int;
 
+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 pub const CType = @import("c/Type.zig");
 
 pub const CValue = union(enum) {
@@ -4179,7 +4183,7 @@ fn airOverflow(f: *Function, inst: Air.Inst.Index, operation: []const u8, info:
     try v.elem(f, w);
     try w.writeAll(", ");
     try f.writeCValue(w, rhs, .FunctionArgument);
-    try v.elem(f, w);
+    if (f.typeOf(bin_op.rhs).isVector(zcu)) try v.elem(f, w);
     try f.object.dg.renderBuiltinInfo(w, scalar_ty, info);
     try w.writeAll(");\n");
     try v.end(f, inst, w);
@@ -6536,7 +6540,7 @@ fn airBinBuiltinCall(
     try v.elem(f, writer);
     try writer.writeAll(", ");
     try f.writeCValue(writer, rhs, .FunctionArgument);
-    try v.elem(f, writer);
+    if (f.typeOf(bin_op.rhs).isVector(zcu)) try v.elem(f, writer);
     try f.object.dg.renderBuiltinInfo(writer, scalar_ty, info);
     try writer.writeAll(");\n");
     try v.end(f, inst, writer);
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 1820faf90c..6f411e7490 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -36,6 +36,10 @@ const compilerRtIntAbbrev = target_util.compilerRtIntAbbrev;
 
 const Error = error{ OutOfMemory, CodegenFail };
 
+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 fn subArchName(features: std.Target.Cpu.Feature.Set, arch: anytype, mappings: anytype) ?[]const u8 {
     inline for (mappings) |mapping| {
         if (arch.featureSetHas(features, mapping[0])) return mapping[1];
@@ -8923,6 +8927,8 @@ pub const FuncGen = struct {
         const rhs = try self.resolveInst(extra.rhs);
 
         const lhs_ty = self.typeOf(extra.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(extra.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
         const lhs_scalar_ty = lhs_ty.scalarType(zcu);
 
         const dest_ty = self.typeOfIndex(inst);
@@ -8992,6 +8998,8 @@ pub const FuncGen = struct {
         const rhs = try self.resolveInst(bin_op.rhs);
 
         const lhs_ty = self.typeOf(bin_op.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
         const lhs_scalar_ty = lhs_ty.scalarType(zcu);
 
         const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), "");
@@ -9003,14 +9011,17 @@ pub const FuncGen = struct {
 
     fn airShl(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
         const o = self.ng.object;
+        const zcu = o.pt.zcu;
         const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
 
         const lhs = try self.resolveInst(bin_op.lhs);
         const rhs = try self.resolveInst(bin_op.rhs);
 
-        const lhs_type = self.typeOf(bin_op.lhs);
+        const lhs_ty = self.typeOf(bin_op.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
 
-        const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_type), "");
+        const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), "");
         return self.wip.bin(.shl, lhs, casted_rhs, "");
     }
 
@@ -9029,6 +9040,8 @@ pub const FuncGen = struct {
         const llvm_lhs_scalar_ty = llvm_lhs_ty.scalarType(&o.builder);
 
         const rhs_ty = self.typeOf(bin_op.rhs);
+        if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
         const rhs_info = rhs_ty.intInfo(zcu);
         assert(rhs_info.signedness == .unsigned);
         const llvm_rhs_ty = try o.lowerType(rhs_ty);
@@ -9101,6 +9114,8 @@ pub const FuncGen = struct {
         const rhs = try self.resolveInst(bin_op.rhs);
 
         const lhs_ty = self.typeOf(bin_op.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
         const lhs_scalar_ty = lhs_ty.scalarType(zcu);
 
         const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), "");
@@ -9255,8 +9270,6 @@ pub const FuncGen = struct {
         const operand_ty = self.typeOf(ty_op.operand);
         const dest_ty = self.typeOfIndex(inst);
         const target = zcu.getTarget();
-        const dest_bits = dest_ty.floatBits(target);
-        const src_bits = operand_ty.floatBits(target);
 
         if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) {
             return self.wip.cast(.fptrunc, operand, try o.lowerType(dest_ty), "");
@@ -9264,6 +9277,8 @@ pub const FuncGen = struct {
             const operand_llvm_ty = try o.lowerType(operand_ty);
             const dest_llvm_ty = try o.lowerType(dest_ty);
 
+            const dest_bits = dest_ty.floatBits(target);
+            const src_bits = operand_ty.floatBits(target);
             const fn_name = try o.builder.strtabStringFmt("__trunc{s}f{s}f2", .{
                 compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits),
             });
@@ -9348,11 +9363,12 @@ pub const FuncGen = struct {
             return self.wip.conv(.unsigned, operand, llvm_dest_ty, "");
         }
 
-        if (operand_ty.zigTypeTag(zcu) == .int and inst_ty.isPtrAtRuntime(zcu)) {
+        const operand_scalar_ty = operand_ty.scalarType(zcu);
+        const inst_scalar_ty = inst_ty.scalarType(zcu);
+        if (operand_scalar_ty.zigTypeTag(zcu) == .int and inst_scalar_ty.isPtrAtRuntime(zcu)) {
             return self.wip.cast(.inttoptr, operand, llvm_dest_ty, "");
         }
-
-        if (operand_ty.isPtrAtRuntime(zcu) and inst_ty.zigTypeTag(zcu) == .int) {
+        if (operand_scalar_ty.isPtrAtRuntime(zcu) and inst_scalar_ty.zigTypeTag(zcu) == .int) {
             return self.wip.cast(.ptrtoint, operand, llvm_dest_ty, "");
         }
 
diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig
index 5041634a75..c51e38ac7e 100644
--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@@ -28,6 +28,10 @@ const SpvAssembler = @import("spirv/Assembler.zig");
 
 const InstMap = std.AutoHashMapUnmanaged(Air.Inst.Index, IdRef);
 
+pub inline fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initEmpty();
+}
+
 pub const zig_call_abi_ver = 3;
 pub const big_int_bits = 32;
 
@@ -3380,6 +3384,10 @@ const NavGen = struct {
         const zcu = self.pt.zcu;
         const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
 
+        if (self.typeOf(bin_op.lhs).isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu)) {
+            return self.fail("vector shift with scalar rhs", .{});
+        }
+
         const base = try self.temporary(bin_op.lhs);
         const shift = try self.temporary(bin_op.rhs);
 
@@ -3866,6 +3874,10 @@ const NavGen = struct {
         const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
         const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
 
+        if (self.typeOf(extra.lhs).isVector(zcu) and !self.typeOf(extra.rhs).isVector(zcu)) {
+            return self.fail("vector shift with scalar rhs", .{});
+        }
+
         const base = try self.temporary(extra.lhs);
         const shift = try self.temporary(extra.rhs);
 
diff --git a/src/target.zig b/src/target.zig
index 6119b002a4..c33588b1b5 100644
--- a/src/target.zig
+++ b/src/target.zig
@@ -850,9 +850,5 @@ pub inline fn backendSupportsFeature(backend: std.builtin.CompilerBackend, compt
             .stage2_llvm => false,
             else => true,
         },
-        .all_vector_instructions => switch (backend) {
-            .stage2_x86_64 => true,
-            else => false,
-        },
     };
 }