zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

avx512vlvbmi2intrin.h (26633B) - Raw


      1 /*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
      2  *
      3  *
      4  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      5  * See https://llvm.org/LICENSE.txt for license information.
      6  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      7  *
      8  *===-----------------------------------------------------------------------===
      9  */
     10 #ifndef __IMMINTRIN_H
     11 #error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead."
     12 #endif
     13 
     14 #ifndef __AVX512VLVBMI2INTRIN_H
     15 #define __AVX512VLVBMI2INTRIN_H
     16 
     17 /* Define the default attributes for the functions in this file. */
     18 #define __DEFAULT_FN_ATTRS128                                                  \
     19   __attribute__((__always_inline__, __nodebug__,                               \
     20                  __target__("avx512vl,avx512vbmi2,no-evex512"),                \
     21                  __min_vector_width__(128)))
     22 #define __DEFAULT_FN_ATTRS256                                                  \
     23   __attribute__((__always_inline__, __nodebug__,                               \
     24                  __target__("avx512vl,avx512vbmi2,no-evex512"),                \
     25                  __min_vector_width__(256)))
     26 
     27 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     28 _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
     29 {
     30   return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
     31               (__v8hi) __S,
     32               __U);
     33 }
     34 
     35 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     36 _mm_maskz_compress_epi16(__mmask8 __U, __m128i __D)
     37 {
     38   return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
     39               (__v8hi) _mm_setzero_si128(),
     40               __U);
     41 }
     42 
     43 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     44 _mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
     45 {
     46   return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
     47               (__v16qi) __S,
     48               __U);
     49 }
     50 
     51 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     52 _mm_maskz_compress_epi8(__mmask16 __U, __m128i __D)
     53 {
     54   return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
     55               (__v16qi) _mm_setzero_si128(),
     56               __U);
     57 }
     58 
     59 static __inline__ void __DEFAULT_FN_ATTRS128
     60 _mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
     61 {
     62   __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
     63               __U);
     64 }
     65 
     66 static __inline__ void __DEFAULT_FN_ATTRS128
     67 _mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
     68 {
     69   __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
     70               __U);
     71 }
     72 
     73 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     74 _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
     75 {
     76   return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
     77               (__v8hi) __S,
     78               __U);
     79 }
     80 
     81 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     82 _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D)
     83 {
     84   return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
     85               (__v8hi) _mm_setzero_si128(),
     86               __U);
     87 }
     88 
     89 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     90 _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
     91 {
     92   return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
     93               (__v16qi) __S,
     94               __U);
     95 }
     96 
     97 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     98 _mm_maskz_expand_epi8(__mmask16 __U, __m128i __D)
     99 {
    100   return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
    101               (__v16qi) _mm_setzero_si128(),
    102               __U);
    103 }
    104 
    105 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    106 _mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
    107 {
    108   return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
    109               (__v8hi) __S,
    110               __U);
    111 }
    112 
    113 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    114 _mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
    115 {
    116   return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
    117               (__v8hi) _mm_setzero_si128(),
    118               __U);
    119 }
    120 
    121 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    122 _mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
    123 {
    124   return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
    125               (__v16qi) __S,
    126               __U);
    127 }
    128 
    129 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    130 _mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
    131 {
    132   return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
    133               (__v16qi) _mm_setzero_si128(),
    134               __U);
    135 }
    136 
    137 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    138 _mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
    139 {
    140   return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
    141               (__v16hi) __S,
    142               __U);
    143 }
    144 
    145 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    146 _mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
    147 {
    148   return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
    149               (__v16hi) _mm256_setzero_si256(),
    150               __U);
    151 }
    152 
    153 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    154 _mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D)
    155 {
    156   return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
    157               (__v32qi) __S,
    158               __U);
    159 }
    160 
    161 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    162 _mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
    163 {
    164   return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
    165               (__v32qi) _mm256_setzero_si256(),
    166               __U);
    167 }
    168 
    169 static __inline__ void __DEFAULT_FN_ATTRS256
    170 _mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D)
    171 {
    172   __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D,
    173               __U);
    174 }
    175 
    176 static __inline__ void __DEFAULT_FN_ATTRS256
    177 _mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
    178 {
    179   __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D,
    180               __U);
    181 }
    182 
    183 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    184 _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D)
    185 {
    186   return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
    187               (__v16hi) __S,
    188               __U);
    189 }
    190 
    191 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    192 _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
    193 {
    194   return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
    195               (__v16hi) _mm256_setzero_si256(),
    196               __U);
    197 }
    198 
    199 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    200 _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D)
    201 {
    202   return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
    203               (__v32qi) __S,
    204               __U);
    205 }
    206 
    207 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    208 _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
    209 {
    210   return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
    211               (__v32qi) _mm256_setzero_si256(),
    212               __U);
    213 }
    214 
    215 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    216 _mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P)
    217 {
    218   return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
    219               (__v16hi) __S,
    220               __U);
    221 }
    222 
    223 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    224 _mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
    225 {
    226   return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
    227               (__v16hi) _mm256_setzero_si256(),
    228               __U);
    229 }
    230 
    231 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    232 _mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P)
    233 {
    234   return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
    235               (__v32qi) __S,
    236               __U);
    237 }
    238 
    239 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    240 _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
    241 {
    242   return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
    243               (__v32qi) _mm256_setzero_si256(),
    244               __U);
    245 }
    246 
    247 #define _mm256_shldi_epi64(A, B, I) \
    248   ((__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \
    249                                       (__v4di)(__m256i)(B), (int)(I)))
    250 
    251 #define _mm256_mask_shldi_epi64(S, U, A, B, I) \
    252   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
    253                                      (__v4di)_mm256_shldi_epi64((A), (B), (I)), \
    254                                      (__v4di)(__m256i)(S)))
    255 
    256 #define _mm256_maskz_shldi_epi64(U, A, B, I) \
    257   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
    258                                      (__v4di)_mm256_shldi_epi64((A), (B), (I)), \
    259                                      (__v4di)_mm256_setzero_si256()))
    260 
    261 #define _mm_shldi_epi64(A, B, I) \
    262   ((__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \
    263                                       (__v2di)(__m128i)(B), (int)(I)))
    264 
    265 #define _mm_mask_shldi_epi64(S, U, A, B, I) \
    266   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
    267                                        (__v2di)_mm_shldi_epi64((A), (B), (I)), \
    268                                        (__v2di)(__m128i)(S)))
    269 
    270 #define _mm_maskz_shldi_epi64(U, A, B, I) \
    271   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
    272                                        (__v2di)_mm_shldi_epi64((A), (B), (I)), \
    273                                        (__v2di)_mm_setzero_si128()))
    274 
    275 #define _mm256_shldi_epi32(A, B, I) \
    276   ((__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \
    277                                       (__v8si)(__m256i)(B), (int)(I)))
    278 
    279 #define _mm256_mask_shldi_epi32(S, U, A, B, I) \
    280   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
    281                                      (__v8si)_mm256_shldi_epi32((A), (B), (I)), \
    282                                      (__v8si)(__m256i)(S)))
    283 
    284 #define _mm256_maskz_shldi_epi32(U, A, B, I) \
    285   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
    286                                      (__v8si)_mm256_shldi_epi32((A), (B), (I)), \
    287                                      (__v8si)_mm256_setzero_si256()))
    288 
    289 #define _mm_shldi_epi32(A, B, I) \
    290   ((__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \
    291                                       (__v4si)(__m128i)(B), (int)(I)))
    292 
    293 #define _mm_mask_shldi_epi32(S, U, A, B, I) \
    294   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
    295                                        (__v4si)_mm_shldi_epi32((A), (B), (I)), \
    296                                        (__v4si)(__m128i)(S)))
    297 
    298 #define _mm_maskz_shldi_epi32(U, A, B, I) \
    299   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
    300                                        (__v4si)_mm_shldi_epi32((A), (B), (I)), \
    301                                        (__v4si)_mm_setzero_si128()))
    302 
    303 #define _mm256_shldi_epi16(A, B, I) \
    304   ((__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \
    305                                       (__v16hi)(__m256i)(B), (int)(I)))
    306 
    307 #define _mm256_mask_shldi_epi16(S, U, A, B, I) \
    308   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
    309                                     (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
    310                                     (__v16hi)(__m256i)(S)))
    311 
    312 #define _mm256_maskz_shldi_epi16(U, A, B, I) \
    313   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
    314                                     (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
    315                                     (__v16hi)_mm256_setzero_si256()))
    316 
    317 #define _mm_shldi_epi16(A, B, I) \
    318   ((__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \
    319                                       (__v8hi)(__m128i)(B), (int)(I)))
    320 
    321 #define _mm_mask_shldi_epi16(S, U, A, B, I) \
    322   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
    323                                        (__v8hi)_mm_shldi_epi16((A), (B), (I)), \
    324                                        (__v8hi)(__m128i)(S)))
    325 
    326 #define _mm_maskz_shldi_epi16(U, A, B, I) \
    327   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
    328                                        (__v8hi)_mm_shldi_epi16((A), (B), (I)), \
    329                                        (__v8hi)_mm_setzero_si128()))
    330 
    331 #define _mm256_shrdi_epi64(A, B, I) \
    332   ((__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \
    333                                       (__v4di)(__m256i)(B), (int)(I)))
    334 
    335 #define _mm256_mask_shrdi_epi64(S, U, A, B, I) \
    336   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
    337                                      (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
    338                                      (__v4di)(__m256i)(S)))
    339 
    340 #define _mm256_maskz_shrdi_epi64(U, A, B, I) \
    341   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
    342                                      (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
    343                                      (__v4di)_mm256_setzero_si256()))
    344 
    345 #define _mm_shrdi_epi64(A, B, I) \
    346   ((__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \
    347                                       (__v2di)(__m128i)(B), (int)(I)))
    348 
    349 #define _mm_mask_shrdi_epi64(S, U, A, B, I) \
    350   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
    351                                        (__v2di)_mm_shrdi_epi64((A), (B), (I)), \
    352                                        (__v2di)(__m128i)(S)))
    353 
    354 #define _mm_maskz_shrdi_epi64(U, A, B, I) \
    355   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
    356                                        (__v2di)_mm_shrdi_epi64((A), (B), (I)), \
    357                                        (__v2di)_mm_setzero_si128()))
    358 
    359 #define _mm256_shrdi_epi32(A, B, I) \
    360   ((__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \
    361                                       (__v8si)(__m256i)(B), (int)(I)))
    362 
    363 #define _mm256_mask_shrdi_epi32(S, U, A, B, I) \
    364   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
    365                                      (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
    366                                      (__v8si)(__m256i)(S)))
    367 
    368 #define _mm256_maskz_shrdi_epi32(U, A, B, I) \
    369   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
    370                                      (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
    371                                      (__v8si)_mm256_setzero_si256()))
    372 
    373 #define _mm_shrdi_epi32(A, B, I) \
    374   ((__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \
    375                                       (__v4si)(__m128i)(B), (int)(I)))
    376 
    377 #define _mm_mask_shrdi_epi32(S, U, A, B, I) \
    378   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
    379                                        (__v4si)_mm_shrdi_epi32((A), (B), (I)), \
    380                                        (__v4si)(__m128i)(S)))
    381 
    382 #define _mm_maskz_shrdi_epi32(U, A, B, I) \
    383   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
    384                                        (__v4si)_mm_shrdi_epi32((A), (B), (I)), \
    385                                        (__v4si)_mm_setzero_si128()))
    386 
    387 #define _mm256_shrdi_epi16(A, B, I) \
    388   ((__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \
    389                                       (__v16hi)(__m256i)(B), (int)(I)))
    390 
    391 #define _mm256_mask_shrdi_epi16(S, U, A, B, I) \
    392   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
    393                                     (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
    394                                     (__v16hi)(__m256i)(S)))
    395 
    396 #define _mm256_maskz_shrdi_epi16(U, A, B, I) \
    397   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
    398                                     (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
    399                                     (__v16hi)_mm256_setzero_si256()))
    400 
    401 #define _mm_shrdi_epi16(A, B, I) \
    402   ((__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \
    403                                       (__v8hi)(__m128i)(B), (int)(I)))
    404 
    405 #define _mm_mask_shrdi_epi16(S, U, A, B, I) \
    406   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
    407                                        (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
    408                                        (__v8hi)(__m128i)(S)))
    409 
    410 #define _mm_maskz_shrdi_epi16(U, A, B, I) \
    411   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
    412                                        (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
    413                                        (__v8hi)_mm_setzero_si128()))
    414 
    415 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    416 _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C)
    417 {
    418   return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B,
    419                                              (__v4di)__C);
    420 }
    421 
    422 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    423 _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
    424 {
    425   return (__m256i)__builtin_ia32_selectq_256(__U,
    426                                       (__v4di)_mm256_shldv_epi64(__A, __B, __C),
    427                                       (__v4di)__A);
    428 }
    429 
    430 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    431 _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
    432 {
    433   return (__m256i)__builtin_ia32_selectq_256(__U,
    434                                       (__v4di)_mm256_shldv_epi64(__A, __B, __C),
    435                                       (__v4di)_mm256_setzero_si256());
    436 }
    437 
    438 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    439 _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C)
    440 {
    441   return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B,
    442                                              (__v2di)__C);
    443 }
    444 
    445 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    446 _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
    447 {
    448   return (__m128i)__builtin_ia32_selectq_128(__U,
    449                                          (__v2di)_mm_shldv_epi64(__A, __B, __C),
    450                                          (__v2di)__A);
    451 }
    452 
    453 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    454 _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
    455 {
    456   return (__m128i)__builtin_ia32_selectq_128(__U,
    457                                          (__v2di)_mm_shldv_epi64(__A, __B, __C),
    458                                          (__v2di)_mm_setzero_si128());
    459 }
    460 
    461 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    462 _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C)
    463 {
    464   return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B,
    465                                              (__v8si)__C);
    466 }
    467 
    468 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    469 _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
    470 {
    471   return (__m256i)__builtin_ia32_selectd_256(__U,
    472                                       (__v8si)_mm256_shldv_epi32(__A, __B, __C),
    473                                       (__v8si)__A);
    474 }
    475 
    476 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    477 _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
    478 {
    479   return (__m256i)__builtin_ia32_selectd_256(__U,
    480                                       (__v8si)_mm256_shldv_epi32(__A, __B, __C),
    481                                       (__v8si)_mm256_setzero_si256());
    482 }
    483 
    484 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    485 _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C)
    486 {
    487   return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B,
    488                                              (__v4si)__C);
    489 }
    490 
    491 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    492 _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
    493 {
    494   return (__m128i)__builtin_ia32_selectd_128(__U,
    495                                          (__v4si)_mm_shldv_epi32(__A, __B, __C),
    496                                          (__v4si)__A);
    497 }
    498 
    499 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    500 _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
    501 {
    502   return (__m128i)__builtin_ia32_selectd_128(__U,
    503                                          (__v4si)_mm_shldv_epi32(__A, __B, __C),
    504                                          (__v4si)_mm_setzero_si128());
    505 }
    506 
    507 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    508 _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C)
    509 {
    510   return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B,
    511                                              (__v16hi)__C);
    512 }
    513 
    514 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    515 _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
    516 {
    517   return (__m256i)__builtin_ia32_selectw_256(__U,
    518                                       (__v16hi)_mm256_shldv_epi16(__A, __B, __C),
    519                                       (__v16hi)__A);
    520 }
    521 
    522 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    523 _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
    524 {
    525   return (__m256i)__builtin_ia32_selectw_256(__U,
    526                                       (__v16hi)_mm256_shldv_epi16(__A, __B, __C),
    527                                       (__v16hi)_mm256_setzero_si256());
    528 }
    529 
    530 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    531 _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C)
    532 {
    533   return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B,
    534                                              (__v8hi)__C);
    535 }
    536 
    537 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    538 _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
    539 {
    540   return (__m128i)__builtin_ia32_selectw_128(__U,
    541                                          (__v8hi)_mm_shldv_epi16(__A, __B, __C),
    542                                          (__v8hi)__A);
    543 }
    544 
    545 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    546 _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
    547 {
    548   return (__m128i)__builtin_ia32_selectw_128(__U,
    549                                          (__v8hi)_mm_shldv_epi16(__A, __B, __C),
    550                                          (__v8hi)_mm_setzero_si128());
    551 }
    552 
    553 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    554 _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C)
    555 {
    556   return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B,
    557                                              (__v4di)__C);
    558 }
    559 
    560 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    561 _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
    562 {
    563   return (__m256i)__builtin_ia32_selectq_256(__U,
    564                                       (__v4di)_mm256_shrdv_epi64(__A, __B, __C),
    565                                       (__v4di)__A);
    566 }
    567 
    568 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    569 _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
    570 {
    571   return (__m256i)__builtin_ia32_selectq_256(__U,
    572                                       (__v4di)_mm256_shrdv_epi64(__A, __B, __C),
    573                                       (__v4di)_mm256_setzero_si256());
    574 }
    575 
    576 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    577 _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C)
    578 {
    579   return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B,
    580                                              (__v2di)__C);
    581 }
    582 
    583 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    584 _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
    585 {
    586   return (__m128i)__builtin_ia32_selectq_128(__U,
    587                                          (__v2di)_mm_shrdv_epi64(__A, __B, __C),
    588                                          (__v2di)__A);
    589 }
    590 
    591 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    592 _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
    593 {
    594   return (__m128i)__builtin_ia32_selectq_128(__U,
    595                                          (__v2di)_mm_shrdv_epi64(__A, __B, __C),
    596                                          (__v2di)_mm_setzero_si128());
    597 }
    598 
    599 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    600 _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C)
    601 {
    602   return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B,
    603                                              (__v8si)__C);
    604 }
    605 
    606 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    607 _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
    608 {
    609   return (__m256i)__builtin_ia32_selectd_256(__U,
    610                                       (__v8si)_mm256_shrdv_epi32(__A, __B, __C),
    611                                       (__v8si)__A);
    612 }
    613 
    614 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    615 _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
    616 {
    617   return (__m256i)__builtin_ia32_selectd_256(__U,
    618                                       (__v8si)_mm256_shrdv_epi32(__A, __B, __C),
    619                                       (__v8si)_mm256_setzero_si256());
    620 }
    621 
    622 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    623 _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C)
    624 {
    625   return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B,
    626                                              (__v4si)__C);
    627 }
    628 
    629 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    630 _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
    631 {
    632   return (__m128i)__builtin_ia32_selectd_128(__U,
    633                                          (__v4si)_mm_shrdv_epi32(__A, __B, __C),
    634                                          (__v4si)__A);
    635 }
    636 
    637 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    638 _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
    639 {
    640   return (__m128i)__builtin_ia32_selectd_128(__U,
    641                                          (__v4si)_mm_shrdv_epi32(__A, __B, __C),
    642                                          (__v4si)_mm_setzero_si128());
    643 }
    644 
    645 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    646 _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C)
    647 {
    648   return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B,
    649                                              (__v16hi)__C);
    650 }
    651 
    652 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    653 _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
    654 {
    655   return (__m256i)__builtin_ia32_selectw_256(__U,
    656                                      (__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
    657                                      (__v16hi)__A);
    658 }
    659 
    660 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    661 _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
    662 {
    663   return (__m256i)__builtin_ia32_selectw_256(__U,
    664                                      (__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
    665                                      (__v16hi)_mm256_setzero_si256());
    666 }
    667 
    668 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    669 _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C)
    670 {
    671   return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B,
    672                                              (__v8hi)__C);
    673 }
    674 
    675 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    676 _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
    677 {
    678   return (__m128i)__builtin_ia32_selectw_128(__U,
    679                                          (__v8hi)_mm_shrdv_epi16(__A, __B, __C),
    680                                          (__v8hi)__A);
    681 }
    682 
    683 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    684 _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
    685 {
    686   return (__m128i)__builtin_ia32_selectw_128(__U,
    687                                          (__v8hi)_mm_shrdv_epi16(__A, __B, __C),
    688                                          (__v8hi)_mm_setzero_si128());
    689 }
    690 
    691 
    692 #undef __DEFAULT_FN_ATTRS128
    693 #undef __DEFAULT_FN_ATTRS256
    694 
    695 #endif